4 typedef unsigned long long int ULong
;
5 typedef unsigned int UInt
;
7 __attribute__((noinline
))
8 void do_andn64 ( /*OUT*/UInt
* flags
, /*OUT*/ULong
* res
, ULong arg1
, ULong arg2
)
12 "movabsq $0x5555555555555555, %0" "\n\t"
13 "andn %2, %3, %0" "\n\t"
16 : "=&r" (tem
), "=r" (flag
) : "r" (arg1
), "r" (arg2
) : "cc"
19 *flags
= flag
& 0x8d5;
21 "movabsq $0x5555555555555555, %0" "\n\t"
22 "andn %2, %3, %0" "\n\t"
25 : "=&r" (tem
), "=r" (flag
) : "m" (arg1
), "r" (arg2
) : "cc"
27 if (*res
!= tem
|| *flags
!= (flag
& 0x8d5))
28 printf ("Difference between r and m variants\n");
31 __attribute__((noinline
))
32 void do_andn32 ( /*OUT*/UInt
* flags
, /*OUT*/ULong
* res
, UInt arg1
, UInt arg2
)
36 "movabsq $0x5555555555555555, %0" "\n\t"
37 "andn %2, %3, %k0" "\n\t"
40 : "=&r" (tem
), "=r" (flag
) : "r" (arg1
), "r" (arg2
) : "cc"
43 *flags
= flag
& 0x8d5;
45 "movabsq $0x5555555555555555, %0" "\n\t"
46 "andn %2, %3, %k0" "\n\t"
49 : "=&r" (tem
), "=r" (flag
) : "m" (arg1
), "r" (arg2
) : "cc"
51 if (*res
!= tem
|| *flags
!= (flag
& 0x8d5))
52 printf ("Difference between r and m variants\n");
56 __attribute__((noinline
))
57 void do_mulx64 ( /*OUT*/ULong
* res1
, /*OUT*/ULong
* res2
,
58 ULong arg1
, ULong arg2
)
60 ULong tem1
, tem2
, flag1
, flag2
, flag3
, flag4
;
62 "movabsq $0x5555555555555555, %0" "\n\t"
63 "movabsq $0x5555555555555555, %1" "\n\t"
64 "movq %4, %%rdx" "\n\t"
66 "xorq $0x8d5, (%%rsp)" "\n\t"
67 "movq (%%rsp), %2" "\n\t"
69 "mulx %5, %1, %0" "\n\t"
71 "movq (%%rsp), %3" "\n\t"
72 "xorq $0x8d5, (%%rsp)" "\n\t"
74 : "=&r" (tem1
), "=&r" (tem2
), "=&r" (flag1
), "=r" (flag2
)
75 : "g" (arg1
), "r" (arg2
) : "cc", "rdx"
80 "movabsq $0x5555555555555555, %0" "\n\t"
81 "movabsq $0x5555555555555555, %1" "\n\t"
82 "movq %4, %%rdx" "\n\t"
85 "mulx %5, %1, %0" "\n\t"
88 : "=&r" (tem1
), "=&r" (tem2
), "=&r" (flag3
), "=r" (flag4
)
89 : "g" (arg1
), "m" (arg2
) : "cc", "rdx"
91 if (*res1
!= tem1
|| *res2
!= tem2
)
92 printf ("Difference between r and m variants\n");
93 if (((flag1
^ flag2
) | (flag3
^ flag4
)) & 0x8d5)
94 printf ("Flags changed\n");
97 __attribute__((noinline
))
98 void do_mulx32 ( /*OUT*/ULong
* res1
, /*OUT*/ULong
* res2
,
99 UInt arg1
, UInt arg2
)
101 ULong tem1
, tem2
, flag1
, flag2
, flag3
, flag4
;
102 __asm__
__volatile__(
103 "movabsq $0x5555555555555555, %0" "\n\t"
104 "movabsq $0x5555555555555555, %1" "\n\t"
105 "movl %4, %%edx" "\n\t"
107 "xorq $0x8d5, (%%rsp)" "\n\t"
108 "movq (%%rsp), %2" "\n\t"
110 "mulx %5, %k1, %k0" "\n\t"
112 "movq (%%rsp), %3" "\n\t"
113 "xorq $0x8d5, (%%rsp)" "\n\t"
115 : "=&r" (tem1
), "=&r" (tem2
), "=&r" (flag1
), "=r" (flag2
)
116 : "g" (arg1
), "r" (arg2
) : "cc", "rdx"
120 __asm__
__volatile__(
121 "movabsq $0x5555555555555555, %0" "\n\t"
122 "movabsq $0x5555555555555555, %1" "\n\t"
123 "movl %4, %%edx" "\n\t"
126 "mulx %5, %k1, %k0" "\n\t"
129 : "=&r" (tem1
), "=&r" (tem2
), "=&r" (flag3
), "=r" (flag4
)
130 : "g" (arg1
), "m" (arg2
) : "cc", "rdx"
132 if (*res1
!= tem1
|| *res2
!= tem2
)
133 printf ("Difference between r and m variants\n");
134 if (((flag1
^ flag2
) | (flag3
^ flag4
)) & 0x8d5)
135 printf ("Flags changed\n");
139 __attribute__((noinline
))
140 void do_sarx64 ( /*OUT*/ULong
* res
, ULong arg1
, ULong arg2
)
142 ULong tem
, flag1
, flag2
, flag3
, flag4
;
143 __asm__
__volatile__(
144 "movabsq $0x5555555555555555, %0" "\n\t"
146 "xorq $0x8d5, (%%rsp)" "\n\t"
147 "movq (%%rsp), %1" "\n\t"
149 "sarx %3, %4, %0" "\n\t"
151 "movq (%%rsp), %2" "\n\t"
152 "xorq $0x8d5, (%%rsp)" "\n\t"
154 : "=&r" (tem
), "=&r" (flag1
), "=r" (flag2
)
155 : "r" (arg1
), "r" (arg2
) : "cc"
158 __asm__
__volatile__(
159 "movabsq $0x5555555555555555, %0" "\n\t"
161 "xorq $0x8d5, (%%rsp)" "\n\t"
162 "movq (%%rsp), %1" "\n\t"
164 "sarx %3, %4, %0" "\n\t"
166 "movq (%%rsp), %2" "\n\t"
167 "xorq $0x8d5, (%%rsp)" "\n\t"
169 : "=&r" (tem
), "=&r" (flag3
), "=r" (flag4
)
170 : "r" (arg1
), "m" (arg2
) : "cc"
173 printf ("Difference between r and m variants\n");
174 if (((flag1
^ flag2
) | (flag3
^ flag4
)) & 0x8d5)
175 printf ("Flags changed\n");
178 __attribute__((noinline
))
179 void do_sarx32 ( /*OUT*/ULong
* res
, UInt arg1
, UInt arg2
)
181 ULong tem
, flag1
, flag2
, flag3
, flag4
;
182 __asm__
__volatile__(
183 "movabsq $0x5555555555555555, %0" "\n\t"
185 "xorq $0x8d5, (%%rsp)" "\n\t"
186 "movq (%%rsp), %1" "\n\t"
188 "sarx %3, %4, %k0" "\n\t"
190 "movq (%%rsp), %2" "\n\t"
191 "xorq $0x8d5, (%%rsp)" "\n\t"
193 : "=&r" (tem
), "=&r" (flag1
), "=r" (flag2
)
194 : "r" (arg1
), "r" (arg2
) : "cc"
197 __asm__
__volatile__(
198 "movabsq $0x5555555555555555, %0" "\n\t"
200 "xorq $0x8d5, (%%rsp)" "\n\t"
201 "movq (%%rsp), %1" "\n\t"
203 "sarx %3, %4, %k0" "\n\t"
205 "movq (%%rsp), %2" "\n\t"
206 "xorq $0x8d5, (%%rsp)" "\n\t"
208 : "=&r" (tem
), "=&r" (flag3
), "=r" (flag4
)
209 : "r" (arg1
), "m" (arg2
) : "cc"
212 printf ("Difference between r and m variants\n");
213 if (((flag1
^ flag2
) | (flag3
^ flag4
)) & 0x8d5)
214 printf ("Flags changed\n");
218 __attribute__((noinline
))
219 void do_shlx64 ( /*OUT*/ULong
* res
, ULong arg1
, ULong arg2
)
221 ULong tem
, flag1
, flag2
, flag3
, flag4
;
222 __asm__
__volatile__(
223 "movabsq $0x5555555555555555, %0" "\n\t"
225 "xorq $0x8d5, (%%rsp)" "\n\t"
226 "movq (%%rsp), %1" "\n\t"
228 "shlx %3, %4, %0" "\n\t"
230 "movq (%%rsp), %2" "\n\t"
231 "xorq $0x8d5, (%%rsp)" "\n\t"
233 : "=&r" (tem
), "=&r" (flag1
), "=r" (flag2
)
234 : "r" (arg1
), "r" (arg2
) : "cc"
237 __asm__
__volatile__(
238 "movabsq $0x5555555555555555, %0" "\n\t"
240 "xorq $0x8d5, (%%rsp)" "\n\t"
241 "movq (%%rsp), %1" "\n\t"
243 "shlx %3, %4, %0" "\n\t"
245 "movq (%%rsp), %2" "\n\t"
246 "xorq $0x8d5, (%%rsp)" "\n\t"
248 : "=&r" (tem
), "=&r" (flag3
), "=r" (flag4
)
249 : "r" (arg1
), "m" (arg2
) : "cc"
252 printf ("Difference between r and m variants\n");
253 if (((flag1
^ flag2
) | (flag3
^ flag4
)) & 0x8d5)
254 printf ("Flags changed\n");
257 __attribute__((noinline
))
258 void do_shlx32 ( /*OUT*/ULong
* res
, UInt arg1
, UInt arg2
)
260 ULong tem
, flag1
, flag2
, flag3
, flag4
;
261 __asm__
__volatile__(
262 "movabsq $0x5555555555555555, %0" "\n\t"
264 "xorq $0x8d5, (%%rsp)" "\n\t"
265 "movq (%%rsp), %1" "\n\t"
267 "shlx %3, %4, %k0" "\n\t"
269 "movq (%%rsp), %2" "\n\t"
270 "xorq $0x8d5, (%%rsp)" "\n\t"
272 : "=&r" (tem
), "=&r" (flag1
), "=r" (flag2
)
273 : "r" (arg1
), "r" (arg2
) : "cc"
276 __asm__
__volatile__(
277 "movabsq $0x5555555555555555, %0" "\n\t"
279 "xorq $0x8d5, (%%rsp)" "\n\t"
280 "movq (%%rsp), %1" "\n\t"
282 "shlx %3, %4, %k0" "\n\t"
284 "movq (%%rsp), %2" "\n\t"
285 "xorq $0x8d5, (%%rsp)" "\n\t"
287 : "=&r" (tem
), "=&r" (flag3
), "=r" (flag4
)
288 : "r" (arg1
), "m" (arg2
) : "cc"
291 printf ("Difference between r and m variants\n");
292 if (((flag1
^ flag2
) | (flag3
^ flag4
)) & 0x8d5)
293 printf ("Flags changed\n");
297 __attribute__((noinline
))
298 void do_shrx64 ( /*OUT*/ULong
* res
, ULong arg1
, ULong arg2
)
300 ULong tem
, flag1
, flag2
, flag3
, flag4
;
301 __asm__
__volatile__(
302 "movabsq $0x5555555555555555, %0" "\n\t"
304 "xorq $0x8d5, (%%rsp)" "\n\t"
305 "movq (%%rsp), %1" "\n\t"
307 "shrx %3, %4, %0" "\n\t"
309 "movq (%%rsp), %2" "\n\t"
310 "xorq $0x8d5, (%%rsp)" "\n\t"
312 : "=&r" (tem
), "=&r" (flag1
), "=r" (flag2
)
313 : "r" (arg1
), "r" (arg2
) : "cc"
316 __asm__
__volatile__(
317 "movabsq $0x5555555555555555, %0" "\n\t"
319 "xorq $0x8d5, (%%rsp)" "\n\t"
320 "movq (%%rsp), %1" "\n\t"
322 "shrx %3, %4, %0" "\n\t"
324 "movq (%%rsp), %2" "\n\t"
325 "xorq $0x8d5, (%%rsp)" "\n\t"
327 : "=&r" (tem
), "=&r" (flag3
), "=r" (flag4
)
328 : "r" (arg1
), "m" (arg2
) : "cc"
331 printf ("Difference between r and m variants\n");
332 if (((flag1
^ flag2
) | (flag3
^ flag4
)) & 0x8d5)
333 printf ("Flags changed\n");
336 __attribute__((noinline
))
337 void do_shrx32 ( /*OUT*/ULong
* res
, UInt arg1
, UInt arg2
)
339 ULong tem
, flag1
, flag2
, flag3
, flag4
;
340 __asm__
__volatile__(
341 "movabsq $0x5555555555555555, %0" "\n\t"
343 "xorq $0x8d5, (%%rsp)" "\n\t"
344 "movq (%%rsp), %1" "\n\t"
346 "shrx %3, %4, %k0" "\n\t"
348 "movq (%%rsp), %2" "\n\t"
349 "xorq $0x8d5, (%%rsp)" "\n\t"
351 : "=&r" (tem
), "=&r" (flag1
), "=r" (flag2
)
352 : "r" (arg1
), "r" (arg2
) : "cc"
355 __asm__
__volatile__(
356 "movabsq $0x5555555555555555, %0" "\n\t"
358 "xorq $0x8d5, (%%rsp)" "\n\t"
359 "movq (%%rsp), %1" "\n\t"
361 "shrx %3, %4, %k0" "\n\t"
363 "movq (%%rsp), %2" "\n\t"
364 "xorq $0x8d5, (%%rsp)" "\n\t"
366 : "=&r" (tem
), "=&r" (flag3
), "=r" (flag4
)
367 : "r" (arg1
), "m" (arg2
) : "cc"
370 printf ("Difference between r and m variants\n");
371 if (((flag1
^ flag2
) | (flag3
^ flag4
)) & 0x8d5)
372 printf ("Flags changed\n");
377 __attribute__((noinline
))
378 void do_rorx64 ( /*OUT*/ULong
* res1
, /*OUT*/ULong
* res2
, ULong arg
)
380 ULong tem
, flag1
, flag2
, flag3
, flag4
, flag5
, flag6
;
381 __asm__
__volatile__(
382 "movabsq $0x5555555555555555, %0" "\n\t"
384 "xorq $0x8d5, (%%rsp)" "\n\t"
385 "movq (%%rsp), %1" "\n\t"
387 "rorx $12, %3, %0" "\n\t"
389 "movq (%%rsp), %2" "\n\t"
390 "xorq $0x8d5, (%%rsp)" "\n\t"
392 : "=&r" (tem
), "=&r" (flag1
), "=r" (flag2
) : "r" (arg
) : "cc"
395 __asm__
__volatile__(
396 "movabsq $0x5555555555555555, %0" "\n\t"
398 "xorq $0x8d5, (%%rsp)" "\n\t"
399 "movq (%%rsp), %1" "\n\t"
401 "rorx $67, %3, %0" "\n\t"
403 "movq (%%rsp), %2" "\n\t"
404 "xorq $0x8d5, (%%rsp)" "\n\t"
406 : "=&r" (tem
), "=&r" (flag3
), "=r" (flag4
) : "m" (arg
) : "cc"
409 /* rip-relative memory access */
411 __asm__
__volatile__(
412 "movabsq $0x5555555555555555, %0" "\n\t"
414 "xorq $0x8d5, (%%rsp)" "\n\t"
415 "movq (%%rsp), %1" "\n\t"
417 #if defined(__APPLE__)
418 "rorx $67, _g_ulong_arg(%%rip), %0" "\n\t"
420 "rorx $67, g_ulong_arg(%%rip), %0" "\n\t"
423 "movq (%%rsp), %2" "\n\t"
424 "xorq $0x8d5, (%%rsp)" "\n\t"
426 : "=&r" (tem
), "=&r" (flag5
), "=r" (flag6
) : : "memory", "cc"
429 printf ("Difference between m-variants\n");
430 if (((flag1
^ flag2
) | (flag3
^ flag4
) | (flag5
^ flag6
)) & 0x8d5)
431 printf ("Flags changed\n");
436 __attribute__((noinline
))
437 void do_rorx32 ( /*OUT*/ULong
* res1
, /*OUT*/ULong
* res2
, UInt arg
)
439 ULong tem
, flag1
, flag2
, flag3
, flag4
, flag5
, flag6
;
440 __asm__
__volatile__(
441 "movabsq $0x5555555555555555, %0" "\n\t"
443 "xorq $0x8d5, (%%rsp)" "\n\t"
444 "movq (%%rsp), %1" "\n\t"
446 "rorx $12, %3, %k0" "\n\t"
448 "movq (%%rsp), %2" "\n\t"
449 "xorq $0x8d5, (%%rsp)" "\n\t"
451 : "=&r" (tem
), "=&r" (flag1
), "=r" (flag2
) : "r" (arg
) : "cc"
454 __asm__
__volatile__(
455 "movabsq $0x5555555555555555, %0" "\n\t"
457 "xorq $0x8d5, (%%rsp)" "\n\t"
458 "movq (%%rsp), %1" "\n\t"
460 "rorx $67, %3, %k0" "\n\t"
462 "movq (%%rsp), %2" "\n\t"
463 "xorq $0x8d5, (%%rsp)" "\n\t"
465 : "=&r" (tem
), "=&r" (flag3
), "=r" (flag4
) : "m" (arg
) : "cc"
468 /* rip-relative memory access */
470 __asm__
__volatile__(
471 "movabsq $0x5555555555555555, %0" "\n\t"
473 "xorq $0x8d5, (%%rsp)" "\n\t"
474 "movq (%%rsp), %1" "\n\t"
476 #if defined(__APPLE__)
477 "rorx $67, _g_uint_arg(%%rip), %k0" "\n\t"
479 "rorx $67, g_uint_arg(%%rip), %k0" "\n\t"
482 "movq (%%rsp), %2" "\n\t"
483 "xorq $0x8d5, (%%rsp)" "\n\t"
485 : "=&r" (tem
), "=&r" (flag5
), "=r" (flag6
) : : "memory", "cc"
488 printf ("Difference between m-variants\n");
489 if (((flag1
^ flag2
) | (flag3
^ flag4
) | (flag5
^ flag6
)) & 0x8d5)
490 printf ("Flags changed\n");
494 __attribute__((noinline
))
495 void do_blsi64 ( /*OUT*/UInt
* flags
, /*OUT*/ULong
* res
, ULong arg
)
498 __asm__
__volatile__(
499 "movabsq $0x5555555555555555, %0" "\n\t"
503 : "=&r" (tem
), "=&r" (flag
) : "r" (arg
) : "cc"
506 *flags
= flag
& 0x8d5;
507 __asm__
__volatile__(
508 "movabsq $0x5555555555555555, %0" "\n\t"
512 : "=&r" (tem
), "=&r" (flag
) : "m" (arg
) : "cc"
514 if (*res
!= tem
|| *flags
!= (flag
& 0x8d5))
515 printf ("Difference between r and m variants\n");
518 __attribute__((noinline
))
519 void do_blsi32 ( /*OUT*/UInt
* flags
, /*OUT*/ULong
* res
, UInt arg
)
522 __asm__
__volatile__(
523 "movabsq $0x5555555555555555, %0" "\n\t"
524 "blsi %2, %k0" "\n\t"
527 : "=&r" (tem
), "=&r" (flag
) : "r" (arg
) : "cc"
530 *flags
= flag
& 0x8d5;
531 __asm__
__volatile__(
532 "movabsq $0x5555555555555555, %0" "\n\t"
533 "blsi %2, %k0" "\n\t"
536 : "=&r" (tem
), "=&r" (flag
) : "m" (arg
) : "cc"
538 if (*res
!= tem
|| *flags
!= (flag
& 0x8d5))
539 printf ("Difference between r and m variants\n");
543 __attribute__((noinline
))
544 void do_blsmsk64 ( /*OUT*/UInt
* flags
, /*OUT*/ULong
* res
, ULong arg
)
547 __asm__
__volatile__(
548 "movabsq $0x5555555555555555, %0" "\n\t"
549 "blsmsk %2, %0" "\n\t"
552 : "=&r" (tem
), "=&r" (flag
) : "r" (arg
) : "cc"
555 *flags
= flag
& 0x8d5;
556 __asm__
__volatile__(
557 "movabsq $0x5555555555555555, %0" "\n\t"
558 "blsmsk %2, %0" "\n\t"
561 : "=&r" (tem
), "=&r" (flag
) : "m" (arg
) : "cc"
563 if (*res
!= tem
|| *flags
!= (flag
& 0x8d5))
564 printf ("Difference between r and m variants\n");
567 __attribute__((noinline
))
568 void do_blsmsk32 ( /*OUT*/UInt
* flags
, /*OUT*/ULong
* res
, UInt arg
)
571 __asm__
__volatile__(
572 "movabsq $0x5555555555555555, %0" "\n\t"
573 "blsmsk %2, %k0" "\n\t"
576 : "=&r" (tem
), "=&r" (flag
) : "r" (arg
) : "cc"
579 *flags
= flag
& 0x8d5;
580 __asm__
__volatile__(
581 "movabsq $0x5555555555555555, %0" "\n\t"
582 "blsmsk %2, %k0" "\n\t"
585 : "=&r" (tem
), "=&r" (flag
) : "m" (arg
) : "cc"
587 if (*res
!= tem
|| *flags
!= (flag
& 0x8d5))
588 printf ("Difference between r and m variants\n");
592 __attribute__((noinline
))
593 void do_blsr64 ( /*OUT*/UInt
* flags
, /*OUT*/ULong
* res
, ULong arg
)
596 __asm__
__volatile__(
597 "movabsq $0x5555555555555555, %0" "\n\t"
601 : "=&r" (tem
), "=&r" (flag
) : "r" (arg
) : "cc"
604 *flags
= flag
& 0x8d5;
605 __asm__
__volatile__(
606 "movabsq $0x5555555555555555, %0" "\n\t"
610 : "=&r" (tem
), "=&r" (flag
) : "m" (arg
) : "cc"
612 if (*res
!= tem
|| *flags
!= (flag
& 0x8d5))
613 printf ("Difference between r and m variants\n");
616 __attribute__((noinline
))
617 void do_blsr32 ( /*OUT*/UInt
* flags
, /*OUT*/ULong
* res
, UInt arg
)
620 __asm__
__volatile__(
621 "movabsq $0x5555555555555555, %0" "\n\t"
622 "blsr %2, %k0" "\n\t"
625 : "=&r" (tem
), "=&r" (flag
) : "r" (arg
) : "cc"
628 *flags
= flag
& 0x8d5;
629 __asm__
__volatile__(
630 "movabsq $0x5555555555555555, %0" "\n\t"
631 "blsr %2, %k0" "\n\t"
634 : "=&r" (tem
), "=&r" (flag
) : "m" (arg
) : "cc"
636 if (*res
!= tem
|| *flags
!= (flag
& 0x8d5))
637 printf ("Difference between r and m variants\n");
641 __attribute__((noinline
))
642 void do_bextr64 ( /*OUT*/UInt
* flags
, /*OUT*/ULong
* res
,
643 ULong arg1
, ULong arg2
)
646 __asm__
__volatile__(
647 "movabsq $0x5555555555555555, %0" "\n\t"
648 "bextr %2, %3, %0" "\n\t"
651 : "=&r" (tem
), "=&r" (flag
) : "r" (arg1
), "r" (arg2
) : "cc"
654 *flags
= flag
& 0x8d5;
655 __asm__
__volatile__(
656 "movabsq $0x5555555555555555, %0" "\n\t"
657 "bextr %2, %3, %0" "\n\t"
660 : "=&r" (tem
), "=&r" (flag
) : "r" (arg1
), "m" (arg2
) : "cc"
662 if (*res
!= tem
|| *flags
!= (flag
& 0x8d5))
663 printf ("Difference between r and m variants\n");
666 __attribute__((noinline
))
667 void do_bextr32 ( /*OUT*/UInt
* flags
, /*OUT*/ULong
* res
,
668 UInt arg1
, UInt arg2
)
671 __asm__
__volatile__(
672 "movabsq $0x5555555555555555, %0" "\n\t"
673 "bextr %2, %3, %k0" "\n\t"
676 : "=&r" (tem
), "=&r" (flag
) : "r" (arg1
), "r" (arg2
) : "cc"
679 *flags
= flag
& 0x8d5;
680 __asm__
__volatile__(
681 "movabsq $0x5555555555555555, %0" "\n\t"
682 "bextr %2, %3, %k0" "\n\t"
685 : "=&r" (tem
), "=&r" (flag
) : "r" (arg1
), "m" (arg2
) : "cc"
687 if (*res
!= tem
|| *flags
!= (flag
& 0x8d5))
688 printf ("Difference between r and m variants\n");
692 __attribute__((noinline
))
693 void do_bzhi64 ( /*OUT*/UInt
* flags
, /*OUT*/ULong
* res
,
694 ULong arg1
, ULong arg2
)
697 __asm__
__volatile__(
698 "movabsq $0x5555555555555555, %0" "\n\t"
699 "bzhi %2, %3, %0" "\n\t"
702 : "=&r" (tem
), "=&r" (flag
) : "r" (arg1
), "r" (arg2
) : "cc"
705 *flags
= flag
& 0x8d5;
706 __asm__
__volatile__(
707 "movabsq $0x5555555555555555, %0" "\n\t"
708 "bzhi %2, %3, %0" "\n\t"
711 : "=&r" (tem
), "=&r" (flag
) : "r" (arg1
), "m" (arg2
) : "cc"
713 if (*res
!= tem
|| *flags
!= (flag
& 0x8d5))
714 printf ("Difference between r and m variants\n");
717 __attribute__((noinline
))
718 void do_bzhi32 ( /*OUT*/UInt
* flags
, /*OUT*/ULong
* res
,
719 UInt arg1
, UInt arg2
)
722 __asm__
__volatile__(
723 "movabsq $0x5555555555555555, %0" "\n\t"
724 "bzhi %2, %3, %k0" "\n\t"
727 : "=&r" (tem
), "=&r" (flag
) : "r" (arg1
), "r" (arg2
) : "cc"
730 *flags
= flag
& 0x8d5;
731 __asm__
__volatile__(
732 "movabsq $0x5555555555555555, %0" "\n\t"
733 "bzhi %2, %3, %k0" "\n\t"
736 : "=&r" (tem
), "=&r" (flag
) : "r" (arg1
), "m" (arg2
) : "cc"
738 if (*res
!= tem
|| *flags
!= (flag
& 0x8d5))
739 printf ("Difference between r and m variants\n");
743 __attribute__((noinline
))
744 void do_pdep64 ( /*OUT*/ULong
* res
, ULong arg1
, ULong arg2
)
746 ULong tem
, flag1
, flag2
, flag3
, flag4
;
747 __asm__
__volatile__(
748 "movabsq $0x5555555555555555, %0" "\n\t"
750 "xorq $0x8d5, (%%rsp)" "\n\t"
751 "movq (%%rsp), %1" "\n\t"
753 "pdep %3, %4, %0" "\n\t"
755 "movq (%%rsp), %2" "\n\t"
756 "xorq $0x8d5, (%%rsp)" "\n\t"
758 : "=&r" (tem
), "=&r" (flag1
), "=r" (flag2
)
759 : "r" (arg1
), "r" (arg2
) : "cc"
762 __asm__
__volatile__(
763 "movabsq $0x5555555555555555, %0" "\n\t"
765 "xorq $0x8d5, (%%rsp)" "\n\t"
766 "movq (%%rsp), %1" "\n\t"
768 "pdep %3, %4, %0" "\n\t"
770 "movq (%%rsp), %2" "\n\t"
771 "xorq $0x8d5, (%%rsp)" "\n\t"
773 : "=&r" (tem
), "=&r" (flag3
), "=r" (flag4
)
774 : "m" (arg1
), "r" (arg2
) : "cc"
777 printf ("Difference between r and m variants\n");
778 if (((flag1
^ flag2
) | (flag3
^ flag4
)) & 0x8d5)
779 printf ("Flags changed\n");
782 __attribute__((noinline
))
783 void do_pdep32 ( /*OUT*/ULong
* res
, UInt arg1
, UInt arg2
)
785 ULong tem
, flag1
, flag2
, flag3
, flag4
;
786 __asm__
__volatile__(
787 "movabsq $0x5555555555555555, %0" "\n\t"
789 "xorq $0x8d5, (%%rsp)" "\n\t"
790 "movq (%%rsp), %1" "\n\t"
792 "pdep %3, %4, %k0" "\n\t"
794 "movq (%%rsp), %2" "\n\t"
795 "xorq $0x8d5, (%%rsp)" "\n\t"
797 : "=&r" (tem
), "=&r" (flag1
), "=r" (flag2
)
798 : "r" (arg1
), "r" (arg2
) : "cc"
801 __asm__
__volatile__(
802 "movabsq $0x5555555555555555, %0" "\n\t"
804 "xorq $0x8d5, (%%rsp)" "\n\t"
805 "movq (%%rsp), %1" "\n\t"
807 "pdep %3, %4, %k0" "\n\t"
809 "movq (%%rsp), %2" "\n\t"
810 "xorq $0x8d5, (%%rsp)" "\n\t"
812 : "=&r" (tem
), "=&r" (flag3
), "=r" (flag4
)
813 : "m" (arg1
), "r" (arg2
) : "cc"
816 printf ("Difference between r and m variants\n");
817 if (((flag1
^ flag2
) | (flag3
^ flag4
)) & 0x8d5)
818 printf ("Flags changed\n");
822 __attribute__((noinline
))
823 void do_pext64 ( /*OUT*/ULong
* res
, ULong arg1
, ULong arg2
)
825 ULong tem
, flag1
, flag2
, flag3
, flag4
;
826 __asm__
__volatile__(
827 "movabsq $0x5555555555555555, %0" "\n\t"
829 "xorq $0x8d5, (%%rsp)" "\n\t"
830 "movq (%%rsp), %1" "\n\t"
832 "pext %3, %4, %0" "\n\t"
834 "movq (%%rsp), %2" "\n\t"
835 "xorq $0x8d5, (%%rsp)" "\n\t"
837 : "=&r" (tem
), "=&r" (flag1
), "=r" (flag2
)
838 : "r" (arg1
), "r" (arg2
) : "cc"
841 __asm__
__volatile__(
842 "movabsq $0x5555555555555555, %0" "\n\t"
844 "xorq $0x8d5, (%%rsp)" "\n\t"
845 "movq (%%rsp), %1" "\n\t"
847 "pext %3, %4, %0" "\n\t"
849 "movq (%%rsp), %2" "\n\t"
850 "xorq $0x8d5, (%%rsp)" "\n\t"
852 : "=&r" (tem
), "=&r" (flag3
), "=r" (flag4
)
853 : "m" (arg1
), "r" (arg2
) : "cc"
856 printf ("Difference between r and m variants\n");
857 if (((flag1
^ flag2
) | (flag3
^ flag4
)) & 0x8d5)
858 printf ("Flags changed\n");
861 __attribute__((noinline
))
862 void do_pext32 ( /*OUT*/ULong
* res
, UInt arg1
, UInt arg2
)
864 ULong tem
, flag1
, flag2
, flag3
, flag4
;
865 __asm__
__volatile__(
866 "movabsq $0x5555555555555555, %0" "\n\t"
868 "xorq $0x8d5, (%%rsp)" "\n\t"
869 "movq (%%rsp), %1" "\n\t"
871 "pext %3, %4, %k0" "\n\t"
873 "movq (%%rsp), %2" "\n\t"
874 "xorq $0x8d5, (%%rsp)" "\n\t"
876 : "=&r" (tem
), "=&r" (flag1
), "=r" (flag2
)
877 : "r" (arg1
), "r" (arg2
) : "cc"
880 __asm__
__volatile__(
881 "movabsq $0x5555555555555555, %0" "\n\t"
883 "xorq $0x8d5, (%%rsp)" "\n\t"
884 "movq (%%rsp), %1" "\n\t"
886 "pext %3, %4, %k0" "\n\t"
888 "movq (%%rsp), %2" "\n\t"
889 "xorq $0x8d5, (%%rsp)" "\n\t"
891 : "=&r" (tem
), "=&r" (flag3
), "=r" (flag4
)
892 : "m" (arg1
), "r" (arg2
) : "cc"
895 printf ("Difference between r and m variants\n");
896 if (((flag1
^ flag2
) | (flag3
^ flag4
)) & 0x8d5)
897 printf ("Flags changed\n");
905 w1
= 0xFEDC192837475675ULL
;
906 w2
= 0x57657438291CDEF0ULL
;
910 do_andn64(&flags
, &res
, w1
, w2
);
911 printf("andn64 %016llx %016llx -> %016llx %04x\n", w1
, w2
, res
, flags
);
913 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
914 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
917 w1
= 0xFEDC192837475675ULL
;
918 w2
= 0x57657438291CDEF0ULL
;
922 do_andn32(&flags
, &res
, w1
, w2
);
923 printf("andn32 %016llx %016llx -> %016llx %04x\n", w1
, w2
, res
, flags
);
925 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
926 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
929 w1
= 0xFEDC192837475675ULL
;
930 w2
= 0x57657438291CDEF0ULL
;
933 do_mulx64(&res1
, &res2
, w1
, w2
);
934 printf("mulx64 %016llx %016llx -> %016llx %016llx\n", w1
, w2
, res1
, res2
);
936 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
937 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
940 w1
= 0xFEDC192837475675ULL
;
941 w2
= 0x57657438291CDEF0ULL
;
944 do_mulx32(&res1
, &res2
, w1
, w2
);
945 printf("mulx32 %016llx %016llx -> %016llx %016llx\n", w1
, w2
, res1
, res2
);
947 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
948 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
951 w1
= 0xFEDC192837475675ULL
;
952 w2
= 0x57657438291CDEF0ULL
;
955 do_sarx64(&res
, w1
, w2
);
956 printf("sarx64 %016llx %016llx -> %016llx\n", w1
, w2
, res
);
958 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
959 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
962 w1
= 0xFEDC192837475675ULL
;
963 w2
= 0x57657438291CDEF0ULL
;
966 do_sarx32(&res
, w1
, w2
);
967 printf("sarx32 %016llx %016llx -> %016llx\n", w1
, w2
, res
);
969 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
970 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
973 w1
= 0xFEDC192837475675ULL
;
974 w2
= 0x57657438291CDEF0ULL
;
977 do_shlx64(&res
, w1
, w2
);
978 printf("shlx64 %016llx %016llx -> %016llx\n", w1
, w2
, res
);
980 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
981 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
984 w1
= 0xFEDC192837475675ULL
;
985 w2
= 0x57657438291CDEF0ULL
;
988 do_shlx32(&res
, w1
, w2
);
989 printf("shlx32 %016llx %016llx -> %016llx\n", w1
, w2
, res
);
991 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
992 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
995 w1
= 0xFEDC192837475675ULL
;
996 w2
= 0x57657438291CDEF0ULL
;
999 do_shrx64(&res
, w1
, w2
);
1000 printf("shrx64 %016llx %016llx -> %016llx\n", w1
, w2
, res
);
1002 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1003 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
1006 w1
= 0xFEDC192837475675ULL
;
1007 w2
= 0x57657438291CDEF0ULL
;
1010 do_shrx32(&res
, w1
, w2
);
1011 printf("shrx32 %016llx %016llx -> %016llx\n", w1
, w2
, res
);
1013 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1014 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
1017 w1
= 0xFEDC192837475675ULL
;
1020 do_rorx64(&res1
, &res2
, w1
);
1021 printf("rorx64 %016llx -> %016llx %016llx\n", w1
, res1
, res2
);
1023 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1026 w1
= 0xFEDC192837475675ULL
;
1029 do_rorx32(&res1
, &res2
, w1
);
1030 printf("rorx32 %016llx -> %016llx %016llx\n", w1
, res1
, res2
);
1032 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1035 w1
= 0xFEDC192837475675ULL
;
1039 do_blsi64(&flags
, &res
, w1
);
1040 printf("blsi64 %016llx -> %016llx %04x\n", w1
, res
, flags
);
1042 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1045 w1
= 0xFEDC192837475675ULL
;
1049 do_blsi32(&flags
, &res
, w1
);
1050 printf("blsi32 %016llx -> %016llx %04x\n", w1
, res
, flags
);
1052 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1055 w1
= 0xFEDC192837475675ULL
;
1059 do_blsmsk64(&flags
, &res
, w1
);
1060 printf("blsmsk64 %016llx -> %016llx %04x\n", w1
, res
, flags
);
1062 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1065 w1
= 0xFEDC192837475675ULL
;
1069 do_blsmsk32(&flags
, &res
, w1
);
1070 printf("blsmsk32 %016llx -> %016llx %04x\n", w1
, res
, flags
);
1072 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1075 w1
= 0xFEDC192837475675ULL
;
1079 do_blsr64(&flags
, &res
, w1
);
1080 printf("blsr64 %016llx -> %016llx %04x\n", w1
, res
, flags
);
1082 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1085 w1
= 0xFEDC192837475675ULL
;
1089 do_blsr32(&flags
, &res
, w1
);
1090 printf("blsr32 %016llx -> %016llx %04x\n", w1
, res
, flags
);
1092 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1095 w1
= 0xFEDC192837475675ULL
;
1096 w2
= 0x57657438291CDEF0ULL
;
1100 do_bextr64(&flags
, &res
, w1
, w2
);
1101 printf("bextr64 %016llx %016llx -> %016llx %04x\n", w1
, w2
, res
, flags
);
1103 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1104 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
1107 w1
= 0xFEDC192837475675ULL
;
1108 w2
= 0x57657438291CDEF0ULL
;
1112 do_bextr32(&flags
, &res
, w1
, w2
);
1113 printf("bextr32 %016llx %016llx -> %016llx %04x\n", w1
, w2
, res
, flags
);
1115 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1116 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
1119 w1
= 0xFEDC192837475675ULL
;
1120 w2
= 0x57657438291CDEF0ULL
;
1124 do_bzhi64(&flags
, &res
, w1
, w2
);
1125 printf("bzhi64 %016llx %016llx -> %016llx %04x\n", w1
, w2
, res
, flags
);
1127 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1128 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
1131 w1
= 0xFEDC192837475675ULL
;
1132 w2
= 0x57657438291CDEF0ULL
;
1136 do_bzhi32(&flags
, &res
, w1
, w2
);
1137 printf("bzhi32 %016llx %016llx -> %016llx %04x\n", w1
, w2
, res
, flags
);
1139 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1140 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
1143 w1
= 0xFEDC192837475675ULL
;
1144 w2
= 0x57657438291CDEF0ULL
;
1147 do_pdep64(&res
, w1
, w2
);
1148 printf("pdep64 %016llx %016llx -> %016llx\n", w1
, w2
, res
);
1150 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1151 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
1154 w1
= 0xFEDC192837475675ULL
;
1155 w2
= 0x57657438291CDEF0ULL
;
1158 do_pdep32(&res
, w1
, w2
);
1159 printf("pdep32 %016llx %016llx -> %016llx\n", w1
, w2
, res
);
1161 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1162 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
1165 w1
= 0xFEDC192837475675ULL
;
1166 w2
= 0x57657438291CDEF0ULL
;
1169 do_pext64(&res
, w1
, w2
);
1170 printf("pext64 %016llx %016llx -> %016llx\n", w1
, w2
, res
);
1172 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1173 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);
1176 w1
= 0xFEDC192837475675ULL
;
1177 w2
= 0x57657438291CDEF0ULL
;
1180 do_pext32(&res
, w1
, w2
);
1181 printf("pext32 %016llx %016llx -> %016llx\n", w1
, w2
, res
);
1183 w1
= ((w1
>> 2) | (w1
>> 1)) + (w1
/ 17ULL);
1184 w2
= ((w2
>> 2) | (w2
>> 1)) + (w2
/ 17ULL);