1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2 | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2 -| FileCheck %s --check-prefixes=CHECK,X64
5 define <8 x i64> @avx512_funnel_shift_q_512(<8 x i64> %a0, <8 x i64> %a1) {
6 ; X86-LABEL: avx512_funnel_shift_q_512:
8 ; X86-NEXT: vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0
11 ; X64-LABEL: avx512_funnel_shift_q_512:
13 ; X64-NEXT: vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
15 %1 = shl <8 x i64> %a0, <i64 31, i64 33, i64 31, i64 33, i64 31, i64 33, i64 31, i64 33>
16 %2 = lshr <8 x i64> %a1, <i64 33, i64 31, i64 33, i64 31, i64 33, i64 31, i64 33, i64 31>
17 %3 = or <8 x i64> %1, %2
21 define <8 x i64> @avx512_funnel_shift_q_512_splat(<8 x i64> %a0, <8 x i64> %a1) {
22 ; CHECK-LABEL: avx512_funnel_shift_q_512_splat:
24 ; CHECK-NEXT: vpshldq $31, %zmm1, %zmm0, %zmm0
25 ; CHECK-NEXT: ret{{[l|q]}}
26 %1 = shl <8 x i64> %a0, <i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31, i64 31>
27 %2 = lshr <8 x i64> %a1, <i64 33, i64 33, i64 33, i64 33, i64 33, i64 33, i64 33, i64 33>
28 %3 = or <8 x i64> %1, %2
32 define <16 x i32> @avx512_funnel_shift_d_512(<16 x i32> %a0, <16 x i32> %a1) {
33 ; X86-LABEL: avx512_funnel_shift_d_512:
35 ; X86-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0
38 ; X64-LABEL: avx512_funnel_shift_d_512:
40 ; X64-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
42 %1 = shl <16 x i32> %a0, <i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17>
43 %2 = lshr <16 x i32> %a1, <i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15>
44 %3 = or <16 x i32> %1, %2
48 define <16 x i32> @avx512_funnel_shift_d_512_splat(<16 x i32> %a0, <16 x i32> %a1) {
49 ; CHECK-LABEL: avx512_funnel_shift_d_512_splat:
51 ; CHECK-NEXT: vpshldd $15, %zmm1, %zmm0, %zmm0
52 ; CHECK-NEXT: ret{{[l|q]}}
53 %1 = shl <16 x i32> %a0, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
54 %2 = lshr <16 x i32> %a1, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
55 %3 = or <16 x i32> %1, %2
59 define <32 x i16> @avx512_funnel_shift_w_512(<32 x i16> %a0, <32 x i16> %a1) {
60 ; X86-LABEL: avx512_funnel_shift_w_512:
62 ; X86-NEXT: vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}, %zmm1, %zmm0
65 ; X64-LABEL: avx512_funnel_shift_w_512:
67 ; X64-NEXT: vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm1, %zmm0
69 %1 = shl <32 x i16> %a0, <i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9>
70 %2 = lshr <32 x i16> %a1, <i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7>
71 %3 = or <32 x i16> %1, %2
75 define <32 x i16> @avx512_funnel_shift_w_512_splat(<32 x i16> %a0, <32 x i16> %a1) {
76 ; CHECK-LABEL: avx512_funnel_shift_w_512_splat:
78 ; CHECK-NEXT: vpshldw $7, %zmm1, %zmm0, %zmm0
79 ; CHECK-NEXT: ret{{[l|q]}}
80 %1 = shl <32 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
81 %2 = lshr <32 x i16> %a1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
82 %3 = or <32 x i16> %1, %2