1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512vbmi2,+avx512vl | FileCheck %s --check-prefixes=CHECK,X86
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512vbmi2,+avx512vl -| FileCheck %s --check-prefixes=CHECK,X64
5 define <2 x i64> @avx512_funnel_shift_q_128(<2 x i64> %a0, <2 x i64> %a1) {
6 ; X86-LABEL: avx512_funnel_shift_q_128:
8 ; X86-NEXT: vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm0
11 ; X64-LABEL: avx512_funnel_shift_q_128:
13 ; X64-NEXT: vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
15 %1 = shl <2 x i64> %a0, <i64 31, i64 33>
16 %2 = lshr <2 x i64> %a1, <i64 33, i64 31>
17 %3 = or <2 x i64> %1, %2
21 define <4 x i64> @avx512_funnel_shift_q_256(<4 x i64> %a0, <4 x i64> %a1) {
22 ; X86-LABEL: avx512_funnel_shift_q_256:
24 ; X86-NEXT: vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm0
27 ; X64-LABEL: avx512_funnel_shift_q_256:
29 ; X64-NEXT: vpshldvq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
31 %1 = shl <4 x i64> %a0, <i64 31, i64 33, i64 31, i64 33>
32 %2 = lshr <4 x i64> %a1, <i64 33, i64 31, i64 33, i64 31>
33 %3 = or <4 x i64> %1, %2
37 define <2 x i64> @avx512_funnel_shift_q_128_splat(<2 x i64> %a0, <2 x i64> %a1) {
38 ; CHECK-LABEL: avx512_funnel_shift_q_128_splat:
40 ; CHECK-NEXT: vpshldq $31, %xmm1, %xmm0, %xmm0
41 ; CHECK-NEXT: ret{{[l|q]}}
42 %1 = shl <2 x i64> %a0, <i64 31, i64 31>
43 %2 = lshr <2 x i64> %a1, <i64 33, i64 33>
44 %3 = or <2 x i64> %1, %2
48 define <4 x i64> @avx512_funnel_shift_q_256_splat(<4 x i64> %a0, <4 x i64> %a1) {
49 ; CHECK-LABEL: avx512_funnel_shift_q_256_splat:
51 ; CHECK-NEXT: vpshldq $31, %ymm1, %ymm0, %ymm0
52 ; CHECK-NEXT: ret{{[l|q]}}
53 %1 = shl <4 x i64> %a0, <i64 31, i64 31, i64 31, i64 31>
54 %2 = lshr <4 x i64> %a1, <i64 33, i64 33, i64 33, i64 33>
55 %3 = or <4 x i64> %1, %2
59 define <4 x i32> @avx512_funnel_shift_d_128(<4 x i32> %a0, <4 x i32> %a1) {
60 ; X86-LABEL: avx512_funnel_shift_d_128:
62 ; X86-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm0
65 ; X64-LABEL: avx512_funnel_shift_d_128:
67 ; X64-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
69 %1 = shl <4 x i32> %a0, <i32 15, i32 17, i32 15, i32 17>
70 %2 = lshr <4 x i32> %a1, <i32 17, i32 15, i32 17, i32 15>
71 %3 = or <4 x i32> %1, %2
75 define <8 x i32> @avx512_funnel_shift_d_256(<8 x i32> %a0, <8 x i32> %a1) {
76 ; X86-LABEL: avx512_funnel_shift_d_256:
78 ; X86-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm0
81 ; X64-LABEL: avx512_funnel_shift_d_256:
83 ; X64-NEXT: vpshldvd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
85 %1 = shl <8 x i32> %a0, <i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17>
86 %2 = lshr <8 x i32> %a1, <i32 17, i32 15, i32 17, i32 15, i32 17, i32 15, i32 17, i32 15>
87 %3 = or <8 x i32> %1, %2
91 define <4 x i32> @avx512_funnel_shift_d_128_splat(<4 x i32> %a0, <4 x i32> %a1) {
92 ; CHECK-LABEL: avx512_funnel_shift_d_128_splat:
94 ; CHECK-NEXT: vpshldd $15, %xmm1, %xmm0, %xmm0
95 ; CHECK-NEXT: ret{{[l|q]}}
96 %1 = shl <4 x i32> %a0, <i32 15, i32 15, i32 15, i32 15>
97 %2 = lshr <4 x i32> %a1, <i32 17, i32 17, i32 17, i32 17>
98 %3 = or <4 x i32> %1, %2
102 define <8 x i32> @avx512_funnel_shift_d_256_splat(<8 x i32> %a0, <8 x i32> %a1) {
103 ; CHECK-LABEL: avx512_funnel_shift_d_256_splat:
105 ; CHECK-NEXT: vpshldd $15, %ymm1, %ymm0, %ymm0
106 ; CHECK-NEXT: ret{{[l|q]}}
107 %1 = shl <8 x i32> %a0, <i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15, i32 15>
108 %2 = lshr <8 x i32> %a1, <i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17, i32 17>
109 %3 = or <8 x i32> %1, %2
113 define <8 x i16> @avx512_funnel_shift_w_128(<8 x i16> %a0, <8 x i16> %a1) {
114 ; X86-LABEL: avx512_funnel_shift_w_128:
116 ; X86-NEXT: vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}, %xmm1, %xmm0
119 ; X64-LABEL: avx512_funnel_shift_w_128:
121 ; X64-NEXT: vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm1, %xmm0
123 %1 = shl <8 x i16> %a0, <i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9>
124 %2 = lshr <8 x i16> %a1, <i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7>
125 %3 = or <8 x i16> %1, %2
129 define <16 x i16> @avx512_funnel_shift_w_256(<16 x i16> %a0, <16 x i16> %a1) {
130 ; X86-LABEL: avx512_funnel_shift_w_256:
132 ; X86-NEXT: vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}, %ymm1, %ymm0
135 ; X64-LABEL: avx512_funnel_shift_w_256:
137 ; X64-NEXT: vpshldvw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %ymm1, %ymm0
139 %1 = shl <16 x i16> %a0, <i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9>
140 %2 = lshr <16 x i16> %a1, <i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7, i16 9, i16 7>
141 %3 = or <16 x i16> %1, %2
145 define <8 x i16> @avx512_funnel_shift_w_128_splat(<8 x i16> %a0, <8 x i16> %a1) {
146 ; CHECK-LABEL: avx512_funnel_shift_w_128_splat:
148 ; CHECK-NEXT: vpshldw $7, %xmm1, %xmm0, %xmm0
149 ; CHECK-NEXT: ret{{[l|q]}}
150 %1 = shl <8 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
151 %2 = lshr <8 x i16> %a1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
152 %3 = or <8 x i16> %1, %2
156 define <16 x i16> @avx512_funnel_shift_w_256_splat(<16 x i16> %a0, <16 x i16> %a1) {
157 ; CHECK-LABEL: avx512_funnel_shift_w_256_splat:
159 ; CHECK-NEXT: vpshldw $7, %ymm1, %ymm0, %ymm0
160 ; CHECK-NEXT: ret{{[l|q]}}
161 %1 = shl <16 x i16> %a0, <i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7, i16 7>
162 %2 = lshr <16 x i16> %a1, <i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9, i16 9>
163 %3 = or <16 x i16> %1, %2