1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 ; Verify that instcombine is able to fold identity shuffles.
7 define <4 x float> @identity_test_vpermilvar_ps(<4 x float> %v) {
8 ; CHECK-LABEL: @identity_test_vpermilvar_ps(
9 ; CHECK-NEXT: ret <4 x float> [[V:%.*]]
11 %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 0, i32 1, i32 2, i32 3>)
15 define <8 x float> @identity_test_vpermilvar_ps_256(<8 x float> %v) {
16 ; CHECK-LABEL: @identity_test_vpermilvar_ps_256(
17 ; CHECK-NEXT: ret <8 x float> [[V:%.*]]
19 %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
23 define <16 x float> @identity_test_vpermilvar_ps_512(<16 x float> %v) {
24 ; CHECK-LABEL: @identity_test_vpermilvar_ps_512(
25 ; CHECK-NEXT: ret <16 x float> [[V:%.*]]
27 %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>)
31 define <2 x double> @identity_test_vpermilvar_pd(<2 x double> %v) {
32 ; CHECK-LABEL: @identity_test_vpermilvar_pd(
33 ; CHECK-NEXT: ret <2 x double> [[V:%.*]]
35 %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 0, i64 2>)
39 define <4 x double> @identity_test_vpermilvar_pd_256(<4 x double> %v) {
40 ; CHECK-LABEL: @identity_test_vpermilvar_pd_256(
41 ; CHECK-NEXT: ret <4 x double> [[V:%.*]]
43 %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 0, i64 2, i64 0, i64 2>)
47 define <8 x double> @identity_test_vpermilvar_pd_512(<8 x double> %v) {
48 ; CHECK-LABEL: @identity_test_vpermilvar_pd_512(
49 ; CHECK-NEXT: ret <8 x double> [[V:%.*]]
51 %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 0, i64 2, i64 0, i64 2, i64 0, i64 2, i64 0, i64 2>)
55 ; Instcombine should be able to fold the following byte shuffle to a builtin shufflevector
56 ; with a shuffle mask of all zeroes.
58 define <4 x float> @zero_test_vpermilvar_ps_zero(<4 x float> %v) {
59 ; CHECK-LABEL: @zero_test_vpermilvar_ps_zero(
60 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> poison, <4 x i32> zeroinitializer
61 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
63 %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> zeroinitializer)
67 define <8 x float> @zero_test_vpermilvar_ps_256_zero(<8 x float> %v) {
68 ; CHECK-LABEL: @zero_test_vpermilvar_ps_256_zero(
69 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> poison, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
70 ; CHECK-NEXT: ret <8 x float> [[TMP1]]
72 %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> zeroinitializer)
76 define <16 x float> @zero_test_vpermilvar_ps_512_zero(<16 x float> %v) {
77 ; CHECK-LABEL: @zero_test_vpermilvar_ps_512_zero(
78 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> poison, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4, i32 8, i32 8, i32 8, i32 8, i32 12, i32 12, i32 12, i32 12>
79 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
81 %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> zeroinitializer)
85 define <2 x double> @zero_test_vpermilvar_pd_zero(<2 x double> %v) {
86 ; CHECK-LABEL: @zero_test_vpermilvar_pd_zero(
87 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> poison, <2 x i32> zeroinitializer
88 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
90 %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> zeroinitializer)
94 define <4 x double> @zero_test_vpermilvar_pd_256_zero(<4 x double> %v) {
95 ; CHECK-LABEL: @zero_test_vpermilvar_pd_256_zero(
96 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> poison, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
97 ; CHECK-NEXT: ret <4 x double> [[TMP1]]
99 %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> zeroinitializer)
103 define <8 x double> @zero_test_vpermilvar_pd_512_zero(<8 x double> %v) {
104 ; CHECK-LABEL: @zero_test_vpermilvar_pd_512_zero(
105 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> poison, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
106 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
108 %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> zeroinitializer)
112 ; Verify that instcombine is able to fold constant shuffles.
114 define <4 x float> @test_vpermilvar_ps(<4 x float> %v) {
115 ; CHECK-LABEL: @test_vpermilvar_ps(
116 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
117 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
119 %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 3, i32 2, i32 1, i32 0>)
123 define <8 x float> @test_vpermilvar_ps_256(<8 x float> %v) {
124 ; CHECK-LABEL: @test_vpermilvar_ps_256(
125 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
126 ; CHECK-NEXT: ret <8 x float> [[TMP1]]
128 %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
132 define <16 x float> @test_vpermilvar_ps_512(<16 x float> %v) {
133 ; CHECK-LABEL: @test_vpermilvar_ps_512(
134 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
135 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
137 %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
141 define <2 x double> @test_vpermilvar_pd(<2 x double> %v) {
142 ; CHECK-LABEL: @test_vpermilvar_pd(
143 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> poison, <2 x i32> <i32 1, i32 0>
144 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
146 %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 2, i64 0>)
150 define <4 x double> @test_vpermilvar_pd_256(<4 x double> %v) {
151 ; CHECK-LABEL: @test_vpermilvar_pd_256(
152 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
153 ; CHECK-NEXT: ret <4 x double> [[TMP1]]
155 %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 3, i64 1, i64 2, i64 0>)
159 define <8 x double> @test_vpermilvar_pd_512(<8 x double> %v) {
160 ; CHECK-LABEL: @test_vpermilvar_pd_512(
161 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
162 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
164 %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 3, i64 1, i64 2, i64 0, i64 7, i64 5, i64 6, i64 4>)
168 ; Verify that instcombine is able to fold constant shuffles with poison mask elements.
170 define <4 x float> @poison_test_vpermilvar_ps(<4 x float> %v) {
171 ; CHECK-LABEL: @poison_test_vpermilvar_ps(
172 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[V:%.*]], <4 x float> poison, <4 x i32> <i32 undef, i32 2, i32 1, i32 undef>
173 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
175 %a = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %v, <4 x i32> <i32 poison, i32 2, i32 1, i32 poison>)
179 define <8 x float> @poison_test_vpermilvar_ps_256(<8 x float> %v) {
180 ; CHECK-LABEL: @poison_test_vpermilvar_ps_256(
181 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[V:%.*]], <8 x float> poison, <8 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4>
182 ; CHECK-NEXT: ret <8 x float> [[TMP1]]
184 %a = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %v, <8 x i32> <i32 poison, i32 6, i32 5, i32 poison, i32 3, i32 2, i32 1, i32 0>)
188 define <16 x float> @poison_test_vpermilvar_ps_512(<16 x float> %v) {
189 ; CHECK-LABEL: @poison_test_vpermilvar_ps_512(
190 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[V:%.*]], <16 x float> poison, <16 x i32> <i32 undef, i32 2, i32 1, i32 undef, i32 7, i32 6, i32 5, i32 4, i32 undef, i32 10, i32 9, i32 undef, i32 15, i32 14, i32 13, i32 12>
191 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
193 %a = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %v, <16 x i32> <i32 poison, i32 6, i32 5, i32 poison, i32 3, i32 2, i32 1, i32 0, i32 poison, i32 6, i32 5, i32 poison, i32 3, i32 2, i32 1, i32 0>)
197 define <2 x double> @poison_test_vpermilvar_pd(<2 x double> %v) {
198 ; CHECK-LABEL: @poison_test_vpermilvar_pd(
199 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[V:%.*]], <2 x double> poison, <2 x i32> <i32 undef, i32 0>
200 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
202 %a = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %v, <2 x i64> <i64 poison, i64 0>)
206 define <4 x double> @poison_test_vpermilvar_pd_256(<4 x double> %v) {
207 ; CHECK-LABEL: @poison_test_vpermilvar_pd_256(
208 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[V:%.*]], <4 x double> poison, <4 x i32> <i32 undef, i32 0, i32 3, i32 undef>
209 ; CHECK-NEXT: ret <4 x double> [[TMP1]]
211 %a = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %v, <4 x i64> <i64 poison, i64 1, i64 2, i64 poison>)
215 define <8 x double> @poison_test_vpermilvar_pd_512(<8 x double> %v) {
216 ; CHECK-LABEL: @poison_test_vpermilvar_pd_512(
217 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[V:%.*]], <8 x double> poison, <8 x i32> <i32 undef, i32 0, i32 3, i32 undef, i32 undef, i32 4, i32 7, i32 undef>
218 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
220 %a = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %v, <8 x i64> <i64 poison, i64 1, i64 2, i64 poison, i64 poison, i64 1, i64 2, i64 poison>)
224 ; Simplify demanded elts
226 define <4 x float> @elts_test_vpermilvar_ps(<4 x float> %a0, i32 %a1) {
227 ; CHECK-LABEL: @elts_test_vpermilvar_ps(
228 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x float> [[A0:%.*]], <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
229 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
231 %1 = insertelement <4 x i32> <i32 0, i32 1, i32 2, i32 3>, i32 %a1, i32 3
232 %2 = tail call <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float> %a0, <4 x i32> %1)
233 %3 = shufflevector <4 x float> %2, <4 x float> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
237 define <8 x float> @elts_test_vpermilvar_ps_256(<8 x float> %a0, <8 x i32> %a1) {
238 ; CHECK-LABEL: @elts_test_vpermilvar_ps_256(
239 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> <i32 undef, i32 0, i32 undef, i32 1, i32 undef, i32 6, i32 undef, i32 7>
240 ; CHECK-NEXT: ret <8 x float> [[TMP1]]
242 %1 = shufflevector <8 x i32> %a1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 3, i32 2, i32 1, i32 0>, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
243 %2 = tail call <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float> %a0, <8 x i32> %1)
244 %3 = shufflevector <8 x float> %2, <8 x float> poison, <8 x i32> <i32 poison, i32 1, i32 poison, i32 3, i32 poison, i32 5, i32 poison, i32 7>
248 define <16 x float> @elts_test_vpermilvar_ps_512(<16 x float> %a0, <16 x i32> %a1, i32 %a2) {
249 ; CHECK-LABEL: @elts_test_vpermilvar_ps_512(
250 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> [[A0:%.*]], <16 x i32> [[A1:%.*]])
251 ; CHECK-NEXT: [[TMP2:%.*]] = shufflevector <16 x float> [[TMP1]], <16 x float> poison, <16 x i32> <i32 undef, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
252 ; CHECK-NEXT: ret <16 x float> [[TMP2]]
254 %1 = insertelement <16 x i32> %a1, i32 %a2, i32 0
255 %2 = tail call <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float> %a0, <16 x i32> %1)
256 %3 = shufflevector <16 x float> %2, <16 x float> poison, <16 x i32> <i32 poison, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
260 define <2 x double> @elts_test_vpermilvar_pd(<2 x double> %a0, i64 %a1) {
261 ; CHECK-LABEL: @elts_test_vpermilvar_pd(
262 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <2 x double> [[A0:%.*]], <2 x double> poison, <2 x i32> <i32 0, i32 undef>
263 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
265 %1 = insertelement <2 x i64> <i64 0, i64 2>, i64 %a1, i32 1
266 %2 = tail call <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double> %a0, <2 x i64> %1)
267 %3 = shufflevector <2 x double> %2, <2 x double> poison, <2 x i32> <i32 0, i32 poison>
271 define <4 x double> @elts_test_vpermilvar_pd_256(<4 x double> %a0, <4 x i64> %a1) {
272 ; CHECK-LABEL: @elts_test_vpermilvar_pd_256(
273 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 undef>
274 ; CHECK-NEXT: ret <4 x double> [[TMP1]]
276 %1 = shufflevector <4 x i64> <i64 0, i64 2, i64 0, i64 2>, <4 x i64> %a1, <4 x i32> <i32 1, i32 2, i32 3, i32 4>
277 %2 = tail call <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double> %a0, <4 x i64> %1)
278 %3 = shufflevector <4 x double> %2, <4 x double> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 poison>
282 define <8 x double> @elts_test_vpermilvar_pd_512(<8 x double> %a0, <8 x i64> %a1, i64 %a2) {
283 ; CHECK-LABEL: @elts_test_vpermilvar_pd_512(
284 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <8 x i64> poison, i64 [[A2:%.*]], i32 0
285 ; CHECK-NEXT: [[TMP2:%.*]] = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> [[A0:%.*]], <8 x i64> [[TMP1]])
286 ; CHECK-NEXT: [[TMP3:%.*]] = shufflevector <8 x double> [[TMP2]], <8 x double> poison, <8 x i32> zeroinitializer
287 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
289 %1 = insertelement <8 x i64> %a1, i64 %a2, i32 0
290 %2 = tail call <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double> %a0, <8 x i64> %1)
291 %3 = shufflevector <8 x double> %2, <8 x double> poison, <8 x i32> zeroinitializer
295 declare <2 x double> @llvm.x86.avx.vpermilvar.pd(<2 x double>, <2 x i64>)
296 declare <4 x double> @llvm.x86.avx.vpermilvar.pd.256(<4 x double>, <4 x i64>)
297 declare <8 x double> @llvm.x86.avx512.vpermilvar.pd.512(<8 x double>, <8 x i64>)
299 declare <4 x float> @llvm.x86.avx.vpermilvar.ps(<4 x float>, <4 x i32>)
300 declare <8 x float> @llvm.x86.avx.vpermilvar.ps.256(<8 x float>, <8 x i32>)
301 declare <16 x float> @llvm.x86.avx512.vpermilvar.ps.512(<16 x float>, <16 x i32>)