1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -instcombine -S | FileCheck %s
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
7 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
8 ; CHECK-LABEL: @test_add_ss(
9 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
10 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
11 ; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
12 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
13 ; CHECK-NEXT: ret <4 x float> [[TMP4]]
15 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
16 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
17 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
18 %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
22 define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) {
23 ; CHECK-LABEL: @test_add_ss_round(
24 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
25 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
27 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
28 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
29 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
30 %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
34 define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
35 ; CHECK-LABEL: @test_add_ss_mask(
36 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
37 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
38 ; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
39 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
40 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
41 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
42 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
43 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
44 ; CHECK-NEXT: ret <4 x float> [[TMP8]]
46 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
47 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
48 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
49 %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
53 define <4 x float> @test_add_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
54 ; CHECK-LABEL: @test_add_ss_mask_round(
55 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
56 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
58 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
59 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
60 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
61 %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
65 define float @test_add_ss_1(float %a, float %b) {
66 ; CHECK-LABEL: @test_add_ss_1(
67 ; CHECK-NEXT: ret float 1.000000e+00
69 %1 = insertelement <4 x float> undef, float %a, i32 0
70 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
71 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
72 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
73 %5 = insertelement <4 x float> undef, float %b, i32 0
74 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
75 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
76 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
77 %9 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
78 %10 = extractelement <4 x float> %9, i32 1
82 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
84 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
85 ; CHECK-LABEL: @test_add_sd(
86 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
87 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
88 ; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
89 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
90 ; CHECK-NEXT: ret <2 x double> [[TMP4]]
92 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
93 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
97 define <2 x double> @test_add_sd_round(<2 x double> %a, <2 x double> %b) {
98 ; CHECK-LABEL: @test_add_sd_round(
99 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
100 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
102 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
103 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
107 define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
108 ; CHECK-LABEL: @test_add_sd_mask(
109 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
110 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
111 ; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
112 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
113 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
114 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
115 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
116 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
117 ; CHECK-NEXT: ret <2 x double> [[TMP8]]
119 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
120 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
124 define <2 x double> @test_add_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
125 ; CHECK-LABEL: @test_add_sd_mask_round(
126 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
127 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
129 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
130 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
134 define double @test_add_sd_1(double %a, double %b) {
135 ; CHECK-LABEL: @test_add_sd_1(
136 ; CHECK-NEXT: ret double 1.000000e+00
138 %1 = insertelement <2 x double> undef, double %a, i32 0
139 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
140 %3 = insertelement <2 x double> undef, double %b, i32 0
141 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
142 %5 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
143 %6 = extractelement <2 x double> %5, i32 1
147 declare <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
149 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
150 ; CHECK-LABEL: @test_sub_ss(
151 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
152 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
153 ; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
154 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
155 ; CHECK-NEXT: ret <4 x float> [[TMP4]]
157 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
158 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
159 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
160 %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
164 define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) {
165 ; CHECK-LABEL: @test_sub_ss_round(
166 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
167 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
169 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
170 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
171 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
172 %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
176 define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
177 ; CHECK-LABEL: @test_sub_ss_mask(
178 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
179 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
180 ; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
181 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
182 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
183 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
184 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
185 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
186 ; CHECK-NEXT: ret <4 x float> [[TMP8]]
188 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
189 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
190 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
191 %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
195 define <4 x float> @test_sub_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
196 ; CHECK-LABEL: @test_sub_ss_mask_round(
197 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
198 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
200 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
201 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
202 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
203 %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
207 define float @test_sub_ss_1(float %a, float %b) {
208 ; CHECK-LABEL: @test_sub_ss_1(
209 ; CHECK-NEXT: ret float 1.000000e+00
211 %1 = insertelement <4 x float> undef, float %a, i32 0
212 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
213 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
214 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
215 %5 = insertelement <4 x float> undef, float %b, i32 0
216 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
217 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
218 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
219 %9 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
220 %10 = extractelement <4 x float> %9, i32 1
224 declare <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
226 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
227 ; CHECK-LABEL: @test_sub_sd(
228 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
229 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
230 ; CHECK-NEXT: [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]
231 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
232 ; CHECK-NEXT: ret <2 x double> [[TMP4]]
234 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
235 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
239 define <2 x double> @test_sub_sd_round(<2 x double> %a, <2 x double> %b) {
240 ; CHECK-LABEL: @test_sub_sd_round(
241 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
242 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
244 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
245 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
249 define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
250 ; CHECK-LABEL: @test_sub_sd_mask(
251 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
252 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
253 ; CHECK-NEXT: [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]
254 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
255 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
256 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
257 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
258 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
259 ; CHECK-NEXT: ret <2 x double> [[TMP8]]
261 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
262 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
266 define <2 x double> @test_sub_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
267 ; CHECK-LABEL: @test_sub_sd_mask_round(
268 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
269 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
271 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
272 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
276 define double @test_sub_sd_1(double %a, double %b) {
277 ; CHECK-LABEL: @test_sub_sd_1(
278 ; CHECK-NEXT: ret double 1.000000e+00
280 %1 = insertelement <2 x double> undef, double %a, i32 0
281 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
282 %3 = insertelement <2 x double> undef, double %b, i32 0
283 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
284 %5 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
285 %6 = extractelement <2 x double> %5, i32 1
289 declare <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
291 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
292 ; CHECK-LABEL: @test_mul_ss(
293 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
294 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
295 ; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
296 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
297 ; CHECK-NEXT: ret <4 x float> [[TMP4]]
299 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
300 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
301 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
302 %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
306 define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) {
307 ; CHECK-LABEL: @test_mul_ss_round(
308 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
309 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
311 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
312 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
313 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
314 %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
318 define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
319 ; CHECK-LABEL: @test_mul_ss_mask(
320 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
321 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
322 ; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
323 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
324 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
325 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
326 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
327 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
328 ; CHECK-NEXT: ret <4 x float> [[TMP8]]
330 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
331 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
332 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
333 %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
337 define <4 x float> @test_mul_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
338 ; CHECK-LABEL: @test_mul_ss_mask_round(
339 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
340 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
342 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
343 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
344 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
345 %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
349 define float @test_mul_ss_1(float %a, float %b) {
350 ; CHECK-LABEL: @test_mul_ss_1(
351 ; CHECK-NEXT: ret float 1.000000e+00
353 %1 = insertelement <4 x float> undef, float %a, i32 0
354 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
355 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
356 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
357 %5 = insertelement <4 x float> undef, float %b, i32 0
358 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
359 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
360 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
361 %9 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
362 %10 = extractelement <4 x float> %9, i32 1
366 declare <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
368 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
369 ; CHECK-LABEL: @test_mul_sd(
370 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
371 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
372 ; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]
373 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
374 ; CHECK-NEXT: ret <2 x double> [[TMP4]]
376 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
377 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
381 define <2 x double> @test_mul_sd_round(<2 x double> %a, <2 x double> %b) {
382 ; CHECK-LABEL: @test_mul_sd_round(
383 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
384 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
386 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
387 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
391 define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
392 ; CHECK-LABEL: @test_mul_sd_mask(
393 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
394 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
395 ; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]
396 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
397 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
398 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
399 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
400 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
401 ; CHECK-NEXT: ret <2 x double> [[TMP8]]
403 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
404 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
408 define <2 x double> @test_mul_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
409 ; CHECK-LABEL: @test_mul_sd_mask_round(
410 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
411 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
413 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
414 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
418 define double @test_mul_sd_1(double %a, double %b) {
419 ; CHECK-LABEL: @test_mul_sd_1(
420 ; CHECK-NEXT: ret double 1.000000e+00
422 %1 = insertelement <2 x double> undef, double %a, i32 0
423 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
424 %3 = insertelement <2 x double> undef, double %b, i32 0
425 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
426 %5 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
427 %6 = extractelement <2 x double> %5, i32 1
431 declare <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
433 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
434 ; CHECK-LABEL: @test_div_ss(
435 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
436 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
437 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
438 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
439 ; CHECK-NEXT: ret <4 x float> [[TMP4]]
441 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
442 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
443 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
444 %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
448 define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) {
449 ; CHECK-LABEL: @test_div_ss_round(
450 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
451 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
453 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
454 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
455 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
456 %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
460 define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
461 ; CHECK-LABEL: @test_div_ss_mask(
462 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
463 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
464 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
465 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
466 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
467 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
468 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], float [[TMP3]], float [[TMP6]]
469 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
470 ; CHECK-NEXT: ret <4 x float> [[TMP8]]
472 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
473 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
474 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
475 %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
479 define <4 x float> @test_div_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
480 ; CHECK-LABEL: @test_div_ss_mask_round(
481 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
482 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
484 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
485 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
486 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
487 %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
491 define float @test_div_ss_1(float %a, float %b) {
492 ; CHECK-LABEL: @test_div_ss_1(
493 ; CHECK-NEXT: ret float 1.000000e+00
495 %1 = insertelement <4 x float> undef, float %a, i32 0
496 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
497 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
498 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
499 %5 = insertelement <4 x float> undef, float %b, i32 0
500 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
501 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
502 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
503 %9 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
504 %10 = extractelement <4 x float> %9, i32 1
508 declare <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
510 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
511 ; CHECK-LABEL: @test_div_sd(
512 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
513 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
514 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]
515 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
516 ; CHECK-NEXT: ret <2 x double> [[TMP4]]
518 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
519 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
523 define <2 x double> @test_div_sd_round(<2 x double> %a, <2 x double> %b) {
524 ; CHECK-LABEL: @test_div_sd_round(
525 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
526 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
528 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
529 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
533 define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
534 ; CHECK-LABEL: @test_div_sd_mask(
535 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
536 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
537 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]
538 ; CHECK-NEXT: [[TMP4:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
539 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <8 x i1> [[TMP4]], i64 0
540 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
541 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP5]], double [[TMP3]], double [[TMP6]]
542 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
543 ; CHECK-NEXT: ret <2 x double> [[TMP8]]
545 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
546 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
550 define <2 x double> @test_div_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
551 ; CHECK-LABEL: @test_div_sd_mask_round(
552 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
553 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
555 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
556 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
560 define double @test_div_sd_1(double %a, double %b) {
561 ; CHECK-LABEL: @test_div_sd_1(
562 ; CHECK-NEXT: ret double 1.000000e+00
564 %1 = insertelement <2 x double> undef, double %a, i32 0
565 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
566 %3 = insertelement <2 x double> undef, double %b, i32 0
567 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
568 %5 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
569 %6 = extractelement <2 x double> %5, i32 1
573 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
575 define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
576 ; CHECK-LABEL: @test_max_ss(
577 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
578 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
580 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
581 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
582 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
583 %4 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
587 define <4 x float> @test_max_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
588 ; CHECK-LABEL: @test_max_ss_mask(
589 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
590 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
592 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
593 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
594 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
595 %4 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
599 define float @test_max_ss_1(float %a, float %b) {
600 ; CHECK-LABEL: @test_max_ss_1(
601 ; CHECK-NEXT: ret float 1.000000e+00
603 %1 = insertelement <4 x float> undef, float %a, i32 0
604 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
605 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
606 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
607 %5 = insertelement <4 x float> undef, float %b, i32 0
608 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
609 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
610 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
611 %9 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
612 %10 = extractelement <4 x float> %9, i32 1
616 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
618 define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
619 ; CHECK-LABEL: @test_max_sd(
620 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
621 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
623 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
624 %2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
628 define <2 x double> @test_max_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
629 ; CHECK-LABEL: @test_max_sd_mask(
630 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
631 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
633 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
634 %2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
638 define double @test_max_sd_1(double %a, double %b) {
639 ; CHECK-LABEL: @test_max_sd_1(
640 ; CHECK-NEXT: ret double 1.000000e+00
642 %1 = insertelement <2 x double> undef, double %a, i32 0
643 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
644 %3 = insertelement <2 x double> undef, double %b, i32 0
645 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
646 %5 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
647 %6 = extractelement <2 x double> %5, i32 1
651 declare <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
653 define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
654 ; CHECK-LABEL: @test_min_ss(
655 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
656 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
658 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
659 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
660 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
661 %4 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
665 define <4 x float> @test_min_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
666 ; CHECK-LABEL: @test_min_ss_mask(
667 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
668 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
670 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
671 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
672 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
673 %4 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
677 define float @test_min_ss_1(float %a, float %b) {
678 ; CHECK-LABEL: @test_min_ss_1(
679 ; CHECK-NEXT: ret float 1.000000e+00
681 %1 = insertelement <4 x float> undef, float %a, i32 0
682 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
683 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
684 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
685 %5 = insertelement <4 x float> undef, float %b, i32 0
686 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
687 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
688 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
689 %9 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
690 %10 = extractelement <4 x float> %9, i32 1
694 declare <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
696 define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
697 ; CHECK-LABEL: @test_min_sd(
698 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
699 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
701 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
702 %2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
706 define <2 x double> @test_min_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
707 ; CHECK-LABEL: @test_min_sd_mask(
708 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
709 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
711 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
712 %2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
716 define double @test_min_sd_1(double %a, double %b) {
717 ; CHECK-LABEL: @test_min_sd_1(
718 ; CHECK-NEXT: ret double 1.000000e+00
720 %1 = insertelement <2 x double> undef, double %a, i32 0
721 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
722 %3 = insertelement <2 x double> undef, double %b, i32 0
723 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
724 %5 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
725 %6 = extractelement <2 x double> %5, i32 1
729 declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
731 define i8 @test_cmp_ss(<4 x float> %a, <4 x float> %b, i8 %mask) {
732 ; CHECK-LABEL: @test_cmp_ss(
733 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4)
734 ; CHECK-NEXT: ret i8 [[TMP1]]
736 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
737 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
738 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
739 %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
740 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
741 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
742 %7 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %3, <4 x float> %6, i32 3, i8 %mask, i32 4)
746 declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
748 define i8 @test_cmp_sd(<2 x double> %a, <2 x double> %b, i8 %mask) {
749 ; CHECK-LABEL: @test_cmp_sd(
750 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4)
751 ; CHECK-NEXT: ret i8 [[TMP1]]
753 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
754 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
755 %3 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %1, <2 x double> %2, i32 3, i8 %mask, i32 4)
759 define i64 @test(float %f, double %d) {
760 ; CHECK-LABEL: @test(
761 ; CHECK-NEXT: [[V03:%.*]] = insertelement <4 x float> undef, float [[F:%.*]], i32 0
762 ; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[V03]], i32 4)
763 ; CHECK-NEXT: [[V13:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
764 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> [[V13]], i32 4)
765 ; CHECK-NEXT: [[V23:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
766 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[V23]], i32 4)
767 ; CHECK-NEXT: [[V33:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
768 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> [[V33]], i32 4)
769 ; CHECK-NEXT: [[V41:%.*]] = insertelement <2 x double> undef, double [[D:%.*]], i32 0
770 ; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[V41]], i32 4)
771 ; CHECK-NEXT: [[V51:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
772 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> [[V51]], i32 4)
773 ; CHECK-NEXT: [[V61:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
774 ; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[V61]], i32 4)
775 ; CHECK-NEXT: [[V71:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
776 ; CHECK-NEXT: [[TMP7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> [[V71]], i32 4)
777 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
778 ; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
779 ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
780 ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
781 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
782 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
783 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
784 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
785 ; CHECK-NEXT: ret i64 [[TMP15]]
787 %v00 = insertelement <4 x float> undef, float %f, i32 0
788 %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
789 %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
790 %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
791 %tmp0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %v03, i32 4)
792 %v10 = insertelement <4 x float> undef, float %f, i32 0
793 %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
794 %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
795 %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
796 %tmp1 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %v13, i32 4)
797 %v20 = insertelement <4 x float> undef, float %f, i32 0
798 %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
799 %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
800 %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
801 %tmp2 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %v23, i32 4)
802 %v30 = insertelement <4 x float> undef, float %f, i32 0
803 %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
804 %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
805 %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
806 %tmp3 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %v33, i32 4)
807 %v40 = insertelement <2 x double> undef, double %d, i32 0
808 %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
809 %tmp4 = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %v41, i32 4)
810 %v50 = insertelement <2 x double> undef, double %d, i32 0
811 %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
812 %tmp5 = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %v51, i32 4)
813 %v60 = insertelement <2 x double> undef, double %d, i32 0
814 %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
815 %tmp6 = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %v61, i32 4)
816 %v70 = insertelement <2 x double> undef, double %d, i32 0
817 %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
818 %tmp7 = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %v71, i32 4)
819 %tmp8 = add i32 %tmp0, %tmp2
820 %tmp9 = add i32 %tmp4, %tmp6
821 %tmp10 = add i32 %tmp8, %tmp9
822 %tmp11 = sext i32 %tmp10 to i64
823 %tmp12 = add i64 %tmp1, %tmp3
824 %tmp13 = add i64 %tmp5, %tmp7
825 %tmp14 = add i64 %tmp12, %tmp13
826 %tmp15 = add i64 %tmp11, %tmp14
830 declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32)
831 declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32)
832 declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32)
833 declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32)
834 declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32)
835 declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32)
836 declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32)
837 declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32)
839 define i64 @test2(float %f, double %d) {
840 ; CHECK-LABEL: @test2(
841 ; CHECK-NEXT: [[V03:%.*]] = insertelement <4 x float> undef, float [[F:%.*]], i32 0
842 ; CHECK-NEXT: [[TMP0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[V03]], i32 4)
843 ; CHECK-NEXT: [[V13:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
844 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> [[V13]], i32 4)
845 ; CHECK-NEXT: [[V23:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
846 ; CHECK-NEXT: [[TMP2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[V23]], i32 4)
847 ; CHECK-NEXT: [[V33:%.*]] = insertelement <4 x float> undef, float [[F]], i32 0
848 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> [[V33]], i32 4)
849 ; CHECK-NEXT: [[V41:%.*]] = insertelement <2 x double> undef, double [[D:%.*]], i32 0
850 ; CHECK-NEXT: [[TMP4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[V41]], i32 4)
851 ; CHECK-NEXT: [[V51:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
852 ; CHECK-NEXT: [[TMP5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> [[V51]], i32 4)
853 ; CHECK-NEXT: [[V61:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
854 ; CHECK-NEXT: [[TMP6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[V61]], i32 4)
855 ; CHECK-NEXT: [[V71:%.*]] = insertelement <2 x double> undef, double [[D]], i32 0
856 ; CHECK-NEXT: [[TMP7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> [[V71]], i32 4)
857 ; CHECK-NEXT: [[TMP8:%.*]] = add i32 [[TMP0]], [[TMP2]]
858 ; CHECK-NEXT: [[TMP9:%.*]] = add i32 [[TMP4]], [[TMP6]]
859 ; CHECK-NEXT: [[TMP10:%.*]] = add i32 [[TMP8]], [[TMP9]]
860 ; CHECK-NEXT: [[TMP11:%.*]] = sext i32 [[TMP10]] to i64
861 ; CHECK-NEXT: [[TMP12:%.*]] = add i64 [[TMP1]], [[TMP3]]
862 ; CHECK-NEXT: [[TMP13:%.*]] = add i64 [[TMP5]], [[TMP7]]
863 ; CHECK-NEXT: [[TMP14:%.*]] = add i64 [[TMP12]], [[TMP13]]
864 ; CHECK-NEXT: [[TMP15:%.*]] = add i64 [[TMP14]], [[TMP11]]
865 ; CHECK-NEXT: ret i64 [[TMP15]]
867 %v00 = insertelement <4 x float> undef, float %f, i32 0
868 %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
869 %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
870 %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
871 %tmp0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %v03, i32 4)
872 %v10 = insertelement <4 x float> undef, float %f, i32 0
873 %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
874 %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
875 %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
876 %tmp1 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %v13, i32 4)
877 %v20 = insertelement <4 x float> undef, float %f, i32 0
878 %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
879 %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
880 %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
881 %tmp2 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %v23, i32 4)
882 %v30 = insertelement <4 x float> undef, float %f, i32 0
883 %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
884 %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
885 %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
886 %tmp3 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %v33, i32 4)
887 %v40 = insertelement <2 x double> undef, double %d, i32 0
888 %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
889 %tmp4 = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %v41, i32 4)
890 %v50 = insertelement <2 x double> undef, double %d, i32 0
891 %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
892 %tmp5 = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %v51, i32 4)
893 %v60 = insertelement <2 x double> undef, double %d, i32 0
894 %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
895 %tmp6 = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %v61, i32 4)
896 %v70 = insertelement <2 x double> undef, double %d, i32 0
897 %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
898 %tmp7 = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %v71, i32 4)
899 %tmp8 = add i32 %tmp0, %tmp2
900 %tmp9 = add i32 %tmp4, %tmp6
901 %tmp10 = add i32 %tmp8, %tmp9
902 %tmp11 = sext i32 %tmp10 to i64
903 %tmp12 = add i64 %tmp1, %tmp3
904 %tmp13 = add i64 %tmp5, %tmp7
905 %tmp14 = add i64 %tmp12, %tmp13
906 %tmp15 = add i64 %tmp11, %tmp14
910 declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32)
911 declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32)
912 declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32)
913 declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32)
914 declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32)
915 declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32)
916 declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32)
917 declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32)
919 declare float @llvm.fma.f32(float, float, float) #1
921 define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
922 ; CHECK-LABEL: @test_mask_vfmadd_ss(
923 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
924 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
925 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
926 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
927 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
928 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
929 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP1]]
930 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
931 ; CHECK-NEXT: ret <4 x float> [[TMP8]]
933 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
934 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
935 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
936 %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
937 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
938 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
939 %7 = extractelement <4 x float> %a, i64 0
940 %8 = extractelement <4 x float> %3, i64 0
941 %9 = extractelement <4 x float> %6, i64 0
942 %10 = call float @llvm.fma.f32(float %7, float %8, float %9)
943 %11 = bitcast i8 %mask to <8 x i1>
944 %12 = extractelement <8 x i1> %11, i64 0
945 %13 = select i1 %12, float %10, float %7
946 %14 = insertelement <4 x float> %a, float %13, i64 0
950 define float @test_mask_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
951 ; CHECK-LABEL: @test_mask_vfmadd_ss_0(
952 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
953 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
954 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
955 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
956 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
957 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
958 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP1]]
959 ; CHECK-NEXT: ret float [[TMP7]]
961 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
962 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
963 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
964 %4 = extractelement <4 x float> %3, i64 0
965 %5 = extractelement <4 x float> %b, i64 0
966 %6 = extractelement <4 x float> %c, i64 0
967 %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
968 %8 = bitcast i8 %mask to <8 x i1>
969 %9 = extractelement <8 x i1> %8, i64 0
970 %10 = select i1 %9, float %7, float %4
971 %11 = insertelement <4 x float> %3, float %10, i64 0
972 %12 = extractelement <4 x float> %11, i32 0
976 define float @test_mask_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
977 ; CHECK-LABEL: @test_mask_vfmadd_ss_1(
978 ; CHECK-NEXT: ret float 1.000000e+00
980 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
981 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
982 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
983 %4 = extractelement <4 x float> %3, i64 0
984 %5 = extractelement <4 x float> %b, i64 0
985 %6 = extractelement <4 x float> %c, i64 0
986 %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
987 %8 = bitcast i8 %mask to <8 x i1>
988 %9 = extractelement <8 x i1> %8, i64 0
989 %10 = select i1 %9, float %7, float %4
990 %11 = insertelement <4 x float> %3, float %10, i64 0
991 %12 = extractelement <4 x float> %11, i32 1
995 declare double @llvm.fma.f64(double, double, double) #1
997 define <2 x double> @test_mask_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
998 ; CHECK-LABEL: @test_mask_vfmadd_sd(
999 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1000 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1001 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1002 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
1003 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1004 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
1005 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP1]]
1006 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
1007 ; CHECK-NEXT: ret <2 x double> [[TMP8]]
1009 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
1010 %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
1011 %3 = extractelement <2 x double> %a, i64 0
1012 %4 = extractelement <2 x double> %1, i64 0
1013 %5 = extractelement <2 x double> %2, i64 0
1014 %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
1015 %7 = bitcast i8 %mask to <8 x i1>
1016 %8 = extractelement <8 x i1> %7, i64 0
1017 %9 = select i1 %8, double %6, double %3
1018 %10 = insertelement <2 x double> %a, double %9, i64 0
1019 ret <2 x double> %10
1022 define double @test_mask_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1023 ; CHECK-LABEL: @test_mask_vfmadd_sd_0(
1024 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1025 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1026 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1027 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
1028 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1029 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
1030 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP1]]
1031 ; CHECK-NEXT: ret double [[TMP7]]
1033 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1034 %2 = extractelement <2 x double> %1, i64 0
1035 %3 = extractelement <2 x double> %b, i64 0
1036 %4 = extractelement <2 x double> %c, i64 0
1037 %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
1038 %6 = bitcast i8 %mask to <8 x i1>
1039 %7 = extractelement <8 x i1> %6, i64 0
1040 %8 = select i1 %7, double %5, double %2
1041 %9 = insertelement <2 x double> %1, double %8, i64 0
1042 %10 = extractelement <2 x double> %9, i32 0
1046 define double @test_mask_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1047 ; CHECK-LABEL: @test_mask_vfmadd_sd_1(
1048 ; CHECK-NEXT: ret double 1.000000e+00
1050 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1051 %2 = extractelement <2 x double> %1, i64 0
1052 %3 = extractelement <2 x double> %b, i64 0
1053 %4 = extractelement <2 x double> %c, i64 0
1054 %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
1055 %6 = bitcast i8 %mask to <8 x i1>
1056 %7 = extractelement <8 x i1> %6, i64 0
1057 %8 = select i1 %7, double %5, double %2
1058 %9 = insertelement <2 x double> %1, double %8, i64 0
1059 %10 = extractelement <2 x double> %9, i32 1
1063 define <4 x float> @test_maskz_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1064 ; CHECK-LABEL: @test_maskz_vfmadd_ss(
1065 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1066 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
1067 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
1068 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
1069 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1070 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
1071 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00
1072 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[A]], float [[TMP7]], i64 0
1073 ; CHECK-NEXT: ret <4 x float> [[TMP8]]
1075 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
1076 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1077 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1078 %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
1079 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
1080 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
1081 %7 = extractelement <4 x float> %a, i64 0
1082 %8 = extractelement <4 x float> %3, i64 0
1083 %9 = extractelement <4 x float> %6, i64 0
1084 %10 = call float @llvm.fma.f32(float %7, float %8, float %9)
1085 %11 = bitcast i8 %mask to <8 x i1>
1086 %12 = extractelement <8 x i1> %11, i64 0
1087 %13 = select i1 %12, float %10, float 0.000000e+00
1088 %14 = insertelement <4 x float> %a, float %13, i64 0
1092 define float @test_maskz_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1093 ; CHECK-LABEL: @test_maskz_vfmadd_ss_0(
1094 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
1095 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1096 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1097 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
1098 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1099 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
1100 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float 0.000000e+00
1101 ; CHECK-NEXT: ret float [[TMP7]]
1103 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
1104 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1105 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1106 %4 = extractelement <4 x float> %3, i64 0
1107 %5 = extractelement <4 x float> %b, i64 0
1108 %6 = extractelement <4 x float> %c, i64 0
1109 %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
1110 %8 = bitcast i8 %mask to <8 x i1>
1111 %9 = extractelement <8 x i1> %8, i64 0
1112 %10 = select i1 %9, float %7, float 0.000000e+00
1113 %11 = insertelement <4 x float> %3, float %10, i64 0
1114 %12 = extractelement <4 x float> %11, i32 0
1118 define float @test_maskz_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1119 ; CHECK-LABEL: @test_maskz_vfmadd_ss_1(
1120 ; CHECK-NEXT: ret float 1.000000e+00
1122 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
1123 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1124 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1125 %4 = extractelement <4 x float> %3, i64 0
1126 %5 = extractelement <4 x float> %b, i64 0
1127 %6 = extractelement <4 x float> %c, i64 0
1128 %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
1129 %8 = bitcast i8 %mask to <8 x i1>
1130 %9 = extractelement <8 x i1> %8, i64 0
1131 %10 = select i1 %9, float %7, float 0.000000e+00
1132 %11 = insertelement <4 x float> %3, float %10, i64 0
1133 %12 = extractelement <4 x float> %11, i32 1
1137 define <2 x double> @test_maskz_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1138 ; CHECK-LABEL: @test_maskz_vfmadd_sd(
1139 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1140 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1141 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1142 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
1143 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1144 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
1145 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00
1146 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[A]], double [[TMP7]], i64 0
1147 ; CHECK-NEXT: ret <2 x double> [[TMP8]]
1149 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
1150 %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
1151 %3 = extractelement <2 x double> %a, i64 0
1152 %4 = extractelement <2 x double> %1, i64 0
1153 %5 = extractelement <2 x double> %2, i64 0
1154 %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
1155 %7 = bitcast i8 %mask to <8 x i1>
1156 %8 = extractelement <8 x i1> %7, i64 0
1157 %9 = select i1 %8, double %6, double 0.000000e+00
1158 %10 = insertelement <2 x double> %a, double %9, i64 0
1159 ret <2 x double> %10
1162 define double @test_maskz_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1163 ; CHECK-LABEL: @test_maskz_vfmadd_sd_0(
1164 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1165 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1166 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1167 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
1168 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1169 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
1170 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double 0.000000e+00
1171 ; CHECK-NEXT: ret double [[TMP7]]
1173 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1174 %2 = extractelement <2 x double> %1, i64 0
1175 %3 = extractelement <2 x double> %b, i64 0
1176 %4 = extractelement <2 x double> %c, i64 0
1177 %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
1178 %6 = bitcast i8 %mask to <8 x i1>
1179 %7 = extractelement <8 x i1> %6, i64 0
1180 %8 = select i1 %7, double %5, double 0.000000e+00
1181 %9 = insertelement <2 x double> %1, double %8, i64 0
1182 %10 = extractelement <2 x double> %9, i32 0
1186 define double @test_maskz_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1187 ; CHECK-LABEL: @test_maskz_vfmadd_sd_1(
1188 ; CHECK-NEXT: ret double 1.000000e+00
1190 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1191 %2 = extractelement <2 x double> %1, i64 0
1192 %3 = extractelement <2 x double> %b, i64 0
1193 %4 = extractelement <2 x double> %c, i64 0
1194 %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
1195 %6 = bitcast i8 %mask to <8 x i1>
1196 %7 = extractelement <8 x i1> %6, i64 0
1197 %8 = select i1 %7, double %5, double 0.000000e+00
1198 %9 = insertelement <2 x double> %1, double %8, i64 0
1199 %10 = extractelement <2 x double> %9, i32 1
1203 define <4 x float> @test_mask3_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1204 ; CHECK-LABEL: @test_mask3_vfmadd_ss(
1205 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
1206 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
1207 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1208 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
1209 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1210 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
1211 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP3]]
1212 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <4 x float> [[C]], float [[TMP7]], i64 0
1213 ; CHECK-NEXT: ret <4 x float> [[TMP8]]
1215 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
1216 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1217 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1218 %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
1219 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
1220 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
1221 %7 = extractelement <4 x float> %3, i64 0
1222 %8 = extractelement <4 x float> %6, i64 0
1223 %9 = extractelement <4 x float> %c, i64 0
1224 %10 = call float @llvm.fma.f32(float %7, float %8, float %9)
1225 %11 = bitcast i8 %mask to <8 x i1>
1226 %12 = extractelement <8 x i1> %11, i64 0
1227 %13 = select i1 %12, float %10, float %9
1228 %14 = insertelement <4 x float> %c, float %13, i64 0
1232 define float @test_mask3_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1233 ; CHECK-LABEL: @test_mask3_vfmadd_ss_0(
1234 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1235 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1236 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
1237 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
1238 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1239 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
1240 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], float [[TMP4]], float [[TMP3]]
1241 ; CHECK-NEXT: ret float [[TMP7]]
1243 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1244 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1245 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1246 %4 = extractelement <4 x float> %a, i64 0
1247 %5 = extractelement <4 x float> %b, i64 0
1248 %6 = extractelement <4 x float> %3, i64 0
1249 %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
1250 %8 = bitcast i8 %mask to <8 x i1>
1251 %9 = extractelement <8 x i1> %8, i64 0
1252 %10 = select i1 %9, float %7, float %6
1253 %11 = insertelement <4 x float> %3, float %10, i64 0
1254 %12 = extractelement <4 x float> %11, i32 0
1258 define float @test_mask3_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1259 ; CHECK-LABEL: @test_mask3_vfmadd_ss_1(
1260 ; CHECK-NEXT: ret float 1.000000e+00
1262 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1263 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1264 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1265 %4 = extractelement <4 x float> %a, i64 0
1266 %5 = extractelement <4 x float> %b, i64 0
1267 %6 = extractelement <4 x float> %3, i64 0
1268 %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
1269 %8 = bitcast i8 %mask to <8 x i1>
1270 %9 = extractelement <8 x i1> %8, i64 0
1271 %10 = select i1 %9, float %7, float %6
1272 %11 = insertelement <4 x float> %3, float %10, i64 0
1273 %12 = extractelement <4 x float> %11, i32 1
1277 define <2 x double> @test_mask3_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1278 ; CHECK-LABEL: @test_mask3_vfmadd_sd(
1279 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1280 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1281 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1282 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
1283 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1284 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
1285 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP3]]
1286 ; CHECK-NEXT: [[TMP8:%.*]] = insertelement <2 x double> [[C]], double [[TMP7]], i64 0
1287 ; CHECK-NEXT: ret <2 x double> [[TMP8]]
1289 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1290 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
1291 %3 = extractelement <2 x double> %1, i64 0
1292 %4 = extractelement <2 x double> %2, i64 0
1293 %5 = extractelement <2 x double> %c, i64 0
1294 %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
1295 %7 = bitcast i8 %mask to <8 x i1>
1296 %8 = extractelement <8 x i1> %7, i64 0
1297 %9 = select i1 %8, double %6, double %5
1298 %10 = insertelement <2 x double> %c, double %9, i64 0
1299 ret <2 x double> %10
1302 define double @test_mask3_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1303 ; CHECK-LABEL: @test_mask3_vfmadd_sd_0(
1304 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1305 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1306 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1307 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
1308 ; CHECK-NEXT: [[TMP5:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1309 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <8 x i1> [[TMP5]], i64 0
1310 ; CHECK-NEXT: [[TMP7:%.*]] = select i1 [[TMP6]], double [[TMP4]], double [[TMP3]]
1311 ; CHECK-NEXT: ret double [[TMP7]]
1313 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1314 %2 = extractelement <2 x double> %a, i64 0
1315 %3 = extractelement <2 x double> %b, i64 0
1316 %4 = extractelement <2 x double> %1, i64 0
1317 %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
1318 %6 = bitcast i8 %mask to <8 x i1>
1319 %7 = extractelement <8 x i1> %6, i64 0
1320 %8 = select i1 %7, double %5, double %4
1321 %9 = insertelement <2 x double> %1, double %8, i64 0
1322 %10 = extractelement <2 x double> %9, i32 0
1326 define double @test_mask3_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1327 ; CHECK-LABEL: @test_mask3_vfmadd_sd_1(
1328 ; CHECK-NEXT: ret double 1.000000e+00
1330 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1331 %2 = extractelement <2 x double> %a, i64 0
1332 %3 = extractelement <2 x double> %b, i64 0
1333 %4 = extractelement <2 x double> %1, i64 0
1334 %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
1335 %6 = bitcast i8 %mask to <8 x i1>
1336 %7 = extractelement <8 x i1> %6, i64 0
1337 %8 = select i1 %7, double %5, double %4
1338 %9 = insertelement <2 x double> %1, double %8, i64 0
1339 %10 = extractelement <2 x double> %9, i32 1
1343 define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1344 ; CHECK-LABEL: @test_mask3_vfmsub_ss(
1345 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i32 0
1346 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
1347 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1348 ; CHECK-NEXT: [[TMP4:%.*]] = fsub float -0.000000e+00, [[TMP3]]
1349 ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
1350 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0
1351 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1352 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
1353 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]]
1354 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0
1355 ; CHECK-NEXT: ret <4 x float> [[TMP10]]
1357 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
1358 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1359 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1360 %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
1361 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
1362 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
1363 %7 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
1364 %8 = extractelement <4 x float> %3, i64 0
1365 %9 = extractelement <4 x float> %6, i64 0
1366 %10 = extractelement <4 x float> %7, i64 0
1367 %11 = call float @llvm.fma.f32(float %8, float %9, float %10)
1368 %12 = extractelement <4 x float> %c, i64 0
1369 %13 = bitcast i8 %mask to <8 x i1>
1370 %14 = extractelement <8 x i1> %13, i64 0
1371 %15 = select i1 %14, float %11, float %12
1372 %16 = insertelement <4 x float> %c, float %15, i64 0
1376 define float @test_mask3_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1377 ; CHECK-LABEL: @test_mask3_vfmsub_ss_0(
1378 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1379 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1380 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
1381 ; CHECK-NEXT: [[TMP4:%.*]] = fsub float -0.000000e+00, [[TMP3]]
1382 ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
1383 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i32 0
1384 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1385 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
1386 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], float [[TMP5]], float [[TMP6]]
1387 ; CHECK-NEXT: ret float [[TMP9]]
1389 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1390 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1391 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1392 %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
1393 %5 = extractelement <4 x float> %a, i64 0
1394 %6 = extractelement <4 x float> %b, i64 0
1395 %7 = extractelement <4 x float> %4, i64 0
1396 %8 = call float @llvm.fma.f32(float %5, float %6, float %7)
1397 %9 = extractelement <4 x float> %3, i64 0
1398 %10 = bitcast i8 %mask to <8 x i1>
1399 %11 = extractelement <8 x i1> %10, i64 0
1400 %12 = select i1 %11, float %8, float %9
1401 %13 = insertelement <4 x float> %3, float %12, i64 0
1402 %14 = extractelement <4 x float> %13, i32 0
1406 define float @test_mask3_vfmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1407 ; CHECK-LABEL: @test_mask3_vfmsub_ss_1(
1408 ; CHECK-NEXT: ret float 1.000000e+00
1410 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1411 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1412 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1413 %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
1414 %5 = extractelement <4 x float> %a, i64 0
1415 %6 = extractelement <4 x float> %b, i64 0
1416 %7 = extractelement <4 x float> %4, i64 0
1417 %8 = call float @llvm.fma.f32(float %5, float %6, float %7)
1418 %9 = extractelement <4 x float> %3, i64 0
1419 %10 = bitcast i8 %mask to <8 x i1>
1420 %11 = extractelement <8 x i1> %10, i64 0
1421 %12 = select i1 %11, float %8, float %9
1422 %13 = insertelement <4 x float> %3, float %12, i64 0
1423 %14 = extractelement <4 x float> %13, i32 1
1427 define float @test_mask3_vfmsub_ss_1_unary_fneg(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1428 ; CHECK-LABEL: @test_mask3_vfmsub_ss_1_unary_fneg(
1429 ; CHECK-NEXT: ret float 1.000000e+00
1431 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1432 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1433 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1434 %4 = fneg <4 x float> %3
1435 %5 = extractelement <4 x float> %a, i64 0
1436 %6 = extractelement <4 x float> %b, i64 0
1437 %7 = extractelement <4 x float> %4, i64 0
1438 %8 = call float @llvm.fma.f32(float %5, float %6, float %7)
1439 %9 = extractelement <4 x float> %3, i64 0
1440 %10 = bitcast i8 %mask to <8 x i1>
1441 %11 = extractelement <8 x i1> %10, i64 0
1442 %12 = select i1 %11, float %8, float %9
1443 %13 = insertelement <4 x float> %3, float %12, i64 0
1444 %14 = extractelement <4 x float> %13, i32 1
1448 define <2 x double> @test_mask3_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1449 ; CHECK-LABEL: @test_mask3_vfmsub_sd(
1450 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1451 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1452 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1453 ; CHECK-NEXT: [[TMP4:%.*]] = fsub double -0.000000e+00, [[TMP3]]
1454 ; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
1455 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
1456 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1457 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
1458 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]]
1459 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0
1460 ; CHECK-NEXT: ret <2 x double> [[TMP10]]
1462 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1463 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
1464 %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
1465 %4 = extractelement <2 x double> %1, i64 0
1466 %5 = extractelement <2 x double> %2, i64 0
1467 %6 = extractelement <2 x double> %3, i64 0
1468 %7 = call double @llvm.fma.f64(double %4, double %5, double %6)
1469 %8 = extractelement <2 x double> %c, i64 0
1470 %9 = bitcast i8 %mask to <8 x i1>
1471 %10 = extractelement <8 x i1> %9, i64 0
1472 %11 = select i1 %10, double %7, double %8
1473 %12 = insertelement <2 x double> %c, double %11, i64 0
1474 ret <2 x double> %12
1477 define double @test_mask3_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1478 ; CHECK-LABEL: @test_mask3_vfmsub_sd_0(
1479 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1480 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1481 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1482 ; CHECK-NEXT: [[TMP4:%.*]] = fsub double -0.000000e+00, [[TMP3]]
1483 ; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
1484 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
1485 ; CHECK-NEXT: [[TMP7:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1486 ; CHECK-NEXT: [[TMP8:%.*]] = extractelement <8 x i1> [[TMP7]], i64 0
1487 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[TMP8]], double [[TMP5]], double [[TMP6]]
1488 ; CHECK-NEXT: ret double [[TMP9]]
1490 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1491 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
1492 %3 = extractelement <2 x double> %a, i64 0
1493 %4 = extractelement <2 x double> %b, i64 0
1494 %5 = extractelement <2 x double> %2, i64 0
1495 %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
1496 %7 = extractelement <2 x double> %1, i64 0
1497 %8 = bitcast i8 %mask to <8 x i1>
1498 %9 = extractelement <8 x i1> %8, i64 0
1499 %10 = select i1 %9, double %6, double %7
1500 %11 = insertelement <2 x double> %1, double %10, i64 0
1501 %12 = extractelement <2 x double> %11, i32 0
1505 define double @test_mask3_vfmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1506 ; CHECK-LABEL: @test_mask3_vfmsub_sd_1(
1507 ; CHECK-NEXT: ret double 1.000000e+00
1509 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1510 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
1511 %3 = extractelement <2 x double> %a, i64 0
1512 %4 = extractelement <2 x double> %b, i64 0
1513 %5 = extractelement <2 x double> %2, i64 0
1514 %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
1515 %7 = extractelement <2 x double> %1, i64 0
1516 %8 = bitcast i8 %mask to <8 x i1>
1517 %9 = extractelement <8 x i1> %8, i64 0
1518 %10 = select i1 %9, double %6, double %7
1519 %11 = insertelement <2 x double> %1, double %10, i64 0
1520 %12 = extractelement <2 x double> %11, i32 1
1524 define double @test_mask3_vfmsub_sd_1_unary_fneg(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1525 ; CHECK-LABEL: @test_mask3_vfmsub_sd_1_unary_fneg(
1526 ; CHECK-NEXT: ret double 1.000000e+00
1528 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1529 %2 = fneg <2 x double> %1
1530 %3 = extractelement <2 x double> %a, i64 0
1531 %4 = extractelement <2 x double> %b, i64 0
1532 %5 = extractelement <2 x double> %2, i64 0
1533 %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
1534 %7 = extractelement <2 x double> %1, i64 0
1535 %8 = bitcast i8 %mask to <8 x i1>
1536 %9 = extractelement <8 x i1> %8, i64 0
1537 %10 = select i1 %9, double %6, double %7
1538 %11 = insertelement <2 x double> %1, double %10, i64 0
1539 %12 = extractelement <2 x double> %11, i32 1
1543 define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1544 ; CHECK-LABEL: @test_mask3_vfnmsub_ss(
1545 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1546 ; CHECK-NEXT: [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]]
1547 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i32 0
1548 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1549 ; CHECK-NEXT: [[TMP5:%.*]] = fsub float -0.000000e+00, [[TMP4]]
1550 ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
1551 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0
1552 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1553 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
1554 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]]
1555 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <4 x float> [[C]], float [[TMP10]], i64 0
1556 ; CHECK-NEXT: ret <4 x float> [[TMP11]]
1558 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
1559 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1560 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1561 %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
1562 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
1563 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
1564 %7 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
1565 %8 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
1566 %9 = extractelement <4 x float> %7, i64 0
1567 %10 = extractelement <4 x float> %6, i64 0
1568 %11 = extractelement <4 x float> %8, i64 0
1569 %12 = call float @llvm.fma.f32(float %9, float %10, float %11)
1570 %13 = extractelement <4 x float> %c, i64 0
1571 %14 = bitcast i8 %mask to <8 x i1>
1572 %15 = extractelement <8 x i1> %14, i64 0
1573 %16 = select i1 %15, float %12, float %13
1574 %17 = insertelement <4 x float> %c, float %16, i64 0
1578 define float @test_mask3_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1579 ; CHECK-LABEL: @test_mask3_vfnmsub_ss_0(
1580 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1581 ; CHECK-NEXT: [[TMP2:%.*]] = fsub float -0.000000e+00, [[TMP1]]
1582 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1583 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i32 0
1584 ; CHECK-NEXT: [[TMP5:%.*]] = fsub float -0.000000e+00, [[TMP4]]
1585 ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
1586 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i32 0
1587 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1588 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
1589 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], float [[TMP6]], float [[TMP7]]
1590 ; CHECK-NEXT: ret float [[TMP10]]
1592 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1593 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1594 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1595 %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
1596 %5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
1597 %6 = extractelement <4 x float> %4, i64 0
1598 %7 = extractelement <4 x float> %b, i64 0
1599 %8 = extractelement <4 x float> %5, i64 0
1600 %9 = call float @llvm.fma.f32(float %6, float %7, float %8)
1601 %10 = extractelement <4 x float> %3, i64 0
1602 %11 = bitcast i8 %mask to <8 x i1>
1603 %12 = extractelement <8 x i1> %11, i64 0
1604 %13 = select i1 %12, float %9, float %10
1605 %14 = insertelement <4 x float> %3, float %13, i64 0
1606 %15 = extractelement <4 x float> %14, i32 0
1610 define float @test_mask3_vfnmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1611 ; CHECK-LABEL: @test_mask3_vfnmsub_ss_1(
1612 ; CHECK-NEXT: ret float 1.000000e+00
1614 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1615 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1616 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1617 %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
1618 %5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
1619 %6 = extractelement <4 x float> %4, i64 0
1620 %7 = extractelement <4 x float> %b, i64 0
1621 %8 = extractelement <4 x float> %5, i64 0
1622 %9 = call float @llvm.fma.f32(float %6, float %7, float %8)
1623 %10 = extractelement <4 x float> %3, i64 0
1624 %11 = bitcast i8 %mask to <8 x i1>
1625 %12 = extractelement <8 x i1> %11, i64 0
1626 %13 = select i1 %12, float %9, float %10
1627 %14 = insertelement <4 x float> %3, float %13, i64 0
1628 %15 = extractelement <4 x float> %14, i32 1
1632 define float @test_mask3_vfnmsub_ss_1_unary_fneg(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1633 ; CHECK-LABEL: @test_mask3_vfnmsub_ss_1_unary_fneg(
1634 ; CHECK-NEXT: ret float 1.000000e+00
1636 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1637 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1638 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1639 %4 = fneg <4 x float> %a
1640 %5 = fneg <4 x float> %3
1641 %6 = extractelement <4 x float> %4, i64 0
1642 %7 = extractelement <4 x float> %b, i64 0
1643 %8 = extractelement <4 x float> %5, i64 0
1644 %9 = call float @llvm.fma.f32(float %6, float %7, float %8)
1645 %10 = extractelement <4 x float> %3, i64 0
1646 %11 = bitcast i8 %mask to <8 x i1>
1647 %12 = extractelement <8 x i1> %11, i64 0
1648 %13 = select i1 %12, float %9, float %10
1649 %14 = insertelement <4 x float> %3, float %13, i64 0
1650 %15 = extractelement <4 x float> %14, i32 1
1654 define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1655 ; CHECK-LABEL: @test_mask3_vfnmsub_sd(
1656 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1657 ; CHECK-NEXT: [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]]
1658 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1659 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1660 ; CHECK-NEXT: [[TMP5:%.*]] = fsub double -0.000000e+00, [[TMP4]]
1661 ; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
1662 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
1663 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1664 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
1665 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]]
1666 ; CHECK-NEXT: [[TMP11:%.*]] = insertelement <2 x double> [[C]], double [[TMP10]], i64 0
1667 ; CHECK-NEXT: ret <2 x double> [[TMP11]]
1669 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1670 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
1671 %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
1672 %4 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
1673 %5 = extractelement <2 x double> %3, i64 0
1674 %6 = extractelement <2 x double> %2, i64 0
1675 %7 = extractelement <2 x double> %4, i64 0
1676 %8 = call double @llvm.fma.f64(double %5, double %6, double %7)
1677 %9 = extractelement <2 x double> %c, i64 0
1678 %10 = bitcast i8 %mask to <8 x i1>
1679 %11 = extractelement <8 x i1> %10, i64 0
1680 %12 = select i1 %11, double %8, double %9
1681 %13 = insertelement <2 x double> %c, double %12, i64 0
1682 ret <2 x double> %13
1685 define double @test_mask3_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1686 ; CHECK-LABEL: @test_mask3_vfnmsub_sd_0(
1687 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1688 ; CHECK-NEXT: [[TMP2:%.*]] = fsub double -0.000000e+00, [[TMP1]]
1689 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1690 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1691 ; CHECK-NEXT: [[TMP5:%.*]] = fsub double -0.000000e+00, [[TMP4]]
1692 ; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
1693 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
1694 ; CHECK-NEXT: [[TMP8:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1695 ; CHECK-NEXT: [[TMP9:%.*]] = extractelement <8 x i1> [[TMP8]], i64 0
1696 ; CHECK-NEXT: [[TMP10:%.*]] = select i1 [[TMP9]], double [[TMP6]], double [[TMP7]]
1697 ; CHECK-NEXT: ret double [[TMP10]]
1699 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1700 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
1701 %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
1702 %4 = extractelement <2 x double> %2, i64 0
1703 %5 = extractelement <2 x double> %b, i64 0
1704 %6 = extractelement <2 x double> %3, i64 0
1705 %7 = call double @llvm.fma.f64(double %4, double %5, double %6)
1706 %8 = extractelement <2 x double> %1, i64 0
1707 %9 = bitcast i8 %mask to <8 x i1>
1708 %10 = extractelement <8 x i1> %9, i64 0
1709 %11 = select i1 %10, double %7, double %8
1710 %12 = insertelement <2 x double> %1, double %11, i64 0
1711 %13 = extractelement <2 x double> %12, i32 0
1715 define double @test_mask3_vfnmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1716 ; CHECK-LABEL: @test_mask3_vfnmsub_sd_1(
1717 ; CHECK-NEXT: ret double 1.000000e+00
1719 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1720 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
1721 %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
1722 %4 = extractelement <2 x double> %2, i64 0
1723 %5 = extractelement <2 x double> %b, i64 0
1724 %6 = extractelement <2 x double> %3, i64 0
1725 %7 = call double @llvm.fma.f64(double %4, double %5, double %6)
1726 %8 = extractelement <2 x double> %1, i64 0
1727 %9 = bitcast i8 %mask to <8 x i1>
1728 %10 = extractelement <8 x i1> %9, i64 0
1729 %11 = select i1 %10, double %7, double %8
1730 %12 = insertelement <2 x double> %1, double %11, i64 0
1731 %13 = extractelement <2 x double> %12, i32 1
1735 define double @test_mask3_vfnmsub_sd_1_unary_fneg(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1736 ; CHECK-LABEL: @test_mask3_vfnmsub_sd_1_unary_fneg(
1737 ; CHECK-NEXT: ret double 1.000000e+00
1739 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1740 %2 = fneg <2 x double> %a
1741 %3 = fneg <2 x double> %1
1742 %4 = extractelement <2 x double> %2, i64 0
1743 %5 = extractelement <2 x double> %b, i64 0
1744 %6 = extractelement <2 x double> %3, i64 0
1745 %7 = call double @llvm.fma.f64(double %4, double %5, double %6)
1746 %8 = extractelement <2 x double> %1, i64 0
1747 %9 = bitcast i8 %mask to <8 x i1>
1748 %10 = extractelement <8 x i1> %9, i64 0
1749 %11 = select i1 %10, double %7, double %8
1750 %12 = insertelement <2 x double> %1, double %11, i64 0
1751 %13 = extractelement <2 x double> %12, i32 1
1755 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
1757 define <8 x i32> @identity_test_permvar_si_256(<8 x i32> %a0) {
1758 ; CHECK-LABEL: @identity_test_permvar_si_256(
1759 ; CHECK-NEXT: ret <8 x i32> [[A0:%.*]]
1761 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
1765 define <8 x i32> @identity_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
1766 ; CHECK-LABEL: @identity_test_permvar_si_256_mask(
1767 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1768 ; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[A0:%.*]], <8 x i32> [[PASSTHRU:%.*]]
1769 ; CHECK-NEXT: ret <8 x i32> [[TMP2]]
1771 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
1772 %2 = bitcast i8 %mask to <8 x i1>
1773 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
1777 define <8 x i32> @zero_test_permvar_si_256(<8 x i32> %a0) {
1778 ; CHECK-LABEL: @zero_test_permvar_si_256(
1779 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> zeroinitializer
1780 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1782 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
1786 define <8 x i32> @zero_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
1787 ; CHECK-LABEL: @zero_test_permvar_si_256_mask(
1788 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> zeroinitializer
1789 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1790 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
1791 ; CHECK-NEXT: ret <8 x i32> [[TMP3]]
1793 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
1794 %2 = bitcast i8 %mask to <8 x i1>
1795 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
1799 define <8 x i32> @shuffle_test_permvar_si_256(<8 x i32> %a0) {
1800 ; CHECK-LABEL: @shuffle_test_permvar_si_256(
1801 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1802 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1804 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1808 define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
1809 ; CHECK-LABEL: @shuffle_test_permvar_si_256_mask(
1810 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1811 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1812 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
1813 ; CHECK-NEXT: ret <8 x i32> [[TMP3]]
1815 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1816 %2 = bitcast i8 %mask to <8 x i1>
1817 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
1821 define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) {
1822 ; CHECK-LABEL: @undef_test_permvar_si_256(
1823 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1824 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1826 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1830 define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
1831 ; CHECK-LABEL: @undef_test_permvar_si_256_mask(
1832 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1833 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1834 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
1835 ; CHECK-NEXT: ret <8 x i32> [[TMP3]]
1837 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1838 %2 = bitcast i8 %mask to <8 x i1>
1839 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
1843 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
1845 define <8 x float> @identity_test_permvar_sf_256(<8 x float> %a0) {
1846 ; CHECK-LABEL: @identity_test_permvar_sf_256(
1847 ; CHECK-NEXT: ret <8 x float> [[A0:%.*]]
1849 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
1853 define <8 x float> @identity_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
1854 ; CHECK-LABEL: @identity_test_permvar_sf_256_mask(
1855 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1856 ; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[A0:%.*]], <8 x float> [[PASSTHRU:%.*]]
1857 ; CHECK-NEXT: ret <8 x float> [[TMP2]]
1859 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
1860 %2 = bitcast i8 %mask to <8 x i1>
1861 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
1865 define <8 x float> @zero_test_permvar_sf_256(<8 x float> %a0) {
1866 ; CHECK-LABEL: @zero_test_permvar_sf_256(
1867 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> zeroinitializer
1868 ; CHECK-NEXT: ret <8 x float> [[TMP1]]
1870 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
1874 define <8 x float> @zero_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
1875 ; CHECK-LABEL: @zero_test_permvar_sf_256_mask(
1876 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> zeroinitializer
1877 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1878 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
1879 ; CHECK-NEXT: ret <8 x float> [[TMP3]]
1881 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
1882 %2 = bitcast i8 %mask to <8 x i1>
1883 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
1887 define <8 x float> @shuffle_test_permvar_sf_256(<8 x float> %a0) {
1888 ; CHECK-LABEL: @shuffle_test_permvar_sf_256(
1889 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1890 ; CHECK-NEXT: ret <8 x float> [[TMP1]]
1892 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1896 define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
1897 ; CHECK-LABEL: @shuffle_test_permvar_sf_256_mask(
1898 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1899 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1900 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
1901 ; CHECK-NEXT: ret <8 x float> [[TMP3]]
1903 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1904 %2 = bitcast i8 %mask to <8 x i1>
1905 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
1909 define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) {
1910 ; CHECK-LABEL: @undef_test_permvar_sf_256(
1911 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1912 ; CHECK-NEXT: ret <8 x float> [[TMP1]]
1914 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1918 define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
1919 ; CHECK-LABEL: @undef_test_permvar_sf_256_mask(
1920 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1921 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1922 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
1923 ; CHECK-NEXT: ret <8 x float> [[TMP3]]
1925 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1926 %2 = bitcast i8 %mask to <8 x i1>
1927 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
1931 declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>)
1933 define <4 x i64> @identity_test_permvar_di_256(<4 x i64> %a0) {
1934 ; CHECK-LABEL: @identity_test_permvar_di_256(
1935 ; CHECK-NEXT: ret <4 x i64> [[A0:%.*]]
1937 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
1941 define <4 x i64> @identity_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
1942 ; CHECK-LABEL: @identity_test_permvar_di_256_mask(
1943 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1944 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1945 ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[A0:%.*]], <4 x i64> [[PASSTHRU:%.*]]
1946 ; CHECK-NEXT: ret <4 x i64> [[TMP2]]
1948 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
1949 %2 = bitcast i8 %mask to <8 x i1>
1950 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1951 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
1955 define <4 x i64> @zero_test_permvar_di_256(<4 x i64> %a0) {
1956 ; CHECK-LABEL: @zero_test_permvar_di_256(
1957 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> zeroinitializer
1958 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
1960 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer)
1964 define <4 x i64> @zero_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
1965 ; CHECK-LABEL: @zero_test_permvar_di_256_mask(
1966 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> zeroinitializer
1967 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1968 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1969 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
1970 ; CHECK-NEXT: ret <4 x i64> [[TMP3]]
1972 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer)
1973 %2 = bitcast i8 %mask to <8 x i1>
1974 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1975 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
1979 define <4 x i64> @shuffle_test_permvar_di_256(<4 x i64> %a0) {
1980 ; CHECK-LABEL: @shuffle_test_permvar_di_256(
1981 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1982 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
1984 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
1988 define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
1989 ; CHECK-LABEL: @shuffle_test_permvar_di_256_mask(
1990 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
1991 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1992 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1993 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
1994 ; CHECK-NEXT: ret <4 x i64> [[TMP3]]
1996 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
1997 %2 = bitcast i8 %mask to <8 x i1>
1998 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
1999 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
2003 define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) {
2004 ; CHECK-LABEL: @undef_test_permvar_di_256(
2005 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
2006 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
2008 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
2012 define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
2013 ; CHECK-LABEL: @undef_test_permvar_di_256_mask(
2014 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
2015 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2016 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2017 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
2018 ; CHECK-NEXT: ret <4 x i64> [[TMP3]]
2020 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
2021 %2 = bitcast i8 %mask to <8 x i1>
2022 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2023 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
2027 declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>)
2029 define <4 x double> @identity_test_permvar_df_256(<4 x double> %a0) {
2030 ; CHECK-LABEL: @identity_test_permvar_df_256(
2031 ; CHECK-NEXT: ret <4 x double> [[A0:%.*]]
2033 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
2037 define <4 x double> @identity_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
2038 ; CHECK-LABEL: @identity_test_permvar_df_256_mask(
2039 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2040 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2041 ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[A0:%.*]], <4 x double> [[PASSTHRU:%.*]]
2042 ; CHECK-NEXT: ret <4 x double> [[TMP2]]
2044 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
2045 %2 = bitcast i8 %mask to <8 x i1>
2046 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2047 %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
2051 define <4 x double> @zero_test_permvar_df_256(<4 x double> %a0) {
2052 ; CHECK-LABEL: @zero_test_permvar_df_256(
2053 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> zeroinitializer
2054 ; CHECK-NEXT: ret <4 x double> [[TMP1]]
2056 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer)
2060 define <4 x double> @zero_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
2061 ; CHECK-LABEL: @zero_test_permvar_df_256_mask(
2062 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> zeroinitializer
2063 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2064 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2065 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
2066 ; CHECK-NEXT: ret <4 x double> [[TMP3]]
2068 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer)
2069 %2 = bitcast i8 %mask to <8 x i1>
2070 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2071 %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
2075 define <4 x double> @shuffle_test_permvar_df_256(<4 x double> %a0) {
2076 ; CHECK-LABEL: @shuffle_test_permvar_df_256(
2077 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
2078 ; CHECK-NEXT: ret <4 x double> [[TMP1]]
2080 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
2084 define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
2085 ; CHECK-LABEL: @shuffle_test_permvar_df_256_mask(
2086 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
2087 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2088 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2089 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
2090 ; CHECK-NEXT: ret <4 x double> [[TMP3]]
2092 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
2093 %2 = bitcast i8 %mask to <8 x i1>
2094 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2095 %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
2099 define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) {
2100 ; CHECK-LABEL: @undef_test_permvar_df_256(
2101 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
2102 ; CHECK-NEXT: ret <4 x double> [[TMP1]]
2104 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
2108 define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
2109 ; CHECK-LABEL: @undef_test_permvar_df_256_mask(
2110 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> undef, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
2111 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2112 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2113 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
2114 ; CHECK-NEXT: ret <4 x double> [[TMP3]]
2116 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
2117 %2 = bitcast i8 %mask to <8 x i1>
2118 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2119 %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
2123 declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>)
2125 define <16 x i32> @identity_test_permvar_si_512(<16 x i32> %a0) {
2126 ; CHECK-LABEL: @identity_test_permvar_si_512(
2127 ; CHECK-NEXT: ret <16 x i32> [[A0:%.*]]
2129 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
2133 define <16 x i32> @identity_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
2134 ; CHECK-LABEL: @identity_test_permvar_si_512_mask(
2135 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2136 ; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[A0:%.*]], <16 x i32> [[PASSTHRU:%.*]]
2137 ; CHECK-NEXT: ret <16 x i32> [[TMP2]]
2139 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
2140 %2 = bitcast i16 %mask to <16 x i1>
2141 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
2145 define <16 x i32> @zero_test_permvar_si_512(<16 x i32> %a0) {
2146 ; CHECK-LABEL: @zero_test_permvar_si_512(
2147 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> zeroinitializer
2148 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
2150 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer)
2154 define <16 x i32> @zero_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
2155 ; CHECK-LABEL: @zero_test_permvar_si_512_mask(
2156 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> zeroinitializer
2157 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2158 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
2159 ; CHECK-NEXT: ret <16 x i32> [[TMP3]]
2161 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer)
2162 %2 = bitcast i16 %mask to <16 x i1>
2163 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
2167 define <16 x i32> @shuffle_test_permvar_si_512(<16 x i32> %a0) {
2168 ; CHECK-LABEL: @shuffle_test_permvar_si_512(
2169 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2170 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
2172 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2176 define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
2177 ; CHECK-LABEL: @shuffle_test_permvar_si_512_mask(
2178 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2179 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2180 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
2181 ; CHECK-NEXT: ret <16 x i32> [[TMP3]]
2183 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2184 %2 = bitcast i16 %mask to <16 x i1>
2185 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
2189 define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) {
2190 ; CHECK-LABEL: @undef_test_permvar_si_512(
2191 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2192 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
2194 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2198 define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
2199 ; CHECK-LABEL: @undef_test_permvar_si_512_mask(
2200 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2201 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2202 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
2203 ; CHECK-NEXT: ret <16 x i32> [[TMP3]]
2205 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2206 %2 = bitcast i16 %mask to <16 x i1>
2207 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
2211 declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>)
2213 define <16 x float> @identity_test_permvar_sf_512(<16 x float> %a0) {
2214 ; CHECK-LABEL: @identity_test_permvar_sf_512(
2215 ; CHECK-NEXT: ret <16 x float> [[A0:%.*]]
2217 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
2221 define <16 x float> @identity_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
2222 ; CHECK-LABEL: @identity_test_permvar_sf_512_mask(
2223 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2224 ; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[A0:%.*]], <16 x float> [[PASSTHRU:%.*]]
2225 ; CHECK-NEXT: ret <16 x float> [[TMP2]]
2227 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
2228 %2 = bitcast i16 %mask to <16 x i1>
2229 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
2233 define <16 x float> @zero_test_permvar_sf_512(<16 x float> %a0) {
2234 ; CHECK-LABEL: @zero_test_permvar_sf_512(
2235 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> zeroinitializer
2236 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
2238 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer)
2242 define <16 x float> @zero_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
2243 ; CHECK-LABEL: @zero_test_permvar_sf_512_mask(
2244 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> zeroinitializer
2245 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2246 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
2247 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
2249 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer)
2250 %2 = bitcast i16 %mask to <16 x i1>
2251 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
2255 define <16 x float> @shuffle_test_permvar_sf_512(<16 x float> %a0) {
2256 ; CHECK-LABEL: @shuffle_test_permvar_sf_512(
2257 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2258 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
2260 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2264 define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
2265 ; CHECK-LABEL: @shuffle_test_permvar_sf_512_mask(
2266 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2267 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2268 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
2269 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
2271 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2272 %2 = bitcast i16 %mask to <16 x i1>
2273 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
2277 define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) {
2278 ; CHECK-LABEL: @undef_test_permvar_sf_512(
2279 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2280 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
2282 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2286 define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
2287 ; CHECK-LABEL: @undef_test_permvar_sf_512_mask(
2288 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2289 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2290 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
2291 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
2293 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2294 %2 = bitcast i16 %mask to <16 x i1>
2295 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
2299 declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
2301 define <8 x i64> @identity_test_permvar_di_512(<8 x i64> %a0) {
2302 ; CHECK-LABEL: @identity_test_permvar_di_512(
2303 ; CHECK-NEXT: ret <8 x i64> [[A0:%.*]]
2305 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
2309 define <8 x i64> @identity_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
2310 ; CHECK-LABEL: @identity_test_permvar_di_512_mask(
2311 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2312 ; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[A0:%.*]], <8 x i64> [[PASSTHRU:%.*]]
2313 ; CHECK-NEXT: ret <8 x i64> [[TMP2]]
2315 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
2316 %2 = bitcast i8 %mask to <8 x i1>
2317 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
2321 define <8 x i64> @zero_test_permvar_di_512(<8 x i64> %a0) {
2322 ; CHECK-LABEL: @zero_test_permvar_di_512(
2323 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> zeroinitializer
2324 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
2326 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer)
2330 define <8 x i64> @zero_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
2331 ; CHECK-LABEL: @zero_test_permvar_di_512_mask(
2332 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> zeroinitializer
2333 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2334 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
2335 ; CHECK-NEXT: ret <8 x i64> [[TMP3]]
2337 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer)
2338 %2 = bitcast i8 %mask to <8 x i1>
2339 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
2343 define <8 x i64> @shuffle_test_permvar_di_512(<8 x i64> %a0) {
2344 ; CHECK-LABEL: @shuffle_test_permvar_di_512(
2345 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2346 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
2348 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2352 define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
2353 ; CHECK-LABEL: @shuffle_test_permvar_di_512_mask(
2354 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2355 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2356 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
2357 ; CHECK-NEXT: ret <8 x i64> [[TMP3]]
2359 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2360 %2 = bitcast i8 %mask to <8 x i1>
2361 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
2365 define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) {
2366 ; CHECK-LABEL: @undef_test_permvar_di_512(
2367 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2368 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
2370 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2374 define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
2375 ; CHECK-LABEL: @undef_test_permvar_di_512_mask(
2376 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2377 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2378 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
2379 ; CHECK-NEXT: ret <8 x i64> [[TMP3]]
2381 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2382 %2 = bitcast i8 %mask to <8 x i1>
2383 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
2387 declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>)
2389 define <8 x double> @identity_test_permvar_df_512(<8 x double> %a0) {
2390 ; CHECK-LABEL: @identity_test_permvar_df_512(
2391 ; CHECK-NEXT: ret <8 x double> [[A0:%.*]]
2393 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
2397 define <8 x double> @identity_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
2398 ; CHECK-LABEL: @identity_test_permvar_df_512_mask(
2399 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2400 ; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[A0:%.*]], <8 x double> [[PASSTHRU:%.*]]
2401 ; CHECK-NEXT: ret <8 x double> [[TMP2]]
2403 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
2404 %2 = bitcast i8 %mask to <8 x i1>
2405 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
2409 define <8 x double> @zero_test_permvar_df_512(<8 x double> %a0) {
2410 ; CHECK-LABEL: @zero_test_permvar_df_512(
2411 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> zeroinitializer
2412 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
2414 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer)
2418 define <8 x double> @zero_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
2419 ; CHECK-LABEL: @zero_test_permvar_df_512_mask(
2420 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> zeroinitializer
2421 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2422 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
2423 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
2425 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer)
2426 %2 = bitcast i8 %mask to <8 x i1>
2427 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
2431 define <8 x double> @shuffle_test_permvar_df_512(<8 x double> %a0) {
2432 ; CHECK-LABEL: @shuffle_test_permvar_df_512(
2433 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2434 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
2436 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2440 define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
2441 ; CHECK-LABEL: @shuffle_test_permvar_df_512_mask(
2442 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2443 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2444 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
2445 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
2447 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2448 %2 = bitcast i8 %mask to <8 x i1>
2449 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
2453 define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) {
2454 ; CHECK-LABEL: @undef_test_permvar_df_512(
2455 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2456 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
2458 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2462 define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
2463 ; CHECK-LABEL: @undef_test_permvar_df_512_mask(
2464 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2465 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2466 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
2467 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
2469 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2470 %2 = bitcast i8 %mask to <8 x i1>
2471 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
2475 declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>)
2477 define <8 x i16> @identity_test_permvar_hi_128(<8 x i16> %a0) {
2478 ; CHECK-LABEL: @identity_test_permvar_hi_128(
2479 ; CHECK-NEXT: ret <8 x i16> [[A0:%.*]]
2481 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
2485 define <8 x i16> @identity_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
2486 ; CHECK-LABEL: @identity_test_permvar_hi_128_mask(
2487 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2488 ; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A0:%.*]], <8 x i16> [[PASSTHRU:%.*]]
2489 ; CHECK-NEXT: ret <8 x i16> [[TMP2]]
2491 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
2492 %2 = bitcast i8 %mask to <8 x i1>
2493 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
2497 define <8 x i16> @zero_test_permvar_hi_128(<8 x i16> %a0) {
2498 ; CHECK-LABEL: @zero_test_permvar_hi_128(
2499 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
2500 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
2502 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer)
2506 define <8 x i16> @zero_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
2507 ; CHECK-LABEL: @zero_test_permvar_hi_128_mask(
2508 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> zeroinitializer
2509 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2510 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
2511 ; CHECK-NEXT: ret <8 x i16> [[TMP3]]
2513 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer)
2514 %2 = bitcast i8 %mask to <8 x i1>
2515 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
2519 define <8 x i16> @shuffle_test_permvar_hi_128(<8 x i16> %a0) {
2520 ; CHECK-LABEL: @shuffle_test_permvar_hi_128(
2521 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2522 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
2524 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2528 define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
2529 ; CHECK-LABEL: @shuffle_test_permvar_hi_128_mask(
2530 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2531 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2532 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
2533 ; CHECK-NEXT: ret <8 x i16> [[TMP3]]
2535 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2536 %2 = bitcast i8 %mask to <8 x i1>
2537 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
2541 define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) {
2542 ; CHECK-LABEL: @undef_test_permvar_hi_128(
2543 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2544 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
2546 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2550 define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
2551 ; CHECK-LABEL: @undef_test_permvar_hi_128_mask(
2552 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> undef, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2553 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2554 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
2555 ; CHECK-NEXT: ret <8 x i16> [[TMP3]]
2557 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2558 %2 = bitcast i8 %mask to <8 x i1>
2559 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
2563 declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>)
2565 define <16 x i16> @identity_test_permvar_hi_256(<16 x i16> %a0) {
2566 ; CHECK-LABEL: @identity_test_permvar_hi_256(
2567 ; CHECK-NEXT: ret <16 x i16> [[A0:%.*]]
2569 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
2573 define <16 x i16> @identity_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
2574 ; CHECK-LABEL: @identity_test_permvar_hi_256_mask(
2575 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2576 ; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[A0:%.*]], <16 x i16> [[PASSTHRU:%.*]]
2577 ; CHECK-NEXT: ret <16 x i16> [[TMP2]]
2579 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
2580 %2 = bitcast i16 %mask to <16 x i1>
2581 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
2585 define <16 x i16> @zero_test_permvar_hi_256(<16 x i16> %a0) {
2586 ; CHECK-LABEL: @zero_test_permvar_hi_256(
2587 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
2588 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2590 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer)
2594 define <16 x i16> @zero_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
2595 ; CHECK-LABEL: @zero_test_permvar_hi_256_mask(
2596 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> zeroinitializer
2597 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2598 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
2599 ; CHECK-NEXT: ret <16 x i16> [[TMP3]]
2601 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer)
2602 %2 = bitcast i16 %mask to <16 x i1>
2603 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
2607 define <16 x i16> @shuffle_test_permvar_hi_256(<16 x i16> %a0) {
2608 ; CHECK-LABEL: @shuffle_test_permvar_hi_256(
2609 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2610 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2612 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2616 define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
2617 ; CHECK-LABEL: @shuffle_test_permvar_hi_256_mask(
2618 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2619 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2620 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
2621 ; CHECK-NEXT: ret <16 x i16> [[TMP3]]
2623 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2624 %2 = bitcast i16 %mask to <16 x i1>
2625 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
2629 define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) {
2630 ; CHECK-LABEL: @undef_test_permvar_hi_256(
2631 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2632 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2634 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2638 define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
2639 ; CHECK-LABEL: @undef_test_permvar_hi_256_mask(
2640 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2641 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2642 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
2643 ; CHECK-NEXT: ret <16 x i16> [[TMP3]]
2645 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2646 %2 = bitcast i16 %mask to <16 x i1>
2647 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
2651 declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
2653 define <32 x i16> @identity_test_permvar_hi_512(<32 x i16> %a0) {
2654 ; CHECK-LABEL: @identity_test_permvar_hi_512(
2655 ; CHECK-NEXT: ret <32 x i16> [[A0:%.*]]
2657 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>)
2661 define <32 x i16> @identity_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
2662 ; CHECK-LABEL: @identity_test_permvar_hi_512_mask(
2663 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
2664 ; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[A0:%.*]], <32 x i16> [[PASSTHRU:%.*]]
2665 ; CHECK-NEXT: ret <32 x i16> [[TMP2]]
2667 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>)
2668 %2 = bitcast i32 %mask to <32 x i1>
2669 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
2673 define <32 x i16> @zero_test_permvar_hi_512(<32 x i16> %a0) {
2674 ; CHECK-LABEL: @zero_test_permvar_hi_512(
2675 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> zeroinitializer
2676 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
2678 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
2682 define <32 x i16> @zero_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
2683 ; CHECK-LABEL: @zero_test_permvar_hi_512_mask(
2684 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> zeroinitializer
2685 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
2686 ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
2687 ; CHECK-NEXT: ret <32 x i16> [[TMP3]]
2689 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
2690 %2 = bitcast i32 %mask to <32 x i1>
2691 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
2695 define <32 x i16> @shuffle_test_permvar_hi_512(<32 x i16> %a0) {
2696 ; CHECK-LABEL: @shuffle_test_permvar_hi_512(
2697 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2698 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
2700 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2704 define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
2705 ; CHECK-LABEL: @shuffle_test_permvar_hi_512_mask(
2706 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2707 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
2708 ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
2709 ; CHECK-NEXT: ret <32 x i16> [[TMP3]]
2711 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2712 %2 = bitcast i32 %mask to <32 x i1>
2713 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
2717 define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) {
2718 ; CHECK-LABEL: @undef_test_permvar_hi_512(
2719 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2720 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
2722 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2726 define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
2727 ; CHECK-LABEL: @undef_test_permvar_hi_512_mask(
2728 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2729 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
2730 ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
2731 ; CHECK-NEXT: ret <32 x i16> [[TMP3]]
2733 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2734 %2 = bitcast i32 %mask to <32 x i1>
2735 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
2739 declare <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8>, <16 x i8>)
2741 define <16 x i8> @identity_test_permvar_qi_128(<16 x i8> %a0) {
2742 ; CHECK-LABEL: @identity_test_permvar_qi_128(
2743 ; CHECK-NEXT: ret <16 x i8> [[A0:%.*]]
2745 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
2749 define <16 x i8> @identity_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
2750 ; CHECK-LABEL: @identity_test_permvar_qi_128_mask(
2751 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2752 ; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A0:%.*]], <16 x i8> [[PASSTHRU:%.*]]
2753 ; CHECK-NEXT: ret <16 x i8> [[TMP2]]
2755 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
2756 %2 = bitcast i16 %mask to <16 x i1>
2757 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
2761 define <16 x i8> @zero_test_permvar_qi_128(<16 x i8> %a0) {
2762 ; CHECK-LABEL: @zero_test_permvar_qi_128(
2763 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
2764 ; CHECK-NEXT: ret <16 x i8> [[TMP1]]
2766 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer)
2770 define <16 x i8> @zero_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
2771 ; CHECK-LABEL: @zero_test_permvar_qi_128_mask(
2772 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> zeroinitializer
2773 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2774 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
2775 ; CHECK-NEXT: ret <16 x i8> [[TMP3]]
2777 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer)
2778 %2 = bitcast i16 %mask to <16 x i1>
2779 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
2783 define <16 x i8> @shuffle_test_permvar_qi_128(<16 x i8> %a0) {
2784 ; CHECK-LABEL: @shuffle_test_permvar_qi_128(
2785 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2786 ; CHECK-NEXT: ret <16 x i8> [[TMP1]]
2788 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2792 define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
2793 ; CHECK-LABEL: @shuffle_test_permvar_qi_128_mask(
2794 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2795 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2796 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
2797 ; CHECK-NEXT: ret <16 x i8> [[TMP3]]
2799 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2800 %2 = bitcast i16 %mask to <16 x i1>
2801 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
2805 define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) {
2806 ; CHECK-LABEL: @undef_test_permvar_qi_128(
2807 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2808 ; CHECK-NEXT: ret <16 x i8> [[TMP1]]
2810 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2814 define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
2815 ; CHECK-LABEL: @undef_test_permvar_qi_128_mask(
2816 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> undef, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2817 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2818 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
2819 ; CHECK-NEXT: ret <16 x i8> [[TMP3]]
2821 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2822 %2 = bitcast i16 %mask to <16 x i1>
2823 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
2827 declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>)
2829 define <32 x i8> @identity_test_permvar_qi_256(<32 x i8> %a0) {
2830 ; CHECK-LABEL: @identity_test_permvar_qi_256(
2831 ; CHECK-NEXT: ret <32 x i8> [[A0:%.*]]
2833 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
2837 define <32 x i8> @identity_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
2838 ; CHECK-LABEL: @identity_test_permvar_qi_256_mask(
2839 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
2840 ; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i8> [[A0:%.*]], <32 x i8> [[PASSTHRU:%.*]]
2841 ; CHECK-NEXT: ret <32 x i8> [[TMP2]]
2843 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
2844 %2 = bitcast i32 %mask to <32 x i1>
2845 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
2849 define <32 x i8> @zero_test_permvar_qi_256(<32 x i8> %a0) {
2850 ; CHECK-LABEL: @zero_test_permvar_qi_256(
2851 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
2852 ; CHECK-NEXT: ret <32 x i8> [[TMP1]]
2854 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer)
2858 define <32 x i8> @zero_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
2859 ; CHECK-LABEL: @zero_test_permvar_qi_256_mask(
2860 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> zeroinitializer
2861 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
2862 ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
2863 ; CHECK-NEXT: ret <32 x i8> [[TMP3]]
2865 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer)
2866 %2 = bitcast i32 %mask to <32 x i1>
2867 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
2871 define <32 x i8> @shuffle_test_permvar_qi_256(<32 x i8> %a0) {
2872 ; CHECK-LABEL: @shuffle_test_permvar_qi_256(
2873 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2874 ; CHECK-NEXT: ret <32 x i8> [[TMP1]]
2876 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2880 define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
2881 ; CHECK-LABEL: @shuffle_test_permvar_qi_256_mask(
2882 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2883 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
2884 ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
2885 ; CHECK-NEXT: ret <32 x i8> [[TMP3]]
2887 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2888 %2 = bitcast i32 %mask to <32 x i1>
2889 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
2893 define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) {
2894 ; CHECK-LABEL: @undef_test_permvar_qi_256(
2895 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2896 ; CHECK-NEXT: ret <32 x i8> [[TMP1]]
2898 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2902 define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
2903 ; CHECK-LABEL: @undef_test_permvar_qi_256_mask(
2904 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> undef, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2905 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
2906 ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
2907 ; CHECK-NEXT: ret <32 x i8> [[TMP3]]
2909 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2910 %2 = bitcast i32 %mask to <32 x i1>
2911 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
2915 declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>)
2917 define <64 x i8> @identity_test_permvar_qi_512(<64 x i8> %a0) {
2918 ; CHECK-LABEL: @identity_test_permvar_qi_512(
2919 ; CHECK-NEXT: ret <64 x i8> [[A0:%.*]]
2921 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>)
2925 define <64 x i8> @identity_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
2926 ; CHECK-LABEL: @identity_test_permvar_qi_512_mask(
2927 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
2928 ; CHECK-NEXT: [[TMP2:%.*]] = select <64 x i1> [[TMP1]], <64 x i8> [[A0:%.*]], <64 x i8> [[PASSTHRU:%.*]]
2929 ; CHECK-NEXT: ret <64 x i8> [[TMP2]]
2931 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>)
2932 %2 = bitcast i64 %mask to <64 x i1>
2933 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
2937 define <64 x i8> @zero_test_permvar_qi_512(<64 x i8> %a0) {
2938 ; CHECK-LABEL: @zero_test_permvar_qi_512(
2939 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> zeroinitializer
2940 ; CHECK-NEXT: ret <64 x i8> [[TMP1]]
2942 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer)
2946 define <64 x i8> @zero_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
2947 ; CHECK-LABEL: @zero_test_permvar_qi_512_mask(
2948 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> zeroinitializer
2949 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
2950 ; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
2951 ; CHECK-NEXT: ret <64 x i8> [[TMP3]]
2953 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer)
2954 %2 = bitcast i64 %mask to <64 x i1>
2955 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
2959 define <64 x i8> @shuffle_test_permvar_qi_512(<64 x i8> %a0) {
2960 ; CHECK-LABEL: @shuffle_test_permvar_qi_512(
2961 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2962 ; CHECK-NEXT: ret <64 x i8> [[TMP1]]
2964 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2968 define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
2969 ; CHECK-LABEL: @shuffle_test_permvar_qi_512_mask(
2970 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2971 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
2972 ; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
2973 ; CHECK-NEXT: ret <64 x i8> [[TMP3]]
2975 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2976 %2 = bitcast i64 %mask to <64 x i1>
2977 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
2981 define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) {
2982 ; CHECK-LABEL: @undef_test_permvar_qi_512(
2983 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2984 ; CHECK-NEXT: ret <64 x i8> [[TMP1]]
2986 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2990 define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
2991 ; CHECK-LABEL: @undef_test_permvar_qi_512_mask(
2992 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> undef, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2993 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
2994 ; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
2995 ; CHECK-NEXT: ret <64 x i8> [[TMP3]]
2997 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2998 %2 = bitcast i64 %mask to <64 x i1>
2999 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
3003 declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32)
3005 define <16 x float> @test_add_ps(<16 x float> %a, <16 x float> %b) {
3006 ; CHECK-LABEL: @test_add_ps(
3007 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
3008 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3010 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3014 define <16 x float> @test_add_ps_round(<16 x float> %a, <16 x float> %b) {
3015 ; CHECK-LABEL: @test_add_ps_round(
3016 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3017 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3019 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3023 define <16 x float> @test_add_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3024 ; CHECK-LABEL: @test_add_ps_mask(
3025 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
3026 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3027 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3028 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3030 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3031 %2 = bitcast i16 %mask to <16 x i1>
3032 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3036 define <16 x float> @test_add_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3037 ; CHECK-LABEL: @test_add_ps_mask_round(
3038 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3039 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3040 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3041 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3043 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3044 %2 = bitcast i16 %mask to <16 x i1>
3045 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3049 declare <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double>, <8 x double>, i32)
3051 define <8 x double> @test_add_pd(<8 x double> %a, <8 x double> %b) {
3052 ; CHECK-LABEL: @test_add_pd(
3053 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
3054 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3056 %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3060 define <8 x double> @test_add_pd_round(<8 x double> %a, <8 x double> %b) {
3061 ; CHECK-LABEL: @test_add_pd_round(
3062 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3063 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3065 %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3069 define <8 x double> @test_add_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3070 ; CHECK-LABEL: @test_add_pd_mask(
3071 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
3072 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3073 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3074 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3076 %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3077 %2 = bitcast i8 %mask to <8 x i1>
3078 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3082 define <8 x double> @test_add_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3083 ; CHECK-LABEL: @test_add_pd_mask_round(
3084 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3085 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3086 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3087 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3089 %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3090 %2 = bitcast i8 %mask to <8 x i1>
3091 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3095 declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32)
3097 define <16 x float> @test_sub_ps(<16 x float> %a, <16 x float> %b) {
3098 ; CHECK-LABEL: @test_sub_ps(
3099 ; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
3100 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3102 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3106 define <16 x float> @test_sub_ps_round(<16 x float> %a, <16 x float> %b) {
3107 ; CHECK-LABEL: @test_sub_ps_round(
3108 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3109 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3111 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3115 define <16 x float> @test_sub_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3116 ; CHECK-LABEL: @test_sub_ps_mask(
3117 ; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
3118 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3119 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3120 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3122 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3123 %2 = bitcast i16 %mask to <16 x i1>
3124 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3128 define <16 x float> @test_sub_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3129 ; CHECK-LABEL: @test_sub_ps_mask_round(
3130 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3131 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3132 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3133 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3135 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3136 %2 = bitcast i16 %mask to <16 x i1>
3137 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3141 declare <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double>, <8 x double>, i32)
3143 define <8 x double> @test_sub_pd(<8 x double> %a, <8 x double> %b) {
3144 ; CHECK-LABEL: @test_sub_pd(
3145 ; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
3146 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3148 %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3152 define <8 x double> @test_sub_pd_round(<8 x double> %a, <8 x double> %b) {
3153 ; CHECK-LABEL: @test_sub_pd_round(
3154 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3155 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3157 %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3161 define <8 x double> @test_sub_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3162 ; CHECK-LABEL: @test_sub_pd_mask(
3163 ; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
3164 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3165 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3166 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3168 %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3169 %2 = bitcast i8 %mask to <8 x i1>
3170 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3174 define <8 x double> @test_sub_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3175 ; CHECK-LABEL: @test_sub_pd_mask_round(
3176 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3177 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3178 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3179 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3181 %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3182 %2 = bitcast i8 %mask to <8 x i1>
3183 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3187 declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32)
3189 define <16 x float> @test_mul_ps(<16 x float> %a, <16 x float> %b) {
3190 ; CHECK-LABEL: @test_mul_ps(
3191 ; CHECK-NEXT: [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
3192 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3194 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3198 define <16 x float> @test_mul_ps_round(<16 x float> %a, <16 x float> %b) {
3199 ; CHECK-LABEL: @test_mul_ps_round(
3200 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3201 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3203 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3207 define <16 x float> @test_mul_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3208 ; CHECK-LABEL: @test_mul_ps_mask(
3209 ; CHECK-NEXT: [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
3210 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3211 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3212 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3214 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3215 %2 = bitcast i16 %mask to <16 x i1>
3216 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3220 define <16 x float> @test_mul_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3221 ; CHECK-LABEL: @test_mul_ps_mask_round(
3222 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3223 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3224 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3225 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3227 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3228 %2 = bitcast i16 %mask to <16 x i1>
3229 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3233 declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32)
3235 define <8 x double> @test_mul_pd(<8 x double> %a, <8 x double> %b) {
3236 ; CHECK-LABEL: @test_mul_pd(
3237 ; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
3238 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3240 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3244 define <8 x double> @test_mul_pd_round(<8 x double> %a, <8 x double> %b) {
3245 ; CHECK-LABEL: @test_mul_pd_round(
3246 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3247 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3249 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3253 define <8 x double> @test_mul_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3254 ; CHECK-LABEL: @test_mul_pd_mask(
3255 ; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
3256 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3257 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3258 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3260 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3261 %2 = bitcast i8 %mask to <8 x i1>
3262 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3266 define <8 x double> @test_mul_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3267 ; CHECK-LABEL: @test_mul_pd_mask_round(
3268 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3269 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3270 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3271 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3273 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3274 %2 = bitcast i8 %mask to <8 x i1>
3275 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3279 declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32)
3281 define <16 x float> @test_div_ps(<16 x float> %a, <16 x float> %b) {
3282 ; CHECK-LABEL: @test_div_ps(
3283 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
3284 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3286 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3290 define <16 x float> @test_div_ps_round(<16 x float> %a, <16 x float> %b) {
3291 ; CHECK-LABEL: @test_div_ps_round(
3292 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3293 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3295 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3299 define <16 x float> @test_div_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3300 ; CHECK-LABEL: @test_div_ps_mask(
3301 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
3302 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3303 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3304 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3306 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3307 %2 = bitcast i16 %mask to <16 x i1>
3308 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3312 define <16 x float> @test_div_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3313 ; CHECK-LABEL: @test_div_ps_mask_round(
3314 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3315 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3316 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3317 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3319 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3320 %2 = bitcast i16 %mask to <16 x i1>
3321 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3325 declare <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double>, <8 x double>, i32)
3327 define <8 x double> @test_div_pd(<8 x double> %a, <8 x double> %b) {
3328 ; CHECK-LABEL: @test_div_pd(
3329 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
3330 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3332 %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3336 define <8 x double> @test_div_pd_round(<8 x double> %a, <8 x double> %b) {
3337 ; CHECK-LABEL: @test_div_pd_round(
3338 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3339 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3341 %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3345 define <8 x double> @test_div_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3346 ; CHECK-LABEL: @test_div_pd_mask(
3347 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
3348 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3349 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3350 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3352 %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3353 %2 = bitcast i8 %mask to <8 x i1>
3354 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3358 define <8 x double> @test_div_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3359 ; CHECK-LABEL: @test_div_pd_mask_round(
3360 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3361 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3362 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3363 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3365 %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3366 %2 = bitcast i8 %mask to <8 x i1>
3367 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3371 declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
3373 define i32 @test_comi_ss_0(float %a, float %b) {
3374 ; CHECK-LABEL: @test_comi_ss_0(
3375 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> undef, float [[A:%.*]], i32 0
3376 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> undef, float [[B:%.*]], i32 0
3377 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i32 0, i32 4)
3378 ; CHECK-NEXT: ret i32 [[TMP3]]
3380 %1 = insertelement <4 x float> undef, float %a, i32 0
3381 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
3382 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
3383 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
3384 %5 = insertelement <4 x float> undef, float %b, i32 0
3385 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
3386 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
3387 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
3388 %9 = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %4, <4 x float> %8, i32 0, i32 4)
3392 declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
3394 define i32 @test_comi_sd_0(double %a, double %b) {
3395 ; CHECK-LABEL: @test_comi_sd_0(
3396 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> undef, double [[A:%.*]], i32 0
3397 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> undef, double [[B:%.*]], i32 0
3398 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i32 0, i32 4)
3399 ; CHECK-NEXT: ret i32 [[TMP3]]
3401 %1 = insertelement <2 x double> undef, double %a, i32 0
3402 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
3403 %3 = insertelement <2 x double> undef, double %b, i32 0
3404 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
3405 %5 = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %2, <2 x double> %4, i32 0, i32 4)