1 ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py
2 ; RUN: opt < %s -instcombine -mtriple=x86_64-unknown-unknown -S | FileCheck %s
3 target datalayout = "e-m:e-i64:64-f80:128-n8:16:32:64-S128"
5 declare <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
7 define <4 x float> @test_add_ss(<4 x float> %a, <4 x float> %b) {
9 ; CHECK-LABEL: @test_add_ss(
10 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
11 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
12 ; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
13 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
14 ; CHECK-NEXT: ret <4 x float> [[TMP4]]
16 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
17 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
18 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
19 %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
23 define <4 x float> @test_add_ss_round(<4 x float> %a, <4 x float> %b) {
25 ; CHECK-LABEL: @test_add_ss_round(
26 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
27 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
29 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
30 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
31 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
32 %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
36 define <4 x float> @test_add_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
38 ; CHECK-LABEL: @test_add_ss_mask(
39 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
40 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
41 ; CHECK-NEXT: [[TMP3:%.*]] = fadd float [[TMP1]], [[TMP2]]
42 ; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
43 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
44 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
45 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]]
46 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
47 ; CHECK-NEXT: ret <4 x float> [[TMP7]]
49 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
50 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
51 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
52 %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
56 define <4 x float> @test_add_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
58 ; CHECK-LABEL: @test_add_ss_mask_round(
59 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
60 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
62 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
63 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
64 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
65 %4 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
69 define float @test_add_ss_1(float %a, float %b) {
71 ; CHECK-LABEL: @test_add_ss_1(
72 ; CHECK-NEXT: ret float 1.000000e+00
74 %1 = insertelement <4 x float> undef, float %a, i32 0
75 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
76 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
77 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
78 %5 = insertelement <4 x float> undef, float %b, i32 0
79 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
80 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
81 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
82 %9 = tail call <4 x float> @llvm.x86.avx512.mask.add.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
83 %10 = extractelement <4 x float> %9, i32 1
87 declare <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
89 define <2 x double> @test_add_sd(<2 x double> %a, <2 x double> %b) {
91 ; CHECK-LABEL: @test_add_sd(
92 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
93 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
94 ; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
95 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
96 ; CHECK-NEXT: ret <2 x double> [[TMP4]]
98 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
99 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
103 define <2 x double> @test_add_sd_round(<2 x double> %a, <2 x double> %b) {
105 ; CHECK-LABEL: @test_add_sd_round(
106 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
107 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
109 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
110 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
114 define <2 x double> @test_add_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
116 ; CHECK-LABEL: @test_add_sd_mask(
117 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
118 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
119 ; CHECK-NEXT: [[TMP3:%.*]] = fadd double [[TMP1]], [[TMP2]]
120 ; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
121 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
122 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
123 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]]
124 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
125 ; CHECK-NEXT: ret <2 x double> [[TMP7]]
127 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
128 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
132 define <2 x double> @test_add_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
134 ; CHECK-LABEL: @test_add_sd_mask_round(
135 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
136 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
138 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
139 %2 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
143 define double @test_add_sd_1(double %a, double %b) {
145 ; CHECK-LABEL: @test_add_sd_1(
146 ; CHECK-NEXT: ret double 1.000000e+00
148 %1 = insertelement <2 x double> undef, double %a, i32 0
149 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
150 %3 = insertelement <2 x double> undef, double %b, i32 0
151 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
152 %5 = tail call <2 x double> @llvm.x86.avx512.mask.add.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
153 %6 = extractelement <2 x double> %5, i32 1
157 declare <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
159 define <4 x float> @test_sub_ss(<4 x float> %a, <4 x float> %b) {
161 ; CHECK-LABEL: @test_sub_ss(
162 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
163 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
164 ; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
165 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
166 ; CHECK-NEXT: ret <4 x float> [[TMP4]]
168 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
169 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
170 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
171 %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
175 define <4 x float> @test_sub_ss_round(<4 x float> %a, <4 x float> %b) {
177 ; CHECK-LABEL: @test_sub_ss_round(
178 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
179 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
181 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
182 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
183 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
184 %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
188 define <4 x float> @test_sub_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
190 ; CHECK-LABEL: @test_sub_ss_mask(
191 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
192 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
193 ; CHECK-NEXT: [[TMP3:%.*]] = fsub float [[TMP1]], [[TMP2]]
194 ; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
195 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
196 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
197 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]]
198 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
199 ; CHECK-NEXT: ret <4 x float> [[TMP7]]
201 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
202 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
203 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
204 %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
208 define <4 x float> @test_sub_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
210 ; CHECK-LABEL: @test_sub_ss_mask_round(
211 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
212 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
214 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
215 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
216 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
217 %4 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
221 define float @test_sub_ss_1(float %a, float %b) {
223 ; CHECK-LABEL: @test_sub_ss_1(
224 ; CHECK-NEXT: ret float 1.000000e+00
226 %1 = insertelement <4 x float> undef, float %a, i32 0
227 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
228 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
229 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
230 %5 = insertelement <4 x float> undef, float %b, i32 0
231 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
232 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
233 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
234 %9 = tail call <4 x float> @llvm.x86.avx512.mask.sub.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
235 %10 = extractelement <4 x float> %9, i32 1
239 declare <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
241 define <2 x double> @test_sub_sd(<2 x double> %a, <2 x double> %b) {
243 ; CHECK-LABEL: @test_sub_sd(
244 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
245 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
246 ; CHECK-NEXT: [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]
247 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
248 ; CHECK-NEXT: ret <2 x double> [[TMP4]]
250 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
251 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
255 define <2 x double> @test_sub_sd_round(<2 x double> %a, <2 x double> %b) {
257 ; CHECK-LABEL: @test_sub_sd_round(
258 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
259 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
261 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
262 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
266 define <2 x double> @test_sub_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
268 ; CHECK-LABEL: @test_sub_sd_mask(
269 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
270 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
271 ; CHECK-NEXT: [[TMP3:%.*]] = fsub double [[TMP1]], [[TMP2]]
272 ; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
273 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
274 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
275 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]]
276 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
277 ; CHECK-NEXT: ret <2 x double> [[TMP7]]
279 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
280 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
284 define <2 x double> @test_sub_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
286 ; CHECK-LABEL: @test_sub_sd_mask_round(
287 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
288 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
290 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
291 %2 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
295 define double @test_sub_sd_1(double %a, double %b) {
297 ; CHECK-LABEL: @test_sub_sd_1(
298 ; CHECK-NEXT: ret double 1.000000e+00
300 %1 = insertelement <2 x double> undef, double %a, i32 0
301 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
302 %3 = insertelement <2 x double> undef, double %b, i32 0
303 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
304 %5 = tail call <2 x double> @llvm.x86.avx512.mask.sub.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
305 %6 = extractelement <2 x double> %5, i32 1
309 declare <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
311 define <4 x float> @test_mul_ss(<4 x float> %a, <4 x float> %b) {
313 ; CHECK-LABEL: @test_mul_ss(
314 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
315 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
316 ; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
317 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
318 ; CHECK-NEXT: ret <4 x float> [[TMP4]]
320 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
321 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
322 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
323 %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
327 define <4 x float> @test_mul_ss_round(<4 x float> %a, <4 x float> %b) {
329 ; CHECK-LABEL: @test_mul_ss_round(
330 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
331 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
333 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
334 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
335 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
336 %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
340 define <4 x float> @test_mul_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
342 ; CHECK-LABEL: @test_mul_ss_mask(
343 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
344 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
345 ; CHECK-NEXT: [[TMP3:%.*]] = fmul float [[TMP1]], [[TMP2]]
346 ; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
347 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
348 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
349 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]]
350 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
351 ; CHECK-NEXT: ret <4 x float> [[TMP7]]
353 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
354 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
355 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
356 %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
360 define <4 x float> @test_mul_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
362 ; CHECK-LABEL: @test_mul_ss_mask_round(
363 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
364 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
366 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
367 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
368 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
369 %4 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
373 define float @test_mul_ss_1(float %a, float %b) {
375 ; CHECK-LABEL: @test_mul_ss_1(
376 ; CHECK-NEXT: ret float 1.000000e+00
378 %1 = insertelement <4 x float> undef, float %a, i32 0
379 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
380 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
381 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
382 %5 = insertelement <4 x float> undef, float %b, i32 0
383 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
384 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
385 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
386 %9 = tail call <4 x float> @llvm.x86.avx512.mask.mul.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
387 %10 = extractelement <4 x float> %9, i32 1
391 declare <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
393 define <2 x double> @test_mul_sd(<2 x double> %a, <2 x double> %b) {
395 ; CHECK-LABEL: @test_mul_sd(
396 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
397 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
398 ; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]
399 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
400 ; CHECK-NEXT: ret <2 x double> [[TMP4]]
402 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
403 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
407 define <2 x double> @test_mul_sd_round(<2 x double> %a, <2 x double> %b) {
409 ; CHECK-LABEL: @test_mul_sd_round(
410 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
411 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
413 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
414 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
418 define <2 x double> @test_mul_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
420 ; CHECK-LABEL: @test_mul_sd_mask(
421 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
422 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
423 ; CHECK-NEXT: [[TMP3:%.*]] = fmul double [[TMP1]], [[TMP2]]
424 ; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
425 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
426 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
427 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]]
428 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
429 ; CHECK-NEXT: ret <2 x double> [[TMP7]]
431 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
432 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
436 define <2 x double> @test_mul_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
438 ; CHECK-LABEL: @test_mul_sd_mask_round(
439 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
440 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
442 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
443 %2 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
447 define double @test_mul_sd_1(double %a, double %b) {
449 ; CHECK-LABEL: @test_mul_sd_1(
450 ; CHECK-NEXT: ret double 1.000000e+00
452 %1 = insertelement <2 x double> undef, double %a, i32 0
453 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
454 %3 = insertelement <2 x double> undef, double %b, i32 0
455 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
456 %5 = tail call <2 x double> @llvm.x86.avx512.mask.mul.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
457 %6 = extractelement <2 x double> %5, i32 1
461 declare <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
463 define <4 x float> @test_div_ss(<4 x float> %a, <4 x float> %b) {
465 ; CHECK-LABEL: @test_div_ss(
466 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
467 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
468 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
469 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <4 x float> [[A]], float [[TMP3]], i64 0
470 ; CHECK-NEXT: ret <4 x float> [[TMP4]]
472 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
473 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
474 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
475 %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
479 define <4 x float> @test_div_ss_round(<4 x float> %a, <4 x float> %b) {
481 ; CHECK-LABEL: @test_div_ss_round(
482 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 8)
483 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
485 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
486 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
487 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
488 %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 8)
492 define <4 x float> @test_div_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
494 ; CHECK-LABEL: @test_div_ss_mask(
495 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
496 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
497 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv float [[TMP1]], [[TMP2]]
498 ; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
499 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
500 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
501 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP5]], float [[TMP3]]
502 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
503 ; CHECK-NEXT: ret <4 x float> [[TMP7]]
505 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
506 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
507 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
508 %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
512 define <4 x float> @test_div_ss_mask_round(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
514 ; CHECK-LABEL: @test_div_ss_mask_round(
515 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
516 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
518 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
519 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
520 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
521 %4 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 8)
525 define float @test_div_ss_1(float %a, float %b) {
527 ; CHECK-LABEL: @test_div_ss_1(
528 ; CHECK-NEXT: ret float 1.000000e+00
530 %1 = insertelement <4 x float> undef, float %a, i32 0
531 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
532 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
533 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
534 %5 = insertelement <4 x float> undef, float %b, i32 0
535 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
536 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
537 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
538 %9 = tail call <4 x float> @llvm.x86.avx512.mask.div.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
539 %10 = extractelement <4 x float> %9, i32 1
543 declare <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
545 define <2 x double> @test_div_sd(<2 x double> %a, <2 x double> %b) {
547 ; CHECK-LABEL: @test_div_sd(
548 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
549 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
550 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]
551 ; CHECK-NEXT: [[TMP4:%.*]] = insertelement <2 x double> [[A]], double [[TMP3]], i64 0
552 ; CHECK-NEXT: ret <2 x double> [[TMP4]]
554 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
555 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
559 define <2 x double> @test_div_sd_round(<2 x double> %a, <2 x double> %b) {
561 ; CHECK-LABEL: @test_div_sd_round(
562 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 8)
563 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
565 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
566 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 8)
570 define <2 x double> @test_div_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
572 ; CHECK-LABEL: @test_div_sd_mask(
573 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
574 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
575 ; CHECK-NEXT: [[TMP3:%.*]] = fdiv double [[TMP1]], [[TMP2]]
576 ; CHECK-NEXT: [[TMP4:%.*]] = and i8 [[MASK:%.*]], 1
577 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP4]], 0
578 ; CHECK-NEXT: [[TMP5:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
579 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP5]], double [[TMP3]]
580 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
581 ; CHECK-NEXT: ret <2 x double> [[TMP7]]
583 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
584 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
588 define <2 x double> @test_div_sd_mask_round(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
590 ; CHECK-LABEL: @test_div_sd_mask_round(
591 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 8)
592 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
594 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
595 %2 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 8)
599 define double @test_div_sd_1(double %a, double %b) {
601 ; CHECK-LABEL: @test_div_sd_1(
602 ; CHECK-NEXT: ret double 1.000000e+00
604 %1 = insertelement <2 x double> undef, double %a, i32 0
605 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
606 %3 = insertelement <2 x double> undef, double %b, i32 0
607 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
608 %5 = tail call <2 x double> @llvm.x86.avx512.mask.div.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
609 %6 = extractelement <2 x double> %5, i32 1
613 declare <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
615 define <4 x float> @test_max_ss(<4 x float> %a, <4 x float> %b) {
617 ; CHECK-LABEL: @test_max_ss(
618 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
619 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
621 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
622 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
623 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
624 %4 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
628 define <4 x float> @test_max_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
630 ; CHECK-LABEL: @test_max_ss_mask(
631 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
632 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
634 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
635 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
636 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
637 %4 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
641 define float @test_max_ss_1(float %a, float %b) {
643 ; CHECK-LABEL: @test_max_ss_1(
644 ; CHECK-NEXT: ret float 1.000000e+00
646 %1 = insertelement <4 x float> undef, float %a, i32 0
647 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
648 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
649 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
650 %5 = insertelement <4 x float> undef, float %b, i32 0
651 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
652 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
653 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
654 %9 = tail call <4 x float> @llvm.x86.avx512.mask.max.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
655 %10 = extractelement <4 x float> %9, i32 1
659 declare <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
661 define <2 x double> @test_max_sd(<2 x double> %a, <2 x double> %b) {
663 ; CHECK-LABEL: @test_max_sd(
664 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
665 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
667 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
668 %2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
672 define <2 x double> @test_max_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
674 ; CHECK-LABEL: @test_max_sd_mask(
675 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
676 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
678 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
679 %2 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
683 define double @test_max_sd_1(double %a, double %b) {
685 ; CHECK-LABEL: @test_max_sd_1(
686 ; CHECK-NEXT: ret double 1.000000e+00
688 %1 = insertelement <2 x double> undef, double %a, i32 0
689 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
690 %3 = insertelement <2 x double> undef, double %b, i32 0
691 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
692 %5 = tail call <2 x double> @llvm.x86.avx512.mask.max.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
693 %6 = extractelement <2 x double> %5, i32 1
697 declare <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float>, <4 x float>, <4 x float>, i8, i32)
699 define <4 x float> @test_min_ss(<4 x float> %a, <4 x float> %b) {
701 ; CHECK-LABEL: @test_min_ss(
702 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> undef, i8 -1, i32 4)
703 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
705 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
706 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
707 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
708 %4 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %a, <4 x float> %3, <4 x float> undef, i8 -1, i32 4)
712 define <4 x float> @test_min_ss_mask(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
714 ; CHECK-LABEL: @test_min_ss_mask(
715 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], <4 x float> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
716 ; CHECK-NEXT: ret <4 x float> [[TMP1]]
718 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
719 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
720 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
721 %4 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %a, <4 x float> %b, <4 x float> %3, i8 %mask, i32 4)
725 define float @test_min_ss_1(float %a, float %b) {
727 ; CHECK-LABEL: @test_min_ss_1(
728 ; CHECK-NEXT: ret float 1.000000e+00
730 %1 = insertelement <4 x float> undef, float %a, i32 0
731 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
732 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
733 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
734 %5 = insertelement <4 x float> undef, float %b, i32 0
735 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
736 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
737 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
738 %9 = tail call <4 x float> @llvm.x86.avx512.mask.min.ss.round(<4 x float> %4, <4 x float> %8, <4 x float> undef, i8 -1, i32 8)
739 %10 = extractelement <4 x float> %9, i32 1
743 declare <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double>, <2 x double>, <2 x double>, i8, i32)
745 define <2 x double> @test_min_sd(<2 x double> %a, <2 x double> %b) {
747 ; CHECK-LABEL: @test_min_sd(
748 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> undef, i8 -1, i32 4)
749 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
751 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
752 %2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %1, <2 x double> undef, i8 -1, i32 4)
756 define <2 x double> @test_min_sd_mask(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
758 ; CHECK-LABEL: @test_min_sd_mask(
759 ; CHECK-NEXT: [[TMP1:%.*]] = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], <2 x double> [[C:%.*]], i8 [[MASK:%.*]], i32 4)
760 ; CHECK-NEXT: ret <2 x double> [[TMP1]]
762 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
763 %2 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %a, <2 x double> %b, <2 x double> %1, i8 %mask, i32 4)
767 define double @test_min_sd_1(double %a, double %b) {
769 ; CHECK-LABEL: @test_min_sd_1(
770 ; CHECK-NEXT: ret double 1.000000e+00
772 %1 = insertelement <2 x double> undef, double %a, i32 0
773 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
774 %3 = insertelement <2 x double> undef, double %b, i32 0
775 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
776 %5 = tail call <2 x double> @llvm.x86.avx512.mask.min.sd.round(<2 x double> %2, <2 x double> %4, <2 x double> undef, i8 -1, i32 8)
777 %6 = extractelement <2 x double> %5, i32 1
781 declare i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float>, <4 x float>, i32, i8, i32)
783 define i8 @test_cmp_ss(<4 x float> %a, <4 x float> %b, i8 %mask) {
785 ; CHECK-LABEL: @test_cmp_ss(
786 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> [[A:%.*]], <4 x float> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4)
787 ; CHECK-NEXT: ret i8 [[TMP1]]
789 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
790 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
791 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
792 %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
793 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
794 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
795 %7 = tail call i8 @llvm.x86.avx512.mask.cmp.ss(<4 x float> %3, <4 x float> %6, i32 3, i8 %mask, i32 4)
799 declare i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double>, <2 x double>, i32, i8, i32)
801 define i8 @test_cmp_sd(<2 x double> %a, <2 x double> %b, i8 %mask) {
803 ; CHECK-LABEL: @test_cmp_sd(
804 ; CHECK-NEXT: [[TMP1:%.*]] = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> [[A:%.*]], <2 x double> [[B:%.*]], i32 3, i8 [[MASK:%.*]], i32 4)
805 ; CHECK-NEXT: ret i8 [[TMP1]]
807 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
808 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
809 %3 = tail call i8 @llvm.x86.avx512.mask.cmp.sd(<2 x double> %1, <2 x double> %2, i32 3, i8 %mask, i32 4)
813 define i64 @test(float %f, double %d) {
815 ; CHECK-LABEL: @test(
816 ; CHECK-NEXT: [[V03:%.*]] = insertelement <4 x float> poison, float [[F:%.*]], i64 0
817 ; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> [[V03]], i32 4)
818 ; CHECK-NEXT: [[V13:%.*]] = insertelement <4 x float> poison, float [[F]], i64 0
819 ; CHECK-NEXT: [[T1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> [[V13]], i32 4)
820 ; CHECK-NEXT: [[V23:%.*]] = insertelement <4 x float> poison, float [[F]], i64 0
821 ; CHECK-NEXT: [[T2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> [[V23]], i32 4)
822 ; CHECK-NEXT: [[V33:%.*]] = insertelement <4 x float> poison, float [[F]], i64 0
823 ; CHECK-NEXT: [[T3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> [[V33]], i32 4)
824 ; CHECK-NEXT: [[V41:%.*]] = insertelement <2 x double> poison, double [[D:%.*]], i64 0
825 ; CHECK-NEXT: [[T4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> [[V41]], i32 4)
826 ; CHECK-NEXT: [[V51:%.*]] = insertelement <2 x double> poison, double [[D]], i64 0
827 ; CHECK-NEXT: [[T5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> [[V51]], i32 4)
828 ; CHECK-NEXT: [[V61:%.*]] = insertelement <2 x double> poison, double [[D]], i64 0
829 ; CHECK-NEXT: [[T6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> [[V61]], i32 4)
830 ; CHECK-NEXT: [[V71:%.*]] = insertelement <2 x double> poison, double [[D]], i64 0
831 ; CHECK-NEXT: [[T7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> [[V71]], i32 4)
832 ; CHECK-NEXT: [[T8:%.*]] = add i32 [[T0]], [[T2]]
833 ; CHECK-NEXT: [[T9:%.*]] = add i32 [[T4]], [[T6]]
834 ; CHECK-NEXT: [[T10:%.*]] = add i32 [[T8]], [[T9]]
835 ; CHECK-NEXT: [[T11:%.*]] = sext i32 [[T10]] to i64
836 ; CHECK-NEXT: [[T12:%.*]] = add i64 [[T1]], [[T3]]
837 ; CHECK-NEXT: [[T13:%.*]] = add i64 [[T5]], [[T7]]
838 ; CHECK-NEXT: [[T14:%.*]] = add i64 [[T12]], [[T13]]
839 ; CHECK-NEXT: [[T15:%.*]] = add i64 [[T14]], [[T11]]
840 ; CHECK-NEXT: ret i64 [[T15]]
842 %v00 = insertelement <4 x float> undef, float %f, i32 0
843 %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
844 %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
845 %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
846 %t0 = tail call i32 @llvm.x86.avx512.vcvtss2si32(<4 x float> %v03, i32 4)
847 %v10 = insertelement <4 x float> undef, float %f, i32 0
848 %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
849 %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
850 %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
851 %t1 = tail call i64 @llvm.x86.avx512.vcvtss2si64(<4 x float> %v13, i32 4)
852 %v20 = insertelement <4 x float> undef, float %f, i32 0
853 %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
854 %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
855 %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
856 %t2 = tail call i32 @llvm.x86.avx512.cvttss2si(<4 x float> %v23, i32 4)
857 %v30 = insertelement <4 x float> undef, float %f, i32 0
858 %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
859 %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
860 %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
861 %t3 = tail call i64 @llvm.x86.avx512.cvttss2si64(<4 x float> %v33, i32 4)
862 %v40 = insertelement <2 x double> undef, double %d, i32 0
863 %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
864 %t4 = tail call i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double> %v41, i32 4)
865 %v50 = insertelement <2 x double> undef, double %d, i32 0
866 %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
867 %t5 = tail call i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double> %v51, i32 4)
868 %v60 = insertelement <2 x double> undef, double %d, i32 0
869 %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
870 %t6 = tail call i32 @llvm.x86.avx512.cvttsd2si(<2 x double> %v61, i32 4)
871 %v70 = insertelement <2 x double> undef, double %d, i32 0
872 %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
873 %t7 = tail call i64 @llvm.x86.avx512.cvttsd2si64(<2 x double> %v71, i32 4)
874 %t8 = add i32 %t0, %t2
875 %t9 = add i32 %t4, %t6
876 %t10 = add i32 %t8, %t9
877 %t11 = sext i32 %t10 to i64
878 %t12 = add i64 %t1, %t3
879 %t13 = add i64 %t5, %t7
880 %t14 = add i64 %t12, %t13
881 %t15 = add i64 %t11, %t14
885 declare i32 @llvm.x86.avx512.vcvtss2si32(<4 x float>, i32)
886 declare i64 @llvm.x86.avx512.vcvtss2si64(<4 x float>, i32)
887 declare i32 @llvm.x86.avx512.cvttss2si(<4 x float>, i32)
888 declare i64 @llvm.x86.avx512.cvttss2si64(<4 x float>, i32)
889 declare i32 @llvm.x86.avx512.vcvtsd2si32(<2 x double>, i32)
890 declare i64 @llvm.x86.avx512.vcvtsd2si64(<2 x double>, i32)
891 declare i32 @llvm.x86.avx512.cvttsd2si(<2 x double>, i32)
892 declare i64 @llvm.x86.avx512.cvttsd2si64(<2 x double>, i32)
894 define i64 @test2(float %f, double %d) {
896 ; CHECK-LABEL: @test2(
897 ; CHECK-NEXT: [[V03:%.*]] = insertelement <4 x float> poison, float [[F:%.*]], i64 0
898 ; CHECK-NEXT: [[T0:%.*]] = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> [[V03]], i32 4)
899 ; CHECK-NEXT: [[V13:%.*]] = insertelement <4 x float> poison, float [[F]], i64 0
900 ; CHECK-NEXT: [[T1:%.*]] = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> [[V13]], i32 4)
901 ; CHECK-NEXT: [[V23:%.*]] = insertelement <4 x float> poison, float [[F]], i64 0
902 ; CHECK-NEXT: [[T2:%.*]] = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> [[V23]], i32 4)
903 ; CHECK-NEXT: [[V33:%.*]] = insertelement <4 x float> poison, float [[F]], i64 0
904 ; CHECK-NEXT: [[T3:%.*]] = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> [[V33]], i32 4)
905 ; CHECK-NEXT: [[V41:%.*]] = insertelement <2 x double> poison, double [[D:%.*]], i64 0
906 ; CHECK-NEXT: [[T4:%.*]] = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> [[V41]], i32 4)
907 ; CHECK-NEXT: [[V51:%.*]] = insertelement <2 x double> poison, double [[D]], i64 0
908 ; CHECK-NEXT: [[T5:%.*]] = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> [[V51]], i32 4)
909 ; CHECK-NEXT: [[V61:%.*]] = insertelement <2 x double> poison, double [[D]], i64 0
910 ; CHECK-NEXT: [[T6:%.*]] = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> [[V61]], i32 4)
911 ; CHECK-NEXT: [[V71:%.*]] = insertelement <2 x double> poison, double [[D]], i64 0
912 ; CHECK-NEXT: [[T7:%.*]] = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> [[V71]], i32 4)
913 ; CHECK-NEXT: [[T8:%.*]] = add i32 [[T0]], [[T2]]
914 ; CHECK-NEXT: [[T9:%.*]] = add i32 [[T4]], [[T6]]
915 ; CHECK-NEXT: [[T10:%.*]] = add i32 [[T8]], [[T9]]
916 ; CHECK-NEXT: [[T11:%.*]] = sext i32 [[T10]] to i64
917 ; CHECK-NEXT: [[T12:%.*]] = add i64 [[T1]], [[T3]]
918 ; CHECK-NEXT: [[T13:%.*]] = add i64 [[T5]], [[T7]]
919 ; CHECK-NEXT: [[T14:%.*]] = add i64 [[T12]], [[T13]]
920 ; CHECK-NEXT: [[T15:%.*]] = add i64 [[T14]], [[T11]]
921 ; CHECK-NEXT: ret i64 [[T15]]
923 %v00 = insertelement <4 x float> undef, float %f, i32 0
924 %v01 = insertelement <4 x float> %v00, float 0.000000e+00, i32 1
925 %v02 = insertelement <4 x float> %v01, float 0.000000e+00, i32 2
926 %v03 = insertelement <4 x float> %v02, float 0.000000e+00, i32 3
927 %t0 = tail call i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float> %v03, i32 4)
928 %v10 = insertelement <4 x float> undef, float %f, i32 0
929 %v11 = insertelement <4 x float> %v10, float 0.000000e+00, i32 1
930 %v12 = insertelement <4 x float> %v11, float 0.000000e+00, i32 2
931 %v13 = insertelement <4 x float> %v12, float 0.000000e+00, i32 3
932 %t1 = tail call i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float> %v13, i32 4)
933 %v20 = insertelement <4 x float> undef, float %f, i32 0
934 %v21 = insertelement <4 x float> %v20, float 0.000000e+00, i32 1
935 %v22 = insertelement <4 x float> %v21, float 0.000000e+00, i32 2
936 %v23 = insertelement <4 x float> %v22, float 0.000000e+00, i32 3
937 %t2 = tail call i32 @llvm.x86.avx512.cvttss2usi(<4 x float> %v23, i32 4)
938 %v30 = insertelement <4 x float> undef, float %f, i32 0
939 %v31 = insertelement <4 x float> %v30, float 0.000000e+00, i32 1
940 %v32 = insertelement <4 x float> %v31, float 0.000000e+00, i32 2
941 %v33 = insertelement <4 x float> %v32, float 0.000000e+00, i32 3
942 %t3 = tail call i64 @llvm.x86.avx512.cvttss2usi64(<4 x float> %v33, i32 4)
943 %v40 = insertelement <2 x double> undef, double %d, i32 0
944 %v41 = insertelement <2 x double> %v40, double 0.000000e+00, i32 1
945 %t4 = tail call i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double> %v41, i32 4)
946 %v50 = insertelement <2 x double> undef, double %d, i32 0
947 %v51 = insertelement <2 x double> %v50, double 0.000000e+00, i32 1
948 %t5 = tail call i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double> %v51, i32 4)
949 %v60 = insertelement <2 x double> undef, double %d, i32 0
950 %v61 = insertelement <2 x double> %v60, double 0.000000e+00, i32 1
951 %t6 = tail call i32 @llvm.x86.avx512.cvttsd2usi(<2 x double> %v61, i32 4)
952 %v70 = insertelement <2 x double> undef, double %d, i32 0
953 %v71 = insertelement <2 x double> %v70, double 0.000000e+00, i32 1
954 %t7 = tail call i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double> %v71, i32 4)
955 %t8 = add i32 %t0, %t2
956 %t9 = add i32 %t4, %t6
957 %t10 = add i32 %t8, %t9
958 %t11 = sext i32 %t10 to i64
959 %t12 = add i64 %t1, %t3
960 %t13 = add i64 %t5, %t7
961 %t14 = add i64 %t12, %t13
962 %t15 = add i64 %t11, %t14
966 declare i32 @llvm.x86.avx512.vcvtss2usi32(<4 x float>, i32)
967 declare i64 @llvm.x86.avx512.vcvtss2usi64(<4 x float>, i32)
968 declare i32 @llvm.x86.avx512.cvttss2usi(<4 x float>, i32)
969 declare i64 @llvm.x86.avx512.cvttss2usi64(<4 x float>, i32)
970 declare i32 @llvm.x86.avx512.vcvtsd2usi32(<2 x double>, i32)
971 declare i64 @llvm.x86.avx512.vcvtsd2usi64(<2 x double>, i32)
972 declare i32 @llvm.x86.avx512.cvttsd2usi(<2 x double>, i32)
973 declare i64 @llvm.x86.avx512.cvttsd2usi64(<2 x double>, i32)
975 declare float @llvm.fma.f32(float, float, float) #1
977 define <4 x float> @test_mask_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
979 ; CHECK-LABEL: @test_mask_vfmadd_ss(
980 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
981 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
982 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
983 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
984 ; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
985 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
986 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP1]], float [[TMP4]]
987 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
988 ; CHECK-NEXT: ret <4 x float> [[TMP7]]
990 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
991 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
992 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
993 %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
994 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
995 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
996 %7 = extractelement <4 x float> %a, i64 0
997 %8 = extractelement <4 x float> %3, i64 0
998 %9 = extractelement <4 x float> %6, i64 0
999 %10 = call float @llvm.fma.f32(float %7, float %8, float %9)
1000 %11 = bitcast i8 %mask to <8 x i1>
1001 %12 = extractelement <8 x i1> %11, i64 0
1002 %13 = select i1 %12, float %10, float %7
1003 %14 = insertelement <4 x float> %a, float %13, i64 0
1007 define float @test_mask_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1009 ; CHECK-LABEL: @test_mask_vfmadd_ss_0(
1010 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1011 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1012 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1013 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
1014 ; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
1015 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
1016 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP1]], float [[TMP4]]
1017 ; CHECK-NEXT: ret float [[TMP6]]
1019 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
1020 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1021 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1022 %4 = extractelement <4 x float> %3, i64 0
1023 %5 = extractelement <4 x float> %b, i64 0
1024 %6 = extractelement <4 x float> %c, i64 0
1025 %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
1026 %8 = bitcast i8 %mask to <8 x i1>
1027 %9 = extractelement <8 x i1> %8, i64 0
1028 %10 = select i1 %9, float %7, float %4
1029 %11 = insertelement <4 x float> %3, float %10, i64 0
1030 %12 = extractelement <4 x float> %11, i32 0
1034 define float @test_mask_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1036 ; CHECK-LABEL: @test_mask_vfmadd_ss_1(
1037 ; CHECK-NEXT: ret float 1.000000e+00
1039 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
1040 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1041 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1042 %4 = extractelement <4 x float> %3, i64 0
1043 %5 = extractelement <4 x float> %b, i64 0
1044 %6 = extractelement <4 x float> %c, i64 0
1045 %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
1046 %8 = bitcast i8 %mask to <8 x i1>
1047 %9 = extractelement <8 x i1> %8, i64 0
1048 %10 = select i1 %9, float %7, float %4
1049 %11 = insertelement <4 x float> %3, float %10, i64 0
1050 %12 = extractelement <4 x float> %11, i32 1
1054 declare double @llvm.fma.f64(double, double, double) #1
1056 define <2 x double> @test_mask_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1058 ; CHECK-LABEL: @test_mask_vfmadd_sd(
1059 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1060 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1061 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1062 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
1063 ; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
1064 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
1065 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP1]], double [[TMP4]]
1066 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
1067 ; CHECK-NEXT: ret <2 x double> [[TMP7]]
1069 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
1070 %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
1071 %3 = extractelement <2 x double> %a, i64 0
1072 %4 = extractelement <2 x double> %1, i64 0
1073 %5 = extractelement <2 x double> %2, i64 0
1074 %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
1075 %7 = bitcast i8 %mask to <8 x i1>
1076 %8 = extractelement <8 x i1> %7, i64 0
1077 %9 = select i1 %8, double %6, double %3
1078 %10 = insertelement <2 x double> %a, double %9, i64 0
1079 ret <2 x double> %10
1082 define double @test_mask_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1084 ; CHECK-LABEL: @test_mask_vfmadd_sd_0(
1085 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1086 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1087 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1088 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
1089 ; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
1090 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
1091 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP1]], double [[TMP4]]
1092 ; CHECK-NEXT: ret double [[TMP6]]
1094 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1095 %2 = extractelement <2 x double> %1, i64 0
1096 %3 = extractelement <2 x double> %b, i64 0
1097 %4 = extractelement <2 x double> %c, i64 0
1098 %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
1099 %6 = bitcast i8 %mask to <8 x i1>
1100 %7 = extractelement <8 x i1> %6, i64 0
1101 %8 = select i1 %7, double %5, double %2
1102 %9 = insertelement <2 x double> %1, double %8, i64 0
1103 %10 = extractelement <2 x double> %9, i32 0
1107 define double @test_mask_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1109 ; CHECK-LABEL: @test_mask_vfmadd_sd_1(
1110 ; CHECK-NEXT: ret double 1.000000e+00
1112 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1113 %2 = extractelement <2 x double> %1, i64 0
1114 %3 = extractelement <2 x double> %b, i64 0
1115 %4 = extractelement <2 x double> %c, i64 0
1116 %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
1117 %6 = bitcast i8 %mask to <8 x i1>
1118 %7 = extractelement <8 x i1> %6, i64 0
1119 %8 = select i1 %7, double %5, double %2
1120 %9 = insertelement <2 x double> %1, double %8, i64 0
1121 %10 = extractelement <2 x double> %9, i32 1
1125 define <4 x float> @test_maskz_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1127 ; CHECK-LABEL: @test_maskz_vfmadd_ss(
1128 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1129 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1130 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1131 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
1132 ; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
1133 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
1134 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float 0.000000e+00, float [[TMP4]]
1135 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[A]], float [[TMP6]], i64 0
1136 ; CHECK-NEXT: ret <4 x float> [[TMP7]]
1138 %1 = insertelement <4 x float> %b, float 1.000000e+00, i32 1
1139 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1140 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1141 %4 = insertelement <4 x float> %c, float 4.000000e+00, i32 1
1142 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
1143 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
1144 %7 = extractelement <4 x float> %a, i64 0
1145 %8 = extractelement <4 x float> %3, i64 0
1146 %9 = extractelement <4 x float> %6, i64 0
1147 %10 = call float @llvm.fma.f32(float %7, float %8, float %9)
1148 %11 = bitcast i8 %mask to <8 x i1>
1149 %12 = extractelement <8 x i1> %11, i64 0
1150 %13 = select i1 %12, float %10, float 0.000000e+00
1151 %14 = insertelement <4 x float> %a, float %13, i64 0
1155 define float @test_maskz_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1157 ; CHECK-LABEL: @test_maskz_vfmadd_ss_0(
1158 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1159 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1160 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1161 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
1162 ; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
1163 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
1164 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float 0.000000e+00, float [[TMP4]]
1165 ; CHECK-NEXT: ret float [[TMP6]]
1167 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
1168 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1169 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1170 %4 = extractelement <4 x float> %3, i64 0
1171 %5 = extractelement <4 x float> %b, i64 0
1172 %6 = extractelement <4 x float> %c, i64 0
1173 %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
1174 %8 = bitcast i8 %mask to <8 x i1>
1175 %9 = extractelement <8 x i1> %8, i64 0
1176 %10 = select i1 %9, float %7, float 0.000000e+00
1177 %11 = insertelement <4 x float> %3, float %10, i64 0
1178 %12 = extractelement <4 x float> %11, i32 0
1182 define float @test_maskz_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1184 ; CHECK-LABEL: @test_maskz_vfmadd_ss_1(
1185 ; CHECK-NEXT: ret float 1.000000e+00
1187 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
1188 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1189 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1190 %4 = extractelement <4 x float> %3, i64 0
1191 %5 = extractelement <4 x float> %b, i64 0
1192 %6 = extractelement <4 x float> %c, i64 0
1193 %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
1194 %8 = bitcast i8 %mask to <8 x i1>
1195 %9 = extractelement <8 x i1> %8, i64 0
1196 %10 = select i1 %9, float %7, float 0.000000e+00
1197 %11 = insertelement <4 x float> %3, float %10, i64 0
1198 %12 = extractelement <4 x float> %11, i32 1
1202 define <2 x double> @test_maskz_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1204 ; CHECK-LABEL: @test_maskz_vfmadd_sd(
1205 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1206 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1207 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1208 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
1209 ; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
1210 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
1211 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double 0.000000e+00, double [[TMP4]]
1212 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[A]], double [[TMP6]], i64 0
1213 ; CHECK-NEXT: ret <2 x double> [[TMP7]]
1215 %1 = insertelement <2 x double> %b, double 1.000000e+00, i32 1
1216 %2 = insertelement <2 x double> %c, double 2.000000e+00, i32 1
1217 %3 = extractelement <2 x double> %a, i64 0
1218 %4 = extractelement <2 x double> %1, i64 0
1219 %5 = extractelement <2 x double> %2, i64 0
1220 %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
1221 %7 = bitcast i8 %mask to <8 x i1>
1222 %8 = extractelement <8 x i1> %7, i64 0
1223 %9 = select i1 %8, double %6, double 0.000000e+00
1224 %10 = insertelement <2 x double> %a, double %9, i64 0
1225 ret <2 x double> %10
1228 define double @test_maskz_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1230 ; CHECK-LABEL: @test_maskz_vfmadd_sd_0(
1231 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1232 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1233 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1234 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
1235 ; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
1236 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
1237 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double 0.000000e+00, double [[TMP4]]
1238 ; CHECK-NEXT: ret double [[TMP6]]
1240 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1241 %2 = extractelement <2 x double> %1, i64 0
1242 %3 = extractelement <2 x double> %b, i64 0
1243 %4 = extractelement <2 x double> %c, i64 0
1244 %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
1245 %6 = bitcast i8 %mask to <8 x i1>
1246 %7 = extractelement <8 x i1> %6, i64 0
1247 %8 = select i1 %7, double %5, double 0.000000e+00
1248 %9 = insertelement <2 x double> %1, double %8, i64 0
1249 %10 = extractelement <2 x double> %9, i32 0
1253 define double @test_maskz_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1255 ; CHECK-LABEL: @test_maskz_vfmadd_sd_1(
1256 ; CHECK-NEXT: ret double 1.000000e+00
1258 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1259 %2 = extractelement <2 x double> %1, i64 0
1260 %3 = extractelement <2 x double> %b, i64 0
1261 %4 = extractelement <2 x double> %c, i64 0
1262 %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
1263 %6 = bitcast i8 %mask to <8 x i1>
1264 %7 = extractelement <8 x i1> %6, i64 0
1265 %8 = select i1 %7, double %5, double 0.000000e+00
1266 %9 = insertelement <2 x double> %1, double %8, i64 0
1267 %10 = extractelement <2 x double> %9, i32 1
1271 define <4 x float> @test_mask3_vfmadd_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1273 ; CHECK-LABEL: @test_mask3_vfmadd_ss(
1274 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1275 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1276 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1277 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
1278 ; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
1279 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
1280 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP3]], float [[TMP4]]
1281 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <4 x float> [[C]], float [[TMP6]], i64 0
1282 ; CHECK-NEXT: ret <4 x float> [[TMP7]]
1284 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
1285 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1286 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1287 %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
1288 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
1289 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
1290 %7 = extractelement <4 x float> %3, i64 0
1291 %8 = extractelement <4 x float> %6, i64 0
1292 %9 = extractelement <4 x float> %c, i64 0
1293 %10 = call float @llvm.fma.f32(float %7, float %8, float %9)
1294 %11 = bitcast i8 %mask to <8 x i1>
1295 %12 = extractelement <8 x i1> %11, i64 0
1296 %13 = select i1 %12, float %10, float %9
1297 %14 = insertelement <4 x float> %c, float %13, i64 0
1301 define float @test_mask3_vfmadd_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1303 ; CHECK-LABEL: @test_mask3_vfmadd_ss_0(
1304 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1305 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1306 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1307 ; CHECK-NEXT: [[TMP4:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP3]])
1308 ; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
1309 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
1310 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], float [[TMP3]], float [[TMP4]]
1311 ; CHECK-NEXT: ret float [[TMP6]]
1313 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1314 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1315 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1316 %4 = extractelement <4 x float> %a, i64 0
1317 %5 = extractelement <4 x float> %b, i64 0
1318 %6 = extractelement <4 x float> %3, i64 0
1319 %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
1320 %8 = bitcast i8 %mask to <8 x i1>
1321 %9 = extractelement <8 x i1> %8, i64 0
1322 %10 = select i1 %9, float %7, float %6
1323 %11 = insertelement <4 x float> %3, float %10, i64 0
1324 %12 = extractelement <4 x float> %11, i32 0
1328 define float @test_mask3_vfmadd_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1330 ; CHECK-LABEL: @test_mask3_vfmadd_ss_1(
1331 ; CHECK-NEXT: ret float 1.000000e+00
1333 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1334 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1335 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1336 %4 = extractelement <4 x float> %a, i64 0
1337 %5 = extractelement <4 x float> %b, i64 0
1338 %6 = extractelement <4 x float> %3, i64 0
1339 %7 = call float @llvm.fma.f32(float %4, float %5, float %6)
1340 %8 = bitcast i8 %mask to <8 x i1>
1341 %9 = extractelement <8 x i1> %8, i64 0
1342 %10 = select i1 %9, float %7, float %6
1343 %11 = insertelement <4 x float> %3, float %10, i64 0
1344 %12 = extractelement <4 x float> %11, i32 1
1348 define <2 x double> @test_mask3_vfmadd_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1350 ; CHECK-LABEL: @test_mask3_vfmadd_sd(
1351 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1352 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1353 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1354 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
1355 ; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
1356 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
1357 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP3]], double [[TMP4]]
1358 ; CHECK-NEXT: [[TMP7:%.*]] = insertelement <2 x double> [[C]], double [[TMP6]], i64 0
1359 ; CHECK-NEXT: ret <2 x double> [[TMP7]]
1361 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1362 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
1363 %3 = extractelement <2 x double> %1, i64 0
1364 %4 = extractelement <2 x double> %2, i64 0
1365 %5 = extractelement <2 x double> %c, i64 0
1366 %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
1367 %7 = bitcast i8 %mask to <8 x i1>
1368 %8 = extractelement <8 x i1> %7, i64 0
1369 %9 = select i1 %8, double %6, double %5
1370 %10 = insertelement <2 x double> %c, double %9, i64 0
1371 ret <2 x double> %10
1374 define double @test_mask3_vfmadd_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1376 ; CHECK-LABEL: @test_mask3_vfmadd_sd_0(
1377 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1378 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1379 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1380 ; CHECK-NEXT: [[TMP4:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP3]])
1381 ; CHECK-NEXT: [[TMP5:%.*]] = and i8 [[MASK:%.*]], 1
1382 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP5]], 0
1383 ; CHECK-NEXT: [[TMP6:%.*]] = select i1 [[DOTNOT]], double [[TMP3]], double [[TMP4]]
1384 ; CHECK-NEXT: ret double [[TMP6]]
1386 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1387 %2 = extractelement <2 x double> %a, i64 0
1388 %3 = extractelement <2 x double> %b, i64 0
1389 %4 = extractelement <2 x double> %1, i64 0
1390 %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
1391 %6 = bitcast i8 %mask to <8 x i1>
1392 %7 = extractelement <8 x i1> %6, i64 0
1393 %8 = select i1 %7, double %5, double %4
1394 %9 = insertelement <2 x double> %1, double %8, i64 0
1395 %10 = extractelement <2 x double> %9, i32 0
1399 define double @test_mask3_vfmadd_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1401 ; CHECK-LABEL: @test_mask3_vfmadd_sd_1(
1402 ; CHECK-NEXT: ret double 1.000000e+00
1404 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1405 %2 = extractelement <2 x double> %a, i64 0
1406 %3 = extractelement <2 x double> %b, i64 0
1407 %4 = extractelement <2 x double> %1, i64 0
1408 %5 = call double @llvm.fma.f64(double %2, double %3, double %4)
1409 %6 = bitcast i8 %mask to <8 x i1>
1410 %7 = extractelement <8 x i1> %6, i64 0
1411 %8 = select i1 %7, double %5, double %4
1412 %9 = insertelement <2 x double> %1, double %8, i64 0
1413 %10 = extractelement <2 x double> %9, i32 1
1417 define <4 x float> @test_mask3_vfmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1419 ; CHECK-LABEL: @test_mask3_vfmsub_ss(
1420 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1421 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1422 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1423 ; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
1424 ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
1425 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0
1426 ; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1
1427 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0
1428 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], float [[TMP6]], float [[TMP5]]
1429 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <4 x float> [[C]], float [[TMP8]], i64 0
1430 ; CHECK-NEXT: ret <4 x float> [[TMP9]]
1432 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
1433 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1434 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1435 %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
1436 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
1437 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
1438 %7 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
1439 %8 = extractelement <4 x float> %3, i64 0
1440 %9 = extractelement <4 x float> %6, i64 0
1441 %10 = extractelement <4 x float> %7, i64 0
1442 %11 = call float @llvm.fma.f32(float %8, float %9, float %10)
1443 %12 = extractelement <4 x float> %c, i64 0
1444 %13 = bitcast i8 %mask to <8 x i1>
1445 %14 = extractelement <8 x i1> %13, i64 0
1446 %15 = select i1 %14, float %11, float %12
1447 %16 = insertelement <4 x float> %c, float %15, i64 0
1451 define float @test_mask3_vfmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1453 ; CHECK-LABEL: @test_mask3_vfmsub_ss_0(
1454 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1455 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1456 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1457 ; CHECK-NEXT: [[TMP4:%.*]] = fneg float [[TMP3]]
1458 ; CHECK-NEXT: [[TMP5:%.*]] = call float @llvm.fma.f32(float [[TMP1]], float [[TMP2]], float [[TMP4]])
1459 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <4 x float> [[C]], i64 0
1460 ; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1
1461 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0
1462 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], float [[TMP6]], float [[TMP5]]
1463 ; CHECK-NEXT: ret float [[TMP8]]
1465 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1466 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1467 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1468 %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
1469 %5 = extractelement <4 x float> %a, i64 0
1470 %6 = extractelement <4 x float> %b, i64 0
1471 %7 = extractelement <4 x float> %4, i64 0
1472 %8 = call float @llvm.fma.f32(float %5, float %6, float %7)
1473 %9 = extractelement <4 x float> %3, i64 0
1474 %10 = bitcast i8 %mask to <8 x i1>
1475 %11 = extractelement <8 x i1> %10, i64 0
1476 %12 = select i1 %11, float %8, float %9
1477 %13 = insertelement <4 x float> %3, float %12, i64 0
1478 %14 = extractelement <4 x float> %13, i32 0
1482 define float @test_mask3_vfmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1484 ; CHECK-LABEL: @test_mask3_vfmsub_ss_1(
1485 ; CHECK-NEXT: ret float 1.000000e+00
1487 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1488 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1489 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1490 %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
1491 %5 = extractelement <4 x float> %a, i64 0
1492 %6 = extractelement <4 x float> %b, i64 0
1493 %7 = extractelement <4 x float> %4, i64 0
1494 %8 = call float @llvm.fma.f32(float %5, float %6, float %7)
1495 %9 = extractelement <4 x float> %3, i64 0
1496 %10 = bitcast i8 %mask to <8 x i1>
1497 %11 = extractelement <8 x i1> %10, i64 0
1498 %12 = select i1 %11, float %8, float %9
1499 %13 = insertelement <4 x float> %3, float %12, i64 0
1500 %14 = extractelement <4 x float> %13, i32 1
1504 define float @test_mask3_vfmsub_ss_1_unary_fneg(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1506 ; CHECK-LABEL: @test_mask3_vfmsub_ss_1_unary_fneg(
1507 ; CHECK-NEXT: ret float 1.000000e+00
1509 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1510 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1511 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1512 %4 = fneg <4 x float> %3
1513 %5 = extractelement <4 x float> %a, i64 0
1514 %6 = extractelement <4 x float> %b, i64 0
1515 %7 = extractelement <4 x float> %4, i64 0
1516 %8 = call float @llvm.fma.f32(float %5, float %6, float %7)
1517 %9 = extractelement <4 x float> %3, i64 0
1518 %10 = bitcast i8 %mask to <8 x i1>
1519 %11 = extractelement <8 x i1> %10, i64 0
1520 %12 = select i1 %11, float %8, float %9
1521 %13 = insertelement <4 x float> %3, float %12, i64 0
1522 %14 = extractelement <4 x float> %13, i32 1
1526 define <2 x double> @test_mask3_vfmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1528 ; CHECK-LABEL: @test_mask3_vfmsub_sd(
1529 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1530 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1531 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1532 ; CHECK-NEXT: [[TMP4:%.*]] = fneg double [[TMP3]]
1533 ; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
1534 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
1535 ; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1
1536 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0
1537 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], double [[TMP6]], double [[TMP5]]
1538 ; CHECK-NEXT: [[TMP9:%.*]] = insertelement <2 x double> [[C]], double [[TMP8]], i64 0
1539 ; CHECK-NEXT: ret <2 x double> [[TMP9]]
1541 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1542 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
1543 %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
1544 %4 = extractelement <2 x double> %1, i64 0
1545 %5 = extractelement <2 x double> %2, i64 0
1546 %6 = extractelement <2 x double> %3, i64 0
1547 %7 = call double @llvm.fma.f64(double %4, double %5, double %6)
1548 %8 = extractelement <2 x double> %c, i64 0
1549 %9 = bitcast i8 %mask to <8 x i1>
1550 %10 = extractelement <8 x i1> %9, i64 0
1551 %11 = select i1 %10, double %7, double %8
1552 %12 = insertelement <2 x double> %c, double %11, i64 0
1553 ret <2 x double> %12
1556 define double @test_mask3_vfmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1558 ; CHECK-LABEL: @test_mask3_vfmsub_sd_0(
1559 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1560 ; CHECK-NEXT: [[TMP2:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1561 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1562 ; CHECK-NEXT: [[TMP4:%.*]] = fneg double [[TMP3]]
1563 ; CHECK-NEXT: [[TMP5:%.*]] = call double @llvm.fma.f64(double [[TMP1]], double [[TMP2]], double [[TMP4]])
1564 ; CHECK-NEXT: [[TMP6:%.*]] = extractelement <2 x double> [[C]], i64 0
1565 ; CHECK-NEXT: [[TMP7:%.*]] = and i8 [[MASK:%.*]], 1
1566 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP7]], 0
1567 ; CHECK-NEXT: [[TMP8:%.*]] = select i1 [[DOTNOT]], double [[TMP6]], double [[TMP5]]
1568 ; CHECK-NEXT: ret double [[TMP8]]
1570 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1571 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
1572 %3 = extractelement <2 x double> %a, i64 0
1573 %4 = extractelement <2 x double> %b, i64 0
1574 %5 = extractelement <2 x double> %2, i64 0
1575 %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
1576 %7 = extractelement <2 x double> %1, i64 0
1577 %8 = bitcast i8 %mask to <8 x i1>
1578 %9 = extractelement <8 x i1> %8, i64 0
1579 %10 = select i1 %9, double %6, double %7
1580 %11 = insertelement <2 x double> %1, double %10, i64 0
1581 %12 = extractelement <2 x double> %11, i32 0
1585 define double @test_mask3_vfmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1587 ; CHECK-LABEL: @test_mask3_vfmsub_sd_1(
1588 ; CHECK-NEXT: ret double 1.000000e+00
1590 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1591 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
1592 %3 = extractelement <2 x double> %a, i64 0
1593 %4 = extractelement <2 x double> %b, i64 0
1594 %5 = extractelement <2 x double> %2, i64 0
1595 %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
1596 %7 = extractelement <2 x double> %1, i64 0
1597 %8 = bitcast i8 %mask to <8 x i1>
1598 %9 = extractelement <8 x i1> %8, i64 0
1599 %10 = select i1 %9, double %6, double %7
1600 %11 = insertelement <2 x double> %1, double %10, i64 0
1601 %12 = extractelement <2 x double> %11, i32 1
1605 define double @test_mask3_vfmsub_sd_1_unary_fneg(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1607 ; CHECK-LABEL: @test_mask3_vfmsub_sd_1_unary_fneg(
1608 ; CHECK-NEXT: ret double 1.000000e+00
1610 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1611 %2 = fneg <2 x double> %1
1612 %3 = extractelement <2 x double> %a, i64 0
1613 %4 = extractelement <2 x double> %b, i64 0
1614 %5 = extractelement <2 x double> %2, i64 0
1615 %6 = call double @llvm.fma.f64(double %3, double %4, double %5)
1616 %7 = extractelement <2 x double> %1, i64 0
1617 %8 = bitcast i8 %mask to <8 x i1>
1618 %9 = extractelement <8 x i1> %8, i64 0
1619 %10 = select i1 %9, double %6, double %7
1620 %11 = insertelement <2 x double> %1, double %10, i64 0
1621 %12 = extractelement <2 x double> %11, i32 1
1625 define <4 x float> @test_mask3_vfnmsub_ss(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1627 ; CHECK-LABEL: @test_mask3_vfnmsub_ss(
1628 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1629 ; CHECK-NEXT: [[TMP2:%.*]] = fneg float [[TMP1]]
1630 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1631 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1632 ; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]]
1633 ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
1634 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0
1635 ; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1
1636 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0
1637 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], float [[TMP7]], float [[TMP6]]
1638 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <4 x float> [[C]], float [[TMP9]], i64 0
1639 ; CHECK-NEXT: ret <4 x float> [[TMP10]]
1641 %1 = insertelement <4 x float> %a, float 1.000000e+00, i32 1
1642 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1643 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1644 %4 = insertelement <4 x float> %b, float 4.000000e+00, i32 1
1645 %5 = insertelement <4 x float> %4, float 5.000000e+00, i32 2
1646 %6 = insertelement <4 x float> %5, float 6.000000e+00, i32 3
1647 %7 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
1648 %8 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
1649 %9 = extractelement <4 x float> %7, i64 0
1650 %10 = extractelement <4 x float> %6, i64 0
1651 %11 = extractelement <4 x float> %8, i64 0
1652 %12 = call float @llvm.fma.f32(float %9, float %10, float %11)
1653 %13 = extractelement <4 x float> %c, i64 0
1654 %14 = bitcast i8 %mask to <8 x i1>
1655 %15 = extractelement <8 x i1> %14, i64 0
1656 %16 = select i1 %15, float %12, float %13
1657 %17 = insertelement <4 x float> %c, float %16, i64 0
1661 define float @test_mask3_vfnmsub_ss_0(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1663 ; CHECK-LABEL: @test_mask3_vfnmsub_ss_0(
1664 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x float> [[A:%.*]], i64 0
1665 ; CHECK-NEXT: [[TMP2:%.*]] = fneg float [[TMP1]]
1666 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <4 x float> [[B:%.*]], i64 0
1667 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <4 x float> [[C:%.*]], i64 0
1668 ; CHECK-NEXT: [[TMP5:%.*]] = fneg float [[TMP4]]
1669 ; CHECK-NEXT: [[TMP6:%.*]] = call float @llvm.fma.f32(float [[TMP2]], float [[TMP3]], float [[TMP5]])
1670 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <4 x float> [[C]], i64 0
1671 ; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1
1672 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0
1673 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], float [[TMP7]], float [[TMP6]]
1674 ; CHECK-NEXT: ret float [[TMP9]]
1676 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1677 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1678 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1679 %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
1680 %5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
1681 %6 = extractelement <4 x float> %4, i64 0
1682 %7 = extractelement <4 x float> %b, i64 0
1683 %8 = extractelement <4 x float> %5, i64 0
1684 %9 = call float @llvm.fma.f32(float %6, float %7, float %8)
1685 %10 = extractelement <4 x float> %3, i64 0
1686 %11 = bitcast i8 %mask to <8 x i1>
1687 %12 = extractelement <8 x i1> %11, i64 0
1688 %13 = select i1 %12, float %9, float %10
1689 %14 = insertelement <4 x float> %3, float %13, i64 0
1690 %15 = extractelement <4 x float> %14, i32 0
1694 define float @test_mask3_vfnmsub_ss_1(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1696 ; CHECK-LABEL: @test_mask3_vfnmsub_ss_1(
1697 ; CHECK-NEXT: ret float 1.000000e+00
1699 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1700 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1701 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1702 %4 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %a
1703 %5 = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %3
1704 %6 = extractelement <4 x float> %4, i64 0
1705 %7 = extractelement <4 x float> %b, i64 0
1706 %8 = extractelement <4 x float> %5, i64 0
1707 %9 = call float @llvm.fma.f32(float %6, float %7, float %8)
1708 %10 = extractelement <4 x float> %3, i64 0
1709 %11 = bitcast i8 %mask to <8 x i1>
1710 %12 = extractelement <8 x i1> %11, i64 0
1711 %13 = select i1 %12, float %9, float %10
1712 %14 = insertelement <4 x float> %3, float %13, i64 0
1713 %15 = extractelement <4 x float> %14, i32 1
1717 define float @test_mask3_vfnmsub_ss_1_unary_fneg(<4 x float> %a, <4 x float> %b, <4 x float> %c, i8 %mask) {
1719 ; CHECK-LABEL: @test_mask3_vfnmsub_ss_1_unary_fneg(
1720 ; CHECK-NEXT: ret float 1.000000e+00
1722 %1 = insertelement <4 x float> %c, float 1.000000e+00, i32 1
1723 %2 = insertelement <4 x float> %1, float 2.000000e+00, i32 2
1724 %3 = insertelement <4 x float> %2, float 3.000000e+00, i32 3
1725 %4 = fneg <4 x float> %a
1726 %5 = fneg <4 x float> %3
1727 %6 = extractelement <4 x float> %4, i64 0
1728 %7 = extractelement <4 x float> %b, i64 0
1729 %8 = extractelement <4 x float> %5, i64 0
1730 %9 = call float @llvm.fma.f32(float %6, float %7, float %8)
1731 %10 = extractelement <4 x float> %3, i64 0
1732 %11 = bitcast i8 %mask to <8 x i1>
1733 %12 = extractelement <8 x i1> %11, i64 0
1734 %13 = select i1 %12, float %9, float %10
1735 %14 = insertelement <4 x float> %3, float %13, i64 0
1736 %15 = extractelement <4 x float> %14, i32 1
1740 define <2 x double> @test_mask3_vfnmsub_sd(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1742 ; CHECK-LABEL: @test_mask3_vfnmsub_sd(
1743 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1744 ; CHECK-NEXT: [[TMP2:%.*]] = fneg double [[TMP1]]
1745 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1746 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1747 ; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]]
1748 ; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
1749 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
1750 ; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1
1751 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0
1752 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], double [[TMP7]], double [[TMP6]]
1753 ; CHECK-NEXT: [[TMP10:%.*]] = insertelement <2 x double> [[C]], double [[TMP9]], i64 0
1754 ; CHECK-NEXT: ret <2 x double> [[TMP10]]
1756 %1 = insertelement <2 x double> %a, double 1.000000e+00, i32 1
1757 %2 = insertelement <2 x double> %b, double 2.000000e+00, i32 1
1758 %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
1759 %4 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %c
1760 %5 = extractelement <2 x double> %3, i64 0
1761 %6 = extractelement <2 x double> %2, i64 0
1762 %7 = extractelement <2 x double> %4, i64 0
1763 %8 = call double @llvm.fma.f64(double %5, double %6, double %7)
1764 %9 = extractelement <2 x double> %c, i64 0
1765 %10 = bitcast i8 %mask to <8 x i1>
1766 %11 = extractelement <8 x i1> %10, i64 0
1767 %12 = select i1 %11, double %8, double %9
1768 %13 = insertelement <2 x double> %c, double %12, i64 0
1769 ret <2 x double> %13
1772 define double @test_mask3_vfnmsub_sd_0(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1774 ; CHECK-LABEL: @test_mask3_vfnmsub_sd_0(
1775 ; CHECK-NEXT: [[TMP1:%.*]] = extractelement <2 x double> [[A:%.*]], i64 0
1776 ; CHECK-NEXT: [[TMP2:%.*]] = fneg double [[TMP1]]
1777 ; CHECK-NEXT: [[TMP3:%.*]] = extractelement <2 x double> [[B:%.*]], i64 0
1778 ; CHECK-NEXT: [[TMP4:%.*]] = extractelement <2 x double> [[C:%.*]], i64 0
1779 ; CHECK-NEXT: [[TMP5:%.*]] = fneg double [[TMP4]]
1780 ; CHECK-NEXT: [[TMP6:%.*]] = call double @llvm.fma.f64(double [[TMP2]], double [[TMP3]], double [[TMP5]])
1781 ; CHECK-NEXT: [[TMP7:%.*]] = extractelement <2 x double> [[C]], i64 0
1782 ; CHECK-NEXT: [[TMP8:%.*]] = and i8 [[MASK:%.*]], 1
1783 ; CHECK-NEXT: [[DOTNOT:%.*]] = icmp eq i8 [[TMP8]], 0
1784 ; CHECK-NEXT: [[TMP9:%.*]] = select i1 [[DOTNOT]], double [[TMP7]], double [[TMP6]]
1785 ; CHECK-NEXT: ret double [[TMP9]]
1787 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1788 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
1789 %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
1790 %4 = extractelement <2 x double> %2, i64 0
1791 %5 = extractelement <2 x double> %b, i64 0
1792 %6 = extractelement <2 x double> %3, i64 0
1793 %7 = call double @llvm.fma.f64(double %4, double %5, double %6)
1794 %8 = extractelement <2 x double> %1, i64 0
1795 %9 = bitcast i8 %mask to <8 x i1>
1796 %10 = extractelement <8 x i1> %9, i64 0
1797 %11 = select i1 %10, double %7, double %8
1798 %12 = insertelement <2 x double> %1, double %11, i64 0
1799 %13 = extractelement <2 x double> %12, i32 0
1803 define double @test_mask3_vfnmsub_sd_1(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1805 ; CHECK-LABEL: @test_mask3_vfnmsub_sd_1(
1806 ; CHECK-NEXT: ret double 1.000000e+00
1808 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1809 %2 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %a
1810 %3 = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %1
1811 %4 = extractelement <2 x double> %2, i64 0
1812 %5 = extractelement <2 x double> %b, i64 0
1813 %6 = extractelement <2 x double> %3, i64 0
1814 %7 = call double @llvm.fma.f64(double %4, double %5, double %6)
1815 %8 = extractelement <2 x double> %1, i64 0
1816 %9 = bitcast i8 %mask to <8 x i1>
1817 %10 = extractelement <8 x i1> %9, i64 0
1818 %11 = select i1 %10, double %7, double %8
1819 %12 = insertelement <2 x double> %1, double %11, i64 0
1820 %13 = extractelement <2 x double> %12, i32 1
1824 define double @test_mask3_vfnmsub_sd_1_unary_fneg(<2 x double> %a, <2 x double> %b, <2 x double> %c, i8 %mask) {
1826 ; CHECK-LABEL: @test_mask3_vfnmsub_sd_1_unary_fneg(
1827 ; CHECK-NEXT: ret double 1.000000e+00
1829 %1 = insertelement <2 x double> %c, double 1.000000e+00, i32 1
1830 %2 = fneg <2 x double> %a
1831 %3 = fneg <2 x double> %1
1832 %4 = extractelement <2 x double> %2, i64 0
1833 %5 = extractelement <2 x double> %b, i64 0
1834 %6 = extractelement <2 x double> %3, i64 0
1835 %7 = call double @llvm.fma.f64(double %4, double %5, double %6)
1836 %8 = extractelement <2 x double> %1, i64 0
1837 %9 = bitcast i8 %mask to <8 x i1>
1838 %10 = extractelement <8 x i1> %9, i64 0
1839 %11 = select i1 %10, double %7, double %8
1840 %12 = insertelement <2 x double> %1, double %11, i64 0
1841 %13 = extractelement <2 x double> %12, i32 1
1845 declare <8 x i32> @llvm.x86.avx2.permd(<8 x i32>, <8 x i32>)
1847 define <8 x i32> @identity_test_permvar_si_256(<8 x i32> %a0) {
1849 ; CHECK-LABEL: @identity_test_permvar_si_256(
1850 ; CHECK-NEXT: ret <8 x i32> [[A0:%.*]]
1852 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
1856 define <8 x i32> @identity_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
1858 ; CHECK-LABEL: @identity_test_permvar_si_256_mask(
1859 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1860 ; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i32> [[A0:%.*]], <8 x i32> [[PASSTHRU:%.*]]
1861 ; CHECK-NEXT: ret <8 x i32> [[TMP2]]
1863 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
1864 %2 = bitcast i8 %mask to <8 x i1>
1865 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
1869 define <8 x i32> @zero_test_permvar_si_256(<8 x i32> %a0) {
1871 ; CHECK-LABEL: @zero_test_permvar_si_256(
1872 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> zeroinitializer
1873 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1875 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
1879 define <8 x i32> @zero_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
1881 ; CHECK-LABEL: @zero_test_permvar_si_256_mask(
1882 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> zeroinitializer
1883 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1884 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
1885 ; CHECK-NEXT: ret <8 x i32> [[TMP3]]
1887 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> zeroinitializer)
1888 %2 = bitcast i8 %mask to <8 x i1>
1889 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
1893 define <8 x i32> @shuffle_test_permvar_si_256(<8 x i32> %a0) {
1895 ; CHECK-LABEL: @shuffle_test_permvar_si_256(
1896 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1897 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1899 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1903 define <8 x i32> @shuffle_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
1905 ; CHECK-LABEL: @shuffle_test_permvar_si_256_mask(
1906 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1907 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1908 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
1909 ; CHECK-NEXT: ret <8 x i32> [[TMP3]]
1911 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1912 %2 = bitcast i8 %mask to <8 x i1>
1913 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
1917 define <8 x i32> @undef_test_permvar_si_256(<8 x i32> %a0) {
1919 ; CHECK-LABEL: @undef_test_permvar_si_256(
1920 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1921 ; CHECK-NEXT: ret <8 x i32> [[TMP1]]
1923 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1927 define <8 x i32> @undef_test_permvar_si_256_mask(<8 x i32> %a0, <8 x i32> %passthru, i8 %mask) {
1929 ; CHECK-LABEL: @undef_test_permvar_si_256_mask(
1930 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i32> [[A0:%.*]], <8 x i32> poison, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1931 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1932 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i32> [[TMP1]], <8 x i32> [[PASSTHRU:%.*]]
1933 ; CHECK-NEXT: ret <8 x i32> [[TMP3]]
1935 %1 = call <8 x i32> @llvm.x86.avx2.permd(<8 x i32> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1936 %2 = bitcast i8 %mask to <8 x i1>
1937 %3 = select <8 x i1> %2, <8 x i32> %1, <8 x i32> %passthru
1941 declare <8 x float> @llvm.x86.avx2.permps(<8 x float>, <8 x i32>)
1943 define <8 x float> @identity_test_permvar_sf_256(<8 x float> %a0) {
1945 ; CHECK-LABEL: @identity_test_permvar_sf_256(
1946 ; CHECK-NEXT: ret <8 x float> [[A0:%.*]]
1948 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
1952 define <8 x float> @identity_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
1954 ; CHECK-LABEL: @identity_test_permvar_sf_256_mask(
1955 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1956 ; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x float> [[A0:%.*]], <8 x float> [[PASSTHRU:%.*]]
1957 ; CHECK-NEXT: ret <8 x float> [[TMP2]]
1959 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>)
1960 %2 = bitcast i8 %mask to <8 x i1>
1961 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
1965 define <8 x float> @zero_test_permvar_sf_256(<8 x float> %a0) {
1967 ; CHECK-LABEL: @zero_test_permvar_sf_256(
1968 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> zeroinitializer
1969 ; CHECK-NEXT: ret <8 x float> [[TMP1]]
1971 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
1975 define <8 x float> @zero_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
1977 ; CHECK-LABEL: @zero_test_permvar_sf_256_mask(
1978 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> zeroinitializer
1979 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
1980 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
1981 ; CHECK-NEXT: ret <8 x float> [[TMP3]]
1983 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> zeroinitializer)
1984 %2 = bitcast i8 %mask to <8 x i1>
1985 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
1989 define <8 x float> @shuffle_test_permvar_sf_256(<8 x float> %a0) {
1991 ; CHECK-LABEL: @shuffle_test_permvar_sf_256(
1992 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
1993 ; CHECK-NEXT: ret <8 x float> [[TMP1]]
1995 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
1999 define <8 x float> @shuffle_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
2001 ; CHECK-LABEL: @shuffle_test_permvar_sf_256_mask(
2002 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2003 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2004 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
2005 ; CHECK-NEXT: ret <8 x float> [[TMP3]]
2007 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2008 %2 = bitcast i8 %mask to <8 x i1>
2009 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
2013 define <8 x float> @undef_test_permvar_sf_256(<8 x float> %a0) {
2015 ; CHECK-LABEL: @undef_test_permvar_sf_256(
2016 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2017 ; CHECK-NEXT: ret <8 x float> [[TMP1]]
2019 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2023 define <8 x float> @undef_test_permvar_sf_256_mask(<8 x float> %a0, <8 x float> %passthru, i8 %mask) {
2025 ; CHECK-LABEL: @undef_test_permvar_sf_256_mask(
2026 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x float> [[A0:%.*]], <8 x float> poison, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2027 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2028 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x float> [[TMP1]], <8 x float> [[PASSTHRU:%.*]]
2029 ; CHECK-NEXT: ret <8 x float> [[TMP3]]
2031 %1 = call <8 x float> @llvm.x86.avx2.permps(<8 x float> %a0, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2032 %2 = bitcast i8 %mask to <8 x i1>
2033 %3 = select <8 x i1> %2, <8 x float> %1, <8 x float> %passthru
2037 declare <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64>, <4 x i64>)
2039 define <4 x i64> @identity_test_permvar_di_256(<4 x i64> %a0) {
2041 ; CHECK-LABEL: @identity_test_permvar_di_256(
2042 ; CHECK-NEXT: ret <4 x i64> [[A0:%.*]]
2044 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
2048 define <4 x i64> @identity_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
2050 ; CHECK-LABEL: @identity_test_permvar_di_256_mask(
2051 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2052 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2053 ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[A0:%.*]], <4 x i64> [[PASSTHRU:%.*]]
2054 ; CHECK-NEXT: ret <4 x i64> [[TMP2]]
2056 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
2057 %2 = bitcast i8 %mask to <8 x i1>
2058 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2059 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
2063 define <4 x i64> @zero_test_permvar_di_256(<4 x i64> %a0) {
2065 ; CHECK-LABEL: @zero_test_permvar_di_256(
2066 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> zeroinitializer
2067 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
2069 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer)
2073 define <4 x i64> @zero_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
2075 ; CHECK-LABEL: @zero_test_permvar_di_256_mask(
2076 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> zeroinitializer
2077 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2078 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2079 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
2080 ; CHECK-NEXT: ret <4 x i64> [[TMP3]]
2082 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> zeroinitializer)
2083 %2 = bitcast i8 %mask to <8 x i1>
2084 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2085 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
2089 define <4 x i64> @shuffle_test_permvar_di_256(<4 x i64> %a0) {
2091 ; CHECK-LABEL: @shuffle_test_permvar_di_256(
2092 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
2093 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
2095 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
2099 define <4 x i64> @shuffle_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
2101 ; CHECK-LABEL: @shuffle_test_permvar_di_256_mask(
2102 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
2103 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2104 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2105 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
2106 ; CHECK-NEXT: ret <4 x i64> [[TMP3]]
2108 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
2109 %2 = bitcast i8 %mask to <8 x i1>
2110 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2111 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
2115 define <4 x i64> @undef_test_permvar_di_256(<4 x i64> %a0) {
2117 ; CHECK-LABEL: @undef_test_permvar_di_256(
2118 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
2119 ; CHECK-NEXT: ret <4 x i64> [[TMP1]]
2121 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
2125 define <4 x i64> @undef_test_permvar_di_256_mask(<4 x i64> %a0, <4 x i64> %passthru, i8 %mask) {
2127 ; CHECK-LABEL: @undef_test_permvar_di_256_mask(
2128 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x i64> [[A0:%.*]], <4 x i64> poison, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
2129 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2130 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2131 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x i64> [[TMP1]], <4 x i64> [[PASSTHRU:%.*]]
2132 ; CHECK-NEXT: ret <4 x i64> [[TMP3]]
2134 %1 = call <4 x i64> @llvm.x86.avx512.permvar.di.256(<4 x i64> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
2135 %2 = bitcast i8 %mask to <8 x i1>
2136 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2137 %3 = select <4 x i1> %extract, <4 x i64> %1, <4 x i64> %passthru
2141 declare <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double>, <4 x i64>)
2143 define <4 x double> @identity_test_permvar_df_256(<4 x double> %a0) {
2145 ; CHECK-LABEL: @identity_test_permvar_df_256(
2146 ; CHECK-NEXT: ret <4 x double> [[A0:%.*]]
2148 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
2152 define <4 x double> @identity_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
2154 ; CHECK-LABEL: @identity_test_permvar_df_256_mask(
2155 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2156 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP1]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2157 ; CHECK-NEXT: [[TMP2:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[A0:%.*]], <4 x double> [[PASSTHRU:%.*]]
2158 ; CHECK-NEXT: ret <4 x double> [[TMP2]]
2160 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 0, i64 1, i64 2, i64 3>)
2161 %2 = bitcast i8 %mask to <8 x i1>
2162 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2163 %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
2167 define <4 x double> @zero_test_permvar_df_256(<4 x double> %a0) {
2169 ; CHECK-LABEL: @zero_test_permvar_df_256(
2170 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> zeroinitializer
2171 ; CHECK-NEXT: ret <4 x double> [[TMP1]]
2173 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer)
2177 define <4 x double> @zero_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
2179 ; CHECK-LABEL: @zero_test_permvar_df_256_mask(
2180 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> zeroinitializer
2181 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2182 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2183 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
2184 ; CHECK-NEXT: ret <4 x double> [[TMP3]]
2186 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> zeroinitializer)
2187 %2 = bitcast i8 %mask to <8 x i1>
2188 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2189 %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
2193 define <4 x double> @shuffle_test_permvar_df_256(<4 x double> %a0) {
2195 ; CHECK-LABEL: @shuffle_test_permvar_df_256(
2196 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
2197 ; CHECK-NEXT: ret <4 x double> [[TMP1]]
2199 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
2203 define <4 x double> @shuffle_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
2205 ; CHECK-LABEL: @shuffle_test_permvar_df_256_mask(
2206 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
2207 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2208 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2209 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
2210 ; CHECK-NEXT: ret <4 x double> [[TMP3]]
2212 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 3, i64 2, i64 1, i64 0>)
2213 %2 = bitcast i8 %mask to <8 x i1>
2214 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2215 %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
2219 define <4 x double> @undef_test_permvar_df_256(<4 x double> %a0) {
2221 ; CHECK-LABEL: @undef_test_permvar_df_256(
2222 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
2223 ; CHECK-NEXT: ret <4 x double> [[TMP1]]
2225 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
2229 define <4 x double> @undef_test_permvar_df_256_mask(<4 x double> %a0, <4 x double> %passthru, i8 %mask) {
2231 ; CHECK-LABEL: @undef_test_permvar_df_256_mask(
2232 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <4 x double> [[A0:%.*]], <4 x double> poison, <4 x i32> <i32 undef, i32 2, i32 1, i32 0>
2233 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2234 ; CHECK-NEXT: [[EXTRACT:%.*]] = shufflevector <8 x i1> [[TMP2]], <8 x i1> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2235 ; CHECK-NEXT: [[TMP3:%.*]] = select <4 x i1> [[EXTRACT]], <4 x double> [[TMP1]], <4 x double> [[PASSTHRU:%.*]]
2236 ; CHECK-NEXT: ret <4 x double> [[TMP3]]
2238 %1 = call <4 x double> @llvm.x86.avx512.permvar.df.256(<4 x double> %a0, <4 x i64> <i64 undef, i64 2, i64 1, i64 0>)
2239 %2 = bitcast i8 %mask to <8 x i1>
2240 %extract = shufflevector <8 x i1> %2, <8 x i1> %2, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
2241 %3 = select <4 x i1> %extract, <4 x double> %1, <4 x double> %passthru
2245 declare <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32>, <16 x i32>)
2247 define <16 x i32> @identity_test_permvar_si_512(<16 x i32> %a0) {
2249 ; CHECK-LABEL: @identity_test_permvar_si_512(
2250 ; CHECK-NEXT: ret <16 x i32> [[A0:%.*]]
2252 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
2256 define <16 x i32> @identity_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
2258 ; CHECK-LABEL: @identity_test_permvar_si_512_mask(
2259 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2260 ; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i32> [[A0:%.*]], <16 x i32> [[PASSTHRU:%.*]]
2261 ; CHECK-NEXT: ret <16 x i32> [[TMP2]]
2263 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
2264 %2 = bitcast i16 %mask to <16 x i1>
2265 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
2269 define <16 x i32> @zero_test_permvar_si_512(<16 x i32> %a0) {
2271 ; CHECK-LABEL: @zero_test_permvar_si_512(
2272 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> zeroinitializer
2273 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
2275 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer)
2279 define <16 x i32> @zero_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
2281 ; CHECK-LABEL: @zero_test_permvar_si_512_mask(
2282 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> zeroinitializer
2283 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2284 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
2285 ; CHECK-NEXT: ret <16 x i32> [[TMP3]]
2287 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> zeroinitializer)
2288 %2 = bitcast i16 %mask to <16 x i1>
2289 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
2293 define <16 x i32> @shuffle_test_permvar_si_512(<16 x i32> %a0) {
2295 ; CHECK-LABEL: @shuffle_test_permvar_si_512(
2296 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2297 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
2299 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2303 define <16 x i32> @shuffle_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
2305 ; CHECK-LABEL: @shuffle_test_permvar_si_512_mask(
2306 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2307 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2308 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
2309 ; CHECK-NEXT: ret <16 x i32> [[TMP3]]
2311 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2312 %2 = bitcast i16 %mask to <16 x i1>
2313 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
2317 define <16 x i32> @undef_test_permvar_si_512(<16 x i32> %a0) {
2319 ; CHECK-LABEL: @undef_test_permvar_si_512(
2320 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2321 ; CHECK-NEXT: ret <16 x i32> [[TMP1]]
2323 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2327 define <16 x i32> @undef_test_permvar_si_512_mask(<16 x i32> %a0, <16 x i32> %passthru, i16 %mask) {
2329 ; CHECK-LABEL: @undef_test_permvar_si_512_mask(
2330 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i32> [[A0:%.*]], <16 x i32> poison, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2331 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2332 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i32> [[TMP1]], <16 x i32> [[PASSTHRU:%.*]]
2333 ; CHECK-NEXT: ret <16 x i32> [[TMP3]]
2335 %1 = call <16 x i32> @llvm.x86.avx512.permvar.si.512(<16 x i32> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2336 %2 = bitcast i16 %mask to <16 x i1>
2337 %3 = select <16 x i1> %2, <16 x i32> %1, <16 x i32> %passthru
2341 declare <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float>, <16 x i32>)
2343 define <16 x float> @identity_test_permvar_sf_512(<16 x float> %a0) {
2345 ; CHECK-LABEL: @identity_test_permvar_sf_512(
2346 ; CHECK-NEXT: ret <16 x float> [[A0:%.*]]
2348 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
2352 define <16 x float> @identity_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
2354 ; CHECK-LABEL: @identity_test_permvar_sf_512_mask(
2355 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2356 ; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x float> [[A0:%.*]], <16 x float> [[PASSTHRU:%.*]]
2357 ; CHECK-NEXT: ret <16 x float> [[TMP2]]
2359 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>)
2360 %2 = bitcast i16 %mask to <16 x i1>
2361 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
2365 define <16 x float> @zero_test_permvar_sf_512(<16 x float> %a0) {
2367 ; CHECK-LABEL: @zero_test_permvar_sf_512(
2368 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> zeroinitializer
2369 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
2371 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer)
2375 define <16 x float> @zero_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
2377 ; CHECK-LABEL: @zero_test_permvar_sf_512_mask(
2378 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> zeroinitializer
2379 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2380 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
2381 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
2383 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> zeroinitializer)
2384 %2 = bitcast i16 %mask to <16 x i1>
2385 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
2389 define <16 x float> @shuffle_test_permvar_sf_512(<16 x float> %a0) {
2391 ; CHECK-LABEL: @shuffle_test_permvar_sf_512(
2392 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2393 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
2395 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2399 define <16 x float> @shuffle_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
2401 ; CHECK-LABEL: @shuffle_test_permvar_sf_512_mask(
2402 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2403 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2404 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
2405 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
2407 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2408 %2 = bitcast i16 %mask to <16 x i1>
2409 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
2413 define <16 x float> @undef_test_permvar_sf_512(<16 x float> %a0) {
2415 ; CHECK-LABEL: @undef_test_permvar_sf_512(
2416 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2417 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
2419 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2423 define <16 x float> @undef_test_permvar_sf_512_mask(<16 x float> %a0, <16 x float> %passthru, i16 %mask) {
2425 ; CHECK-LABEL: @undef_test_permvar_sf_512_mask(
2426 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x float> [[A0:%.*]], <16 x float> poison, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2427 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2428 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[PASSTHRU:%.*]]
2429 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
2431 %1 = call <16 x float> @llvm.x86.avx512.permvar.sf.512(<16 x float> %a0, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>)
2432 %2 = bitcast i16 %mask to <16 x i1>
2433 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %passthru
2437 declare <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64>, <8 x i64>)
2439 define <8 x i64> @identity_test_permvar_di_512(<8 x i64> %a0) {
2441 ; CHECK-LABEL: @identity_test_permvar_di_512(
2442 ; CHECK-NEXT: ret <8 x i64> [[A0:%.*]]
2444 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
2448 define <8 x i64> @identity_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
2450 ; CHECK-LABEL: @identity_test_permvar_di_512_mask(
2451 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2452 ; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i64> [[A0:%.*]], <8 x i64> [[PASSTHRU:%.*]]
2453 ; CHECK-NEXT: ret <8 x i64> [[TMP2]]
2455 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
2456 %2 = bitcast i8 %mask to <8 x i1>
2457 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
2461 define <8 x i64> @zero_test_permvar_di_512(<8 x i64> %a0) {
2463 ; CHECK-LABEL: @zero_test_permvar_di_512(
2464 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> zeroinitializer
2465 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
2467 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer)
2471 define <8 x i64> @zero_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
2473 ; CHECK-LABEL: @zero_test_permvar_di_512_mask(
2474 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> zeroinitializer
2475 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2476 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
2477 ; CHECK-NEXT: ret <8 x i64> [[TMP3]]
2479 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> zeroinitializer)
2480 %2 = bitcast i8 %mask to <8 x i1>
2481 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
2485 define <8 x i64> @shuffle_test_permvar_di_512(<8 x i64> %a0) {
2487 ; CHECK-LABEL: @shuffle_test_permvar_di_512(
2488 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2489 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
2491 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2495 define <8 x i64> @shuffle_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
2497 ; CHECK-LABEL: @shuffle_test_permvar_di_512_mask(
2498 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2499 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2500 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
2501 ; CHECK-NEXT: ret <8 x i64> [[TMP3]]
2503 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2504 %2 = bitcast i8 %mask to <8 x i1>
2505 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
2509 define <8 x i64> @undef_test_permvar_di_512(<8 x i64> %a0) {
2511 ; CHECK-LABEL: @undef_test_permvar_di_512(
2512 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2513 ; CHECK-NEXT: ret <8 x i64> [[TMP1]]
2515 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2519 define <8 x i64> @undef_test_permvar_di_512_mask(<8 x i64> %a0, <8 x i64> %passthru, i8 %mask) {
2521 ; CHECK-LABEL: @undef_test_permvar_di_512_mask(
2522 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i64> [[A0:%.*]], <8 x i64> poison, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2523 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2524 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i64> [[TMP1]], <8 x i64> [[PASSTHRU:%.*]]
2525 ; CHECK-NEXT: ret <8 x i64> [[TMP3]]
2527 %1 = call <8 x i64> @llvm.x86.avx512.permvar.di.512(<8 x i64> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2528 %2 = bitcast i8 %mask to <8 x i1>
2529 %3 = select <8 x i1> %2, <8 x i64> %1, <8 x i64> %passthru
2533 declare <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double>, <8 x i64>)
2535 define <8 x double> @identity_test_permvar_df_512(<8 x double> %a0) {
2537 ; CHECK-LABEL: @identity_test_permvar_df_512(
2538 ; CHECK-NEXT: ret <8 x double> [[A0:%.*]]
2540 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
2544 define <8 x double> @identity_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
2546 ; CHECK-LABEL: @identity_test_permvar_df_512_mask(
2547 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2548 ; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x double> [[A0:%.*]], <8 x double> [[PASSTHRU:%.*]]
2549 ; CHECK-NEXT: ret <8 x double> [[TMP2]]
2551 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 0, i64 1, i64 2, i64 3, i64 4, i64 5, i64 6, i64 7>)
2552 %2 = bitcast i8 %mask to <8 x i1>
2553 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
2557 define <8 x double> @zero_test_permvar_df_512(<8 x double> %a0) {
2559 ; CHECK-LABEL: @zero_test_permvar_df_512(
2560 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> zeroinitializer
2561 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
2563 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer)
2567 define <8 x double> @zero_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
2569 ; CHECK-LABEL: @zero_test_permvar_df_512_mask(
2570 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> zeroinitializer
2571 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2572 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
2573 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
2575 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> zeroinitializer)
2576 %2 = bitcast i8 %mask to <8 x i1>
2577 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
2581 define <8 x double> @shuffle_test_permvar_df_512(<8 x double> %a0) {
2583 ; CHECK-LABEL: @shuffle_test_permvar_df_512(
2584 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2585 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
2587 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2591 define <8 x double> @shuffle_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
2593 ; CHECK-LABEL: @shuffle_test_permvar_df_512_mask(
2594 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2595 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2596 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
2597 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
2599 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 7, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2600 %2 = bitcast i8 %mask to <8 x i1>
2601 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
2605 define <8 x double> @undef_test_permvar_df_512(<8 x double> %a0) {
2607 ; CHECK-LABEL: @undef_test_permvar_df_512(
2608 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2609 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
2611 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2615 define <8 x double> @undef_test_permvar_df_512_mask(<8 x double> %a0, <8 x double> %passthru, i8 %mask) {
2617 ; CHECK-LABEL: @undef_test_permvar_df_512_mask(
2618 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x double> [[A0:%.*]], <8 x double> poison, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2619 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2620 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[PASSTHRU:%.*]]
2621 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
2623 %1 = call <8 x double> @llvm.x86.avx512.permvar.df.512(<8 x double> %a0, <8 x i64> <i64 undef, i64 6, i64 5, i64 4, i64 3, i64 2, i64 1, i64 0>)
2624 %2 = bitcast i8 %mask to <8 x i1>
2625 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %passthru
2629 declare <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16>, <8 x i16>)
2631 define <8 x i16> @identity_test_permvar_hi_128(<8 x i16> %a0) {
2633 ; CHECK-LABEL: @identity_test_permvar_hi_128(
2634 ; CHECK-NEXT: ret <8 x i16> [[A0:%.*]]
2636 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
2640 define <8 x i16> @identity_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
2642 ; CHECK-LABEL: @identity_test_permvar_hi_128_mask(
2643 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2644 ; CHECK-NEXT: [[TMP2:%.*]] = select <8 x i1> [[TMP1]], <8 x i16> [[A0:%.*]], <8 x i16> [[PASSTHRU:%.*]]
2645 ; CHECK-NEXT: ret <8 x i16> [[TMP2]]
2647 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7>)
2648 %2 = bitcast i8 %mask to <8 x i1>
2649 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
2653 define <8 x i16> @zero_test_permvar_hi_128(<8 x i16> %a0) {
2655 ; CHECK-LABEL: @zero_test_permvar_hi_128(
2656 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> zeroinitializer
2657 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
2659 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer)
2663 define <8 x i16> @zero_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
2665 ; CHECK-LABEL: @zero_test_permvar_hi_128_mask(
2666 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> zeroinitializer
2667 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2668 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
2669 ; CHECK-NEXT: ret <8 x i16> [[TMP3]]
2671 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> zeroinitializer)
2672 %2 = bitcast i8 %mask to <8 x i1>
2673 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
2677 define <8 x i16> @shuffle_test_permvar_hi_128(<8 x i16> %a0) {
2679 ; CHECK-LABEL: @shuffle_test_permvar_hi_128(
2680 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2681 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
2683 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2687 define <8 x i16> @shuffle_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
2689 ; CHECK-LABEL: @shuffle_test_permvar_hi_128_mask(
2690 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> <i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2691 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2692 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
2693 ; CHECK-NEXT: ret <8 x i16> [[TMP3]]
2695 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2696 %2 = bitcast i8 %mask to <8 x i1>
2697 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
2701 define <8 x i16> @undef_test_permvar_hi_128(<8 x i16> %a0) {
2703 ; CHECK-LABEL: @undef_test_permvar_hi_128(
2704 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2705 ; CHECK-NEXT: ret <8 x i16> [[TMP1]]
2707 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2711 define <8 x i16> @undef_test_permvar_hi_128_mask(<8 x i16> %a0, <8 x i16> %passthru, i8 %mask) {
2713 ; CHECK-LABEL: @undef_test_permvar_hi_128_mask(
2714 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <8 x i16> [[A0:%.*]], <8 x i16> poison, <8 x i32> <i32 undef, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2715 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
2716 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x i16> [[TMP1]], <8 x i16> [[PASSTHRU:%.*]]
2717 ; CHECK-NEXT: ret <8 x i16> [[TMP3]]
2719 %1 = call <8 x i16> @llvm.x86.avx512.permvar.hi.128(<8 x i16> %a0, <8 x i16> <i16 undef, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2720 %2 = bitcast i8 %mask to <8 x i1>
2721 %3 = select <8 x i1> %2, <8 x i16> %1, <8 x i16> %passthru
2725 declare <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16>, <16 x i16>)
2727 define <16 x i16> @identity_test_permvar_hi_256(<16 x i16> %a0) {
2729 ; CHECK-LABEL: @identity_test_permvar_hi_256(
2730 ; CHECK-NEXT: ret <16 x i16> [[A0:%.*]]
2732 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
2736 define <16 x i16> @identity_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
2738 ; CHECK-LABEL: @identity_test_permvar_hi_256_mask(
2739 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2740 ; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i16> [[A0:%.*]], <16 x i16> [[PASSTHRU:%.*]]
2741 ; CHECK-NEXT: ret <16 x i16> [[TMP2]]
2743 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15>)
2744 %2 = bitcast i16 %mask to <16 x i1>
2745 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
2749 define <16 x i16> @zero_test_permvar_hi_256(<16 x i16> %a0) {
2751 ; CHECK-LABEL: @zero_test_permvar_hi_256(
2752 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer
2753 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2755 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer)
2759 define <16 x i16> @zero_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
2761 ; CHECK-LABEL: @zero_test_permvar_hi_256_mask(
2762 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> zeroinitializer
2763 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2764 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
2765 ; CHECK-NEXT: ret <16 x i16> [[TMP3]]
2767 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> zeroinitializer)
2768 %2 = bitcast i16 %mask to <16 x i1>
2769 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
2773 define <16 x i16> @shuffle_test_permvar_hi_256(<16 x i16> %a0) {
2775 ; CHECK-LABEL: @shuffle_test_permvar_hi_256(
2776 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2777 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2779 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2783 define <16 x i16> @shuffle_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
2785 ; CHECK-LABEL: @shuffle_test_permvar_hi_256_mask(
2786 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2787 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2788 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
2789 ; CHECK-NEXT: ret <16 x i16> [[TMP3]]
2791 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2792 %2 = bitcast i16 %mask to <16 x i1>
2793 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
2797 define <16 x i16> @undef_test_permvar_hi_256(<16 x i16> %a0) {
2799 ; CHECK-LABEL: @undef_test_permvar_hi_256(
2800 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2801 ; CHECK-NEXT: ret <16 x i16> [[TMP1]]
2803 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2807 define <16 x i16> @undef_test_permvar_hi_256_mask(<16 x i16> %a0, <16 x i16> %passthru, i16 %mask) {
2809 ; CHECK-LABEL: @undef_test_permvar_hi_256_mask(
2810 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i16> [[A0:%.*]], <16 x i16> poison, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2811 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2812 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i16> [[TMP1]], <16 x i16> [[PASSTHRU:%.*]]
2813 ; CHECK-NEXT: ret <16 x i16> [[TMP3]]
2815 %1 = call <16 x i16> @llvm.x86.avx512.permvar.hi.256(<16 x i16> %a0, <16 x i16> <i16 undef, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2816 %2 = bitcast i16 %mask to <16 x i1>
2817 %3 = select <16 x i1> %2, <16 x i16> %1, <16 x i16> %passthru
2821 declare <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16>, <32 x i16>)
2823 define <32 x i16> @identity_test_permvar_hi_512(<32 x i16> %a0) {
2825 ; CHECK-LABEL: @identity_test_permvar_hi_512(
2826 ; CHECK-NEXT: ret <32 x i16> [[A0:%.*]]
2828 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>)
2832 define <32 x i16> @identity_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
2834 ; CHECK-LABEL: @identity_test_permvar_hi_512_mask(
2835 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
2836 ; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i16> [[A0:%.*]], <32 x i16> [[PASSTHRU:%.*]]
2837 ; CHECK-NEXT: ret <32 x i16> [[TMP2]]
2839 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 0, i16 1, i16 2, i16 3, i16 4, i16 5, i16 6, i16 7, i16 8, i16 9, i16 10, i16 11, i16 12, i16 13, i16 14, i16 15, i16 16, i16 17, i16 18, i16 19, i16 20, i16 21, i16 22, i16 23, i16 24, i16 25, i16 26, i16 27, i16 28, i16 29, i16 30, i16 31>)
2840 %2 = bitcast i32 %mask to <32 x i1>
2841 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
2845 define <32 x i16> @zero_test_permvar_hi_512(<32 x i16> %a0) {
2847 ; CHECK-LABEL: @zero_test_permvar_hi_512(
2848 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> zeroinitializer
2849 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
2851 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
2855 define <32 x i16> @zero_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
2857 ; CHECK-LABEL: @zero_test_permvar_hi_512_mask(
2858 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> zeroinitializer
2859 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
2860 ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
2861 ; CHECK-NEXT: ret <32 x i16> [[TMP3]]
2863 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> zeroinitializer)
2864 %2 = bitcast i32 %mask to <32 x i1>
2865 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
2869 define <32 x i16> @shuffle_test_permvar_hi_512(<32 x i16> %a0) {
2871 ; CHECK-LABEL: @shuffle_test_permvar_hi_512(
2872 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2873 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
2875 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2879 define <32 x i16> @shuffle_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
2881 ; CHECK-LABEL: @shuffle_test_permvar_hi_512_mask(
2882 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2883 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
2884 ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
2885 ; CHECK-NEXT: ret <32 x i16> [[TMP3]]
2887 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 31, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2888 %2 = bitcast i32 %mask to <32 x i1>
2889 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
2893 define <32 x i16> @undef_test_permvar_hi_512(<32 x i16> %a0) {
2895 ; CHECK-LABEL: @undef_test_permvar_hi_512(
2896 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2897 ; CHECK-NEXT: ret <32 x i16> [[TMP1]]
2899 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2903 define <32 x i16> @undef_test_permvar_hi_512_mask(<32 x i16> %a0, <32 x i16> %passthru, i32 %mask) {
2905 ; CHECK-LABEL: @undef_test_permvar_hi_512_mask(
2906 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i16> [[A0:%.*]], <32 x i16> poison, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2907 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
2908 ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i16> [[TMP1]], <32 x i16> [[PASSTHRU:%.*]]
2909 ; CHECK-NEXT: ret <32 x i16> [[TMP3]]
2911 %1 = call <32 x i16> @llvm.x86.avx512.permvar.hi.512(<32 x i16> %a0, <32 x i16> <i16 undef, i16 30, i16 29, i16 28, i16 27, i16 26, i16 25, i16 24, i16 23, i16 22, i16 21, i16 20, i16 19, i16 18, i16 17, i16 16, i16 15, i16 14, i16 13, i16 12, i16 11, i16 10, i16 9, i16 8, i16 7, i16 6, i16 5, i16 4, i16 3, i16 2, i16 1, i16 0>)
2912 %2 = bitcast i32 %mask to <32 x i1>
2913 %3 = select <32 x i1> %2, <32 x i16> %1, <32 x i16> %passthru
2917 declare <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8>, <16 x i8>)
2919 define <16 x i8> @identity_test_permvar_qi_128(<16 x i8> %a0) {
2921 ; CHECK-LABEL: @identity_test_permvar_qi_128(
2922 ; CHECK-NEXT: ret <16 x i8> [[A0:%.*]]
2924 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
2928 define <16 x i8> @identity_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
2930 ; CHECK-LABEL: @identity_test_permvar_qi_128_mask(
2931 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2932 ; CHECK-NEXT: [[TMP2:%.*]] = select <16 x i1> [[TMP1]], <16 x i8> [[A0:%.*]], <16 x i8> [[PASSTHRU:%.*]]
2933 ; CHECK-NEXT: ret <16 x i8> [[TMP2]]
2935 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15>)
2936 %2 = bitcast i16 %mask to <16 x i1>
2937 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
2941 define <16 x i8> @zero_test_permvar_qi_128(<16 x i8> %a0) {
2943 ; CHECK-LABEL: @zero_test_permvar_qi_128(
2944 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer
2945 ; CHECK-NEXT: ret <16 x i8> [[TMP1]]
2947 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer)
2951 define <16 x i8> @zero_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
2953 ; CHECK-LABEL: @zero_test_permvar_qi_128_mask(
2954 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> zeroinitializer
2955 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2956 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
2957 ; CHECK-NEXT: ret <16 x i8> [[TMP3]]
2959 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> zeroinitializer)
2960 %2 = bitcast i16 %mask to <16 x i1>
2961 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
2965 define <16 x i8> @shuffle_test_permvar_qi_128(<16 x i8> %a0) {
2967 ; CHECK-LABEL: @shuffle_test_permvar_qi_128(
2968 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2969 ; CHECK-NEXT: ret <16 x i8> [[TMP1]]
2971 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2975 define <16 x i8> @shuffle_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
2977 ; CHECK-LABEL: @shuffle_test_permvar_qi_128_mask(
2978 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> <i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2979 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
2980 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
2981 ; CHECK-NEXT: ret <16 x i8> [[TMP3]]
2983 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2984 %2 = bitcast i16 %mask to <16 x i1>
2985 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
2989 define <16 x i8> @undef_test_permvar_qi_128(<16 x i8> %a0) {
2991 ; CHECK-LABEL: @undef_test_permvar_qi_128(
2992 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
2993 ; CHECK-NEXT: ret <16 x i8> [[TMP1]]
2995 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
2999 define <16 x i8> @undef_test_permvar_qi_128_mask(<16 x i8> %a0, <16 x i8> %passthru, i16 %mask) {
3001 ; CHECK-LABEL: @undef_test_permvar_qi_128_mask(
3002 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <16 x i8> [[A0:%.*]], <16 x i8> poison, <16 x i32> <i32 undef, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
3003 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3004 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x i8> [[TMP1]], <16 x i8> [[PASSTHRU:%.*]]
3005 ; CHECK-NEXT: ret <16 x i8> [[TMP3]]
3007 %1 = call <16 x i8> @llvm.x86.avx512.permvar.qi.128(<16 x i8> %a0, <16 x i8> <i8 undef, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
3008 %2 = bitcast i16 %mask to <16 x i1>
3009 %3 = select <16 x i1> %2, <16 x i8> %1, <16 x i8> %passthru
3013 declare <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8>, <32 x i8>)
3015 define <32 x i8> @identity_test_permvar_qi_256(<32 x i8> %a0) {
3017 ; CHECK-LABEL: @identity_test_permvar_qi_256(
3018 ; CHECK-NEXT: ret <32 x i8> [[A0:%.*]]
3020 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
3024 define <32 x i8> @identity_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
3026 ; CHECK-LABEL: @identity_test_permvar_qi_256_mask(
3027 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
3028 ; CHECK-NEXT: [[TMP2:%.*]] = select <32 x i1> [[TMP1]], <32 x i8> [[A0:%.*]], <32 x i8> [[PASSTHRU:%.*]]
3029 ; CHECK-NEXT: ret <32 x i8> [[TMP2]]
3031 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31>)
3032 %2 = bitcast i32 %mask to <32 x i1>
3033 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
3037 define <32 x i8> @zero_test_permvar_qi_256(<32 x i8> %a0) {
3039 ; CHECK-LABEL: @zero_test_permvar_qi_256(
3040 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
3041 ; CHECK-NEXT: ret <32 x i8> [[TMP1]]
3043 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer)
3047 define <32 x i8> @zero_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
3049 ; CHECK-LABEL: @zero_test_permvar_qi_256_mask(
3050 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> zeroinitializer
3051 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
3052 ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
3053 ; CHECK-NEXT: ret <32 x i8> [[TMP3]]
3055 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> zeroinitializer)
3056 %2 = bitcast i32 %mask to <32 x i1>
3057 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
3061 define <32 x i8> @shuffle_test_permvar_qi_256(<32 x i8> %a0) {
3063 ; CHECK-LABEL: @shuffle_test_permvar_qi_256(
3064 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
3065 ; CHECK-NEXT: ret <32 x i8> [[TMP1]]
3067 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
3071 define <32 x i8> @shuffle_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
3073 ; CHECK-LABEL: @shuffle_test_permvar_qi_256_mask(
3074 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> <i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
3075 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
3076 ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
3077 ; CHECK-NEXT: ret <32 x i8> [[TMP3]]
3079 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
3080 %2 = bitcast i32 %mask to <32 x i1>
3081 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
3085 define <32 x i8> @undef_test_permvar_qi_256(<32 x i8> %a0) {
3087 ; CHECK-LABEL: @undef_test_permvar_qi_256(
3088 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
3089 ; CHECK-NEXT: ret <32 x i8> [[TMP1]]
3091 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
3095 define <32 x i8> @undef_test_permvar_qi_256_mask(<32 x i8> %a0, <32 x i8> %passthru, i32 %mask) {
3097 ; CHECK-LABEL: @undef_test_permvar_qi_256_mask(
3098 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <32 x i8> [[A0:%.*]], <32 x i8> poison, <32 x i32> <i32 undef, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
3099 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i32 [[MASK:%.*]] to <32 x i1>
3100 ; CHECK-NEXT: [[TMP3:%.*]] = select <32 x i1> [[TMP2]], <32 x i8> [[TMP1]], <32 x i8> [[PASSTHRU:%.*]]
3101 ; CHECK-NEXT: ret <32 x i8> [[TMP3]]
3103 %1 = call <32 x i8> @llvm.x86.avx512.permvar.qi.256(<32 x i8> %a0, <32 x i8> <i8 undef, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
3104 %2 = bitcast i32 %mask to <32 x i1>
3105 %3 = select <32 x i1> %2, <32 x i8> %1, <32 x i8> %passthru
3109 declare <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8>, <64 x i8>)
3111 define <64 x i8> @identity_test_permvar_qi_512(<64 x i8> %a0) {
3113 ; CHECK-LABEL: @identity_test_permvar_qi_512(
3114 ; CHECK-NEXT: ret <64 x i8> [[A0:%.*]]
3116 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>)
3120 define <64 x i8> @identity_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
3122 ; CHECK-LABEL: @identity_test_permvar_qi_512_mask(
3123 ; CHECK-NEXT: [[TMP1:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
3124 ; CHECK-NEXT: [[TMP2:%.*]] = select <64 x i1> [[TMP1]], <64 x i8> [[A0:%.*]], <64 x i8> [[PASSTHRU:%.*]]
3125 ; CHECK-NEXT: ret <64 x i8> [[TMP2]]
3127 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 0, i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8, i8 9, i8 10, i8 11, i8 12, i8 13, i8 14, i8 15, i8 16, i8 17, i8 18, i8 19, i8 20, i8 21, i8 22, i8 23, i8 24, i8 25, i8 26, i8 27, i8 28, i8 29, i8 30, i8 31, i8 32, i8 33, i8 34, i8 35, i8 36, i8 37, i8 38, i8 39, i8 40, i8 41, i8 42, i8 43, i8 44, i8 45, i8 46, i8 47, i8 48, i8 49, i8 50, i8 51, i8 52, i8 53, i8 54, i8 55, i8 56, i8 57, i8 58, i8 59, i8 60, i8 61, i8 62, i8 63>)
3128 %2 = bitcast i64 %mask to <64 x i1>
3129 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
3133 define <64 x i8> @zero_test_permvar_qi_512(<64 x i8> %a0) {
3135 ; CHECK-LABEL: @zero_test_permvar_qi_512(
3136 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> zeroinitializer
3137 ; CHECK-NEXT: ret <64 x i8> [[TMP1]]
3139 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer)
3143 define <64 x i8> @zero_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
3145 ; CHECK-LABEL: @zero_test_permvar_qi_512_mask(
3146 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> zeroinitializer
3147 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
3148 ; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
3149 ; CHECK-NEXT: ret <64 x i8> [[TMP3]]
3151 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> zeroinitializer)
3152 %2 = bitcast i64 %mask to <64 x i1>
3153 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
3157 define <64 x i8> @shuffle_test_permvar_qi_512(<64 x i8> %a0) {
3159 ; CHECK-LABEL: @shuffle_test_permvar_qi_512(
3160 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
3161 ; CHECK-NEXT: ret <64 x i8> [[TMP1]]
3163 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
3167 define <64 x i8> @shuffle_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
3169 ; CHECK-LABEL: @shuffle_test_permvar_qi_512_mask(
3170 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> <i32 63, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
3171 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
3172 ; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
3173 ; CHECK-NEXT: ret <64 x i8> [[TMP3]]
3175 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 63, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
3176 %2 = bitcast i64 %mask to <64 x i1>
3177 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
3181 define <64 x i8> @undef_test_permvar_qi_512(<64 x i8> %a0) {
3183 ; CHECK-LABEL: @undef_test_permvar_qi_512(
3184 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
3185 ; CHECK-NEXT: ret <64 x i8> [[TMP1]]
3187 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
3191 define <64 x i8> @undef_test_permvar_qi_512_mask(<64 x i8> %a0, <64 x i8> %passthru, i64 %mask) {
3193 ; CHECK-LABEL: @undef_test_permvar_qi_512_mask(
3194 ; CHECK-NEXT: [[TMP1:%.*]] = shufflevector <64 x i8> [[A0:%.*]], <64 x i8> poison, <64 x i32> <i32 undef, i32 62, i32 61, i32 60, i32 59, i32 58, i32 57, i32 56, i32 55, i32 54, i32 53, i32 52, i32 51, i32 50, i32 49, i32 48, i32 47, i32 46, i32 45, i32 44, i32 43, i32 42, i32 41, i32 40, i32 39, i32 38, i32 37, i32 36, i32 35, i32 34, i32 33, i32 32, i32 31, i32 30, i32 29, i32 28, i32 27, i32 26, i32 25, i32 24, i32 23, i32 22, i32 21, i32 20, i32 19, i32 18, i32 17, i32 16, i32 15, i32 14, i32 13, i32 12, i32 11, i32 10, i32 9, i32 8, i32 7, i32 6, i32 5, i32 4, i32 3, i32 2, i32 1, i32 0>
3195 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i64 [[MASK:%.*]] to <64 x i1>
3196 ; CHECK-NEXT: [[TMP3:%.*]] = select <64 x i1> [[TMP2]], <64 x i8> [[TMP1]], <64 x i8> [[PASSTHRU:%.*]]
3197 ; CHECK-NEXT: ret <64 x i8> [[TMP3]]
3199 %1 = call <64 x i8> @llvm.x86.avx512.permvar.qi.512(<64 x i8> %a0, <64 x i8> <i8 undef, i8 62, i8 61, i8 60, i8 59, i8 58, i8 57, i8 56, i8 55, i8 54, i8 53, i8 52, i8 51, i8 50, i8 49, i8 48, i8 47, i8 46, i8 45, i8 44, i8 43, i8 42, i8 41, i8 40, i8 39, i8 38, i8 37, i8 36, i8 35, i8 34, i8 33, i8 32, i8 31, i8 30, i8 29, i8 28, i8 27, i8 26, i8 25, i8 24, i8 23, i8 22, i8 21, i8 20, i8 19, i8 18, i8 17, i8 16, i8 15, i8 14, i8 13, i8 12, i8 11, i8 10, i8 9, i8 8, i8 7, i8 6, i8 5, i8 4, i8 3, i8 2, i8 1, i8 0>)
3200 %2 = bitcast i64 %mask to <64 x i1>
3201 %3 = select <64 x i1> %2, <64 x i8> %1, <64 x i8> %passthru
3205 declare <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float>, <16 x float>, i32)
3207 define <16 x float> @test_add_ps(<16 x float> %a, <16 x float> %b) {
3209 ; CHECK-LABEL: @test_add_ps(
3210 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
3211 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3213 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3217 define <16 x float> @test_add_ps_round(<16 x float> %a, <16 x float> %b) {
3219 ; CHECK-LABEL: @test_add_ps_round(
3220 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3221 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3223 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3227 define <16 x float> @test_add_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3229 ; CHECK-LABEL: @test_add_ps_mask(
3230 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <16 x float> [[A:%.*]], [[B:%.*]]
3231 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3232 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3233 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3235 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3236 %2 = bitcast i16 %mask to <16 x i1>
3237 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3241 define <16 x float> @test_add_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3243 ; CHECK-LABEL: @test_add_ps_mask_round(
3244 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3245 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3246 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3247 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3249 %1 = call <16 x float> @llvm.x86.avx512.add.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3250 %2 = bitcast i16 %mask to <16 x i1>
3251 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3255 declare <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double>, <8 x double>, i32)
3257 define <8 x double> @test_add_pd(<8 x double> %a, <8 x double> %b) {
3259 ; CHECK-LABEL: @test_add_pd(
3260 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
3261 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3263 %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3267 define <8 x double> @test_add_pd_round(<8 x double> %a, <8 x double> %b) {
3269 ; CHECK-LABEL: @test_add_pd_round(
3270 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3271 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3273 %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3277 define <8 x double> @test_add_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3279 ; CHECK-LABEL: @test_add_pd_mask(
3280 ; CHECK-NEXT: [[TMP1:%.*]] = fadd <8 x double> [[A:%.*]], [[B:%.*]]
3281 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3282 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3283 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3285 %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3286 %2 = bitcast i8 %mask to <8 x i1>
3287 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3291 define <8 x double> @test_add_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3293 ; CHECK-LABEL: @test_add_pd_mask_round(
3294 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3295 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3296 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3297 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3299 %1 = call <8 x double> @llvm.x86.avx512.add.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3300 %2 = bitcast i8 %mask to <8 x i1>
3301 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3305 declare <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float>, <16 x float>, i32)
3307 define <16 x float> @test_sub_ps(<16 x float> %a, <16 x float> %b) {
3309 ; CHECK-LABEL: @test_sub_ps(
3310 ; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
3311 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3313 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3317 define <16 x float> @test_sub_ps_round(<16 x float> %a, <16 x float> %b) {
3319 ; CHECK-LABEL: @test_sub_ps_round(
3320 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3321 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3323 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3327 define <16 x float> @test_sub_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3329 ; CHECK-LABEL: @test_sub_ps_mask(
3330 ; CHECK-NEXT: [[TMP1:%.*]] = fsub <16 x float> [[A:%.*]], [[B:%.*]]
3331 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3332 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3333 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3335 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3336 %2 = bitcast i16 %mask to <16 x i1>
3337 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3341 define <16 x float> @test_sub_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3343 ; CHECK-LABEL: @test_sub_ps_mask_round(
3344 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3345 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3346 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3347 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3349 %1 = call <16 x float> @llvm.x86.avx512.sub.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3350 %2 = bitcast i16 %mask to <16 x i1>
3351 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3355 declare <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double>, <8 x double>, i32)
3357 define <8 x double> @test_sub_pd(<8 x double> %a, <8 x double> %b) {
3359 ; CHECK-LABEL: @test_sub_pd(
3360 ; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
3361 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3363 %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3367 define <8 x double> @test_sub_pd_round(<8 x double> %a, <8 x double> %b) {
3369 ; CHECK-LABEL: @test_sub_pd_round(
3370 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3371 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3373 %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3377 define <8 x double> @test_sub_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3379 ; CHECK-LABEL: @test_sub_pd_mask(
3380 ; CHECK-NEXT: [[TMP1:%.*]] = fsub <8 x double> [[A:%.*]], [[B:%.*]]
3381 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3382 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3383 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3385 %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3386 %2 = bitcast i8 %mask to <8 x i1>
3387 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3391 define <8 x double> @test_sub_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3393 ; CHECK-LABEL: @test_sub_pd_mask_round(
3394 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3395 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3396 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3397 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3399 %1 = call <8 x double> @llvm.x86.avx512.sub.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3400 %2 = bitcast i8 %mask to <8 x i1>
3401 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3405 declare <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float>, <16 x float>, i32)
3407 define <16 x float> @test_mul_ps(<16 x float> %a, <16 x float> %b) {
3409 ; CHECK-LABEL: @test_mul_ps(
3410 ; CHECK-NEXT: [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
3411 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3413 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3417 define <16 x float> @test_mul_ps_round(<16 x float> %a, <16 x float> %b) {
3419 ; CHECK-LABEL: @test_mul_ps_round(
3420 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3421 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3423 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3427 define <16 x float> @test_mul_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3429 ; CHECK-LABEL: @test_mul_ps_mask(
3430 ; CHECK-NEXT: [[TMP1:%.*]] = fmul <16 x float> [[A:%.*]], [[B:%.*]]
3431 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3432 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3433 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3435 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3436 %2 = bitcast i16 %mask to <16 x i1>
3437 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3441 define <16 x float> @test_mul_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3443 ; CHECK-LABEL: @test_mul_ps_mask_round(
3444 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3445 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3446 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3447 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3449 %1 = call <16 x float> @llvm.x86.avx512.mul.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3450 %2 = bitcast i16 %mask to <16 x i1>
3451 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3455 declare <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double>, <8 x double>, i32)
3457 define <8 x double> @test_mul_pd(<8 x double> %a, <8 x double> %b) {
3459 ; CHECK-LABEL: @test_mul_pd(
3460 ; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
3461 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3463 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3467 define <8 x double> @test_mul_pd_round(<8 x double> %a, <8 x double> %b) {
3469 ; CHECK-LABEL: @test_mul_pd_round(
3470 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3471 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3473 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3477 define <8 x double> @test_mul_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3479 ; CHECK-LABEL: @test_mul_pd_mask(
3480 ; CHECK-NEXT: [[TMP1:%.*]] = fmul <8 x double> [[A:%.*]], [[B:%.*]]
3481 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3482 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3483 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3485 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3486 %2 = bitcast i8 %mask to <8 x i1>
3487 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3491 define <8 x double> @test_mul_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3493 ; CHECK-LABEL: @test_mul_pd_mask_round(
3494 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3495 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3496 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3497 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3499 %1 = call <8 x double> @llvm.x86.avx512.mul.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3500 %2 = bitcast i8 %mask to <8 x i1>
3501 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3505 declare <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float>, <16 x float>, i32)
3507 define <16 x float> @test_div_ps(<16 x float> %a, <16 x float> %b) {
3509 ; CHECK-LABEL: @test_div_ps(
3510 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
3511 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3513 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3517 define <16 x float> @test_div_ps_round(<16 x float> %a, <16 x float> %b) {
3519 ; CHECK-LABEL: @test_div_ps_round(
3520 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3521 ; CHECK-NEXT: ret <16 x float> [[TMP1]]
3523 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3527 define <16 x float> @test_div_ps_mask(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3529 ; CHECK-LABEL: @test_div_ps_mask(
3530 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv <16 x float> [[A:%.*]], [[B:%.*]]
3531 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3532 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3533 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3535 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 4)
3536 %2 = bitcast i16 %mask to <16 x i1>
3537 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3541 define <16 x float> @test_div_ps_mask_round(<16 x float> %a, <16 x float> %b, <16 x float> %c, i16 %mask) {
3543 ; CHECK-LABEL: @test_div_ps_mask_round(
3544 ; CHECK-NEXT: [[TMP1:%.*]] = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> [[A:%.*]], <16 x float> [[B:%.*]], i32 8)
3545 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i16 [[MASK:%.*]] to <16 x i1>
3546 ; CHECK-NEXT: [[TMP3:%.*]] = select <16 x i1> [[TMP2]], <16 x float> [[TMP1]], <16 x float> [[C:%.*]]
3547 ; CHECK-NEXT: ret <16 x float> [[TMP3]]
3549 %1 = call <16 x float> @llvm.x86.avx512.div.ps.512(<16 x float> %a, <16 x float> %b, i32 8)
3550 %2 = bitcast i16 %mask to <16 x i1>
3551 %3 = select <16 x i1> %2, <16 x float> %1, <16 x float> %c
3555 declare <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double>, <8 x double>, i32)
3557 define <8 x double> @test_div_pd(<8 x double> %a, <8 x double> %b) {
3559 ; CHECK-LABEL: @test_div_pd(
3560 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
3561 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3563 %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3567 define <8 x double> @test_div_pd_round(<8 x double> %a, <8 x double> %b) {
3569 ; CHECK-LABEL: @test_div_pd_round(
3570 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3571 ; CHECK-NEXT: ret <8 x double> [[TMP1]]
3573 %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3577 define <8 x double> @test_div_pd_mask(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3579 ; CHECK-LABEL: @test_div_pd_mask(
3580 ; CHECK-NEXT: [[TMP1:%.*]] = fdiv <8 x double> [[A:%.*]], [[B:%.*]]
3581 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3582 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3583 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3585 %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 4)
3586 %2 = bitcast i8 %mask to <8 x i1>
3587 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3591 define <8 x double> @test_div_pd_mask_round(<8 x double> %a, <8 x double> %b, <8 x double> %c, i8 %mask) {
3593 ; CHECK-LABEL: @test_div_pd_mask_round(
3594 ; CHECK-NEXT: [[TMP1:%.*]] = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> [[A:%.*]], <8 x double> [[B:%.*]], i32 8)
3595 ; CHECK-NEXT: [[TMP2:%.*]] = bitcast i8 [[MASK:%.*]] to <8 x i1>
3596 ; CHECK-NEXT: [[TMP3:%.*]] = select <8 x i1> [[TMP2]], <8 x double> [[TMP1]], <8 x double> [[C:%.*]]
3597 ; CHECK-NEXT: ret <8 x double> [[TMP3]]
3599 %1 = call <8 x double> @llvm.x86.avx512.div.pd.512(<8 x double> %a, <8 x double> %b, i32 8)
3600 %2 = bitcast i8 %mask to <8 x i1>
3601 %3 = select <8 x i1> %2, <8 x double> %1, <8 x double> %c
3605 declare i32 @llvm.x86.avx512.vcomi.ss(<4 x float>, <4 x float>, i32, i32)
3607 define i32 @test_comi_ss_0(float %a, float %b) {
3609 ; CHECK-LABEL: @test_comi_ss_0(
3610 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <4 x float> poison, float [[A:%.*]], i64 0
3611 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <4 x float> poison, float [[B:%.*]], i64 0
3612 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> [[TMP1]], <4 x float> [[TMP2]], i32 0, i32 4)
3613 ; CHECK-NEXT: ret i32 [[TMP3]]
3615 %1 = insertelement <4 x float> undef, float %a, i32 0
3616 %2 = insertelement <4 x float> %1, float 1.000000e+00, i32 1
3617 %3 = insertelement <4 x float> %2, float 2.000000e+00, i32 2
3618 %4 = insertelement <4 x float> %3, float 3.000000e+00, i32 3
3619 %5 = insertelement <4 x float> undef, float %b, i32 0
3620 %6 = insertelement <4 x float> %5, float 4.000000e+00, i32 1
3621 %7 = insertelement <4 x float> %6, float 5.000000e+00, i32 2
3622 %8 = insertelement <4 x float> %7, float 6.000000e+00, i32 3
3623 %9 = tail call i32 @llvm.x86.avx512.vcomi.ss(<4 x float> %4, <4 x float> %8, i32 0, i32 4)
3627 declare i32 @llvm.x86.avx512.vcomi.sd(<2 x double>, <2 x double>, i32, i32)
3629 define i32 @test_comi_sd_0(double %a, double %b) {
3631 ; CHECK-LABEL: @test_comi_sd_0(
3632 ; CHECK-NEXT: [[TMP1:%.*]] = insertelement <2 x double> poison, double [[A:%.*]], i64 0
3633 ; CHECK-NEXT: [[TMP2:%.*]] = insertelement <2 x double> poison, double [[B:%.*]], i64 0
3634 ; CHECK-NEXT: [[TMP3:%.*]] = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> [[TMP1]], <2 x double> [[TMP2]], i32 0, i32 4)
3635 ; CHECK-NEXT: ret i32 [[TMP3]]
3637 %1 = insertelement <2 x double> undef, double %a, i32 0
3638 %2 = insertelement <2 x double> %1, double 1.000000e+00, i32 1
3639 %3 = insertelement <2 x double> undef, double %b, i32 0
3640 %4 = insertelement <2 x double> %3, double 2.000000e+00, i32 1
3641 %5 = tail call i32 @llvm.x86.avx512.vcomi.sd(<2 x double> %2, <2 x double> %4, i32 0, i32 4)