1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
4 define <4 x double> @test_double_to_4(double %s) {
5 ; CHECK-LABEL: test_double_to_4:
7 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
9 %vec = insertelement <2 x double> undef, double %s, i32 0
10 %res = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
13 define <4 x double> @test_masked_double_to_4_mask0(double %s, <4 x double> %default, <4 x double> %mask) {
14 ; CHECK-LABEL: test_masked_double_to_4_mask0:
16 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
17 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
18 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
19 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
21 %vec = insertelement <2 x double> undef, double %s, i32 0
22 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
23 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
24 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
28 define <4 x double> @test_masked_z_double_to_4_mask0(double %s, <4 x double> %mask) {
29 ; CHECK-LABEL: test_masked_z_double_to_4_mask0:
31 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
32 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
33 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
35 %vec = insertelement <2 x double> undef, double %s, i32 0
36 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
37 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
38 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
41 define <4 x double> @test_masked_double_to_4_mask1(double %s, <4 x double> %default, <4 x double> %mask) {
42 ; CHECK-LABEL: test_masked_double_to_4_mask1:
44 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
45 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
46 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
47 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
49 %vec = insertelement <2 x double> undef, double %s, i32 0
50 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
51 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
52 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
56 define <4 x double> @test_masked_z_double_to_4_mask1(double %s, <4 x double> %mask) {
57 ; CHECK-LABEL: test_masked_z_double_to_4_mask1:
59 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
60 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
61 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
63 %vec = insertelement <2 x double> undef, double %s, i32 0
64 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
65 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
66 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
69 define <4 x double> @test_masked_double_to_4_mask2(double %s, <4 x double> %default, <4 x double> %mask) {
70 ; CHECK-LABEL: test_masked_double_to_4_mask2:
72 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
73 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
74 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
75 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
77 %vec = insertelement <2 x double> undef, double %s, i32 0
78 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
79 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
80 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
84 define <4 x double> @test_masked_z_double_to_4_mask2(double %s, <4 x double> %mask) {
85 ; CHECK-LABEL: test_masked_z_double_to_4_mask2:
87 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
88 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
89 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
91 %vec = insertelement <2 x double> undef, double %s, i32 0
92 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
93 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
94 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
97 define <4 x double> @test_masked_double_to_4_mask3(double %s, <4 x double> %default, <4 x double> %mask) {
98 ; CHECK-LABEL: test_masked_double_to_4_mask3:
100 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
101 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
102 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm1 {%k1}
103 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
105 %vec = insertelement <2 x double> undef, double %s, i32 0
106 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
107 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
108 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
109 ret <4 x double> %res
112 define <4 x double> @test_masked_z_double_to_4_mask3(double %s, <4 x double> %mask) {
113 ; CHECK-LABEL: test_masked_z_double_to_4_mask3:
115 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
116 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
117 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0 {%k1} {z}
119 %vec = insertelement <2 x double> undef, double %s, i32 0
120 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
121 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
122 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
123 ret <4 x double> %res
125 define <8 x double> @test_double_to_8(double %s) {
126 ; CHECK-LABEL: test_double_to_8:
128 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
130 %vec = insertelement <2 x double> undef, double %s, i32 0
131 %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
132 ret <8 x double> %res
134 define <8 x double> @test_masked_double_to_8_mask0(double %s, <8 x double> %default, <8 x double> %mask) {
135 ; CHECK-LABEL: test_masked_double_to_8_mask0:
137 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
138 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
139 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
140 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
142 %vec = insertelement <2 x double> undef, double %s, i32 0
143 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
144 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
145 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
146 ret <8 x double> %res
149 define <8 x double> @test_masked_z_double_to_8_mask0(double %s, <8 x double> %mask) {
150 ; CHECK-LABEL: test_masked_z_double_to_8_mask0:
152 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
153 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
154 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
156 %vec = insertelement <2 x double> undef, double %s, i32 0
157 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
158 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
159 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
160 ret <8 x double> %res
162 define <8 x double> @test_masked_double_to_8_mask1(double %s, <8 x double> %default, <8 x double> %mask) {
163 ; CHECK-LABEL: test_masked_double_to_8_mask1:
165 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
166 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
167 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
168 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
170 %vec = insertelement <2 x double> undef, double %s, i32 0
171 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
172 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
173 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
174 ret <8 x double> %res
177 define <8 x double> @test_masked_z_double_to_8_mask1(double %s, <8 x double> %mask) {
178 ; CHECK-LABEL: test_masked_z_double_to_8_mask1:
180 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
181 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
182 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
184 %vec = insertelement <2 x double> undef, double %s, i32 0
185 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
186 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
187 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
188 ret <8 x double> %res
190 define <8 x double> @test_masked_double_to_8_mask2(double %s, <8 x double> %default, <8 x double> %mask) {
191 ; CHECK-LABEL: test_masked_double_to_8_mask2:
193 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
194 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
195 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
196 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
198 %vec = insertelement <2 x double> undef, double %s, i32 0
199 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
200 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
201 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
202 ret <8 x double> %res
205 define <8 x double> @test_masked_z_double_to_8_mask2(double %s, <8 x double> %mask) {
206 ; CHECK-LABEL: test_masked_z_double_to_8_mask2:
208 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
209 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
210 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
212 %vec = insertelement <2 x double> undef, double %s, i32 0
213 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
214 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
215 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
216 ret <8 x double> %res
218 define <8 x double> @test_masked_double_to_8_mask3(double %s, <8 x double> %default, <8 x double> %mask) {
219 ; CHECK-LABEL: test_masked_double_to_8_mask3:
221 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
222 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
223 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm1 {%k1}
224 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
226 %vec = insertelement <2 x double> undef, double %s, i32 0
227 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
228 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
229 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
230 ret <8 x double> %res
233 define <8 x double> @test_masked_z_double_to_8_mask3(double %s, <8 x double> %mask) {
234 ; CHECK-LABEL: test_masked_z_double_to_8_mask3:
236 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
237 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
238 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0 {%k1} {z}
240 %vec = insertelement <2 x double> undef, double %s, i32 0
241 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
242 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
243 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
244 ret <8 x double> %res
246 define <4 x float> @test_float_to_4(float %s) {
247 ; CHECK-LABEL: test_float_to_4:
249 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0
251 %vec = insertelement <2 x float> undef, float %s, i32 0
252 %res = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
255 define <4 x float> @test_masked_float_to_4_mask0(float %s, <4 x float> %default, <4 x float> %mask) {
256 ; CHECK-LABEL: test_masked_float_to_4_mask0:
258 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
259 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
260 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
261 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
263 %vec = insertelement <2 x float> undef, float %s, i32 0
264 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
265 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
266 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
270 define <4 x float> @test_masked_z_float_to_4_mask0(float %s, <4 x float> %mask) {
271 ; CHECK-LABEL: test_masked_z_float_to_4_mask0:
273 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
274 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
275 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
277 %vec = insertelement <2 x float> undef, float %s, i32 0
278 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
279 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
280 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
283 define <4 x float> @test_masked_float_to_4_mask1(float %s, <4 x float> %default, <4 x float> %mask) {
284 ; CHECK-LABEL: test_masked_float_to_4_mask1:
286 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
287 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
288 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
289 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
291 %vec = insertelement <2 x float> undef, float %s, i32 0
292 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
293 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
294 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
298 define <4 x float> @test_masked_z_float_to_4_mask1(float %s, <4 x float> %mask) {
299 ; CHECK-LABEL: test_masked_z_float_to_4_mask1:
301 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
302 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
303 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
305 %vec = insertelement <2 x float> undef, float %s, i32 0
306 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
307 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
308 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
311 define <4 x float> @test_masked_float_to_4_mask2(float %s, <4 x float> %default, <4 x float> %mask) {
312 ; CHECK-LABEL: test_masked_float_to_4_mask2:
314 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
315 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
316 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
317 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
319 %vec = insertelement <2 x float> undef, float %s, i32 0
320 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
321 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
322 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
326 define <4 x float> @test_masked_z_float_to_4_mask2(float %s, <4 x float> %mask) {
327 ; CHECK-LABEL: test_masked_z_float_to_4_mask2:
329 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
330 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
331 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
333 %vec = insertelement <2 x float> undef, float %s, i32 0
334 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
335 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
336 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
339 define <4 x float> @test_masked_float_to_4_mask3(float %s, <4 x float> %default, <4 x float> %mask) {
340 ; CHECK-LABEL: test_masked_float_to_4_mask3:
342 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
343 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
344 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm1 {%k1}
345 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
347 %vec = insertelement <2 x float> undef, float %s, i32 0
348 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
349 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
350 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
354 define <4 x float> @test_masked_z_float_to_4_mask3(float %s, <4 x float> %mask) {
355 ; CHECK-LABEL: test_masked_z_float_to_4_mask3:
357 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
358 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
359 ; CHECK-NEXT: vbroadcastss %xmm0, %xmm0 {%k1} {z}
361 %vec = insertelement <2 x float> undef, float %s, i32 0
362 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
363 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
364 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
367 define <8 x float> @test_float_to_8(float %s) {
368 ; CHECK-LABEL: test_float_to_8:
370 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0
372 %vec = insertelement <2 x float> undef, float %s, i32 0
373 %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
376 define <8 x float> @test_masked_float_to_8_mask0(float %s, <8 x float> %default, <8 x float> %mask) {
377 ; CHECK-LABEL: test_masked_float_to_8_mask0:
379 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
380 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
381 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
382 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
384 %vec = insertelement <2 x float> undef, float %s, i32 0
385 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
386 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
387 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
391 define <8 x float> @test_masked_z_float_to_8_mask0(float %s, <8 x float> %mask) {
392 ; CHECK-LABEL: test_masked_z_float_to_8_mask0:
394 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
395 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
396 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
398 %vec = insertelement <2 x float> undef, float %s, i32 0
399 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
400 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
401 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
404 define <8 x float> @test_masked_float_to_8_mask1(float %s, <8 x float> %default, <8 x float> %mask) {
405 ; CHECK-LABEL: test_masked_float_to_8_mask1:
407 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
408 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
409 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
410 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
412 %vec = insertelement <2 x float> undef, float %s, i32 0
413 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
414 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
415 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
419 define <8 x float> @test_masked_z_float_to_8_mask1(float %s, <8 x float> %mask) {
420 ; CHECK-LABEL: test_masked_z_float_to_8_mask1:
422 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
423 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
424 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
426 %vec = insertelement <2 x float> undef, float %s, i32 0
427 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
428 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
429 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
432 define <8 x float> @test_masked_float_to_8_mask2(float %s, <8 x float> %default, <8 x float> %mask) {
433 ; CHECK-LABEL: test_masked_float_to_8_mask2:
435 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
436 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
437 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
438 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
440 %vec = insertelement <2 x float> undef, float %s, i32 0
441 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
442 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
443 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
447 define <8 x float> @test_masked_z_float_to_8_mask2(float %s, <8 x float> %mask) {
448 ; CHECK-LABEL: test_masked_z_float_to_8_mask2:
450 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
451 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
452 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
454 %vec = insertelement <2 x float> undef, float %s, i32 0
455 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
456 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
457 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
460 define <8 x float> @test_masked_float_to_8_mask3(float %s, <8 x float> %default, <8 x float> %mask) {
461 ; CHECK-LABEL: test_masked_float_to_8_mask3:
463 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
464 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
465 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm1 {%k1}
466 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
468 %vec = insertelement <2 x float> undef, float %s, i32 0
469 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
470 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
471 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
475 define <8 x float> @test_masked_z_float_to_8_mask3(float %s, <8 x float> %mask) {
476 ; CHECK-LABEL: test_masked_z_float_to_8_mask3:
478 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
479 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
480 ; CHECK-NEXT: vbroadcastss %xmm0, %ymm0 {%k1} {z}
482 %vec = insertelement <2 x float> undef, float %s, i32 0
483 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
484 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
485 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
488 define <16 x float> @test_float_to_16(float %s) {
489 ; CHECK-LABEL: test_float_to_16:
491 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0
493 %vec = insertelement <2 x float> undef, float %s, i32 0
494 %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
495 ret <16 x float> %res
497 define <16 x float> @test_masked_float_to_16_mask0(float %s, <16 x float> %default, <16 x float> %mask) {
498 ; CHECK-LABEL: test_masked_float_to_16_mask0:
500 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
501 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
502 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
503 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
505 %vec = insertelement <2 x float> undef, float %s, i32 0
506 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
507 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
508 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
509 ret <16 x float> %res
512 define <16 x float> @test_masked_z_float_to_16_mask0(float %s, <16 x float> %mask) {
513 ; CHECK-LABEL: test_masked_z_float_to_16_mask0:
515 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
516 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
517 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
519 %vec = insertelement <2 x float> undef, float %s, i32 0
520 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
521 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
522 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
523 ret <16 x float> %res
525 define <16 x float> @test_masked_float_to_16_mask1(float %s, <16 x float> %default, <16 x float> %mask) {
526 ; CHECK-LABEL: test_masked_float_to_16_mask1:
528 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
529 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
530 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
531 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
533 %vec = insertelement <2 x float> undef, float %s, i32 0
534 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
535 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
536 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
537 ret <16 x float> %res
540 define <16 x float> @test_masked_z_float_to_16_mask1(float %s, <16 x float> %mask) {
541 ; CHECK-LABEL: test_masked_z_float_to_16_mask1:
543 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
544 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
545 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
547 %vec = insertelement <2 x float> undef, float %s, i32 0
548 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
549 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
550 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
551 ret <16 x float> %res
553 define <16 x float> @test_masked_float_to_16_mask2(float %s, <16 x float> %default, <16 x float> %mask) {
554 ; CHECK-LABEL: test_masked_float_to_16_mask2:
556 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
557 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
558 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
559 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
561 %vec = insertelement <2 x float> undef, float %s, i32 0
562 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
563 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
564 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
565 ret <16 x float> %res
568 define <16 x float> @test_masked_z_float_to_16_mask2(float %s, <16 x float> %mask) {
569 ; CHECK-LABEL: test_masked_z_float_to_16_mask2:
571 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
572 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
573 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
575 %vec = insertelement <2 x float> undef, float %s, i32 0
576 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
577 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
578 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
579 ret <16 x float> %res
581 define <16 x float> @test_masked_float_to_16_mask3(float %s, <16 x float> %default, <16 x float> %mask) {
582 ; CHECK-LABEL: test_masked_float_to_16_mask3:
584 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
585 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
586 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm1 {%k1}
587 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
589 %vec = insertelement <2 x float> undef, float %s, i32 0
590 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
591 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
592 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
593 ret <16 x float> %res
596 define <16 x float> @test_masked_z_float_to_16_mask3(float %s, <16 x float> %mask) {
597 ; CHECK-LABEL: test_masked_z_float_to_16_mask3:
599 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
600 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
601 ; CHECK-NEXT: vbroadcastss %xmm0, %zmm0 {%k1} {z}
603 %vec = insertelement <2 x float> undef, float %s, i32 0
604 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
605 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
606 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
607 ret <16 x float> %res
609 define <4 x double> @test_double_to_4_mem(ptr %p) {
610 ; CHECK-LABEL: test_double_to_4_mem:
612 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
614 %s = load double, ptr %p
615 %vec = insertelement <2 x double> undef, double %s, i32 0
616 %res = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
617 ret <4 x double> %res
619 define <4 x double> @test_masked_double_to_4_mem_mask0(ptr %p, <4 x double> %default, <4 x double> %mask) {
620 ; CHECK-LABEL: test_masked_double_to_4_mem_mask0:
622 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
623 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
624 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
626 %s = load double, ptr %p
627 %vec = insertelement <2 x double> undef, double %s, i32 0
628 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
629 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
630 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
631 ret <4 x double> %res
634 define <4 x double> @test_masked_z_double_to_4_mem_mask0(ptr %p, <4 x double> %mask) {
635 ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask0:
637 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
638 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
639 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
641 %s = load double, ptr %p
642 %vec = insertelement <2 x double> undef, double %s, i32 0
643 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
644 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
645 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
646 ret <4 x double> %res
648 define <4 x double> @test_masked_double_to_4_mem_mask1(ptr %p, <4 x double> %default, <4 x double> %mask) {
649 ; CHECK-LABEL: test_masked_double_to_4_mem_mask1:
651 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
652 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
653 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
655 %s = load double, ptr %p
656 %vec = insertelement <2 x double> undef, double %s, i32 0
657 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
658 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
659 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
660 ret <4 x double> %res
663 define <4 x double> @test_masked_z_double_to_4_mem_mask1(ptr %p, <4 x double> %mask) {
664 ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask1:
666 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
667 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
668 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
670 %s = load double, ptr %p
671 %vec = insertelement <2 x double> undef, double %s, i32 0
672 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
673 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
674 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
675 ret <4 x double> %res
677 define <4 x double> @test_masked_double_to_4_mem_mask2(ptr %p, <4 x double> %default, <4 x double> %mask) {
678 ; CHECK-LABEL: test_masked_double_to_4_mem_mask2:
680 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
681 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
682 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
684 %s = load double, ptr %p
685 %vec = insertelement <2 x double> undef, double %s, i32 0
686 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
687 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
688 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
689 ret <4 x double> %res
692 define <4 x double> @test_masked_z_double_to_4_mem_mask2(ptr %p, <4 x double> %mask) {
693 ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask2:
695 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
696 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
697 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
699 %s = load double, ptr %p
700 %vec = insertelement <2 x double> undef, double %s, i32 0
701 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
702 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
703 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
704 ret <4 x double> %res
706 define <4 x double> @test_masked_double_to_4_mem_mask3(ptr %p, <4 x double> %default, <4 x double> %mask) {
707 ; CHECK-LABEL: test_masked_double_to_4_mem_mask3:
709 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
710 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
711 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1}
713 %s = load double, ptr %p
714 %vec = insertelement <2 x double> undef, double %s, i32 0
715 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
716 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
717 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
718 ret <4 x double> %res
721 define <4 x double> @test_masked_z_double_to_4_mem_mask3(ptr %p, <4 x double> %mask) {
722 ; CHECK-LABEL: test_masked_z_double_to_4_mem_mask3:
724 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
725 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
726 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0 {%k1} {z}
728 %s = load double, ptr %p
729 %vec = insertelement <2 x double> undef, double %s, i32 0
730 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
731 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
732 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
733 ret <4 x double> %res
735 define <8 x double> @test_double_to_8_mem(ptr %p) {
736 ; CHECK-LABEL: test_double_to_8_mem:
738 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
740 %s = load double, ptr %p
741 %vec = insertelement <2 x double> undef, double %s, i32 0
742 %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
743 ret <8 x double> %res
745 define <8 x double> @test_masked_double_to_8_mem_mask0(ptr %p, <8 x double> %default, <8 x double> %mask) {
746 ; CHECK-LABEL: test_masked_double_to_8_mem_mask0:
748 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
749 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
750 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
752 %s = load double, ptr %p
753 %vec = insertelement <2 x double> undef, double %s, i32 0
754 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
755 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
756 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
757 ret <8 x double> %res
760 define <8 x double> @test_masked_z_double_to_8_mem_mask0(ptr %p, <8 x double> %mask) {
761 ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask0:
763 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
764 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
765 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
767 %s = load double, ptr %p
768 %vec = insertelement <2 x double> undef, double %s, i32 0
769 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
770 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
771 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
772 ret <8 x double> %res
774 define <8 x double> @test_masked_double_to_8_mem_mask1(ptr %p, <8 x double> %default, <8 x double> %mask) {
775 ; CHECK-LABEL: test_masked_double_to_8_mem_mask1:
777 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
778 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
779 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
781 %s = load double, ptr %p
782 %vec = insertelement <2 x double> undef, double %s, i32 0
783 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
784 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
785 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
786 ret <8 x double> %res
789 define <8 x double> @test_masked_z_double_to_8_mem_mask1(ptr %p, <8 x double> %mask) {
790 ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask1:
792 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
793 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
794 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
796 %s = load double, ptr %p
797 %vec = insertelement <2 x double> undef, double %s, i32 0
798 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
799 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
800 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
801 ret <8 x double> %res
803 define <8 x double> @test_masked_double_to_8_mem_mask2(ptr %p, <8 x double> %default, <8 x double> %mask) {
804 ; CHECK-LABEL: test_masked_double_to_8_mem_mask2:
806 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
807 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
808 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
810 %s = load double, ptr %p
811 %vec = insertelement <2 x double> undef, double %s, i32 0
812 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
813 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
814 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
815 ret <8 x double> %res
818 define <8 x double> @test_masked_z_double_to_8_mem_mask2(ptr %p, <8 x double> %mask) {
819 ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask2:
821 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
822 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
823 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
825 %s = load double, ptr %p
826 %vec = insertelement <2 x double> undef, double %s, i32 0
827 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
828 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
829 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
830 ret <8 x double> %res
832 define <8 x double> @test_masked_double_to_8_mem_mask3(ptr %p, <8 x double> %default, <8 x double> %mask) {
833 ; CHECK-LABEL: test_masked_double_to_8_mem_mask3:
835 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
836 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
837 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1}
839 %s = load double, ptr %p
840 %vec = insertelement <2 x double> undef, double %s, i32 0
841 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
842 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
843 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
844 ret <8 x double> %res
847 define <8 x double> @test_masked_z_double_to_8_mem_mask3(ptr %p, <8 x double> %mask) {
848 ; CHECK-LABEL: test_masked_z_double_to_8_mem_mask3:
850 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
851 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
852 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0 {%k1} {z}
854 %s = load double, ptr %p
855 %vec = insertelement <2 x double> undef, double %s, i32 0
856 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
857 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
858 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
859 ret <8 x double> %res
861 define <4 x float> @test_float_to_4_mem(ptr %p) {
862 ; CHECK-LABEL: test_float_to_4_mem:
864 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0
866 %s = load float, ptr %p
867 %vec = insertelement <2 x float> undef, float %s, i32 0
868 %res = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
871 define <4 x float> @test_masked_float_to_4_mem_mask0(ptr %p, <4 x float> %default, <4 x float> %mask) {
872 ; CHECK-LABEL: test_masked_float_to_4_mem_mask0:
874 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
875 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
876 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
878 %s = load float, ptr %p
879 %vec = insertelement <2 x float> undef, float %s, i32 0
880 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
881 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
882 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
886 define <4 x float> @test_masked_z_float_to_4_mem_mask0(ptr %p, <4 x float> %mask) {
887 ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask0:
889 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
890 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
891 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
893 %s = load float, ptr %p
894 %vec = insertelement <2 x float> undef, float %s, i32 0
895 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
896 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
897 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
900 define <4 x float> @test_masked_float_to_4_mem_mask1(ptr %p, <4 x float> %default, <4 x float> %mask) {
901 ; CHECK-LABEL: test_masked_float_to_4_mem_mask1:
903 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
904 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
905 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
907 %s = load float, ptr %p
908 %vec = insertelement <2 x float> undef, float %s, i32 0
909 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
910 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
911 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
915 define <4 x float> @test_masked_z_float_to_4_mem_mask1(ptr %p, <4 x float> %mask) {
916 ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask1:
918 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
919 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
920 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
922 %s = load float, ptr %p
923 %vec = insertelement <2 x float> undef, float %s, i32 0
924 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
925 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
926 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
929 define <4 x float> @test_masked_float_to_4_mem_mask2(ptr %p, <4 x float> %default, <4 x float> %mask) {
930 ; CHECK-LABEL: test_masked_float_to_4_mem_mask2:
932 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
933 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
934 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
936 %s = load float, ptr %p
937 %vec = insertelement <2 x float> undef, float %s, i32 0
938 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
939 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
940 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
944 define <4 x float> @test_masked_z_float_to_4_mem_mask2(ptr %p, <4 x float> %mask) {
945 ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask2:
947 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
948 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
949 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
951 %s = load float, ptr %p
952 %vec = insertelement <2 x float> undef, float %s, i32 0
953 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
954 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
955 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
958 define <4 x float> @test_masked_float_to_4_mem_mask3(ptr %p, <4 x float> %default, <4 x float> %mask) {
959 ; CHECK-LABEL: test_masked_float_to_4_mem_mask3:
961 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
962 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
963 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1}
965 %s = load float, ptr %p
966 %vec = insertelement <2 x float> undef, float %s, i32 0
967 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
968 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
969 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %default
973 define <4 x float> @test_masked_z_float_to_4_mem_mask3(ptr %p, <4 x float> %mask) {
974 ; CHECK-LABEL: test_masked_z_float_to_4_mem_mask3:
976 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
977 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
978 ; CHECK-NEXT: vbroadcastss (%rdi), %xmm0 {%k1} {z}
980 %s = load float, ptr %p
981 %vec = insertelement <2 x float> undef, float %s, i32 0
982 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <4 x i32> <i32 0, i32 0, i32 0, i32 0>
983 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
984 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
987 define <8 x float> @test_float_to_8_mem(ptr %p) {
988 ; CHECK-LABEL: test_float_to_8_mem:
990 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0
992 %s = load float, ptr %p
993 %vec = insertelement <2 x float> undef, float %s, i32 0
994 %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
997 define <8 x float> @test_masked_float_to_8_mem_mask0(ptr %p, <8 x float> %default, <8 x float> %mask) {
998 ; CHECK-LABEL: test_masked_float_to_8_mem_mask0:
1000 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1001 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1002 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
1004 %s = load float, ptr %p
1005 %vec = insertelement <2 x float> undef, float %s, i32 0
1006 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1007 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1008 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
1009 ret <8 x float> %res
1012 define <8 x float> @test_masked_z_float_to_8_mem_mask0(ptr %p, <8 x float> %mask) {
1013 ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask0:
1015 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1016 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
1017 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
1019 %s = load float, ptr %p
1020 %vec = insertelement <2 x float> undef, float %s, i32 0
1021 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1022 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1023 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1024 ret <8 x float> %res
1026 define <8 x float> @test_masked_float_to_8_mem_mask1(ptr %p, <8 x float> %default, <8 x float> %mask) {
1027 ; CHECK-LABEL: test_masked_float_to_8_mem_mask1:
1029 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1030 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1031 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
1033 %s = load float, ptr %p
1034 %vec = insertelement <2 x float> undef, float %s, i32 0
1035 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1036 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1037 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
1038 ret <8 x float> %res
1041 define <8 x float> @test_masked_z_float_to_8_mem_mask1(ptr %p, <8 x float> %mask) {
1042 ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask1:
1044 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1045 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
1046 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
1048 %s = load float, ptr %p
1049 %vec = insertelement <2 x float> undef, float %s, i32 0
1050 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1051 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1052 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1053 ret <8 x float> %res
1055 define <8 x float> @test_masked_float_to_8_mem_mask2(ptr %p, <8 x float> %default, <8 x float> %mask) {
1056 ; CHECK-LABEL: test_masked_float_to_8_mem_mask2:
1058 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1059 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1060 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
1062 %s = load float, ptr %p
1063 %vec = insertelement <2 x float> undef, float %s, i32 0
1064 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1065 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1066 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
1067 ret <8 x float> %res
1070 define <8 x float> @test_masked_z_float_to_8_mem_mask2(ptr %p, <8 x float> %mask) {
1071 ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask2:
1073 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1074 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
1075 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
1077 %s = load float, ptr %p
1078 %vec = insertelement <2 x float> undef, float %s, i32 0
1079 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1080 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1081 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1082 ret <8 x float> %res
1084 define <8 x float> @test_masked_float_to_8_mem_mask3(ptr %p, <8 x float> %default, <8 x float> %mask) {
1085 ; CHECK-LABEL: test_masked_float_to_8_mem_mask3:
1087 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1088 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1089 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1}
1091 %s = load float, ptr %p
1092 %vec = insertelement <2 x float> undef, float %s, i32 0
1093 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1094 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1095 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
1096 ret <8 x float> %res
1099 define <8 x float> @test_masked_z_float_to_8_mem_mask3(ptr %p, <8 x float> %mask) {
1100 ; CHECK-LABEL: test_masked_z_float_to_8_mem_mask3:
1102 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1103 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
1104 ; CHECK-NEXT: vbroadcastss (%rdi), %ymm0 {%k1} {z}
1106 %s = load float, ptr %p
1107 %vec = insertelement <2 x float> undef, float %s, i32 0
1108 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1109 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1110 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1111 ret <8 x float> %res
1113 define <16 x float> @test_float_to_16_mem(ptr %p) {
1114 ; CHECK-LABEL: test_float_to_16_mem:
1116 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0
1118 %s = load float, ptr %p
1119 %vec = insertelement <2 x float> undef, float %s, i32 0
1120 %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1121 ret <16 x float> %res
1123 define <16 x float> @test_masked_float_to_16_mem_mask0(ptr %p, <16 x float> %default, <16 x float> %mask) {
1124 ; CHECK-LABEL: test_masked_float_to_16_mem_mask0:
1126 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1127 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1128 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
1130 %s = load float, ptr %p
1131 %vec = insertelement <2 x float> undef, float %s, i32 0
1132 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1133 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1134 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1135 ret <16 x float> %res
1138 define <16 x float> @test_masked_z_float_to_16_mem_mask0(ptr %p, <16 x float> %mask) {
1139 ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask0:
1141 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1142 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1143 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
1145 %s = load float, ptr %p
1146 %vec = insertelement <2 x float> undef, float %s, i32 0
1147 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1148 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1149 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1150 ret <16 x float> %res
1152 define <16 x float> @test_masked_float_to_16_mem_mask1(ptr %p, <16 x float> %default, <16 x float> %mask) {
1153 ; CHECK-LABEL: test_masked_float_to_16_mem_mask1:
1155 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1156 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1157 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
1159 %s = load float, ptr %p
1160 %vec = insertelement <2 x float> undef, float %s, i32 0
1161 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1162 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1163 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1164 ret <16 x float> %res
1167 define <16 x float> @test_masked_z_float_to_16_mem_mask1(ptr %p, <16 x float> %mask) {
1168 ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask1:
1170 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1171 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1172 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
1174 %s = load float, ptr %p
1175 %vec = insertelement <2 x float> undef, float %s, i32 0
1176 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1177 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1178 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1179 ret <16 x float> %res
1181 define <16 x float> @test_masked_float_to_16_mem_mask2(ptr %p, <16 x float> %default, <16 x float> %mask) {
1182 ; CHECK-LABEL: test_masked_float_to_16_mem_mask2:
1184 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1185 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1186 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
1188 %s = load float, ptr %p
1189 %vec = insertelement <2 x float> undef, float %s, i32 0
1190 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1191 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1192 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1193 ret <16 x float> %res
1196 define <16 x float> @test_masked_z_float_to_16_mem_mask2(ptr %p, <16 x float> %mask) {
1197 ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask2:
1199 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1200 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1201 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
1203 %s = load float, ptr %p
1204 %vec = insertelement <2 x float> undef, float %s, i32 0
1205 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1206 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1207 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1208 ret <16 x float> %res
1210 define <16 x float> @test_masked_float_to_16_mem_mask3(ptr %p, <16 x float> %default, <16 x float> %mask) {
1211 ; CHECK-LABEL: test_masked_float_to_16_mem_mask3:
1213 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1214 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1215 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1}
1217 %s = load float, ptr %p
1218 %vec = insertelement <2 x float> undef, float %s, i32 0
1219 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1220 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1221 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1222 ret <16 x float> %res
1225 define <16 x float> @test_masked_z_float_to_16_mem_mask3(ptr %p, <16 x float> %mask) {
1226 ; CHECK-LABEL: test_masked_z_float_to_16_mem_mask3:
1228 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1229 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1230 ; CHECK-NEXT: vbroadcastss (%rdi), %zmm0 {%k1} {z}
1232 %s = load float, ptr %p
1233 %vec = insertelement <2 x float> undef, float %s, i32 0
1234 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
1235 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1236 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1237 ret <16 x float> %res