1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq %s -o - | FileCheck %s
4 define <8 x float> @test_2xfloat_to_8xfloat(<8 x float> %vec) {
5 ; CHECK-LABEL: test_2xfloat_to_8xfloat:
7 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
9 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
12 define <8 x float> @test_masked_2xfloat_to_8xfloat_mask0(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) {
13 ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask0:
15 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
16 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
17 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
18 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
20 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
21 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
22 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
26 define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask0(<8 x float> %vec, <8 x float> %mask) {
27 ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask0:
29 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
30 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
31 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
33 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
34 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
35 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
38 define <8 x float> @test_masked_2xfloat_to_8xfloat_mask1(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) {
39 ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask1:
41 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
42 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
43 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
44 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
46 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
47 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
48 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
52 define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask1(<8 x float> %vec, <8 x float> %mask) {
53 ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask1:
55 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
56 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
57 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
59 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
60 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
61 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
64 define <8 x float> @test_masked_2xfloat_to_8xfloat_mask2(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) {
65 ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask2:
67 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
68 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
69 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
70 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
72 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
73 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
74 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
78 define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask2(<8 x float> %vec, <8 x float> %mask) {
79 ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask2:
81 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
82 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
83 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
85 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
86 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
87 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
90 define <8 x float> @test_masked_2xfloat_to_8xfloat_mask3(<8 x float> %vec, <8 x float> %default, <8 x float> %mask) {
91 ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mask3:
93 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
94 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
95 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
96 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
98 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
99 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
100 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
104 define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mask3(<8 x float> %vec, <8 x float> %mask) {
105 ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mask3:
107 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
108 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
109 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
111 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
112 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
113 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
116 define <16 x float> @test_2xfloat_to_16xfloat(<16 x float> %vec) {
117 ; CHECK-LABEL: test_2xfloat_to_16xfloat:
119 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
121 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
122 ret <16 x float> %res
124 define <16 x float> @test_masked_2xfloat_to_16xfloat_mask0(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) {
125 ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask0:
127 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
128 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
129 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
130 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
132 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
133 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
134 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
135 ret <16 x float> %res
138 define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask0(<16 x float> %vec, <16 x float> %mask) {
139 ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask0:
141 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
142 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
143 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
145 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
146 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
147 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
148 ret <16 x float> %res
150 define <16 x float> @test_masked_2xfloat_to_16xfloat_mask1(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) {
151 ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask1:
153 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
154 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
155 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
156 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
158 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
159 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
160 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
161 ret <16 x float> %res
164 define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask1(<16 x float> %vec, <16 x float> %mask) {
165 ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask1:
167 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
168 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
169 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
171 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
172 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
173 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
174 ret <16 x float> %res
176 define <16 x float> @test_masked_2xfloat_to_16xfloat_mask2(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) {
177 ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask2:
179 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
180 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
181 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
182 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
184 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
185 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
186 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
187 ret <16 x float> %res
190 define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask2(<16 x float> %vec, <16 x float> %mask) {
191 ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask2:
193 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
194 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
195 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
197 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
198 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
199 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
200 ret <16 x float> %res
202 define <16 x float> @test_masked_2xfloat_to_16xfloat_mask3(<16 x float> %vec, <16 x float> %default, <16 x float> %mask) {
203 ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mask3:
205 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
206 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
207 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
208 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
210 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
211 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
212 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
213 ret <16 x float> %res
216 define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mask3(<16 x float> %vec, <16 x float> %mask) {
217 ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mask3:
219 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
220 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
221 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
223 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
224 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
225 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
226 ret <16 x float> %res
228 define <4 x double> @test_2xdouble_to_4xdouble_mem(<2 x double>* %vp) {
229 ; CHECK-LABEL: test_2xdouble_to_4xdouble_mem:
231 ; CHECK-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
233 %vec = load <2 x double>, <2 x double>* %vp
234 %res = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
235 ret <4 x double> %res
237 define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask0(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) {
238 ; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask0:
240 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
241 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
242 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
244 %vec = load <2 x double>, <2 x double>* %vp
245 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
246 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
247 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
248 ret <4 x double> %res
251 define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask0(<2 x double>* %vp, <4 x double> %mask) {
252 ; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask0:
254 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
255 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
256 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
258 %vec = load <2 x double>, <2 x double>* %vp
259 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
260 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
261 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
262 ret <4 x double> %res
264 define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask1(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) {
265 ; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask1:
267 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
268 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
269 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
271 %vec = load <2 x double>, <2 x double>* %vp
272 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
273 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
274 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
275 ret <4 x double> %res
278 define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask1(<2 x double>* %vp, <4 x double> %mask) {
279 ; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask1:
281 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
282 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
283 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
285 %vec = load <2 x double>, <2 x double>* %vp
286 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
287 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
288 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
289 ret <4 x double> %res
291 define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask2(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) {
292 ; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask2:
294 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
295 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
296 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
298 %vec = load <2 x double>, <2 x double>* %vp
299 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
300 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
301 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
302 ret <4 x double> %res
305 define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask2(<2 x double>* %vp, <4 x double> %mask) {
306 ; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask2:
308 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
309 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
310 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
312 %vec = load <2 x double>, <2 x double>* %vp
313 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
314 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
315 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
316 ret <4 x double> %res
318 define <4 x double> @test_masked_2xdouble_to_4xdouble_mem_mask3(<2 x double>* %vp, <4 x double> %default, <4 x double> %mask) {
319 ; CHECK-LABEL: test_masked_2xdouble_to_4xdouble_mem_mask3:
321 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
322 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
323 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
325 %vec = load <2 x double>, <2 x double>* %vp
326 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
327 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
328 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %default
329 ret <4 x double> %res
332 define <4 x double> @test_masked_z_2xdouble_to_4xdouble_mem_mask3(<2 x double>* %vp, <4 x double> %mask) {
333 ; CHECK-LABEL: test_masked_z_2xdouble_to_4xdouble_mem_mask3:
335 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
336 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
337 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
339 %vec = load <2 x double>, <2 x double>* %vp
340 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
341 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
342 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
343 ret <4 x double> %res
345 define <8 x double> @test_2xdouble_to_8xdouble_mem(<2 x double>* %vp) {
346 ; CHECK-LABEL: test_2xdouble_to_8xdouble_mem:
348 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
350 %vec = load <2 x double>, <2 x double>* %vp
351 %res = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
352 ret <8 x double> %res
354 define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask0(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
355 ; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask0:
357 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
358 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
359 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
361 %vec = load <2 x double>, <2 x double>* %vp
362 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
363 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
364 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
365 ret <8 x double> %res
368 define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask0(<2 x double>* %vp, <8 x double> %mask) {
369 ; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask0:
371 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
372 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
373 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
375 %vec = load <2 x double>, <2 x double>* %vp
376 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
377 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
378 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
379 ret <8 x double> %res
381 define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask1(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
382 ; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask1:
384 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
385 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
386 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
388 %vec = load <2 x double>, <2 x double>* %vp
389 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
390 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
391 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
392 ret <8 x double> %res
395 define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask1(<2 x double>* %vp, <8 x double> %mask) {
396 ; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask1:
398 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
399 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
400 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
402 %vec = load <2 x double>, <2 x double>* %vp
403 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
404 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
405 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
406 ret <8 x double> %res
408 define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask2(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
409 ; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask2:
411 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
412 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
413 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
415 %vec = load <2 x double>, <2 x double>* %vp
416 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
417 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
418 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
419 ret <8 x double> %res
422 define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask2(<2 x double>* %vp, <8 x double> %mask) {
423 ; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask2:
425 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
426 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
427 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
429 %vec = load <2 x double>, <2 x double>* %vp
430 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
431 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
432 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
433 ret <8 x double> %res
435 define <8 x double> @test_masked_2xdouble_to_8xdouble_mem_mask3(<2 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
436 ; CHECK-LABEL: test_masked_2xdouble_to_8xdouble_mem_mask3:
438 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
439 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
440 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
442 %vec = load <2 x double>, <2 x double>* %vp
443 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
444 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
445 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
446 ret <8 x double> %res
449 define <8 x double> @test_masked_z_2xdouble_to_8xdouble_mem_mask3(<2 x double>* %vp, <8 x double> %mask) {
450 ; CHECK-LABEL: test_masked_z_2xdouble_to_8xdouble_mem_mask3:
452 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
453 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
454 ; CHECK-NEXT: vbroadcastf64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
456 %vec = load <2 x double>, <2 x double>* %vp
457 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
458 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
459 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
460 ret <8 x double> %res
462 define <8 x double> @test_4xdouble_to_8xdouble_mem(<4 x double>* %vp) {
463 ; CHECK-LABEL: test_4xdouble_to_8xdouble_mem:
465 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
467 %vec = load <4 x double>, <4 x double>* %vp
468 %res = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
469 ret <8 x double> %res
471 define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask0(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
472 ; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask0:
474 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
475 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
476 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
478 %vec = load <4 x double>, <4 x double>* %vp
479 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
480 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
481 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
482 ret <8 x double> %res
485 define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask0(<4 x double>* %vp, <8 x double> %mask) {
486 ; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask0:
488 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
489 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
490 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
492 %vec = load <4 x double>, <4 x double>* %vp
493 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
494 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
495 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
496 ret <8 x double> %res
498 define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask1(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
499 ; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask1:
501 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
502 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
503 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
505 %vec = load <4 x double>, <4 x double>* %vp
506 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
507 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
508 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
509 ret <8 x double> %res
512 define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask1(<4 x double>* %vp, <8 x double> %mask) {
513 ; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask1:
515 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
516 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
517 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
519 %vec = load <4 x double>, <4 x double>* %vp
520 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
521 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
522 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
523 ret <8 x double> %res
525 define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask2(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
526 ; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask2:
528 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
529 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
530 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
532 %vec = load <4 x double>, <4 x double>* %vp
533 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
534 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
535 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
536 ret <8 x double> %res
539 define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask2(<4 x double>* %vp, <8 x double> %mask) {
540 ; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask2:
542 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
543 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
544 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
546 %vec = load <4 x double>, <4 x double>* %vp
547 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
548 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
549 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
550 ret <8 x double> %res
552 define <8 x double> @test_masked_4xdouble_to_8xdouble_mem_mask3(<4 x double>* %vp, <8 x double> %default, <8 x double> %mask) {
553 ; CHECK-LABEL: test_masked_4xdouble_to_8xdouble_mem_mask3:
555 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
556 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
557 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
559 %vec = load <4 x double>, <4 x double>* %vp
560 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
561 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
562 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %default
563 ret <8 x double> %res
566 define <8 x double> @test_masked_z_4xdouble_to_8xdouble_mem_mask3(<4 x double>* %vp, <8 x double> %mask) {
567 ; CHECK-LABEL: test_masked_z_4xdouble_to_8xdouble_mem_mask3:
569 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
570 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
571 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
573 %vec = load <4 x double>, <4 x double>* %vp
574 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
575 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
576 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
577 ret <8 x double> %res
579 define <8 x float> @test_2xfloat_to_8xfloat_mem(<2 x float>* %vp) {
580 ; CHECK-LABEL: test_2xfloat_to_8xfloat_mem:
582 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
584 %vec = load <2 x float>, <2 x float>* %vp
585 %res = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
588 define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
589 ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask0:
591 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
592 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
593 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
595 %vec = load <2 x float>, <2 x float>* %vp
596 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
597 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
598 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
602 define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask0(<2 x float>* %vp, <8 x float> %mask) {
603 ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask0:
605 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
606 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
607 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
609 %vec = load <2 x float>, <2 x float>* %vp
610 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
611 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
612 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
615 define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
616 ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask1:
618 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
619 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
620 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
622 %vec = load <2 x float>, <2 x float>* %vp
623 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
624 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
625 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
629 define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask1(<2 x float>* %vp, <8 x float> %mask) {
630 ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask1:
632 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
633 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
634 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
636 %vec = load <2 x float>, <2 x float>* %vp
637 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
638 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
639 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
642 define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
643 ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask2:
645 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
646 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
647 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
649 %vec = load <2 x float>, <2 x float>* %vp
650 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
651 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
652 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
656 define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask2(<2 x float>* %vp, <8 x float> %mask) {
657 ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask2:
659 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
660 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
661 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
663 %vec = load <2 x float>, <2 x float>* %vp
664 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
665 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
666 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
669 define <8 x float> @test_masked_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
670 ; CHECK-LABEL: test_masked_2xfloat_to_8xfloat_mem_mask3:
672 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
673 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
674 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
676 %vec = load <2 x float>, <2 x float>* %vp
677 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
678 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
679 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
683 define <8 x float> @test_masked_z_2xfloat_to_8xfloat_mem_mask3(<2 x float>* %vp, <8 x float> %mask) {
684 ; CHECK-LABEL: test_masked_z_2xfloat_to_8xfloat_mem_mask3:
686 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
687 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
688 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
690 %vec = load <2 x float>, <2 x float>* %vp
691 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
692 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
693 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
696 define <16 x float> @test_2xfloat_to_16xfloat_mem(<2 x float>* %vp) {
697 ; CHECK-LABEL: test_2xfloat_to_16xfloat_mem:
699 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
701 %vec = load <2 x float>, <2 x float>* %vp
702 %res = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
703 ret <16 x float> %res
705 define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
706 ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask0:
708 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
709 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
710 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
712 %vec = load <2 x float>, <2 x float>* %vp
713 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
714 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
715 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
716 ret <16 x float> %res
719 define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask0(<2 x float>* %vp, <16 x float> %mask) {
720 ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask0:
722 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
723 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
724 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
726 %vec = load <2 x float>, <2 x float>* %vp
727 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
728 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
729 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
730 ret <16 x float> %res
732 define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
733 ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask1:
735 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
736 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
737 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
739 %vec = load <2 x float>, <2 x float>* %vp
740 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
741 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
742 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
743 ret <16 x float> %res
746 define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask1(<2 x float>* %vp, <16 x float> %mask) {
747 ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask1:
749 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
750 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
751 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
753 %vec = load <2 x float>, <2 x float>* %vp
754 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
755 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
756 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
757 ret <16 x float> %res
759 define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
760 ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask2:
762 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
763 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
764 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
766 %vec = load <2 x float>, <2 x float>* %vp
767 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
768 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
769 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
770 ret <16 x float> %res
773 define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask2(<2 x float>* %vp, <16 x float> %mask) {
774 ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask2:
776 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
777 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
778 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
780 %vec = load <2 x float>, <2 x float>* %vp
781 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
782 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
783 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
784 ret <16 x float> %res
786 define <16 x float> @test_masked_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
787 ; CHECK-LABEL: test_masked_2xfloat_to_16xfloat_mem_mask3:
789 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
790 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
791 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
793 %vec = load <2 x float>, <2 x float>* %vp
794 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
795 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
796 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
797 ret <16 x float> %res
800 define <16 x float> @test_masked_z_2xfloat_to_16xfloat_mem_mask3(<2 x float>* %vp, <16 x float> %mask) {
801 ; CHECK-LABEL: test_masked_z_2xfloat_to_16xfloat_mem_mask3:
803 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
804 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
805 ; CHECK-NEXT: vbroadcastf32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
807 %vec = load <2 x float>, <2 x float>* %vp
808 %shuf = shufflevector <2 x float> %vec, <2 x float> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
809 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
810 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
811 ret <16 x float> %res
813 define <8 x float> @test_4xfloat_to_8xfloat_mem(<4 x float>* %vp) {
814 ; CHECK-LABEL: test_4xfloat_to_8xfloat_mem:
816 ; CHECK-NEXT: vbroadcastf128 {{.*#+}} ymm0 = mem[0,1,0,1]
818 %vec = load <4 x float>, <4 x float>* %vp
819 %res = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
822 define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
823 ; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask0:
825 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
826 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
827 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
829 %vec = load <4 x float>, <4 x float>* %vp
830 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
831 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
832 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
836 define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask0(<4 x float>* %vp, <8 x float> %mask) {
837 ; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask0:
839 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
840 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
841 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
843 %vec = load <4 x float>, <4 x float>* %vp
844 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
845 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
846 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
849 define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
850 ; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask1:
852 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
853 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
854 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
856 %vec = load <4 x float>, <4 x float>* %vp
857 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
858 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
859 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
863 define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask1(<4 x float>* %vp, <8 x float> %mask) {
864 ; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask1:
866 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
867 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
868 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
870 %vec = load <4 x float>, <4 x float>* %vp
871 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
872 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
873 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
876 define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
877 ; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask2:
879 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
880 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
881 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
883 %vec = load <4 x float>, <4 x float>* %vp
884 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
885 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
886 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
890 define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask2(<4 x float>* %vp, <8 x float> %mask) {
891 ; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask2:
893 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
894 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
895 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
897 %vec = load <4 x float>, <4 x float>* %vp
898 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
899 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
900 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
903 define <8 x float> @test_masked_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp, <8 x float> %default, <8 x float> %mask) {
904 ; CHECK-LABEL: test_masked_4xfloat_to_8xfloat_mem_mask3:
906 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
907 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
908 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
910 %vec = load <4 x float>, <4 x float>* %vp
911 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
912 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
913 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %default
917 define <8 x float> @test_masked_z_4xfloat_to_8xfloat_mem_mask3(<4 x float>* %vp, <8 x float> %mask) {
918 ; CHECK-LABEL: test_masked_z_4xfloat_to_8xfloat_mem_mask3:
920 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
921 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
922 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
924 %vec = load <4 x float>, <4 x float>* %vp
925 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
926 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
927 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
930 define <16 x float> @test_4xfloat_to_16xfloat_mem(<4 x float>* %vp) {
931 ; CHECK-LABEL: test_4xfloat_to_16xfloat_mem:
933 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
935 %vec = load <4 x float>, <4 x float>* %vp
936 %res = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
937 ret <16 x float> %res
939 define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
940 ; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask0:
942 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
943 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
944 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
946 %vec = load <4 x float>, <4 x float>* %vp
947 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
948 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
949 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
950 ret <16 x float> %res
953 define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask0(<4 x float>* %vp, <16 x float> %mask) {
954 ; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask0:
956 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
957 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
958 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
960 %vec = load <4 x float>, <4 x float>* %vp
961 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
962 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
963 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
964 ret <16 x float> %res
966 define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
967 ; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask1:
969 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
970 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
971 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
973 %vec = load <4 x float>, <4 x float>* %vp
974 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
975 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
976 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
977 ret <16 x float> %res
980 define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask1(<4 x float>* %vp, <16 x float> %mask) {
981 ; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask1:
983 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
984 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
985 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
987 %vec = load <4 x float>, <4 x float>* %vp
988 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
989 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
990 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
991 ret <16 x float> %res
993 define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
994 ; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask2:
996 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
997 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
998 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1000 %vec = load <4 x float>, <4 x float>* %vp
1001 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1002 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1003 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1004 ret <16 x float> %res
1007 define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask2(<4 x float>* %vp, <16 x float> %mask) {
1008 ; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask2:
1010 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1011 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1012 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1014 %vec = load <4 x float>, <4 x float>* %vp
1015 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1016 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1017 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1018 ret <16 x float> %res
1020 define <16 x float> @test_masked_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
1021 ; CHECK-LABEL: test_masked_4xfloat_to_16xfloat_mem_mask3:
1023 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1024 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1025 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1027 %vec = load <4 x float>, <4 x float>* %vp
1028 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1029 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1030 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1031 ret <16 x float> %res
1034 define <16 x float> @test_masked_z_4xfloat_to_16xfloat_mem_mask3(<4 x float>* %vp, <16 x float> %mask) {
1035 ; CHECK-LABEL: test_masked_z_4xfloat_to_16xfloat_mem_mask3:
1037 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1038 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1039 ; CHECK-NEXT: vbroadcastf32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
1041 %vec = load <4 x float>, <4 x float>* %vp
1042 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1043 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1044 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1045 ret <16 x float> %res
1047 define <16 x float> @test_8xfloat_to_16xfloat_mem(<8 x float>* %vp) {
1048 ; CHECK-LABEL: test_8xfloat_to_16xfloat_mem:
1050 ; CHECK-NEXT: vbroadcastf64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
1052 %vec = load <8 x float>, <8 x float>* %vp
1053 %res = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1054 ret <16 x float> %res
1056 define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
1057 ; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask0:
1059 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1060 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1061 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1063 %vec = load <8 x float>, <8 x float>* %vp
1064 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1065 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1066 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1067 ret <16 x float> %res
1070 define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask0(<8 x float>* %vp, <16 x float> %mask) {
1071 ; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask0:
1073 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1074 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1075 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1077 %vec = load <8 x float>, <8 x float>* %vp
1078 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1079 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1080 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1081 ret <16 x float> %res
1083 define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
1084 ; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask1:
1086 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1087 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1088 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1090 %vec = load <8 x float>, <8 x float>* %vp
1091 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1092 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1093 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1094 ret <16 x float> %res
1097 define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask1(<8 x float>* %vp, <16 x float> %mask) {
1098 ; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask1:
1100 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1101 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1102 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1104 %vec = load <8 x float>, <8 x float>* %vp
1105 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1106 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1107 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1108 ret <16 x float> %res
1110 define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
1111 ; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask2:
1113 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1114 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1115 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1117 %vec = load <8 x float>, <8 x float>* %vp
1118 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1119 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1120 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1121 ret <16 x float> %res
1124 define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask2(<8 x float>* %vp, <16 x float> %mask) {
1125 ; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask2:
1127 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1128 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1129 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1131 %vec = load <8 x float>, <8 x float>* %vp
1132 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1133 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1134 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1135 ret <16 x float> %res
1137 define <16 x float> @test_masked_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %vp, <16 x float> %default, <16 x float> %mask) {
1138 ; CHECK-LABEL: test_masked_8xfloat_to_16xfloat_mem_mask3:
1140 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1141 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1142 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1144 %vec = load <8 x float>, <8 x float>* %vp
1145 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1146 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1147 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %default
1148 ret <16 x float> %res
1151 define <16 x float> @test_masked_z_8xfloat_to_16xfloat_mem_mask3(<8 x float>* %vp, <16 x float> %mask) {
1152 ; CHECK-LABEL: test_masked_z_8xfloat_to_16xfloat_mem_mask3:
1154 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1155 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1156 ; CHECK-NEXT: vbroadcastf32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1158 %vec = load <8 x float>, <8 x float>* %vp
1159 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1160 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1161 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1162 ret <16 x float> %res