1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
4 define <2 x double> @test_2xdouble_dup_low(<2 x double> %vec) {
5 ; CHECK-LABEL: test_2xdouble_dup_low:
7 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
9 %res = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
12 define <2 x double> @test_masked_2xdouble_dup_low_mask0(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
13 ; CHECK-LABEL: test_masked_2xdouble_dup_low_mask0:
15 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
16 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
17 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 {%k1} = xmm0[0,0]
18 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
20 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
21 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
22 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec2
26 define <2 x double> @test_masked_z_2xdouble_dup_low_mask0(<2 x double> %vec, <2 x double> %mask) {
27 ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mask0:
29 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
30 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
31 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
33 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
34 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
35 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
38 define <2 x double> @test_masked_2xdouble_dup_low_mask1(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
39 ; CHECK-LABEL: test_masked_2xdouble_dup_low_mask1:
41 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
42 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
43 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 {%k1} = xmm0[0,0]
44 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
46 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
47 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
48 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec2
52 define <2 x double> @test_masked_z_2xdouble_dup_low_mask1(<2 x double> %vec, <2 x double> %mask) {
53 ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mask1:
55 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
56 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
57 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0]
59 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
60 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
61 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
64 define <2 x double> @test_2xdouble_dup_low_mem(ptr %vp) {
65 ; CHECK-LABEL: test_2xdouble_dup_low_mem:
67 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
69 %vec = load <2 x double>, ptr %vp
70 %res = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
73 define <2 x double> @test_masked_2xdouble_dup_low_mem_mask0(ptr %vp, <2 x double> %vec2, <2 x double> %mask) {
74 ; CHECK-LABEL: test_masked_2xdouble_dup_low_mem_mask0:
76 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
77 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
78 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
80 %vec = load <2 x double>, ptr %vp
81 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
82 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
83 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec2
87 define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask0(ptr %vp, <2 x double> %mask) {
88 ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mem_mask0:
90 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
91 ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
92 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
94 %vec = load <2 x double>, ptr %vp
95 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
96 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
97 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
100 define <2 x double> @test_masked_2xdouble_dup_low_mem_mask1(ptr %vp, <2 x double> %vec2, <2 x double> %mask) {
101 ; CHECK-LABEL: test_masked_2xdouble_dup_low_mem_mask1:
103 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
104 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
105 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} = mem[0,0]
107 %vec = load <2 x double>, ptr %vp
108 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
109 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
110 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec2
111 ret <2 x double> %res
114 define <2 x double> @test_masked_z_2xdouble_dup_low_mem_mask1(ptr %vp, <2 x double> %mask) {
115 ; CHECK-LABEL: test_masked_z_2xdouble_dup_low_mem_mask1:
117 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
118 ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
119 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 {%k1} {z} = mem[0,0]
121 %vec = load <2 x double>, ptr %vp
122 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 0, i32 0>
123 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
124 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
125 ret <2 x double> %res
127 define <4 x double> @test_4xdouble_dup_low(<4 x double> %vec) {
128 ; CHECK-LABEL: test_4xdouble_dup_low:
130 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 = ymm0[0,0,2,2]
132 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
133 ret <4 x double> %res
135 define <4 x double> @test_masked_4xdouble_dup_low_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
136 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask0:
138 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
139 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
140 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
141 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
143 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
144 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
145 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
146 ret <4 x double> %res
149 define <4 x double> @test_masked_z_4xdouble_dup_low_mask0(<4 x double> %vec, <4 x double> %mask) {
150 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask0:
152 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
153 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
154 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
156 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
157 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
158 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
159 ret <4 x double> %res
161 define <4 x double> @test_masked_4xdouble_dup_low_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
162 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask1:
164 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
165 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
166 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
167 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
169 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
170 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
171 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
172 ret <4 x double> %res
175 define <4 x double> @test_masked_z_4xdouble_dup_low_mask1(<4 x double> %vec, <4 x double> %mask) {
176 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask1:
178 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
179 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
180 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
182 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
183 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
184 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
185 ret <4 x double> %res
187 define <4 x double> @test_masked_4xdouble_dup_low_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
188 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask2:
190 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
191 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
192 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
193 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
195 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
196 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
197 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
198 ret <4 x double> %res
201 define <4 x double> @test_masked_z_4xdouble_dup_low_mask2(<4 x double> %vec, <4 x double> %mask) {
202 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask2:
204 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
205 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
206 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
208 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
209 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
210 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
211 ret <4 x double> %res
213 define <4 x double> @test_masked_4xdouble_dup_low_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
214 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask3:
216 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
217 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
218 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
219 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
221 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
222 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
223 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
224 ret <4 x double> %res
227 define <4 x double> @test_masked_z_4xdouble_dup_low_mask3(<4 x double> %vec, <4 x double> %mask) {
228 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask3:
230 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
231 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
232 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
234 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
235 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
236 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
237 ret <4 x double> %res
239 define <4 x double> @test_masked_4xdouble_dup_low_mask4(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
240 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mask4:
242 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
243 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
244 ; CHECK-NEXT: vmovddup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2]
245 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
247 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
248 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
249 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
250 ret <4 x double> %res
253 define <4 x double> @test_masked_z_4xdouble_dup_low_mask4(<4 x double> %vec, <4 x double> %mask) {
254 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mask4:
256 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
257 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
258 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2]
260 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
261 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
262 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
263 ret <4 x double> %res
265 define <4 x double> @test_4xdouble_dup_low_mem(ptr %vp) {
266 ; CHECK-LABEL: test_4xdouble_dup_low_mem:
268 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 = mem[0,0,2,2]
270 %vec = load <4 x double>, ptr %vp
271 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
272 ret <4 x double> %res
274 define <4 x double> @test_masked_4xdouble_dup_low_mem_mask0(ptr %vp, <4 x double> %vec2, <4 x double> %mask) {
275 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask0:
277 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
278 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
279 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
281 %vec = load <4 x double>, ptr %vp
282 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
283 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
284 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
285 ret <4 x double> %res
288 define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask0(ptr %vp, <4 x double> %mask) {
289 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask0:
291 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
292 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
293 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
295 %vec = load <4 x double>, ptr %vp
296 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
297 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
298 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
299 ret <4 x double> %res
301 define <4 x double> @test_masked_4xdouble_dup_low_mem_mask1(ptr %vp, <4 x double> %vec2, <4 x double> %mask) {
302 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask1:
304 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
305 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
306 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
308 %vec = load <4 x double>, ptr %vp
309 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
310 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
311 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
312 ret <4 x double> %res
315 define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask1(ptr %vp, <4 x double> %mask) {
316 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask1:
318 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
319 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
320 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
322 %vec = load <4 x double>, ptr %vp
323 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
324 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
325 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
326 ret <4 x double> %res
328 define <4 x double> @test_masked_4xdouble_dup_low_mem_mask2(ptr %vp, <4 x double> %vec2, <4 x double> %mask) {
329 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask2:
331 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
332 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
333 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
335 %vec = load <4 x double>, ptr %vp
336 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
337 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
338 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
339 ret <4 x double> %res
342 define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask2(ptr %vp, <4 x double> %mask) {
343 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask2:
345 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
346 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
347 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
349 %vec = load <4 x double>, ptr %vp
350 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
351 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
352 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
353 ret <4 x double> %res
355 define <4 x double> @test_masked_4xdouble_dup_low_mem_mask3(ptr %vp, <4 x double> %vec2, <4 x double> %mask) {
356 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask3:
358 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
359 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
360 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
362 %vec = load <4 x double>, ptr %vp
363 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
364 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
365 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
366 ret <4 x double> %res
369 define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask3(ptr %vp, <4 x double> %mask) {
370 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask3:
372 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
373 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
374 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
376 %vec = load <4 x double>, ptr %vp
377 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
378 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
379 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
380 ret <4 x double> %res
382 define <4 x double> @test_masked_4xdouble_dup_low_mem_mask4(ptr %vp, <4 x double> %vec2, <4 x double> %mask) {
383 ; CHECK-LABEL: test_masked_4xdouble_dup_low_mem_mask4:
385 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
386 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
387 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2]
389 %vec = load <4 x double>, ptr %vp
390 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
391 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
392 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
393 ret <4 x double> %res
396 define <4 x double> @test_masked_z_4xdouble_dup_low_mem_mask4(ptr %vp, <4 x double> %mask) {
397 ; CHECK-LABEL: test_masked_z_4xdouble_dup_low_mem_mask4:
399 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
400 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
401 ; CHECK-NEXT: vmovddup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2]
403 %vec = load <4 x double>, ptr %vp
404 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
405 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
406 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
407 ret <4 x double> %res
409 define <8 x double> @test_8xdouble_dup_low(<8 x double> %vec) {
410 ; CHECK-LABEL: test_8xdouble_dup_low:
412 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6]
414 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
415 ret <8 x double> %res
417 define <8 x double> @test_masked_8xdouble_dup_low_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
418 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask0:
420 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
421 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
422 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
423 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
425 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
426 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
427 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
428 ret <8 x double> %res
431 define <8 x double> @test_masked_z_8xdouble_dup_low_mask0(<8 x double> %vec, <8 x double> %mask) {
432 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask0:
434 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
435 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
436 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
438 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
439 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
440 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
441 ret <8 x double> %res
443 define <8 x double> @test_masked_8xdouble_dup_low_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
444 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask1:
446 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
447 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
448 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
449 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
451 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
452 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
453 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
454 ret <8 x double> %res
457 define <8 x double> @test_masked_z_8xdouble_dup_low_mask1(<8 x double> %vec, <8 x double> %mask) {
458 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask1:
460 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
461 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
462 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
464 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
465 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
466 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
467 ret <8 x double> %res
469 define <8 x double> @test_masked_8xdouble_dup_low_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
470 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask2:
472 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
473 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
474 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
475 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
477 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
478 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
479 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
480 ret <8 x double> %res
483 define <8 x double> @test_masked_z_8xdouble_dup_low_mask2(<8 x double> %vec, <8 x double> %mask) {
484 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask2:
486 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
487 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
488 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
490 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
491 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
492 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
493 ret <8 x double> %res
495 define <8 x double> @test_masked_8xdouble_dup_low_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
496 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask3:
498 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
499 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
500 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
501 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
503 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
504 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
505 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
506 ret <8 x double> %res
509 define <8 x double> @test_masked_z_8xdouble_dup_low_mask3(<8 x double> %vec, <8 x double> %mask) {
510 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask3:
512 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
513 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
514 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
516 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
517 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
518 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
519 ret <8 x double> %res
521 define <8 x double> @test_masked_8xdouble_dup_low_mask4(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
522 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mask4:
524 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
525 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
526 ; CHECK-NEXT: vmovddup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6]
527 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
529 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
530 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
531 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
532 ret <8 x double> %res
535 define <8 x double> @test_masked_z_8xdouble_dup_low_mask4(<8 x double> %vec, <8 x double> %mask) {
536 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mask4:
538 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
539 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
540 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6]
542 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
543 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
544 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
545 ret <8 x double> %res
547 define <8 x double> @test_8xdouble_dup_low_mem(ptr %vp) {
548 ; CHECK-LABEL: test_8xdouble_dup_low_mem:
550 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 = mem[0,0,2,2,4,4,6,6]
552 %vec = load <8 x double>, ptr %vp
553 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
554 ret <8 x double> %res
556 define <8 x double> @test_masked_8xdouble_dup_low_mem_mask0(ptr %vp, <8 x double> %vec2, <8 x double> %mask) {
557 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask0:
559 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
560 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
561 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
563 %vec = load <8 x double>, ptr %vp
564 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
565 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
566 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
567 ret <8 x double> %res
570 define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask0(ptr %vp, <8 x double> %mask) {
571 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask0:
573 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
574 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
575 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
577 %vec = load <8 x double>, ptr %vp
578 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
579 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
580 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
581 ret <8 x double> %res
583 define <8 x double> @test_masked_8xdouble_dup_low_mem_mask1(ptr %vp, <8 x double> %vec2, <8 x double> %mask) {
584 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask1:
586 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
587 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
588 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
590 %vec = load <8 x double>, ptr %vp
591 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
592 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
593 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
594 ret <8 x double> %res
597 define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask1(ptr %vp, <8 x double> %mask) {
598 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask1:
600 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
601 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
602 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
604 %vec = load <8 x double>, ptr %vp
605 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
606 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
607 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
608 ret <8 x double> %res
610 define <8 x double> @test_masked_8xdouble_dup_low_mem_mask2(ptr %vp, <8 x double> %vec2, <8 x double> %mask) {
611 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask2:
613 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
614 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
615 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
617 %vec = load <8 x double>, ptr %vp
618 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
619 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
620 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
621 ret <8 x double> %res
624 define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask2(ptr %vp, <8 x double> %mask) {
625 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask2:
627 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
628 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
629 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
631 %vec = load <8 x double>, ptr %vp
632 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
633 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
634 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
635 ret <8 x double> %res
637 define <8 x double> @test_masked_8xdouble_dup_low_mem_mask3(ptr %vp, <8 x double> %vec2, <8 x double> %mask) {
638 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask3:
640 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
641 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
642 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
644 %vec = load <8 x double>, ptr %vp
645 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
646 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
647 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
648 ret <8 x double> %res
651 define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask3(ptr %vp, <8 x double> %mask) {
652 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask3:
654 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
655 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
656 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
658 %vec = load <8 x double>, ptr %vp
659 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
660 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
661 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
662 ret <8 x double> %res
664 define <8 x double> @test_masked_8xdouble_dup_low_mem_mask4(ptr %vp, <8 x double> %vec2, <8 x double> %mask) {
665 ; CHECK-LABEL: test_masked_8xdouble_dup_low_mem_mask4:
667 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
668 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
669 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6]
671 %vec = load <8 x double>, ptr %vp
672 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
673 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
674 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
675 ret <8 x double> %res
678 define <8 x double> @test_masked_z_8xdouble_dup_low_mem_mask4(ptr %vp, <8 x double> %mask) {
679 ; CHECK-LABEL: test_masked_z_8xdouble_dup_low_mem_mask4:
681 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
682 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
683 ; CHECK-NEXT: vmovddup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
685 %vec = load <8 x double>, ptr %vp
686 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
687 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
688 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
689 ret <8 x double> %res
691 define <4 x float> @test_4xfloat_dup_low(<4 x float> %vec) {
692 ; CHECK-LABEL: test_4xfloat_dup_low:
694 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 = xmm0[0,0,2,2]
696 %res = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
699 define <4 x float> @test_masked_4xfloat_dup_low_mask0(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
700 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask0:
702 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
703 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
704 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
705 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
707 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
708 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
709 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
713 define <4 x float> @test_masked_z_4xfloat_dup_low_mask0(<4 x float> %vec, <4 x float> %mask) {
714 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask0:
716 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
717 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
718 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
720 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
721 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
722 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
725 define <4 x float> @test_masked_4xfloat_dup_low_mask1(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
726 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask1:
728 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
729 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
730 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
731 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
733 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
734 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
735 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
739 define <4 x float> @test_masked_z_4xfloat_dup_low_mask1(<4 x float> %vec, <4 x float> %mask) {
740 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask1:
742 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
743 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
744 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
746 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
747 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
748 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
751 define <4 x float> @test_masked_4xfloat_dup_low_mask2(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
752 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask2:
754 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
755 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
756 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
757 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
759 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
760 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
761 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
765 define <4 x float> @test_masked_z_4xfloat_dup_low_mask2(<4 x float> %vec, <4 x float> %mask) {
766 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask2:
768 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
769 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
770 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
772 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
773 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
774 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
777 define <4 x float> @test_masked_4xfloat_dup_low_mask3(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
778 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask3:
780 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
781 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
782 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
783 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
785 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
786 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
787 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
791 define <4 x float> @test_masked_z_4xfloat_dup_low_mask3(<4 x float> %vec, <4 x float> %mask) {
792 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask3:
794 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
795 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
796 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
798 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
799 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
800 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
803 define <4 x float> @test_masked_4xfloat_dup_low_mask4(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
804 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mask4:
806 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
807 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
808 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm1 {%k1} = xmm0[0,0,2,2]
809 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
811 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
812 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
813 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
817 define <4 x float> @test_masked_z_4xfloat_dup_low_mask4(<4 x float> %vec, <4 x float> %mask) {
818 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mask4:
820 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
821 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
822 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = xmm0[0,0,2,2]
824 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
825 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
826 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
829 define <4 x float> @test_4xfloat_dup_low_mem(ptr %vp) {
830 ; CHECK-LABEL: test_4xfloat_dup_low_mem:
832 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 = mem[0,0,2,2]
834 %vec = load <4 x float>, ptr %vp
835 %res = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
838 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask0(ptr %vp, <4 x float> %vec2, <4 x float> %mask) {
839 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask0:
841 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
842 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
843 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
845 %vec = load <4 x float>, ptr %vp
846 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
847 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
848 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
852 define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask0(ptr %vp, <4 x float> %mask) {
853 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask0:
855 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
856 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
857 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
859 %vec = load <4 x float>, ptr %vp
860 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
861 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
862 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
865 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask1(ptr %vp, <4 x float> %vec2, <4 x float> %mask) {
866 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask1:
868 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
869 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
870 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
872 %vec = load <4 x float>, ptr %vp
873 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
874 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
875 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
879 define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask1(ptr %vp, <4 x float> %mask) {
880 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask1:
882 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
883 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
884 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
886 %vec = load <4 x float>, ptr %vp
887 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
888 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
889 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
892 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask2(ptr %vp, <4 x float> %vec2, <4 x float> %mask) {
893 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask2:
895 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
896 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
897 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
899 %vec = load <4 x float>, ptr %vp
900 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
901 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
902 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
906 define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask2(ptr %vp, <4 x float> %mask) {
907 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask2:
909 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
910 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
911 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
913 %vec = load <4 x float>, ptr %vp
914 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
915 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
916 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
919 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask3(ptr %vp, <4 x float> %vec2, <4 x float> %mask) {
920 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask3:
922 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
923 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
924 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
926 %vec = load <4 x float>, ptr %vp
927 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
928 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
929 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
933 define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask3(ptr %vp, <4 x float> %mask) {
934 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask3:
936 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
937 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
938 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
940 %vec = load <4 x float>, ptr %vp
941 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
942 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
943 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
946 define <4 x float> @test_masked_4xfloat_dup_low_mem_mask4(ptr %vp, <4 x float> %vec2, <4 x float> %mask) {
947 ; CHECK-LABEL: test_masked_4xfloat_dup_low_mem_mask4:
949 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
950 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
951 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} = mem[0,0,2,2]
953 %vec = load <4 x float>, ptr %vp
954 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
955 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
956 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
960 define <4 x float> @test_masked_z_4xfloat_dup_low_mem_mask4(ptr %vp, <4 x float> %mask) {
961 ; CHECK-LABEL: test_masked_z_4xfloat_dup_low_mem_mask4:
963 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
964 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
965 ; CHECK-NEXT: vmovsldup {{.*#+}} xmm0 {%k1} {z} = mem[0,0,2,2]
967 %vec = load <4 x float>, ptr %vp
968 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 2>
969 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
970 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
973 define <8 x float> @test_8xfloat_dup_low(<8 x float> %vec) {
974 ; CHECK-LABEL: test_8xfloat_dup_low:
976 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 = ymm0[0,0,2,2,4,4,6,6]
978 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
981 define <8 x float> @test_masked_8xfloat_dup_low_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
982 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask0:
984 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
985 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
986 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
987 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
989 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
990 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
991 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
995 define <8 x float> @test_masked_z_8xfloat_dup_low_mask0(<8 x float> %vec, <8 x float> %mask) {
996 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask0:
998 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
999 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1000 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
1002 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1003 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1004 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1005 ret <8 x float> %res
1007 define <8 x float> @test_masked_8xfloat_dup_low_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
1008 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask1:
1010 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1011 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
1012 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
1013 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1015 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1016 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1017 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1018 ret <8 x float> %res
1021 define <8 x float> @test_masked_z_8xfloat_dup_low_mask1(<8 x float> %vec, <8 x float> %mask) {
1022 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask1:
1024 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1025 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1026 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
1028 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1029 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1030 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1031 ret <8 x float> %res
1033 define <8 x float> @test_masked_8xfloat_dup_low_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
1034 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask2:
1036 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1037 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
1038 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
1039 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1041 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1042 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1043 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1044 ret <8 x float> %res
1047 define <8 x float> @test_masked_z_8xfloat_dup_low_mask2(<8 x float> %vec, <8 x float> %mask) {
1048 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask2:
1050 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1051 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1052 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
1054 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1055 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1056 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1057 ret <8 x float> %res
1059 define <8 x float> @test_masked_8xfloat_dup_low_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
1060 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask3:
1062 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1063 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
1064 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
1065 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1067 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1068 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1069 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1070 ret <8 x float> %res
1073 define <8 x float> @test_masked_z_8xfloat_dup_low_mask3(<8 x float> %vec, <8 x float> %mask) {
1074 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask3:
1076 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1077 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1078 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
1080 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1081 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1082 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1083 ret <8 x float> %res
1085 define <8 x float> @test_masked_8xfloat_dup_low_mask4(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
1086 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mask4:
1088 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1089 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
1090 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm1 {%k1} = ymm0[0,0,2,2,4,4,6,6]
1091 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1093 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1094 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1095 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1096 ret <8 x float> %res
1099 define <8 x float> @test_masked_z_8xfloat_dup_low_mask4(<8 x float> %vec, <8 x float> %mask) {
1100 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mask4:
1102 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1103 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1104 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = ymm0[0,0,2,2,4,4,6,6]
1106 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1107 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1108 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1109 ret <8 x float> %res
1111 define <8 x float> @test_8xfloat_dup_low_mem(ptr %vp) {
1112 ; CHECK-LABEL: test_8xfloat_dup_low_mem:
1114 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 = mem[0,0,2,2,4,4,6,6]
1116 %vec = load <8 x float>, ptr %vp
1117 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1118 ret <8 x float> %res
1120 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask0(ptr %vp, <8 x float> %vec2, <8 x float> %mask) {
1121 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask0:
1123 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1124 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1125 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
1127 %vec = load <8 x float>, ptr %vp
1128 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1129 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1130 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1131 ret <8 x float> %res
1134 define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask0(ptr %vp, <8 x float> %mask) {
1135 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask0:
1137 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1138 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
1139 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
1141 %vec = load <8 x float>, ptr %vp
1142 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1143 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1144 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1145 ret <8 x float> %res
1147 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask1(ptr %vp, <8 x float> %vec2, <8 x float> %mask) {
1148 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask1:
1150 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1151 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1152 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
1154 %vec = load <8 x float>, ptr %vp
1155 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1156 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1157 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1158 ret <8 x float> %res
1161 define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask1(ptr %vp, <8 x float> %mask) {
1162 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask1:
1164 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1165 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
1166 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
1168 %vec = load <8 x float>, ptr %vp
1169 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1170 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1171 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1172 ret <8 x float> %res
1174 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask2(ptr %vp, <8 x float> %vec2, <8 x float> %mask) {
1175 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask2:
1177 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1178 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1179 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
1181 %vec = load <8 x float>, ptr %vp
1182 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1183 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1184 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1185 ret <8 x float> %res
1188 define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask2(ptr %vp, <8 x float> %mask) {
1189 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask2:
1191 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1192 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
1193 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
1195 %vec = load <8 x float>, ptr %vp
1196 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1197 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1198 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1199 ret <8 x float> %res
1201 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask3(ptr %vp, <8 x float> %vec2, <8 x float> %mask) {
1202 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask3:
1204 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1205 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1206 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
1208 %vec = load <8 x float>, ptr %vp
1209 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1210 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1211 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1212 ret <8 x float> %res
1215 define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask3(ptr %vp, <8 x float> %mask) {
1216 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask3:
1218 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1219 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
1220 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
1222 %vec = load <8 x float>, ptr %vp
1223 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1224 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1225 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1226 ret <8 x float> %res
1228 define <8 x float> @test_masked_8xfloat_dup_low_mem_mask4(ptr %vp, <8 x float> %vec2, <8 x float> %mask) {
1229 ; CHECK-LABEL: test_masked_8xfloat_dup_low_mem_mask4:
1231 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1232 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1233 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} = mem[0,0,2,2,4,4,6,6]
1235 %vec = load <8 x float>, ptr %vp
1236 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1237 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1238 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1239 ret <8 x float> %res
1242 define <8 x float> @test_masked_z_8xfloat_dup_low_mem_mask4(ptr %vp, <8 x float> %mask) {
1243 ; CHECK-LABEL: test_masked_z_8xfloat_dup_low_mem_mask4:
1245 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1246 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
1247 ; CHECK-NEXT: vmovsldup {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6]
1249 %vec = load <8 x float>, ptr %vp
1250 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6>
1251 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1252 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1253 ret <8 x float> %res
1255 define <16 x float> @test_16xfloat_dup_low(<16 x float> %vec) {
1256 ; CHECK-LABEL: test_16xfloat_dup_low:
1258 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1260 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1261 ret <16 x float> %res
1263 define <16 x float> @test_masked_16xfloat_dup_low_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
1264 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask0:
1266 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1267 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
1268 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1269 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1271 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1272 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1273 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1274 ret <16 x float> %res
1277 define <16 x float> @test_masked_z_16xfloat_dup_low_mask0(<16 x float> %vec, <16 x float> %mask) {
1278 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask0:
1280 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1281 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1282 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1284 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1285 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1286 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1287 ret <16 x float> %res
1289 define <16 x float> @test_masked_16xfloat_dup_low_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
1290 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask1:
1292 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1293 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
1294 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1295 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1297 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1298 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1299 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1300 ret <16 x float> %res
1303 define <16 x float> @test_masked_z_16xfloat_dup_low_mask1(<16 x float> %vec, <16 x float> %mask) {
1304 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask1:
1306 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1307 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1308 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1310 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1311 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1312 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1313 ret <16 x float> %res
1315 define <16 x float> @test_masked_16xfloat_dup_low_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
1316 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask2:
1318 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1319 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
1320 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1321 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1323 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1324 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1325 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1326 ret <16 x float> %res
1329 define <16 x float> @test_masked_z_16xfloat_dup_low_mask2(<16 x float> %vec, <16 x float> %mask) {
1330 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask2:
1332 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1333 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1334 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1336 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1337 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1338 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1339 ret <16 x float> %res
1341 define <16 x float> @test_masked_16xfloat_dup_low_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
1342 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask3:
1344 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1345 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
1346 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1347 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1349 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1350 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1351 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1352 ret <16 x float> %res
1355 define <16 x float> @test_masked_z_16xfloat_dup_low_mask3(<16 x float> %vec, <16 x float> %mask) {
1356 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask3:
1358 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1359 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1360 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1362 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1363 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1364 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1365 ret <16 x float> %res
1367 define <16 x float> @test_masked_16xfloat_dup_low_mask4(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
1368 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mask4:
1370 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1371 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
1372 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1373 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
1375 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1376 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1377 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1378 ret <16 x float> %res
1381 define <16 x float> @test_masked_z_16xfloat_dup_low_mask4(<16 x float> %vec, <16 x float> %mask) {
1382 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mask4:
1384 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1385 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1386 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1388 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1389 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1390 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1391 ret <16 x float> %res
1393 define <16 x float> @test_16xfloat_dup_low_mem(ptr %vp) {
1394 ; CHECK-LABEL: test_16xfloat_dup_low_mem:
1396 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1398 %vec = load <16 x float>, ptr %vp
1399 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1400 ret <16 x float> %res
1402 define <16 x float> @test_masked_16xfloat_dup_low_mem_mask0(ptr %vp, <16 x float> %vec2, <16 x float> %mask) {
1403 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask0:
1405 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1406 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1407 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1409 %vec = load <16 x float>, ptr %vp
1410 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1411 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1412 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1413 ret <16 x float> %res
1416 define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask0(ptr %vp, <16 x float> %mask) {
1417 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask0:
1419 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1420 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1421 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1423 %vec = load <16 x float>, ptr %vp
1424 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1425 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1426 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1427 ret <16 x float> %res
1429 define <16 x float> @test_masked_16xfloat_dup_low_mem_mask1(ptr %vp, <16 x float> %vec2, <16 x float> %mask) {
1430 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask1:
1432 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1433 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1434 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1436 %vec = load <16 x float>, ptr %vp
1437 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1438 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1439 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1440 ret <16 x float> %res
1443 define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask1(ptr %vp, <16 x float> %mask) {
1444 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask1:
1446 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1447 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1448 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1450 %vec = load <16 x float>, ptr %vp
1451 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1452 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1453 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1454 ret <16 x float> %res
1456 define <16 x float> @test_masked_16xfloat_dup_low_mem_mask2(ptr %vp, <16 x float> %vec2, <16 x float> %mask) {
1457 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask2:
1459 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1460 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1461 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1463 %vec = load <16 x float>, ptr %vp
1464 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1465 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1466 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1467 ret <16 x float> %res
1470 define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask2(ptr %vp, <16 x float> %mask) {
1471 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask2:
1473 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1474 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1475 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1477 %vec = load <16 x float>, ptr %vp
1478 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1479 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1480 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1481 ret <16 x float> %res
1483 define <16 x float> @test_masked_16xfloat_dup_low_mem_mask3(ptr %vp, <16 x float> %vec2, <16 x float> %mask) {
1484 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask3:
1486 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1487 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1488 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1490 %vec = load <16 x float>, ptr %vp
1491 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1492 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1493 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1494 ret <16 x float> %res
1497 define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask3(ptr %vp, <16 x float> %mask) {
1498 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask3:
1500 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1501 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1502 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1504 %vec = load <16 x float>, ptr %vp
1505 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1506 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1507 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1508 ret <16 x float> %res
1510 define <16 x float> @test_masked_16xfloat_dup_low_mem_mask4(ptr %vp, <16 x float> %vec2, <16 x float> %mask) {
1511 ; CHECK-LABEL: test_masked_16xfloat_dup_low_mem_mask4:
1513 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1514 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1515 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1517 %vec = load <16 x float>, ptr %vp
1518 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1519 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1520 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1521 ret <16 x float> %res
1524 define <16 x float> @test_masked_z_16xfloat_dup_low_mem_mask4(ptr %vp, <16 x float> %mask) {
1525 ; CHECK-LABEL: test_masked_z_16xfloat_dup_low_mem_mask4:
1527 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1528 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1529 ; CHECK-NEXT: vmovsldup {{.*#+}} zmm0 {%k1} {z} = mem[0,0,2,2,4,4,6,6,8,8,10,10,12,12,14,14]
1531 %vec = load <16 x float>, ptr %vp
1532 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 0, i32 2, i32 2, i32 4, i32 4, i32 6, i32 6, i32 8, i32 8, i32 10, i32 10, i32 12, i32 12, i32 14, i32 14>
1533 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1534 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1535 ret <16 x float> %res