1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
4 define <4 x float> @test_4xfloat_dup_high(<4 x float> %vec) {
5 ; CHECK-LABEL: test_4xfloat_dup_high:
7 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 = xmm0[1,1,3,3]
9 %res = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
12 define <4 x float> @test_masked_4xfloat_dup_high_mask0(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
13 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask0:
15 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
16 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
17 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
18 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
20 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
21 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
22 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
26 define <4 x float> @test_masked_z_4xfloat_dup_high_mask0(<4 x float> %vec, <4 x float> %mask) {
27 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask0:
29 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
30 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
31 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
33 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
34 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
35 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
38 define <4 x float> @test_masked_4xfloat_dup_high_mask1(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
39 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask1:
41 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
42 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
43 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
44 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
46 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
47 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
48 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
52 define <4 x float> @test_masked_z_4xfloat_dup_high_mask1(<4 x float> %vec, <4 x float> %mask) {
53 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask1:
55 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
56 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
57 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
59 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
60 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
61 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
64 define <4 x float> @test_masked_4xfloat_dup_high_mask2(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
65 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask2:
67 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
68 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
69 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
70 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
72 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
73 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
74 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
78 define <4 x float> @test_masked_z_4xfloat_dup_high_mask2(<4 x float> %vec, <4 x float> %mask) {
79 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask2:
81 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
82 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
83 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
85 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
86 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
87 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
90 define <4 x float> @test_masked_4xfloat_dup_high_mask3(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
91 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask3:
93 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
94 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
95 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
96 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
98 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
99 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
100 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
104 define <4 x float> @test_masked_z_4xfloat_dup_high_mask3(<4 x float> %vec, <4 x float> %mask) {
105 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask3:
107 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
108 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
109 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
111 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
112 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
113 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
116 define <4 x float> @test_masked_4xfloat_dup_high_mask4(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
117 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mask4:
119 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
120 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
121 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm1 {%k1} = xmm0[1,1,3,3]
122 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
124 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
125 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
126 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
130 define <4 x float> @test_masked_z_4xfloat_dup_high_mask4(<4 x float> %vec, <4 x float> %mask) {
131 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mask4:
133 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
134 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
135 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,3,3]
137 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
138 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
139 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
142 define <4 x float> @test_4xfloat_dup_high_mem(<4 x float>* %vp) {
143 ; CHECK-LABEL: test_4xfloat_dup_high_mem:
145 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 = mem[1,1,3,3]
147 %vec = load <4 x float>, <4 x float>* %vp
148 %res = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
151 define <4 x float> @test_masked_4xfloat_dup_high_mem_mask0(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
152 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask0:
154 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
155 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
156 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
158 %vec = load <4 x float>, <4 x float>* %vp
159 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
160 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
161 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
165 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask0(<4 x float>* %vp, <4 x float> %mask) {
166 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask0:
168 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
169 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
170 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
172 %vec = load <4 x float>, <4 x float>* %vp
173 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
174 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
175 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
178 define <4 x float> @test_masked_4xfloat_dup_high_mem_mask1(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
179 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask1:
181 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
182 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
183 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
185 %vec = load <4 x float>, <4 x float>* %vp
186 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
187 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
188 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
192 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask1(<4 x float>* %vp, <4 x float> %mask) {
193 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask1:
195 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
196 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
197 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
199 %vec = load <4 x float>, <4 x float>* %vp
200 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
201 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
202 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
205 define <4 x float> @test_masked_4xfloat_dup_high_mem_mask2(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
206 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask2:
208 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
209 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
210 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
212 %vec = load <4 x float>, <4 x float>* %vp
213 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
214 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
215 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
219 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask2(<4 x float>* %vp, <4 x float> %mask) {
220 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask2:
222 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
223 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
224 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
226 %vec = load <4 x float>, <4 x float>* %vp
227 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
228 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
229 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
232 define <4 x float> @test_masked_4xfloat_dup_high_mem_mask3(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
233 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask3:
235 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
236 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
237 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
239 %vec = load <4 x float>, <4 x float>* %vp
240 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
241 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
242 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
246 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask3(<4 x float>* %vp, <4 x float> %mask) {
247 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask3:
249 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
250 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
251 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
253 %vec = load <4 x float>, <4 x float>* %vp
254 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
255 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
256 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
259 define <4 x float> @test_masked_4xfloat_dup_high_mem_mask4(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
260 ; CHECK-LABEL: test_masked_4xfloat_dup_high_mem_mask4:
262 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
263 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
264 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} = mem[1,1,3,3]
266 %vec = load <4 x float>, <4 x float>* %vp
267 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
268 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
269 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
273 define <4 x float> @test_masked_z_4xfloat_dup_high_mem_mask4(<4 x float>* %vp, <4 x float> %mask) {
274 ; CHECK-LABEL: test_masked_z_4xfloat_dup_high_mem_mask4:
276 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
277 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
278 ; CHECK-NEXT: vmovshdup {{.*#+}} xmm0 {%k1} {z} = mem[1,1,3,3]
280 %vec = load <4 x float>, <4 x float>* %vp
281 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 1, i32 3, i32 3>
282 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
283 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
286 define <8 x float> @test_8xfloat_dup_high(<8 x float> %vec) {
287 ; CHECK-LABEL: test_8xfloat_dup_high:
289 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 = ymm0[1,1,3,3,5,5,7,7]
291 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
294 define <8 x float> @test_masked_8xfloat_dup_high_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
295 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask0:
297 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
298 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
299 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
300 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
302 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
303 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
304 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
308 define <8 x float> @test_masked_z_8xfloat_dup_high_mask0(<8 x float> %vec, <8 x float> %mask) {
309 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask0:
311 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
312 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
313 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
315 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
316 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
317 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
320 define <8 x float> @test_masked_8xfloat_dup_high_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
321 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask1:
323 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
324 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
325 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
326 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
328 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
329 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
330 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
334 define <8 x float> @test_masked_z_8xfloat_dup_high_mask1(<8 x float> %vec, <8 x float> %mask) {
335 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask1:
337 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
338 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
339 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
341 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
342 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
343 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
346 define <8 x float> @test_masked_8xfloat_dup_high_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
347 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask2:
349 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
350 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
351 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
352 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
354 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
355 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
356 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
360 define <8 x float> @test_masked_z_8xfloat_dup_high_mask2(<8 x float> %vec, <8 x float> %mask) {
361 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask2:
363 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
364 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
365 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
367 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
368 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
369 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
372 define <8 x float> @test_masked_8xfloat_dup_high_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
373 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask3:
375 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
376 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
377 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
378 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
380 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
381 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
382 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
386 define <8 x float> @test_masked_z_8xfloat_dup_high_mask3(<8 x float> %vec, <8 x float> %mask) {
387 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask3:
389 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
390 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
391 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
393 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
394 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
395 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
398 define <8 x float> @test_masked_8xfloat_dup_high_mask4(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
399 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mask4:
401 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
402 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
403 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm1 {%k1} = ymm0[1,1,3,3,5,5,7,7]
404 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
406 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
407 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
408 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
412 define <8 x float> @test_masked_z_8xfloat_dup_high_mask4(<8 x float> %vec, <8 x float> %mask) {
413 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mask4:
415 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
416 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
417 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,3,3,5,5,7,7]
419 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
420 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
421 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
424 define <8 x float> @test_8xfloat_dup_high_mem(<8 x float>* %vp) {
425 ; CHECK-LABEL: test_8xfloat_dup_high_mem:
427 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 = mem[1,1,3,3,5,5,7,7]
429 %vec = load <8 x float>, <8 x float>* %vp
430 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
433 define <8 x float> @test_masked_8xfloat_dup_high_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
434 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask0:
436 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
437 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
438 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
440 %vec = load <8 x float>, <8 x float>* %vp
441 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
442 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
443 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
447 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask0(<8 x float>* %vp, <8 x float> %mask) {
448 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask0:
450 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
451 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
452 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
454 %vec = load <8 x float>, <8 x float>* %vp
455 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
456 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
457 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
460 define <8 x float> @test_masked_8xfloat_dup_high_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
461 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask1:
463 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
464 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
465 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
467 %vec = load <8 x float>, <8 x float>* %vp
468 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
469 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
470 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
474 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask1(<8 x float>* %vp, <8 x float> %mask) {
475 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask1:
477 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
478 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
479 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
481 %vec = load <8 x float>, <8 x float>* %vp
482 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
483 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
484 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
487 define <8 x float> @test_masked_8xfloat_dup_high_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
488 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask2:
490 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
491 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
492 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
494 %vec = load <8 x float>, <8 x float>* %vp
495 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
496 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
497 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
501 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask2(<8 x float>* %vp, <8 x float> %mask) {
502 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask2:
504 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
505 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
506 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
508 %vec = load <8 x float>, <8 x float>* %vp
509 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
510 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
511 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
514 define <8 x float> @test_masked_8xfloat_dup_high_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
515 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask3:
517 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
518 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
519 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
521 %vec = load <8 x float>, <8 x float>* %vp
522 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
523 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
524 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
528 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask3(<8 x float>* %vp, <8 x float> %mask) {
529 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask3:
531 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
532 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
533 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
535 %vec = load <8 x float>, <8 x float>* %vp
536 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
537 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
538 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
541 define <8 x float> @test_masked_8xfloat_dup_high_mem_mask4(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
542 ; CHECK-LABEL: test_masked_8xfloat_dup_high_mem_mask4:
544 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
545 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
546 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} = mem[1,1,3,3,5,5,7,7]
548 %vec = load <8 x float>, <8 x float>* %vp
549 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
550 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
551 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
555 define <8 x float> @test_masked_z_8xfloat_dup_high_mem_mask4(<8 x float>* %vp, <8 x float> %mask) {
556 ; CHECK-LABEL: test_masked_z_8xfloat_dup_high_mem_mask4:
558 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
559 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
560 ; CHECK-NEXT: vmovshdup {{.*#+}} ymm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7]
562 %vec = load <8 x float>, <8 x float>* %vp
563 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7>
564 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
565 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
568 define <16 x float> @test_16xfloat_dup_high(<16 x float> %vec) {
569 ; CHECK-LABEL: test_16xfloat_dup_high:
571 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
573 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
574 ret <16 x float> %res
576 define <16 x float> @test_masked_16xfloat_dup_high_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
577 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask0:
579 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
580 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
581 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
582 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
584 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
585 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
586 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
587 ret <16 x float> %res
590 define <16 x float> @test_masked_z_16xfloat_dup_high_mask0(<16 x float> %vec, <16 x float> %mask) {
591 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask0:
593 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
594 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
595 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
597 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
598 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
599 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
600 ret <16 x float> %res
602 define <16 x float> @test_masked_16xfloat_dup_high_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
603 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask1:
605 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
606 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
607 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
608 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
610 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
611 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
612 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
613 ret <16 x float> %res
616 define <16 x float> @test_masked_z_16xfloat_dup_high_mask1(<16 x float> %vec, <16 x float> %mask) {
617 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask1:
619 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
620 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
621 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
623 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
624 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
625 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
626 ret <16 x float> %res
628 define <16 x float> @test_masked_16xfloat_dup_high_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
629 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask2:
631 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
632 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
633 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
634 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
636 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
637 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
638 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
639 ret <16 x float> %res
642 define <16 x float> @test_masked_z_16xfloat_dup_high_mask2(<16 x float> %vec, <16 x float> %mask) {
643 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask2:
645 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
646 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
647 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
649 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
650 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
651 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
652 ret <16 x float> %res
654 define <16 x float> @test_masked_16xfloat_dup_high_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
655 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask3:
657 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
658 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
659 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
660 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
662 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
663 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
664 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
665 ret <16 x float> %res
668 define <16 x float> @test_masked_z_16xfloat_dup_high_mask3(<16 x float> %vec, <16 x float> %mask) {
669 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask3:
671 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
672 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
673 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
675 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
676 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
677 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
678 ret <16 x float> %res
680 define <16 x float> @test_masked_16xfloat_dup_high_mask4(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
681 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mask4:
683 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
684 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
685 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
686 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
688 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
689 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
690 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
691 ret <16 x float> %res
694 define <16 x float> @test_masked_z_16xfloat_dup_high_mask4(<16 x float> %vec, <16 x float> %mask) {
695 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mask4:
697 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
698 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
699 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
701 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
702 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
703 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
704 ret <16 x float> %res
706 define <16 x float> @test_16xfloat_dup_high_mem(<16 x float>* %vp) {
707 ; CHECK-LABEL: test_16xfloat_dup_high_mem:
709 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
711 %vec = load <16 x float>, <16 x float>* %vp
712 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
713 ret <16 x float> %res
715 define <16 x float> @test_masked_16xfloat_dup_high_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
716 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask0:
718 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
719 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
720 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
722 %vec = load <16 x float>, <16 x float>* %vp
723 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
724 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
725 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
726 ret <16 x float> %res
729 define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask0(<16 x float>* %vp, <16 x float> %mask) {
730 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask0:
732 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
733 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
734 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
736 %vec = load <16 x float>, <16 x float>* %vp
737 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
738 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
739 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
740 ret <16 x float> %res
742 define <16 x float> @test_masked_16xfloat_dup_high_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
743 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask1:
745 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
746 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
747 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
749 %vec = load <16 x float>, <16 x float>* %vp
750 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
751 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
752 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
753 ret <16 x float> %res
756 define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask1(<16 x float>* %vp, <16 x float> %mask) {
757 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask1:
759 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
760 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
761 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
763 %vec = load <16 x float>, <16 x float>* %vp
764 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
765 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
766 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
767 ret <16 x float> %res
769 define <16 x float> @test_masked_16xfloat_dup_high_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
770 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask2:
772 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
773 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
774 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
776 %vec = load <16 x float>, <16 x float>* %vp
777 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
778 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
779 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
780 ret <16 x float> %res
783 define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask2(<16 x float>* %vp, <16 x float> %mask) {
784 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask2:
786 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
787 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
788 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
790 %vec = load <16 x float>, <16 x float>* %vp
791 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
792 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
793 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
794 ret <16 x float> %res
796 define <16 x float> @test_masked_16xfloat_dup_high_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
797 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask3:
799 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
800 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
801 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
803 %vec = load <16 x float>, <16 x float>* %vp
804 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
805 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
806 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
807 ret <16 x float> %res
810 define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask3(<16 x float>* %vp, <16 x float> %mask) {
811 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask3:
813 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
814 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
815 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
817 %vec = load <16 x float>, <16 x float>* %vp
818 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
819 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
820 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
821 ret <16 x float> %res
823 define <16 x float> @test_masked_16xfloat_dup_high_mem_mask4(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
824 ; CHECK-LABEL: test_masked_16xfloat_dup_high_mem_mask4:
826 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
827 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
828 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
830 %vec = load <16 x float>, <16 x float>* %vp
831 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
832 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
833 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
834 ret <16 x float> %res
837 define <16 x float> @test_masked_z_16xfloat_dup_high_mem_mask4(<16 x float>* %vp, <16 x float> %mask) {
838 ; CHECK-LABEL: test_masked_z_16xfloat_dup_high_mem_mask4:
840 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
841 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
842 ; CHECK-NEXT: vmovshdup {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,5,7,7,9,9,11,11,13,13,15,15]
844 %vec = load <16 x float>, <16 x float>* %vp
845 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 5, i32 7, i32 7, i32 9, i32 9, i32 11, i32 11, i32 13, i32 13, i32 15, i32 15>
846 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
847 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
848 ret <16 x float> %res