1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
4 ; FIXME: The non immediate <16 x float> test cases should be fixed by PR34382
6 define <4 x float> @test_4xfloat_perm_mask0(<4 x float> %vec) {
7 ; CHECK-LABEL: test_4xfloat_perm_mask0:
9 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[2,1,3,1]
11 %res = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 1>
14 define <4 x float> @test_masked_4xfloat_perm_mask0(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
15 ; CHECK-LABEL: test_masked_4xfloat_perm_mask0:
17 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
18 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
19 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[2,1,3,1]
20 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
22 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 1>
23 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
24 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
28 define <4 x float> @test_masked_z_4xfloat_perm_mask0(<4 x float> %vec, <4 x float> %mask) {
29 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask0:
31 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
32 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
33 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,3,1]
35 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 1>
36 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
37 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
40 define <4 x float> @test_masked_4xfloat_perm_mask1(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
41 ; CHECK-LABEL: test_masked_4xfloat_perm_mask1:
43 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
44 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
45 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,2,3,2]
46 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
48 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 2>
49 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
50 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
54 define <4 x float> @test_masked_z_4xfloat_perm_mask1(<4 x float> %vec, <4 x float> %mask) {
55 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask1:
57 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
58 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
59 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,2]
61 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 2>
62 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
63 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
66 define <4 x float> @test_masked_4xfloat_perm_mask2(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
67 ; CHECK-LABEL: test_masked_4xfloat_perm_mask2:
69 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
70 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
71 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,3,2,1]
72 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
74 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 1>
75 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
76 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
80 define <4 x float> @test_masked_z_4xfloat_perm_mask2(<4 x float> %vec, <4 x float> %mask) {
81 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask2:
83 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
84 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
85 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,3,2,1]
87 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 1>
88 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
89 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
92 define <4 x float> @test_4xfloat_perm_mask3(<4 x float> %vec) {
93 ; CHECK-LABEL: test_4xfloat_perm_mask3:
95 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = xmm0[1,2,3,2]
97 %res = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 2>
100 define <4 x float> @test_masked_4xfloat_perm_mask3(<4 x float> %vec, <4 x float> %vec2, <4 x float> %mask) {
101 ; CHECK-LABEL: test_masked_4xfloat_perm_mask3:
103 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
104 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
105 ; CHECK-NEXT: vpermilps {{.*#+}} xmm1 {%k1} = xmm0[1,2,3,2]
106 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
108 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 2>
109 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
110 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
114 define <4 x float> @test_masked_z_4xfloat_perm_mask3(<4 x float> %vec, <4 x float> %mask) {
115 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mask3:
117 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
118 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
119 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = xmm0[1,2,3,2]
121 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 2, i32 3, i32 2>
122 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
123 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
126 define <4 x float> @test_4xfloat_perm_mem_mask0(<4 x float>* %vp) {
127 ; CHECK-LABEL: test_4xfloat_perm_mem_mask0:
129 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,3,1,3]
131 %vec = load <4 x float>, <4 x float>* %vp
132 %res = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 3>
135 define <4 x float> @test_masked_4xfloat_perm_mem_mask0(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
136 ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask0:
138 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
139 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
140 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[3,3,1,3]
142 %vec = load <4 x float>, <4 x float>* %vp
143 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 3>
144 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
145 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
149 define <4 x float> @test_masked_z_4xfloat_perm_mem_mask0(<4 x float>* %vp, <4 x float> %mask) {
150 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask0:
152 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
153 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
154 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[3,3,1,3]
156 %vec = load <4 x float>, <4 x float>* %vp
157 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 3, i32 3, i32 1, i32 3>
158 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
159 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
163 define <4 x float> @test_masked_4xfloat_perm_mem_mask1(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
164 ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask1:
166 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
167 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
168 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[1,3,2,0]
170 %vec = load <4 x float>, <4 x float>* %vp
171 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
172 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
173 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
177 define <4 x float> @test_masked_z_4xfloat_perm_mem_mask1(<4 x float>* %vp, <4 x float> %mask) {
178 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask1:
180 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
181 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
182 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[1,3,2,0]
184 %vec = load <4 x float>, <4 x float>* %vp
185 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 1, i32 3, i32 2, i32 0>
186 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
187 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
191 define <4 x float> @test_masked_4xfloat_perm_mem_mask2(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
192 ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask2:
194 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
195 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
196 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[2,1,3,2]
198 %vec = load <4 x float>, <4 x float>* %vp
199 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2>
200 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
201 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
205 define <4 x float> @test_masked_z_4xfloat_perm_mem_mask2(<4 x float>* %vp, <4 x float> %mask) {
206 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask2:
208 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
209 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
210 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2]
212 %vec = load <4 x float>, <4 x float>* %vp
213 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2>
214 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
215 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
219 define <4 x float> @test_4xfloat_perm_mem_mask3(<4 x float>* %vp) {
220 ; CHECK-LABEL: test_4xfloat_perm_mem_mask3:
222 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,0]
224 %vec = load <4 x float>, <4 x float>* %vp
225 %res = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 0>
228 define <4 x float> @test_masked_4xfloat_perm_mem_mask3(<4 x float>* %vp, <4 x float> %vec2, <4 x float> %mask) {
229 ; CHECK-LABEL: test_masked_4xfloat_perm_mem_mask3:
231 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
232 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
233 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[0,1,3,0]
235 %vec = load <4 x float>, <4 x float>* %vp
236 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 0>
237 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
238 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec2
242 define <4 x float> @test_masked_z_4xfloat_perm_mem_mask3(<4 x float>* %vp, <4 x float> %mask) {
243 ; CHECK-LABEL: test_masked_z_4xfloat_perm_mem_mask3:
245 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
246 ; CHECK-NEXT: vcmpeqps %xmm1, %xmm0, %k1
247 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,0]
249 %vec = load <4 x float>, <4 x float>* %vp
250 %shuf = shufflevector <4 x float> %vec, <4 x float> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 0>
251 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
252 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
256 define <8 x float> @test_8xfloat_perm_mask0(<8 x float> %vec) {
257 ; CHECK-LABEL: test_8xfloat_perm_mask0:
259 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,4,6,6,6]
261 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 6, i32 6>
264 define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
265 ; CHECK-LABEL: test_masked_8xfloat_perm_mask0:
267 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
268 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
269 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,6,6,6]
270 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
272 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 6, i32 6>
273 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
274 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
278 define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %mask) {
279 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask0:
281 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
282 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
283 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,6,6,6]
285 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 6, i32 6>
286 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
287 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
290 define <8 x float> @test_masked_8xfloat_perm_imm_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
291 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask1:
293 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
294 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
295 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,7,6,7,6]
296 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
298 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 2, i32 7, i32 6, i32 7, i32 6>
299 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
300 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
304 define <8 x float> @test_masked_z_8xfloat_perm_imm_mask1(<8 x float> %vec, <8 x float> %mask) {
305 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask1:
307 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
308 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
309 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,7,6,7,6]
311 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 2, i32 7, i32 6, i32 7, i32 6>
312 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
313 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
316 define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
317 ; CHECK-LABEL: test_masked_8xfloat_perm_mask2:
319 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
320 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
321 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,1,2,1,6,5,4,4]
322 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
324 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 1, i32 2, i32 1, i32 6, i32 5, i32 4, i32 4>
325 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
326 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
330 define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %mask) {
331 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask2:
333 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
334 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
335 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,2,1,6,5,4,4]
337 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 1, i32 2, i32 1, i32 6, i32 5, i32 4, i32 4>
338 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
339 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
342 define <8 x float> @test_8xfloat_perm_imm_mask3(<8 x float> %vec) {
343 ; CHECK-LABEL: test_8xfloat_perm_imm_mask3:
345 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[2,2,1,0,6,6,5,4]
347 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 2, i32 1, i32 0, i32 6, i32 6, i32 5, i32 4>
350 define <8 x float> @test_masked_8xfloat_perm_imm_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
351 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask3:
353 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
354 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
355 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,2,1,0,6,6,5,4]
356 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
358 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 2, i32 1, i32 0, i32 6, i32 6, i32 5, i32 4>
359 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
360 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
364 define <8 x float> @test_masked_z_8xfloat_perm_imm_mask3(<8 x float> %vec, <8 x float> %mask) {
365 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask3:
367 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
368 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
369 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,1,0,6,6,5,4]
371 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 2, i32 1, i32 0, i32 6, i32 6, i32 5, i32 4>
372 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
373 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
376 define <8 x float> @test_masked_8xfloat_perm_mask4(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
377 ; CHECK-LABEL: test_masked_8xfloat_perm_mask4:
379 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
380 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
381 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,3,7,7,6,5]
382 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
384 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 6, i32 5>
385 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
386 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
390 define <8 x float> @test_masked_z_8xfloat_perm_mask4(<8 x float> %vec, <8 x float> %mask) {
391 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask4:
393 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
394 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
395 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,3,7,7,6,5]
397 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 6, i32 5>
398 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
399 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
402 define <8 x float> @test_masked_8xfloat_perm_imm_mask5(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
403 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask5:
405 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
406 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
407 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3,6,5,7,7]
408 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
410 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 3, i32 6, i32 5, i32 7, i32 7>
411 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
412 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
416 define <8 x float> @test_masked_z_8xfloat_perm_imm_mask5(<8 x float> %vec, <8 x float> %mask) {
417 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask5:
419 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
420 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
421 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3,6,5,7,7]
423 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 3, i32 6, i32 5, i32 7, i32 7>
424 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
425 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
428 define <8 x float> @test_8xfloat_perm_mask6(<8 x float> %vec) {
429 ; CHECK-LABEL: test_8xfloat_perm_mask6:
431 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,2,3,2,5,6,7,7]
433 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 2, i32 5, i32 6, i32 7, i32 7>
436 define <8 x float> @test_masked_8xfloat_perm_mask6(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
437 ; CHECK-LABEL: test_masked_8xfloat_perm_mask6:
439 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
440 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
441 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,5,6,7,7]
442 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
444 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 2, i32 5, i32 6, i32 7, i32 7>
445 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
446 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
450 define <8 x float> @test_masked_z_8xfloat_perm_mask6(<8 x float> %vec, <8 x float> %mask) {
451 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask6:
453 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
454 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
455 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,5,6,7,7]
457 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 2, i32 5, i32 6, i32 7, i32 7>
458 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
459 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
462 define <8 x float> @test_masked_8xfloat_perm_imm_mask7(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
463 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mask7:
465 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
466 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
467 ; CHECK-NEXT: vpermilps {{.*#+}} ymm1 {%k1} = ymm0[3,0,2,1,7,4,6,5]
468 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
470 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 0, i32 2, i32 1, i32 7, i32 4, i32 6, i32 5>
471 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
472 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
476 define <8 x float> @test_masked_z_8xfloat_perm_imm_mask7(<8 x float> %vec, <8 x float> %mask) {
477 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mask7:
479 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
480 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
481 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,2,1,7,4,6,5]
483 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 0, i32 2, i32 1, i32 7, i32 4, i32 6, i32 5>
484 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
485 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
488 define <8 x float> @test_8xfloat_perm_mem_mask0(<8 x float>* %vp) {
489 ; CHECK-LABEL: test_8xfloat_perm_mem_mask0:
491 ; CHECK-NEXT: vmovaps (%rdi), %ymm0
492 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[3,0,0,2,4,6,7,6]
494 %vec = load <8 x float>, <8 x float>* %vp
495 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 4, i32 6, i32 7, i32 6>
498 define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
499 ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask0:
501 ; CHECK-NEXT: vmovaps (%rdi), %ymm2
502 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
503 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
504 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[3,0,0,2,4,6,7,6]
506 %vec = load <8 x float>, <8 x float>* %vp
507 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 4, i32 6, i32 7, i32 6>
508 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
509 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
513 define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x float> %mask) {
514 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask0:
516 ; CHECK-NEXT: vmovaps (%rdi), %ymm1
517 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
518 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
519 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[3,0,0,2,4,6,7,6]
521 %vec = load <8 x float>, <8 x float>* %vp
522 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 4, i32 6, i32 7, i32 6>
523 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
524 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
528 define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
529 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask1:
531 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
532 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
533 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[2,0,2,2,6,4,6,6]
535 %vec = load <8 x float>, <8 x float>* %vp
536 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 0, i32 2, i32 2, i32 6, i32 4, i32 6, i32 6>
537 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
538 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
542 define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask1(<8 x float>* %vp, <8 x float> %mask) {
543 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask1:
545 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
546 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
547 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[2,0,2,2,6,4,6,6]
549 %vec = load <8 x float>, <8 x float>* %vp
550 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 0, i32 2, i32 2, i32 6, i32 4, i32 6, i32 6>
551 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
552 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
556 define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
557 ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask2:
559 ; CHECK-NEXT: vmovaps (%rdi), %ymm2
560 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
561 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
562 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[2,1,1,3,4,4,7,4]
564 %vec = load <8 x float>, <8 x float>* %vp
565 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 3, i32 4, i32 4, i32 7, i32 4>
566 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
567 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
571 define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x float> %mask) {
572 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask2:
574 ; CHECK-NEXT: vmovaps (%rdi), %ymm1
575 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
576 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
577 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[2,1,1,3,4,4,7,4]
579 %vec = load <8 x float>, <8 x float>* %vp
580 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 3, i32 4, i32 4, i32 7, i32 4>
581 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
582 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
586 define <8 x float> @test_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp) {
587 ; CHECK-LABEL: test_8xfloat_perm_imm_mem_mask3:
589 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[0,0,3,3,4,4,7,7]
591 %vec = load <8 x float>, <8 x float>* %vp
592 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
595 define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
596 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask3:
598 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
599 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
600 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[0,0,3,3,4,4,7,7]
602 %vec = load <8 x float>, <8 x float>* %vp
603 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
604 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
605 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
609 define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask3(<8 x float>* %vp, <8 x float> %mask) {
610 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask3:
612 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
613 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
614 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[0,0,3,3,4,4,7,7]
616 %vec = load <8 x float>, <8 x float>* %vp
617 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 3, i32 4, i32 4, i32 7, i32 7>
618 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
619 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
623 define <8 x float> @test_masked_8xfloat_perm_mem_mask4(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
624 ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask4:
626 ; CHECK-NEXT: vmovaps (%rdi), %ymm2
627 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
628 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
629 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[0,1,0,1,4,6,5,4]
631 %vec = load <8 x float>, <8 x float>* %vp
632 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 6, i32 5, i32 4>
633 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
634 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
638 define <8 x float> @test_masked_z_8xfloat_perm_mem_mask4(<8 x float>* %vp, <8 x float> %mask) {
639 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask4:
641 ; CHECK-NEXT: vmovaps (%rdi), %ymm1
642 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
643 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
644 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[0,1,0,1,4,6,5,4]
646 %vec = load <8 x float>, <8 x float>* %vp
647 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 6, i32 5, i32 4>
648 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
649 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
653 define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
654 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask5:
656 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
657 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
658 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[2,0,0,3,6,4,4,7]
660 %vec = load <8 x float>, <8 x float>* %vp
661 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 0, i32 0, i32 3, i32 6, i32 4, i32 4, i32 7>
662 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
663 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
667 define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask5(<8 x float>* %vp, <8 x float> %mask) {
668 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask5:
670 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
671 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
672 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[2,0,0,3,6,4,4,7]
674 %vec = load <8 x float>, <8 x float>* %vp
675 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 0, i32 0, i32 3, i32 6, i32 4, i32 4, i32 7>
676 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
677 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
681 define <8 x float> @test_8xfloat_perm_mem_mask6(<8 x float>* %vp) {
682 ; CHECK-LABEL: test_8xfloat_perm_mem_mask6:
684 ; CHECK-NEXT: vmovaps (%rdi), %ymm0
685 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = ymm0[0,1,2,3,7,4,6,7]
687 %vec = load <8 x float>, <8 x float>* %vp
688 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 7>
691 define <8 x float> @test_masked_8xfloat_perm_mem_mask6(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
692 ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask6:
694 ; CHECK-NEXT: vmovaps (%rdi), %ymm2
695 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
696 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
697 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = ymm2[0,1,2,3,7,4,6,7]
699 %vec = load <8 x float>, <8 x float>* %vp
700 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 7>
701 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
702 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
706 define <8 x float> @test_masked_z_8xfloat_perm_mem_mask6(<8 x float>* %vp, <8 x float> %mask) {
707 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask6:
709 ; CHECK-NEXT: vmovaps (%rdi), %ymm1
710 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
711 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
712 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = ymm1[0,1,2,3,7,4,6,7]
714 %vec = load <8 x float>, <8 x float>* %vp
715 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 7>
716 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
717 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
721 define <8 x float> @test_masked_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
722 ; CHECK-LABEL: test_masked_8xfloat_perm_imm_mem_mask7:
724 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
725 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
726 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[0,2,3,1,4,6,7,5]
728 %vec = load <8 x float>, <8 x float>* %vp
729 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 3, i32 1, i32 4, i32 6, i32 7, i32 5>
730 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
731 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
735 define <8 x float> @test_masked_z_8xfloat_perm_imm_mem_mask7(<8 x float>* %vp, <8 x float> %mask) {
736 ; CHECK-LABEL: test_masked_z_8xfloat_perm_imm_mem_mask7:
738 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
739 ; CHECK-NEXT: vcmpeqps %ymm1, %ymm0, %k1
740 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,1,4,6,7,5]
742 %vec = load <8 x float>, <8 x float>* %vp
743 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 2, i32 3, i32 1, i32 4, i32 6, i32 7, i32 5>
744 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
745 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
749 define <16 x float> @test_16xfloat_perm_mask0(<16 x float> %vec) {
750 ; CHECK-LABEL: test_16xfloat_perm_mask0:
752 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
754 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 1, i32 6, i32 4, i32 6, i32 5, i32 8, i32 9, i32 8, i32 11, i32 13, i32 13, i32 13, i32 15>
755 ret <16 x float> %res
757 define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
758 ; CHECK-LABEL: test_masked_16xfloat_perm_mask0:
760 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
761 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
762 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
763 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
765 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 1, i32 6, i32 4, i32 6, i32 5, i32 8, i32 9, i32 8, i32 11, i32 13, i32 13, i32 13, i32 15>
766 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
767 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
768 ret <16 x float> %res
771 define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %mask) {
772 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask0:
774 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
775 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
776 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,3,1,6,4,6,5,8,9,8,11,13,13,13,15]
778 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 3, i32 1, i32 6, i32 4, i32 6, i32 5, i32 8, i32 9, i32 8, i32 11, i32 13, i32 13, i32 13, i32 15>
779 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
780 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
781 ret <16 x float> %res
783 define <16 x float> @test_masked_16xfloat_perm_imm_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
784 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask1:
786 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
787 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
788 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,2,2,1,6,6,6,5,10,10,10,9,14,14,14,13]
789 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
791 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 1, i32 6, i32 6, i32 6, i32 5, i32 10, i32 10, i32 10, i32 9, i32 14, i32 14, i32 14, i32 13>
792 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
793 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
794 ret <16 x float> %res
797 define <16 x float> @test_masked_z_16xfloat_perm_imm_mask1(<16 x float> %vec, <16 x float> %mask) {
798 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask1:
800 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
801 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
802 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,2,2,1,6,6,6,5,10,10,10,9,14,14,14,13]
804 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 2, i32 2, i32 1, i32 6, i32 6, i32 6, i32 5, i32 10, i32 10, i32 10, i32 9, i32 14, i32 14, i32 14, i32 13>
805 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
806 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
807 ret <16 x float> %res
809 define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
810 ; CHECK-LABEL: test_masked_16xfloat_perm_mask2:
812 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
813 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
814 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12]
815 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
817 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 0, i32 0, i32 5, i32 4, i32 6, i32 5, i32 11, i32 10, i32 9, i32 9, i32 14, i32 13, i32 14, i32 12>
818 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
819 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
820 ret <16 x float> %res
823 define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %mask) {
824 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask2:
826 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
827 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
828 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,0,0,5,4,6,5,11,10,9,9,14,13,14,12]
830 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 0, i32 0, i32 5, i32 4, i32 6, i32 5, i32 11, i32 10, i32 9, i32 9, i32 14, i32 13, i32 14, i32 12>
831 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
832 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
833 ret <16 x float> %res
835 define <16 x float> @test_16xfloat_perm_imm_mask3(<16 x float> %vec) {
836 ; CHECK-LABEL: test_16xfloat_perm_imm_mask3:
838 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14]
840 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 0, i32 2, i32 5, i32 5, i32 4, i32 6, i32 9, i32 9, i32 8, i32 10, i32 13, i32 13, i32 12, i32 14>
841 ret <16 x float> %res
843 define <16 x float> @test_masked_16xfloat_perm_imm_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
844 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask3:
846 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
847 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
848 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14]
849 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
851 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 0, i32 2, i32 5, i32 5, i32 4, i32 6, i32 9, i32 9, i32 8, i32 10, i32 13, i32 13, i32 12, i32 14>
852 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
853 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
854 ret <16 x float> %res
857 define <16 x float> @test_masked_z_16xfloat_perm_imm_mask3(<16 x float> %vec, <16 x float> %mask) {
858 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask3:
860 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
861 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
862 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,1,0,2,5,5,4,6,9,9,8,10,13,13,12,14]
864 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 1, i32 0, i32 2, i32 5, i32 5, i32 4, i32 6, i32 9, i32 9, i32 8, i32 10, i32 13, i32 13, i32 12, i32 14>
865 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
866 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
867 ret <16 x float> %res
869 define <16 x float> @test_masked_16xfloat_perm_mask4(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
870 ; CHECK-LABEL: test_masked_16xfloat_perm_mask4:
872 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
873 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
874 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15]
875 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
877 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 3, i32 3, i32 5, i32 5, i32 5, i32 7, i32 11, i32 11, i32 8, i32 11, i32 14, i32 12, i32 14, i32 15>
878 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
879 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
880 ret <16 x float> %res
883 define <16 x float> @test_masked_z_16xfloat_perm_mask4(<16 x float> %vec, <16 x float> %mask) {
884 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask4:
886 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
887 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
888 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,3,3,5,5,5,7,11,11,8,11,14,12,14,15]
890 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 3, i32 3, i32 5, i32 5, i32 5, i32 7, i32 11, i32 11, i32 8, i32 11, i32 14, i32 12, i32 14, i32 15>
891 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
892 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
893 ret <16 x float> %res
895 define <16 x float> @test_masked_16xfloat_perm_imm_mask5(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
896 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask5:
898 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
899 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
900 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[1,2,1,0,5,6,5,4,9,10,9,8,13,14,13,12]
901 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
903 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 1, i32 0, i32 5, i32 6, i32 5, i32 4, i32 9, i32 10, i32 9, i32 8, i32 13, i32 14, i32 13, i32 12>
904 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
905 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
906 ret <16 x float> %res
909 define <16 x float> @test_masked_z_16xfloat_perm_imm_mask5(<16 x float> %vec, <16 x float> %mask) {
910 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask5:
912 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
913 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
914 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[1,2,1,0,5,6,5,4,9,10,9,8,13,14,13,12]
916 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 1, i32 0, i32 5, i32 6, i32 5, i32 4, i32 9, i32 10, i32 9, i32 8, i32 13, i32 14, i32 13, i32 12>
917 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
918 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
919 ret <16 x float> %res
921 define <16 x float> @test_16xfloat_perm_mask6(<16 x float> %vec) {
922 ; CHECK-LABEL: test_16xfloat_perm_mask6:
924 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
926 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 2, i32 4, i32 4, i32 6, i32 7, i32 9, i32 11, i32 8, i32 11, i32 13, i32 12, i32 13, i32 13>
927 ret <16 x float> %res
929 define <16 x float> @test_masked_16xfloat_perm_mask6(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
930 ; CHECK-LABEL: test_masked_16xfloat_perm_mask6:
932 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
933 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
934 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
935 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
937 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 2, i32 4, i32 4, i32 6, i32 7, i32 9, i32 11, i32 8, i32 11, i32 13, i32 12, i32 13, i32 13>
938 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
939 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
940 ret <16 x float> %res
943 define <16 x float> @test_masked_z_16xfloat_perm_mask6(<16 x float> %vec, <16 x float> %mask) {
944 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask6:
946 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
947 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
948 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,2,4,4,6,7,9,11,8,11,13,12,13,13]
950 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 2, i32 4, i32 4, i32 6, i32 7, i32 9, i32 11, i32 8, i32 11, i32 13, i32 12, i32 13, i32 13>
951 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
952 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
953 ret <16 x float> %res
955 define <16 x float> @test_masked_16xfloat_perm_imm_mask7(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
956 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mask7:
958 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
959 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
960 ; CHECK-NEXT: vpermilps {{.*#+}} zmm1 {%k1} = zmm0[3,3,0,2,7,7,4,6,11,11,8,10,15,15,12,14]
961 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
963 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 0, i32 2, i32 7, i32 7, i32 4, i32 6, i32 11, i32 11, i32 8, i32 10, i32 15, i32 15, i32 12, i32 14>
964 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
965 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
966 ret <16 x float> %res
969 define <16 x float> @test_masked_z_16xfloat_perm_imm_mask7(<16 x float> %vec, <16 x float> %mask) {
970 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mask7:
972 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
973 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
974 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,0,2,7,7,4,6,11,11,8,10,15,15,12,14]
976 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 0, i32 2, i32 7, i32 7, i32 4, i32 6, i32 11, i32 11, i32 8, i32 10, i32 15, i32 15, i32 12, i32 14>
977 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
978 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
979 ret <16 x float> %res
981 define <16 x float> @test_16xfloat_perm_mem_mask0(<16 x float>* %vp) {
982 ; CHECK-LABEL: test_16xfloat_perm_mem_mask0:
984 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
985 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
987 %vec = load <16 x float>, <16 x float>* %vp
988 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 6, i32 6, i32 6, i32 6, i32 11, i32 10, i32 9, i32 10, i32 12, i32 14, i32 12, i32 12>
989 ret <16 x float> %res
991 define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
992 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask0:
994 ; CHECK-NEXT: vmovaps (%rdi), %zmm2
995 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
996 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
997 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
999 %vec = load <16 x float>, <16 x float>* %vp
1000 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 6, i32 6, i32 6, i32 6, i32 11, i32 10, i32 9, i32 10, i32 12, i32 14, i32 12, i32 12>
1001 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1002 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1003 ret <16 x float> %res
1006 define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x float> %mask) {
1007 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask0:
1009 ; CHECK-NEXT: vmovaps (%rdi), %zmm1
1010 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1011 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
1012 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[3,3,3,0,6,6,6,6,11,10,9,10,12,14,12,12]
1014 %vec = load <16 x float>, <16 x float>* %vp
1015 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 6, i32 6, i32 6, i32 6, i32 11, i32 10, i32 9, i32 10, i32 12, i32 14, i32 12, i32 12>
1016 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1017 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1018 ret <16 x float> %res
1021 define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
1022 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask1:
1024 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1025 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1026 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,3,2,1,5,7,6,5,9,11,10,9,13,15,14,13]
1028 %vec = load <16 x float>, <16 x float>* %vp
1029 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 1, i32 5, i32 7, i32 6, i32 5, i32 9, i32 11, i32 10, i32 9, i32 13, i32 15, i32 14, i32 13>
1030 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1031 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1032 ret <16 x float> %res
1035 define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask1(<16 x float>* %vp, <16 x float> %mask) {
1036 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask1:
1038 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1039 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1040 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,3,2,1,5,7,6,5,9,11,10,9,13,15,14,13]
1042 %vec = load <16 x float>, <16 x float>* %vp
1043 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 3, i32 2, i32 1, i32 5, i32 7, i32 6, i32 5, i32 9, i32 11, i32 10, i32 9, i32 13, i32 15, i32 14, i32 13>
1044 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1045 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1046 ret <16 x float> %res
1049 define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
1050 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask2:
1052 ; CHECK-NEXT: vmovaps (%rdi), %zmm2
1053 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1054 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
1055 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13]
1057 %vec = load <16 x float>, <16 x float>* %vp
1058 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 3, i32 5, i32 5, i32 6, i32 5, i32 9, i32 8, i32 8, i32 8, i32 14, i32 12, i32 13, i32 13>
1059 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1060 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1061 ret <16 x float> %res
1064 define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x float> %mask) {
1065 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask2:
1067 ; CHECK-NEXT: vmovaps (%rdi), %zmm1
1068 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1069 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
1070 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[2,0,0,3,5,5,6,5,9,8,8,8,14,12,13,13]
1072 %vec = load <16 x float>, <16 x float>* %vp
1073 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 3, i32 5, i32 5, i32 6, i32 5, i32 9, i32 8, i32 8, i32 8, i32 14, i32 12, i32 13, i32 13>
1074 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1075 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1076 ret <16 x float> %res
1079 define <16 x float> @test_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp) {
1080 ; CHECK-LABEL: test_16xfloat_perm_imm_mem_mask3:
1082 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13]
1084 %vec = load <16 x float>, <16 x float>* %vp
1085 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 1, i32 5, i32 4, i32 7, i32 5, i32 9, i32 8, i32 11, i32 9, i32 13, i32 12, i32 15, i32 13>
1086 ret <16 x float> %res
1088 define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
1089 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask3:
1091 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1092 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1093 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13]
1095 %vec = load <16 x float>, <16 x float>* %vp
1096 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 1, i32 5, i32 4, i32 7, i32 5, i32 9, i32 8, i32 11, i32 9, i32 13, i32 12, i32 15, i32 13>
1097 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1098 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1099 ret <16 x float> %res
1102 define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask3(<16 x float>* %vp, <16 x float> %mask) {
1103 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask3:
1105 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1106 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1107 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,0,3,1,5,4,7,5,9,8,11,9,13,12,15,13]
1109 %vec = load <16 x float>, <16 x float>* %vp
1110 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 0, i32 3, i32 1, i32 5, i32 4, i32 7, i32 5, i32 9, i32 8, i32 11, i32 9, i32 13, i32 12, i32 15, i32 13>
1111 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1112 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1113 ret <16 x float> %res
1116 define <16 x float> @test_masked_16xfloat_perm_mem_mask4(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
1117 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask4:
1119 ; CHECK-NEXT: vmovaps (%rdi), %zmm2
1120 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1121 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
1122 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12]
1124 %vec = load <16 x float>, <16 x float>* %vp
1125 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 1, i32 1, i32 6, i32 5, i32 5, i32 6, i32 11, i32 11, i32 10, i32 9, i32 15, i32 14, i32 12, i32 12>
1126 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1127 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1128 ret <16 x float> %res
1131 define <16 x float> @test_masked_z_16xfloat_perm_mem_mask4(<16 x float>* %vp, <16 x float> %mask) {
1132 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask4:
1134 ; CHECK-NEXT: vmovaps (%rdi), %zmm1
1135 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1136 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
1137 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[3,3,1,1,6,5,5,6,11,11,10,9,15,14,12,12]
1139 %vec = load <16 x float>, <16 x float>* %vp
1140 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 3, i32 3, i32 1, i32 1, i32 6, i32 5, i32 5, i32 6, i32 11, i32 11, i32 10, i32 9, i32 15, i32 14, i32 12, i32 12>
1141 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1142 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1143 ret <16 x float> %res
1146 define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
1147 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask5:
1149 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1150 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1151 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[2,0,0,1,6,4,4,5,10,8,8,9,14,12,12,13]
1153 %vec = load <16 x float>, <16 x float>* %vp
1154 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 1, i32 6, i32 4, i32 4, i32 5, i32 10, i32 8, i32 8, i32 9, i32 14, i32 12, i32 12, i32 13>
1155 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1156 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1157 ret <16 x float> %res
1160 define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask5(<16 x float>* %vp, <16 x float> %mask) {
1161 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask5:
1163 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1164 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1165 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[2,0,0,1,6,4,4,5,10,8,8,9,14,12,12,13]
1167 %vec = load <16 x float>, <16 x float>* %vp
1168 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 0, i32 0, i32 1, i32 6, i32 4, i32 4, i32 5, i32 10, i32 8, i32 8, i32 9, i32 14, i32 12, i32 12, i32 13>
1169 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1170 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1171 ret <16 x float> %res
1174 define <16 x float> @test_16xfloat_perm_mem_mask6(<16 x float>* %vp) {
1175 ; CHECK-LABEL: test_16xfloat_perm_mem_mask6:
1177 ; CHECK-NEXT: vmovaps (%rdi), %zmm0
1178 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = zmm0[2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
1180 %vec = load <16 x float>, <16 x float>* %vp
1181 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 1, i32 1, i32 2, i32 6, i32 5, i32 5, i32 7, i32 9, i32 11, i32 9, i32 9, i32 12, i32 15, i32 14, i32 15>
1182 ret <16 x float> %res
1184 define <16 x float> @test_masked_16xfloat_perm_mem_mask6(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
1185 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask6:
1187 ; CHECK-NEXT: vmovaps (%rdi), %zmm2
1188 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1189 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
1190 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = zmm2[2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
1192 %vec = load <16 x float>, <16 x float>* %vp
1193 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 1, i32 1, i32 2, i32 6, i32 5, i32 5, i32 7, i32 9, i32 11, i32 9, i32 9, i32 12, i32 15, i32 14, i32 15>
1194 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1195 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1196 ret <16 x float> %res
1199 define <16 x float> @test_masked_z_16xfloat_perm_mem_mask6(<16 x float>* %vp, <16 x float> %mask) {
1200 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask6:
1202 ; CHECK-NEXT: vmovaps (%rdi), %zmm1
1203 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1204 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
1205 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = zmm1[2,1,1,2,6,5,5,7,9,11,9,9,12,15,14,15]
1207 %vec = load <16 x float>, <16 x float>* %vp
1208 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 2, i32 1, i32 1, i32 2, i32 6, i32 5, i32 5, i32 7, i32 9, i32 11, i32 9, i32 9, i32 12, i32 15, i32 14, i32 15>
1209 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1210 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1211 ret <16 x float> %res
1214 define <16 x float> @test_masked_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
1215 ; CHECK-LABEL: test_masked_16xfloat_perm_imm_mem_mask7:
1217 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1218 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
1219 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[1,2,0,1,5,6,4,5,9,10,8,9,13,14,12,13]
1221 %vec = load <16 x float>, <16 x float>* %vp
1222 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 0, i32 1, i32 5, i32 6, i32 4, i32 5, i32 9, i32 10, i32 8, i32 9, i32 13, i32 14, i32 12, i32 13>
1223 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1224 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
1225 ret <16 x float> %res
1228 define <16 x float> @test_masked_z_16xfloat_perm_imm_mem_mask7(<16 x float>* %vp, <16 x float> %mask) {
1229 ; CHECK-LABEL: test_masked_z_16xfloat_perm_imm_mem_mask7:
1231 ; CHECK-NEXT: vxorps %xmm1, %xmm1, %xmm1
1232 ; CHECK-NEXT: vcmpeqps %zmm1, %zmm0, %k1
1233 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[1,2,0,1,5,6,4,5,9,10,8,9,13,14,12,13]
1235 %vec = load <16 x float>, <16 x float>* %vp
1236 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 1, i32 2, i32 0, i32 1, i32 5, i32 6, i32 4, i32 5, i32 9, i32 10, i32 8, i32 9, i32 13, i32 14, i32 12, i32 13>
1237 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1238 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1239 ret <16 x float> %res
1242 define <2 x double> @test_2xdouble_perm_mask0(<2 x double> %vec) {
1243 ; CHECK-LABEL: test_2xdouble_perm_mask0:
1245 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = xmm0[1,0]
1247 %res = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1248 ret <2 x double> %res
1250 define <2 x double> @test_masked_2xdouble_perm_mask0(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
1251 ; CHECK-LABEL: test_masked_2xdouble_perm_mask0:
1253 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1254 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
1255 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 {%k1} = xmm0[1,0]
1256 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
1258 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1259 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
1260 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec2
1261 ret <2 x double> %res
1264 define <2 x double> @test_masked_z_2xdouble_perm_mask0(<2 x double> %vec, <2 x double> %mask) {
1265 ; CHECK-LABEL: test_masked_z_2xdouble_perm_mask0:
1267 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1268 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
1269 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0]
1271 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1272 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
1273 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
1274 ret <2 x double> %res
1276 define <2 x double> @test_masked_2xdouble_perm_mask1(<2 x double> %vec, <2 x double> %vec2, <2 x double> %mask) {
1277 ; CHECK-LABEL: test_masked_2xdouble_perm_mask1:
1279 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1280 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
1281 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm1 {%k1} = xmm0[1,0]
1282 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
1284 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1285 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
1286 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec2
1287 ret <2 x double> %res
1290 define <2 x double> @test_masked_z_2xdouble_perm_mask1(<2 x double> %vec, <2 x double> %mask) {
1291 ; CHECK-LABEL: test_masked_z_2xdouble_perm_mask1:
1293 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1294 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
1295 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0]
1297 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1298 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
1299 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
1300 ret <2 x double> %res
1302 define <2 x double> @test_2xdouble_perm_mem_mask0(<2 x double>* %vp) {
1303 ; CHECK-LABEL: test_2xdouble_perm_mem_mask0:
1305 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 = mem[1,0]
1307 %vec = load <2 x double>, <2 x double>* %vp
1308 %res = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1309 ret <2 x double> %res
1311 define <2 x double> @test_masked_2xdouble_perm_mem_mask0(<2 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) {
1312 ; CHECK-LABEL: test_masked_2xdouble_perm_mem_mask0:
1314 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1315 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
1316 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} = mem[1,0]
1318 %vec = load <2 x double>, <2 x double>* %vp
1319 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1320 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
1321 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec2
1322 ret <2 x double> %res
1325 define <2 x double> @test_masked_z_2xdouble_perm_mem_mask0(<2 x double>* %vp, <2 x double> %mask) {
1326 ; CHECK-LABEL: test_masked_z_2xdouble_perm_mem_mask0:
1328 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1329 ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
1330 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = mem[1,0]
1332 %vec = load <2 x double>, <2 x double>* %vp
1333 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1334 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
1335 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
1336 ret <2 x double> %res
1339 define <2 x double> @test_masked_2xdouble_perm_mem_mask1(<2 x double>* %vp, <2 x double> %vec2, <2 x double> %mask) {
1340 ; CHECK-LABEL: test_masked_2xdouble_perm_mem_mask1:
1342 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1343 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
1344 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} = mem[1,0]
1346 %vec = load <2 x double>, <2 x double>* %vp
1347 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1348 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
1349 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec2
1350 ret <2 x double> %res
1353 define <2 x double> @test_masked_z_2xdouble_perm_mem_mask1(<2 x double>* %vp, <2 x double> %mask) {
1354 ; CHECK-LABEL: test_masked_z_2xdouble_perm_mem_mask1:
1356 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1357 ; CHECK-NEXT: vcmpeqpd %xmm1, %xmm0, %k1
1358 ; CHECK-NEXT: vpermilpd {{.*#+}} xmm0 {%k1} {z} = mem[1,0]
1360 %vec = load <2 x double>, <2 x double>* %vp
1361 %shuf = shufflevector <2 x double> %vec, <2 x double> undef, <2 x i32> <i32 1, i32 0>
1362 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
1363 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
1364 ret <2 x double> %res
1367 define <4 x double> @test_4xdouble_perm_mask0(<4 x double> %vec) {
1368 ; CHECK-LABEL: test_4xdouble_perm_mask0:
1370 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,0,2,3]
1372 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
1373 ret <4 x double> %res
1375 define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
1376 ; CHECK-LABEL: test_masked_4xdouble_perm_mask0:
1378 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1379 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
1380 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,0,2,3]
1381 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
1383 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
1384 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1385 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
1386 ret <4 x double> %res
1389 define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %mask) {
1390 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask0:
1392 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1393 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
1394 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,0,2,3]
1396 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 3>
1397 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1398 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1399 ret <4 x double> %res
1401 define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
1402 ; CHECK-LABEL: test_masked_4xdouble_perm_mask1:
1404 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1405 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
1406 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,1,2,2]
1407 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
1409 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
1410 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1411 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
1412 ret <4 x double> %res
1415 define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %mask) {
1416 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask1:
1418 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1419 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
1420 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,2,2]
1422 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
1423 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1424 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1425 ret <4 x double> %res
1427 define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
1428 ; CHECK-LABEL: test_masked_4xdouble_perm_mask2:
1430 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1431 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
1432 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[0,1,3,3]
1433 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
1435 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
1436 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1437 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
1438 ret <4 x double> %res
1441 define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %mask) {
1442 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask2:
1444 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1445 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
1446 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,3,3]
1448 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
1449 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1450 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1451 ret <4 x double> %res
1453 define <4 x double> @test_4xdouble_perm_mask3(<4 x double> %vec) {
1454 ; CHECK-LABEL: test_4xdouble_perm_mask3:
1456 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = ymm0[1,1,2,2]
1458 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
1459 ret <4 x double> %res
1461 define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
1462 ; CHECK-LABEL: test_masked_4xdouble_perm_mask3:
1464 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1465 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
1466 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm1 {%k1} = ymm0[1,1,2,2]
1467 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
1469 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
1470 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1471 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
1472 ret <4 x double> %res
1475 define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %mask) {
1476 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask3:
1478 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1479 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
1480 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,1,2,2]
1482 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 1, i32 2, i32 2>
1483 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1484 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1485 ret <4 x double> %res
1487 define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) {
1488 ; CHECK-LABEL: test_4xdouble_perm_mem_mask0:
1490 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = mem[0,1,2,2]
1492 %vec = load <4 x double>, <4 x double>* %vp
1493 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
1494 ret <4 x double> %res
1496 define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
1497 ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask0:
1499 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1500 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
1501 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[0,1,2,2]
1503 %vec = load <4 x double>, <4 x double>* %vp
1504 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
1505 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1506 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
1507 ret <4 x double> %res
1510 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %mask) {
1511 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
1513 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1514 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
1515 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,2]
1517 %vec = load <4 x double>, <4 x double>* %vp
1518 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 2>
1519 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1520 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1521 ret <4 x double> %res
1524 define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
1525 ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask1:
1527 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1528 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
1529 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[0,1,3,3]
1531 %vec = load <4 x double>, <4 x double>* %vp
1532 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
1533 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1534 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
1535 ret <4 x double> %res
1538 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %mask) {
1539 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
1541 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1542 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
1543 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[0,1,3,3]
1545 %vec = load <4 x double>, <4 x double>* %vp
1546 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
1547 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1548 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1549 ret <4 x double> %res
1552 define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
1553 ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask2:
1555 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1556 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
1557 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[1,0,3,3]
1559 %vec = load <4 x double>, <4 x double>* %vp
1560 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
1561 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1562 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
1563 ret <4 x double> %res
1566 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %mask) {
1567 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
1569 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1570 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
1571 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,3,3]
1573 %vec = load <4 x double>, <4 x double>* %vp
1574 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 3>
1575 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1576 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1577 ret <4 x double> %res
1580 define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) {
1581 ; CHECK-LABEL: test_4xdouble_perm_mem_mask3:
1583 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 = mem[1,0,3,2]
1585 %vec = load <4 x double>, <4 x double>* %vp
1586 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
1587 ret <4 x double> %res
1589 define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
1590 ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask3:
1592 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1593 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
1594 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} = mem[1,0,3,2]
1596 %vec = load <4 x double>, <4 x double>* %vp
1597 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
1598 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1599 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
1600 ret <4 x double> %res
1603 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %mask) {
1604 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
1606 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1607 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
1608 ; CHECK-NEXT: vpermilpd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,3,2]
1610 %vec = load <4 x double>, <4 x double>* %vp
1611 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
1612 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1613 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1614 ret <4 x double> %res
1617 define <8 x double> @test_8xdouble_perm_mask0(<8 x double> %vec) {
1618 ; CHECK-LABEL: test_8xdouble_perm_mask0:
1620 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,0,3,2,4,5,7,6]
1622 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 2, i32 4, i32 5, i32 7, i32 6>
1623 ret <8 x double> %res
1625 define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
1626 ; CHECK-LABEL: test_masked_8xdouble_perm_mask0:
1628 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1629 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
1630 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,0,3,2,4,5,7,6]
1631 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
1633 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 2, i32 4, i32 5, i32 7, i32 6>
1634 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1635 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
1636 ret <8 x double> %res
1639 define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %mask) {
1640 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask0:
1642 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1643 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1644 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,3,2,4,5,7,6]
1646 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 3, i32 2, i32 4, i32 5, i32 7, i32 6>
1647 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1648 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1649 ret <8 x double> %res
1651 define <8 x double> @test_masked_8xdouble_perm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
1652 ; CHECK-LABEL: test_masked_8xdouble_perm_mask1:
1654 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1655 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
1656 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,7,6]
1657 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
1659 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 6>
1660 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1661 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
1662 ret <8 x double> %res
1665 define <8 x double> @test_masked_z_8xdouble_perm_mask1(<8 x double> %vec, <8 x double> %mask) {
1666 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask1:
1668 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1669 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1670 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,7,6]
1672 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 7, i32 6>
1673 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1674 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1675 ret <8 x double> %res
1677 define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
1678 ; CHECK-LABEL: test_masked_8xdouble_perm_mask2:
1680 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1681 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
1682 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,0,2,3,5,5,6,7]
1683 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
1685 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 5, i32 5, i32 6, i32 7>
1686 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1687 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
1688 ret <8 x double> %res
1691 define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %mask) {
1692 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask2:
1694 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1695 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1696 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,2,3,5,5,6,7]
1698 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 0, i32 2, i32 3, i32 5, i32 5, i32 6, i32 7>
1699 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1700 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1701 ret <8 x double> %res
1703 define <8 x double> @test_8xdouble_perm_mask3(<8 x double> %vec) {
1704 ; CHECK-LABEL: test_8xdouble_perm_mask3:
1706 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = zmm0[0,1,2,2,4,4,6,7]
1708 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 7>
1709 ret <8 x double> %res
1711 define <8 x double> @test_masked_8xdouble_perm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
1712 ; CHECK-LABEL: test_masked_8xdouble_perm_mask3:
1714 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1715 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
1716 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,2,4,4,6,7]
1717 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
1719 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 7>
1720 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1721 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
1722 ret <8 x double> %res
1725 define <8 x double> @test_masked_z_8xdouble_perm_mask3(<8 x double> %vec, <8 x double> %mask) {
1726 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask3:
1728 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1729 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1730 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,2,4,4,6,7]
1732 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 2, i32 4, i32 4, i32 6, i32 7>
1733 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1734 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1735 ret <8 x double> %res
1737 define <8 x double> @test_8xdouble_perm_mem_mask0(<8 x double>* %vp) {
1738 ; CHECK-LABEL: test_8xdouble_perm_mem_mask0:
1740 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = mem[0,1,2,3,5,4,7,6]
1742 %vec = load <8 x double>, <8 x double>* %vp
1743 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6>
1744 ret <8 x double> %res
1746 define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
1747 ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask0:
1749 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1750 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1751 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,5,4,7,6]
1753 %vec = load <8 x double>, <8 x double>* %vp
1754 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6>
1755 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1756 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
1757 ret <8 x double> %res
1760 define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %mask) {
1761 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask0:
1763 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1764 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
1765 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,5,4,7,6]
1767 %vec = load <8 x double>, <8 x double>* %vp
1768 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 7, i32 6>
1769 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1770 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1771 ret <8 x double> %res
1774 define <8 x double> @test_masked_8xdouble_perm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
1775 ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask1:
1777 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1778 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1779 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[0,1,3,3,4,5,7,7]
1781 %vec = load <8 x double>, <8 x double>* %vp
1782 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 3, i32 4, i32 5, i32 7, i32 7>
1783 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1784 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
1785 ret <8 x double> %res
1788 define <8 x double> @test_masked_z_8xdouble_perm_mem_mask1(<8 x double>* %vp, <8 x double> %mask) {
1789 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask1:
1791 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1792 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
1793 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[0,1,3,3,4,5,7,7]
1795 %vec = load <8 x double>, <8 x double>* %vp
1796 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 1, i32 3, i32 3, i32 4, i32 5, i32 7, i32 7>
1797 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1798 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1799 ret <8 x double> %res
1802 define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
1803 ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask2:
1805 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1806 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1807 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,5,4,7,6]
1809 %vec = load <8 x double>, <8 x double>* %vp
1810 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 4, i32 7, i32 6>
1811 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1812 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
1813 ret <8 x double> %res
1816 define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %mask) {
1817 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask2:
1819 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1820 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
1821 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,5,4,7,6]
1823 %vec = load <8 x double>, <8 x double>* %vp
1824 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 3, i32 5, i32 4, i32 7, i32 6>
1825 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1826 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1827 ret <8 x double> %res
1830 define <8 x double> @test_8xdouble_perm_mem_mask3(<8 x double>* %vp) {
1831 ; CHECK-LABEL: test_8xdouble_perm_mem_mask3:
1833 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 = mem[1,0,3,2,4,5,6,7]
1835 %vec = load <8 x double>, <8 x double>* %vp
1836 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1837 ret <8 x double> %res
1839 define <8 x double> @test_masked_8xdouble_perm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
1840 ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask3:
1842 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1843 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1844 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} = mem[1,0,3,2,4,5,6,7]
1846 %vec = load <8 x double>, <8 x double>* %vp
1847 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1848 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1849 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
1850 ret <8 x double> %res
1853 define <8 x double> @test_masked_z_8xdouble_perm_mem_mask3(<8 x double>* %vp, <8 x double> %mask) {
1854 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask3:
1856 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
1857 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
1858 ; CHECK-NEXT: vpermilpd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,3,2,4,5,6,7]
1860 %vec = load <8 x double>, <8 x double>* %vp
1861 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1862 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1863 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1864 ret <8 x double> %res