1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
4 define <4 x float> @test_4xfloat_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2) {
5 ; CHECK-LABEL: test_4xfloat_unpack_low_mask0:
7 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
9 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
12 define <4 x float> @test_4xfloat_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
13 ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask0:
15 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
16 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
17 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
18 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
20 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
21 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
22 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
26 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
27 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask0:
29 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
30 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
31 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
33 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
34 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
35 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
38 define <4 x float> @test_4xfloat_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
39 ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask1:
41 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
42 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
43 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
44 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
46 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
47 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
48 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
52 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
53 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask1:
55 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
56 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
57 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
59 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
60 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
61 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
64 define <4 x float> @test_4xfloat_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
65 ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask2:
67 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
68 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
69 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
70 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
72 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
73 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
74 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
78 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
79 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask2:
81 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
82 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
83 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
85 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
86 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
87 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
90 define <4 x float> @test_4xfloat_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2) {
91 ; CHECK-LABEL: test_4xfloat_unpack_low_mask3:
93 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
95 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
98 define <4 x float> @test_4xfloat_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
99 ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mask3:
101 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
102 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
103 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
104 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
106 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
107 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
108 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
112 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
113 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mask3:
115 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
116 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
117 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
119 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
120 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
121 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
124 define <4 x float> @test_4xfloat_unpack_low_mem_mask0(<4 x float> %vec1, ptr %vec2p) {
125 ; CHECK-LABEL: test_4xfloat_unpack_low_mem_mask0:
127 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
129 %vec2 = load <4 x float>, ptr %vec2p
130 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
133 define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask0(<4 x float> %vec1, ptr %vec2p, <4 x float> %vec3, <4 x float> %mask) {
134 ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask0:
136 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
137 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
138 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
139 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
141 %vec2 = load <4 x float>, ptr %vec2p
142 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
143 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
144 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
148 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask0(<4 x float> %vec1, ptr %vec2p, <4 x float> %mask) {
149 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask0:
151 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
152 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
153 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
155 %vec2 = load <4 x float>, ptr %vec2p
156 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
157 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
158 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
162 define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask1(<4 x float> %vec1, ptr %vec2p, <4 x float> %vec3, <4 x float> %mask) {
163 ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask1:
165 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
166 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
167 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
168 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
170 %vec2 = load <4 x float>, ptr %vec2p
171 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
172 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
173 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
177 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask1(<4 x float> %vec1, ptr %vec2p, <4 x float> %mask) {
178 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask1:
180 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
181 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
182 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
184 %vec2 = load <4 x float>, ptr %vec2p
185 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
186 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
187 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
191 define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask2(<4 x float> %vec1, ptr %vec2p, <4 x float> %vec3, <4 x float> %mask) {
192 ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask2:
194 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
195 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
196 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
197 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
199 %vec2 = load <4 x float>, ptr %vec2p
200 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
201 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
202 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
206 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask2(<4 x float> %vec1, ptr %vec2p, <4 x float> %mask) {
207 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask2:
209 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
210 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
211 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
213 %vec2 = load <4 x float>, ptr %vec2p
214 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
215 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
216 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
220 define <4 x float> @test_4xfloat_unpack_low_mem_mask3(<4 x float> %vec1, ptr %vec2p) {
221 ; CHECK-LABEL: test_4xfloat_unpack_low_mem_mask3:
223 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 = xmm0[0],mem[0],xmm0[1],mem[1]
225 %vec2 = load <4 x float>, ptr %vec2p
226 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
229 define <4 x float> @test_4xfloat_masked_unpack_low_mem_mask3(<4 x float> %vec1, ptr %vec2p, <4 x float> %vec3, <4 x float> %mask) {
230 ; CHECK-LABEL: test_4xfloat_masked_unpack_low_mem_mask3:
232 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
233 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
234 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0],xmm0[1],mem[1]
235 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
237 %vec2 = load <4 x float>, ptr %vec2p
238 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
239 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
240 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
244 define <4 x float> @test_4xfloat_zero_masked_unpack_low_mem_mask3(<4 x float> %vec1, ptr %vec2p, <4 x float> %mask) {
245 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_low_mem_mask3:
247 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
248 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
249 ; CHECK-NEXT: vunpcklps {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0],xmm0[1],mem[1]
251 %vec2 = load <4 x float>, ptr %vec2p
252 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 0, i32 4, i32 1, i32 5>
253 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
254 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
258 define <8 x float> @test_8xfloat_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2) {
259 ; CHECK-LABEL: test_8xfloat_unpack_low_mask0:
261 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
263 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
266 define <8 x float> @test_8xfloat_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
267 ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask0:
269 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
270 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
271 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
272 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
274 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
275 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
276 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
280 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
281 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask0:
283 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
284 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
285 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
287 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
288 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
289 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
292 define <8 x float> @test_8xfloat_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
293 ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask1:
295 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
296 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
297 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
298 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
300 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
301 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
302 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
306 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
307 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask1:
309 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
310 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
311 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
313 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
314 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
315 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
318 define <8 x float> @test_8xfloat_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
319 ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask2:
321 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
322 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
323 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
324 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
326 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
327 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
328 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
332 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
333 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask2:
335 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
336 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
337 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
339 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
340 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
341 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
344 define <8 x float> @test_8xfloat_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2) {
345 ; CHECK-LABEL: test_8xfloat_unpack_low_mask3:
347 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
349 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
352 define <8 x float> @test_8xfloat_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
353 ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mask3:
355 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
356 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
357 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
358 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
360 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
361 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
362 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
366 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
367 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mask3:
369 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
370 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
371 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[1],ymm1[1],ymm0[4],ymm1[4],ymm0[5],ymm1[5]
373 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
374 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
375 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
378 define <8 x float> @test_8xfloat_unpack_low_mem_mask0(<8 x float> %vec1, ptr %vec2p) {
379 ; CHECK-LABEL: test_8xfloat_unpack_low_mem_mask0:
381 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
383 %vec2 = load <8 x float>, ptr %vec2p
384 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
387 define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask0(<8 x float> %vec1, ptr %vec2p, <8 x float> %vec3, <8 x float> %mask) {
388 ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask0:
390 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
391 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
392 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
393 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
395 %vec2 = load <8 x float>, ptr %vec2p
396 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
397 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
398 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
402 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask0(<8 x float> %vec1, ptr %vec2p, <8 x float> %mask) {
403 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask0:
405 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
406 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
407 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
409 %vec2 = load <8 x float>, ptr %vec2p
410 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
411 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
412 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
416 define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask1(<8 x float> %vec1, ptr %vec2p, <8 x float> %vec3, <8 x float> %mask) {
417 ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask1:
419 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
420 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
421 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
422 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
424 %vec2 = load <8 x float>, ptr %vec2p
425 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
426 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
427 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
431 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask1(<8 x float> %vec1, ptr %vec2p, <8 x float> %mask) {
432 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask1:
434 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
435 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
436 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
438 %vec2 = load <8 x float>, ptr %vec2p
439 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
440 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
441 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
445 define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask2(<8 x float> %vec1, ptr %vec2p, <8 x float> %vec3, <8 x float> %mask) {
446 ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask2:
448 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
449 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
450 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
451 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
453 %vec2 = load <8 x float>, ptr %vec2p
454 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
455 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
456 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
460 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask2(<8 x float> %vec1, ptr %vec2p, <8 x float> %mask) {
461 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask2:
463 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
464 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
465 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
467 %vec2 = load <8 x float>, ptr %vec2p
468 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
469 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
470 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
474 define <8 x float> @test_8xfloat_unpack_low_mem_mask3(<8 x float> %vec1, ptr %vec2p) {
475 ; CHECK-LABEL: test_8xfloat_unpack_low_mem_mask3:
477 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
479 %vec2 = load <8 x float>, ptr %vec2p
480 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
483 define <8 x float> @test_8xfloat_masked_unpack_low_mem_mask3(<8 x float> %vec1, ptr %vec2p, <8 x float> %vec3, <8 x float> %mask) {
484 ; CHECK-LABEL: test_8xfloat_masked_unpack_low_mem_mask3:
486 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
487 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
488 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
489 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
491 %vec2 = load <8 x float>, ptr %vec2p
492 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
493 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
494 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
498 define <8 x float> @test_8xfloat_zero_masked_unpack_low_mem_mask3(<8 x float> %vec1, ptr %vec2p, <8 x float> %mask) {
499 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_low_mem_mask3:
501 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
502 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
503 ; CHECK-NEXT: vunpcklps {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[1],mem[1],ymm0[4],mem[4],ymm0[5],mem[5]
505 %vec2 = load <8 x float>, ptr %vec2p
506 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 4, i32 12, i32 5, i32 13>
507 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
508 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
512 define <16 x float> @test_16xfloat_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2) {
513 ; CHECK-LABEL: test_16xfloat_unpack_low_mask0:
515 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
517 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
518 ret <16 x float> %res
520 define <16 x float> @test_16xfloat_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
521 ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask0:
523 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
524 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
525 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
526 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
528 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
529 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
530 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
531 ret <16 x float> %res
534 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
535 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask0:
537 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
538 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
539 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
541 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
542 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
543 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
544 ret <16 x float> %res
546 define <16 x float> @test_16xfloat_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
547 ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask1:
549 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
550 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
551 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
552 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
554 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
555 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
556 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
557 ret <16 x float> %res
560 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
561 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask1:
563 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
564 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
565 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
567 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
568 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
569 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
570 ret <16 x float> %res
572 define <16 x float> @test_16xfloat_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
573 ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask2:
575 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
576 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
577 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
578 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
580 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
581 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
582 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
583 ret <16 x float> %res
586 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
587 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask2:
589 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
590 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
591 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
593 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
594 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
595 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
596 ret <16 x float> %res
598 define <16 x float> @test_16xfloat_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2) {
599 ; CHECK-LABEL: test_16xfloat_unpack_low_mask3:
601 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
603 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
604 ret <16 x float> %res
606 define <16 x float> @test_16xfloat_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
607 ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mask3:
609 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
610 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
611 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
612 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
614 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
615 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
616 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
617 ret <16 x float> %res
620 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
621 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mask3:
623 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
624 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
625 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[1],zmm1[1],zmm0[4],zmm1[4],zmm0[5],zmm1[5],zmm0[8],zmm1[8],zmm0[9],zmm1[9],zmm0[12],zmm1[12],zmm0[13],zmm1[13]
627 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
628 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
629 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
630 ret <16 x float> %res
632 define <16 x float> @test_16xfloat_unpack_low_mem_mask0(<16 x float> %vec1, ptr %vec2p) {
633 ; CHECK-LABEL: test_16xfloat_unpack_low_mem_mask0:
635 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
637 %vec2 = load <16 x float>, ptr %vec2p
638 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
639 ret <16 x float> %res
641 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask0(<16 x float> %vec1, ptr %vec2p, <16 x float> %vec3, <16 x float> %mask) {
642 ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask0:
644 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
645 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
646 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
647 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
649 %vec2 = load <16 x float>, ptr %vec2p
650 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
651 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
652 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
653 ret <16 x float> %res
656 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask0(<16 x float> %vec1, ptr %vec2p, <16 x float> %mask) {
657 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask0:
659 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
660 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
661 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
663 %vec2 = load <16 x float>, ptr %vec2p
664 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
665 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
666 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
667 ret <16 x float> %res
670 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask1(<16 x float> %vec1, ptr %vec2p, <16 x float> %vec3, <16 x float> %mask) {
671 ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask1:
673 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
674 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
675 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
676 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
678 %vec2 = load <16 x float>, ptr %vec2p
679 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
680 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
681 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
682 ret <16 x float> %res
685 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask1(<16 x float> %vec1, ptr %vec2p, <16 x float> %mask) {
686 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask1:
688 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
689 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
690 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
692 %vec2 = load <16 x float>, ptr %vec2p
693 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
694 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
695 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
696 ret <16 x float> %res
699 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask2(<16 x float> %vec1, ptr %vec2p, <16 x float> %vec3, <16 x float> %mask) {
700 ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask2:
702 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
703 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
704 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
705 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
707 %vec2 = load <16 x float>, ptr %vec2p
708 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
709 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
710 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
711 ret <16 x float> %res
714 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask2(<16 x float> %vec1, ptr %vec2p, <16 x float> %mask) {
715 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask2:
717 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
718 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
719 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
721 %vec2 = load <16 x float>, ptr %vec2p
722 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
723 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
724 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
725 ret <16 x float> %res
728 define <16 x float> @test_16xfloat_unpack_low_mem_mask3(<16 x float> %vec1, ptr %vec2p) {
729 ; CHECK-LABEL: test_16xfloat_unpack_low_mem_mask3:
731 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
733 %vec2 = load <16 x float>, ptr %vec2p
734 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
735 ret <16 x float> %res
737 define <16 x float> @test_16xfloat_masked_unpack_low_mem_mask3(<16 x float> %vec1, ptr %vec2p, <16 x float> %vec3, <16 x float> %mask) {
738 ; CHECK-LABEL: test_16xfloat_masked_unpack_low_mem_mask3:
740 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
741 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
742 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
743 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
745 %vec2 = load <16 x float>, ptr %vec2p
746 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
747 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
748 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
749 ret <16 x float> %res
752 define <16 x float> @test_16xfloat_zero_masked_unpack_low_mem_mask3(<16 x float> %vec1, ptr %vec2p, <16 x float> %mask) {
753 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_low_mem_mask3:
755 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
756 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
757 ; CHECK-NEXT: vunpcklps {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[1],mem[1],zmm0[4],mem[4],zmm0[5],mem[5],zmm0[8],mem[8],zmm0[9],mem[9],zmm0[12],mem[12],zmm0[13],mem[13]
759 %vec2 = load <16 x float>, ptr %vec2p
760 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 16, i32 1, i32 17, i32 4, i32 20, i32 5, i32 21, i32 8, i32 24, i32 9, i32 25, i32 12, i32 28, i32 13, i32 29>
761 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
762 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
763 ret <16 x float> %res
766 define <2 x double> @test_2xdouble_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2) {
767 ; CHECK-LABEL: test_2xdouble_unpack_low_mask0:
769 ; CHECK-NEXT: vmovlhps {{.*#+}} xmm0 = xmm0[0],xmm1[0]
771 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
772 ret <2 x double> %res
774 define <2 x double> @test_2xdouble_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
775 ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask0:
777 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
778 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1
779 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
780 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
782 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
783 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
784 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
785 ret <2 x double> %res
788 define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
789 ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask0:
791 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
792 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
793 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
795 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
796 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
797 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
798 ret <2 x double> %res
800 define <2 x double> @test_2xdouble_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
801 ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mask1:
803 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
804 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1
805 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm2 {%k1} = xmm0[0],xmm1[0]
806 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
808 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
809 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
810 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
811 ret <2 x double> %res
814 define <2 x double> @test_2xdouble_zero_masked_unpack_low_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
815 ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mask1:
817 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
818 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
819 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],xmm1[0]
821 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
822 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
823 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
824 ret <2 x double> %res
826 define <2 x double> @test_2xdouble_unpack_low_mem_mask0(<2 x double> %vec1, ptr %vec2p) {
827 ; CHECK-LABEL: test_2xdouble_unpack_low_mem_mask0:
829 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 = xmm0[0],mem[0]
831 %vec2 = load <2 x double>, ptr %vec2p
832 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
833 ret <2 x double> %res
835 define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask0(<2 x double> %vec1, ptr %vec2p, <2 x double> %vec3, <2 x double> %mask) {
836 ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask0:
838 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
839 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
840 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0]
841 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
843 %vec2 = load <2 x double>, ptr %vec2p
844 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
845 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
846 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
847 ret <2 x double> %res
850 define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask0(<2 x double> %vec1, ptr %vec2p, <2 x double> %mask) {
851 ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask0:
853 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
854 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
855 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0]
857 %vec2 = load <2 x double>, ptr %vec2p
858 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
859 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
860 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
861 ret <2 x double> %res
864 define <2 x double> @test_2xdouble_masked_unpack_low_mem_mask1(<2 x double> %vec1, ptr %vec2p, <2 x double> %vec3, <2 x double> %mask) {
865 ; CHECK-LABEL: test_2xdouble_masked_unpack_low_mem_mask1:
867 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
868 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
869 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm1 {%k1} = xmm0[0],mem[0]
870 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
872 %vec2 = load <2 x double>, ptr %vec2p
873 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
874 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
875 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
876 ret <2 x double> %res
879 define <2 x double> @test_2xdouble_zero_masked_unpack_low_mem_mask1(<2 x double> %vec1, ptr %vec2p, <2 x double> %mask) {
880 ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_low_mem_mask1:
882 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
883 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
884 ; CHECK-NEXT: vunpcklpd {{.*#+}} xmm0 {%k1} {z} = xmm0[0],mem[0]
886 %vec2 = load <2 x double>, ptr %vec2p
887 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 0, i32 2>
888 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
889 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
890 ret <2 x double> %res
893 define <4 x double> @test_4xdouble_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2) {
894 ; CHECK-LABEL: test_4xdouble_unpack_low_mask0:
896 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
898 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
899 ret <4 x double> %res
901 define <4 x double> @test_4xdouble_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
902 ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask0:
904 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
905 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
906 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
907 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
909 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
910 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
911 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
912 ret <4 x double> %res
915 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
916 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask0:
918 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
919 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
920 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
922 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
923 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
924 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
925 ret <4 x double> %res
927 define <4 x double> @test_4xdouble_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
928 ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask1:
930 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
931 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
932 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
933 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
935 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
936 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
937 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
938 ret <4 x double> %res
941 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
942 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask1:
944 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
945 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
946 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
948 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
949 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
950 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
951 ret <4 x double> %res
953 define <4 x double> @test_4xdouble_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
954 ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask2:
956 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
957 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
958 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
959 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
961 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
962 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
963 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
964 ret <4 x double> %res
967 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
968 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask2:
970 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
971 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
972 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
974 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
975 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
976 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
977 ret <4 x double> %res
979 define <4 x double> @test_4xdouble_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2) {
980 ; CHECK-LABEL: test_4xdouble_unpack_low_mask3:
982 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
984 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
985 ret <4 x double> %res
987 define <4 x double> @test_4xdouble_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
988 ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mask3:
990 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
991 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
992 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm2 {%k1} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
993 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
995 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
996 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
997 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
998 ret <4 x double> %res
1001 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
1002 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mask3:
1004 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1005 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
1006 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],ymm1[0],ymm0[2],ymm1[2]
1008 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1009 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1010 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1011 ret <4 x double> %res
1013 define <4 x double> @test_4xdouble_unpack_low_mem_mask0(<4 x double> %vec1, ptr %vec2p) {
1014 ; CHECK-LABEL: test_4xdouble_unpack_low_mem_mask0:
1016 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
1018 %vec2 = load <4 x double>, ptr %vec2p
1019 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1020 ret <4 x double> %res
1022 define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask0(<4 x double> %vec1, ptr %vec2p, <4 x double> %vec3, <4 x double> %mask) {
1023 ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask0:
1025 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1026 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
1027 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
1028 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
1030 %vec2 = load <4 x double>, ptr %vec2p
1031 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1032 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1033 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
1034 ret <4 x double> %res
1037 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask0(<4 x double> %vec1, ptr %vec2p, <4 x double> %mask) {
1038 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask0:
1040 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1041 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
1042 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
1044 %vec2 = load <4 x double>, ptr %vec2p
1045 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1046 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1047 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1048 ret <4 x double> %res
1051 define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask1(<4 x double> %vec1, ptr %vec2p, <4 x double> %vec3, <4 x double> %mask) {
1052 ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask1:
1054 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1055 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
1056 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
1057 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
1059 %vec2 = load <4 x double>, ptr %vec2p
1060 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1061 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1062 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
1063 ret <4 x double> %res
1066 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask1(<4 x double> %vec1, ptr %vec2p, <4 x double> %mask) {
1067 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask1:
1069 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1070 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
1071 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
1073 %vec2 = load <4 x double>, ptr %vec2p
1074 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1075 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1076 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1077 ret <4 x double> %res
1080 define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask2(<4 x double> %vec1, ptr %vec2p, <4 x double> %vec3, <4 x double> %mask) {
1081 ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask2:
1083 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1084 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
1085 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
1086 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
1088 %vec2 = load <4 x double>, ptr %vec2p
1089 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1090 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1091 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
1092 ret <4 x double> %res
1095 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask2(<4 x double> %vec1, ptr %vec2p, <4 x double> %mask) {
1096 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask2:
1098 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1099 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
1100 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
1102 %vec2 = load <4 x double>, ptr %vec2p
1103 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1104 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1105 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1106 ret <4 x double> %res
1109 define <4 x double> @test_4xdouble_unpack_low_mem_mask3(<4 x double> %vec1, ptr %vec2p) {
1110 ; CHECK-LABEL: test_4xdouble_unpack_low_mem_mask3:
1112 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 = ymm0[0],mem[0],ymm0[2],mem[2]
1114 %vec2 = load <4 x double>, ptr %vec2p
1115 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1116 ret <4 x double> %res
1118 define <4 x double> @test_4xdouble_masked_unpack_low_mem_mask3(<4 x double> %vec1, ptr %vec2p, <4 x double> %vec3, <4 x double> %mask) {
1119 ; CHECK-LABEL: test_4xdouble_masked_unpack_low_mem_mask3:
1121 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1122 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
1123 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm1 {%k1} = ymm0[0],mem[0],ymm0[2],mem[2]
1124 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
1126 %vec2 = load <4 x double>, ptr %vec2p
1127 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1128 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1129 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
1130 ret <4 x double> %res
1133 define <4 x double> @test_4xdouble_zero_masked_unpack_low_mem_mask3(<4 x double> %vec1, ptr %vec2p, <4 x double> %mask) {
1134 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_low_mem_mask3:
1136 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1137 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
1138 ; CHECK-NEXT: vunpcklpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0],mem[0],ymm0[2],mem[2]
1140 %vec2 = load <4 x double>, ptr %vec2p
1141 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
1142 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
1143 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
1144 ret <4 x double> %res
1147 define <8 x double> @test_8xdouble_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2) {
1148 ; CHECK-LABEL: test_8xdouble_unpack_low_mask0:
1150 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1152 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1153 ret <8 x double> %res
1155 define <8 x double> @test_8xdouble_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
1156 ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask0:
1158 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
1159 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
1160 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1161 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
1163 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1164 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1165 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1166 ret <8 x double> %res
1169 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
1170 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask0:
1172 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1173 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
1174 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1176 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1177 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1178 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1179 ret <8 x double> %res
1181 define <8 x double> @test_8xdouble_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
1182 ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask1:
1184 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
1185 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
1186 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1187 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
1189 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1190 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1191 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1192 ret <8 x double> %res
1195 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
1196 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask1:
1198 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1199 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
1200 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1202 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1203 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1204 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1205 ret <8 x double> %res
1207 define <8 x double> @test_8xdouble_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
1208 ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask2:
1210 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
1211 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
1212 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1213 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
1215 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1216 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1217 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1218 ret <8 x double> %res
1221 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
1222 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask2:
1224 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1225 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
1226 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1228 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1229 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1230 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1231 ret <8 x double> %res
1233 define <8 x double> @test_8xdouble_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2) {
1234 ; CHECK-LABEL: test_8xdouble_unpack_low_mask3:
1236 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1238 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1239 ret <8 x double> %res
1241 define <8 x double> @test_8xdouble_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
1242 ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mask3:
1244 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
1245 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
1246 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm2 {%k1} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1247 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
1249 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1250 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1251 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1252 ret <8 x double> %res
1255 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
1256 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mask3:
1258 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1259 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
1260 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],zmm1[0],zmm0[2],zmm1[2],zmm0[4],zmm1[4],zmm0[6],zmm1[6]
1262 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1263 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1264 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1265 ret <8 x double> %res
1267 define <8 x double> @test_8xdouble_unpack_low_mem_mask0(<8 x double> %vec1, ptr %vec2p) {
1268 ; CHECK-LABEL: test_8xdouble_unpack_low_mem_mask0:
1270 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1272 %vec2 = load <8 x double>, ptr %vec2p
1273 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1274 ret <8 x double> %res
1276 define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask0(<8 x double> %vec1, ptr %vec2p, <8 x double> %vec3, <8 x double> %mask) {
1277 ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask0:
1279 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1280 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
1281 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1282 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
1284 %vec2 = load <8 x double>, ptr %vec2p
1285 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1286 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1287 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1288 ret <8 x double> %res
1291 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask0(<8 x double> %vec1, ptr %vec2p, <8 x double> %mask) {
1292 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask0:
1294 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1295 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1296 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1298 %vec2 = load <8 x double>, ptr %vec2p
1299 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1300 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1301 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1302 ret <8 x double> %res
1305 define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask1(<8 x double> %vec1, ptr %vec2p, <8 x double> %vec3, <8 x double> %mask) {
1306 ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask1:
1308 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1309 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
1310 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1311 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
1313 %vec2 = load <8 x double>, ptr %vec2p
1314 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1315 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1316 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1317 ret <8 x double> %res
1320 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask1(<8 x double> %vec1, ptr %vec2p, <8 x double> %mask) {
1321 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask1:
1323 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1324 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1325 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1327 %vec2 = load <8 x double>, ptr %vec2p
1328 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1329 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1330 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1331 ret <8 x double> %res
1334 define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask2(<8 x double> %vec1, ptr %vec2p, <8 x double> %vec3, <8 x double> %mask) {
1335 ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask2:
1337 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1338 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
1339 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1340 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
1342 %vec2 = load <8 x double>, ptr %vec2p
1343 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1344 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1345 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1346 ret <8 x double> %res
1349 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask2(<8 x double> %vec1, ptr %vec2p, <8 x double> %mask) {
1350 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask2:
1352 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1353 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1354 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1356 %vec2 = load <8 x double>, ptr %vec2p
1357 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1358 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1359 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1360 ret <8 x double> %res
1363 define <8 x double> @test_8xdouble_unpack_low_mem_mask3(<8 x double> %vec1, ptr %vec2p) {
1364 ; CHECK-LABEL: test_8xdouble_unpack_low_mem_mask3:
1366 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1368 %vec2 = load <8 x double>, ptr %vec2p
1369 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1370 ret <8 x double> %res
1372 define <8 x double> @test_8xdouble_masked_unpack_low_mem_mask3(<8 x double> %vec1, ptr %vec2p, <8 x double> %vec3, <8 x double> %mask) {
1373 ; CHECK-LABEL: test_8xdouble_masked_unpack_low_mem_mask3:
1375 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
1376 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
1377 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm1 {%k1} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1378 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
1380 %vec2 = load <8 x double>, ptr %vec2p
1381 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1382 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1383 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1384 ret <8 x double> %res
1387 define <8 x double> @test_8xdouble_zero_masked_unpack_low_mem_mask3(<8 x double> %vec1, ptr %vec2p, <8 x double> %mask) {
1388 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_low_mem_mask3:
1390 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1391 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1392 ; CHECK-NEXT: vunpcklpd {{.*#+}} zmm0 {%k1} {z} = zmm0[0],mem[0],zmm0[2],mem[2],zmm0[4],mem[4],zmm0[6],mem[6]
1394 %vec2 = load <8 x double>, ptr %vec2p
1395 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 8, i32 2, i32 10, i32 4, i32 12, i32 6, i32 14>
1396 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1397 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1398 ret <8 x double> %res
1401 define <4 x float> @test_4xfloat_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2) {
1402 ; CHECK-LABEL: test_4xfloat_unpack_high_mask0:
1404 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1406 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1407 ret <4 x float> %res
1409 define <4 x float> @test_4xfloat_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
1410 ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask0:
1412 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1413 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
1414 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1415 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
1417 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1418 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1419 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1420 ret <4 x float> %res
1423 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask0(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
1424 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask0:
1426 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1427 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
1428 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1430 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1431 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1432 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1433 ret <4 x float> %res
1435 define <4 x float> @test_4xfloat_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
1436 ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask1:
1438 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1439 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
1440 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1441 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
1443 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1444 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1445 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1446 ret <4 x float> %res
1449 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask1(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
1450 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask1:
1452 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1453 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
1454 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1456 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1457 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1458 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1459 ret <4 x float> %res
1461 define <4 x float> @test_4xfloat_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
1462 ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask2:
1464 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1465 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
1466 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1467 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
1469 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1470 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1471 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1472 ret <4 x float> %res
1475 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask2(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
1476 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask2:
1478 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1479 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
1480 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1482 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1483 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1484 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1485 ret <4 x float> %res
1487 define <4 x float> @test_4xfloat_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2) {
1488 ; CHECK-LABEL: test_4xfloat_unpack_high_mask3:
1490 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1492 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1493 ret <4 x float> %res
1495 define <4 x float> @test_4xfloat_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %vec3, <4 x float> %mask) {
1496 ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mask3:
1498 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1499 ; CHECK-NEXT: vcmpeqps %xmm4, %xmm3, %k1
1500 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm2 {%k1} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1501 ; CHECK-NEXT: vmovaps %xmm2, %xmm0
1503 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1504 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1505 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1506 ret <4 x float> %res
1509 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mask3(<4 x float> %vec1, <4 x float> %vec2, <4 x float> %mask) {
1510 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mask3:
1512 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1513 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
1514 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],xmm1[2],xmm0[3],xmm1[3]
1516 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1517 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1518 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1519 ret <4 x float> %res
1521 define <4 x float> @test_4xfloat_unpack_high_mem_mask0(<4 x float> %vec1, ptr %vec2p) {
1522 ; CHECK-LABEL: test_4xfloat_unpack_high_mem_mask0:
1524 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
1526 %vec2 = load <4 x float>, ptr %vec2p
1527 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1528 ret <4 x float> %res
1530 define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask0(<4 x float> %vec1, ptr %vec2p, <4 x float> %vec3, <4 x float> %mask) {
1531 ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask0:
1533 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1534 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
1535 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
1536 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
1538 %vec2 = load <4 x float>, ptr %vec2p
1539 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1540 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1541 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1542 ret <4 x float> %res
1545 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask0(<4 x float> %vec1, ptr %vec2p, <4 x float> %mask) {
1546 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask0:
1548 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1549 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
1550 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
1552 %vec2 = load <4 x float>, ptr %vec2p
1553 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1554 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1555 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1556 ret <4 x float> %res
1559 define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask1(<4 x float> %vec1, ptr %vec2p, <4 x float> %vec3, <4 x float> %mask) {
1560 ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask1:
1562 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1563 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
1564 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
1565 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
1567 %vec2 = load <4 x float>, ptr %vec2p
1568 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1569 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1570 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1571 ret <4 x float> %res
1574 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask1(<4 x float> %vec1, ptr %vec2p, <4 x float> %mask) {
1575 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask1:
1577 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1578 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
1579 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
1581 %vec2 = load <4 x float>, ptr %vec2p
1582 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1583 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1584 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1585 ret <4 x float> %res
1588 define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask2(<4 x float> %vec1, ptr %vec2p, <4 x float> %vec3, <4 x float> %mask) {
1589 ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask2:
1591 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1592 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
1593 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
1594 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
1596 %vec2 = load <4 x float>, ptr %vec2p
1597 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1598 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1599 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1600 ret <4 x float> %res
1603 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask2(<4 x float> %vec1, ptr %vec2p, <4 x float> %mask) {
1604 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask2:
1606 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1607 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
1608 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
1610 %vec2 = load <4 x float>, ptr %vec2p
1611 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1612 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1613 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1614 ret <4 x float> %res
1617 define <4 x float> @test_4xfloat_unpack_high_mem_mask3(<4 x float> %vec1, ptr %vec2p) {
1618 ; CHECK-LABEL: test_4xfloat_unpack_high_mem_mask3:
1620 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 = xmm0[2],mem[2],xmm0[3],mem[3]
1622 %vec2 = load <4 x float>, ptr %vec2p
1623 %res = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1624 ret <4 x float> %res
1626 define <4 x float> @test_4xfloat_masked_unpack_high_mem_mask3(<4 x float> %vec1, ptr %vec2p, <4 x float> %vec3, <4 x float> %mask) {
1627 ; CHECK-LABEL: test_4xfloat_masked_unpack_high_mem_mask3:
1629 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1630 ; CHECK-NEXT: vcmpeqps %xmm3, %xmm2, %k1
1631 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm1 {%k1} = xmm0[2],mem[2],xmm0[3],mem[3]
1632 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
1634 %vec2 = load <4 x float>, ptr %vec2p
1635 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1636 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1637 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> %vec3
1638 ret <4 x float> %res
1641 define <4 x float> @test_4xfloat_zero_masked_unpack_high_mem_mask3(<4 x float> %vec1, ptr %vec2p, <4 x float> %mask) {
1642 ; CHECK-LABEL: test_4xfloat_zero_masked_unpack_high_mem_mask3:
1644 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1645 ; CHECK-NEXT: vcmpeqps %xmm2, %xmm1, %k1
1646 ; CHECK-NEXT: vunpckhps {{.*#+}} xmm0 {%k1} {z} = xmm0[2],mem[2],xmm0[3],mem[3]
1648 %vec2 = load <4 x float>, ptr %vec2p
1649 %shuf = shufflevector <4 x float> %vec1, <4 x float> %vec2, <4 x i32> <i32 2, i32 6, i32 3, i32 7>
1650 %cmp = fcmp oeq <4 x float> %mask, zeroinitializer
1651 %res = select <4 x i1> %cmp, <4 x float> %shuf, <4 x float> zeroinitializer
1652 ret <4 x float> %res
1655 define <8 x float> @test_8xfloat_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2) {
1656 ; CHECK-LABEL: test_8xfloat_unpack_high_mask0:
1658 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1660 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1661 ret <8 x float> %res
1663 define <8 x float> @test_8xfloat_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
1664 ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask0:
1666 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1667 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
1668 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1669 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
1671 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1672 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1673 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1674 ret <8 x float> %res
1677 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
1678 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask0:
1680 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1681 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
1682 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1684 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1685 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1686 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1687 ret <8 x float> %res
1689 define <8 x float> @test_8xfloat_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
1690 ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask1:
1692 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1693 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
1694 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1695 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
1697 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1698 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1699 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1700 ret <8 x float> %res
1703 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
1704 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask1:
1706 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1707 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
1708 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1710 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1711 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1712 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1713 ret <8 x float> %res
1715 define <8 x float> @test_8xfloat_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
1716 ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask2:
1718 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1719 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
1720 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1721 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
1723 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1724 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1725 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1726 ret <8 x float> %res
1729 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
1730 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask2:
1732 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1733 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
1734 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1736 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1737 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1738 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1739 ret <8 x float> %res
1741 define <8 x float> @test_8xfloat_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2) {
1742 ; CHECK-LABEL: test_8xfloat_unpack_high_mask3:
1744 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1746 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1747 ret <8 x float> %res
1749 define <8 x float> @test_8xfloat_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
1750 ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mask3:
1752 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1753 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
1754 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm2 {%k1} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1755 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
1757 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1758 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1759 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1760 ret <8 x float> %res
1763 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
1764 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mask3:
1766 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1767 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
1768 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],ymm1[2],ymm0[3],ymm1[3],ymm0[6],ymm1[6],ymm0[7],ymm1[7]
1770 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1771 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1772 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1773 ret <8 x float> %res
1775 define <8 x float> @test_8xfloat_unpack_high_mem_mask0(<8 x float> %vec1, ptr %vec2p) {
1776 ; CHECK-LABEL: test_8xfloat_unpack_high_mem_mask0:
1778 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1780 %vec2 = load <8 x float>, ptr %vec2p
1781 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1782 ret <8 x float> %res
1784 define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask0(<8 x float> %vec1, ptr %vec2p, <8 x float> %vec3, <8 x float> %mask) {
1785 ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask0:
1787 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1788 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
1789 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1790 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1792 %vec2 = load <8 x float>, ptr %vec2p
1793 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1794 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1795 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1796 ret <8 x float> %res
1799 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask0(<8 x float> %vec1, ptr %vec2p, <8 x float> %mask) {
1800 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask0:
1802 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1803 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1804 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1806 %vec2 = load <8 x float>, ptr %vec2p
1807 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1808 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1809 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1810 ret <8 x float> %res
1813 define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask1(<8 x float> %vec1, ptr %vec2p, <8 x float> %vec3, <8 x float> %mask) {
1814 ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask1:
1816 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1817 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
1818 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1819 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1821 %vec2 = load <8 x float>, ptr %vec2p
1822 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1823 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1824 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1825 ret <8 x float> %res
1828 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask1(<8 x float> %vec1, ptr %vec2p, <8 x float> %mask) {
1829 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask1:
1831 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1832 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1833 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1835 %vec2 = load <8 x float>, ptr %vec2p
1836 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1837 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1838 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1839 ret <8 x float> %res
1842 define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask2(<8 x float> %vec1, ptr %vec2p, <8 x float> %vec3, <8 x float> %mask) {
1843 ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask2:
1845 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1846 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
1847 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1848 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1850 %vec2 = load <8 x float>, ptr %vec2p
1851 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1852 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1853 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1854 ret <8 x float> %res
1857 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask2(<8 x float> %vec1, ptr %vec2p, <8 x float> %mask) {
1858 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask2:
1860 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1861 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1862 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1864 %vec2 = load <8 x float>, ptr %vec2p
1865 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1866 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1867 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1868 ret <8 x float> %res
1871 define <8 x float> @test_8xfloat_unpack_high_mem_mask3(<8 x float> %vec1, ptr %vec2p) {
1872 ; CHECK-LABEL: test_8xfloat_unpack_high_mem_mask3:
1874 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1876 %vec2 = load <8 x float>, ptr %vec2p
1877 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1878 ret <8 x float> %res
1880 define <8 x float> @test_8xfloat_masked_unpack_high_mem_mask3(<8 x float> %vec1, ptr %vec2p, <8 x float> %vec3, <8 x float> %mask) {
1881 ; CHECK-LABEL: test_8xfloat_masked_unpack_high_mem_mask3:
1883 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1884 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
1885 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm1 {%k1} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1886 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1888 %vec2 = load <8 x float>, ptr %vec2p
1889 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1890 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1891 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
1892 ret <8 x float> %res
1895 define <8 x float> @test_8xfloat_zero_masked_unpack_high_mem_mask3(<8 x float> %vec1, ptr %vec2p, <8 x float> %mask) {
1896 ; CHECK-LABEL: test_8xfloat_zero_masked_unpack_high_mem_mask3:
1898 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1899 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
1900 ; CHECK-NEXT: vunpckhps {{.*#+}} ymm0 {%k1} {z} = ymm0[2],mem[2],ymm0[3],mem[3],ymm0[6],mem[6],ymm0[7],mem[7]
1902 %vec2 = load <8 x float>, ptr %vec2p
1903 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 2, i32 10, i32 3, i32 11, i32 6, i32 14, i32 7, i32 15>
1904 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1905 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1906 ret <8 x float> %res
1909 define <16 x float> @test_16xfloat_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2) {
1910 ; CHECK-LABEL: test_16xfloat_unpack_high_mask0:
1912 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1914 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1915 ret <16 x float> %res
1917 define <16 x float> @test_16xfloat_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
1918 ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask0:
1920 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1921 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
1922 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1923 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1925 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1926 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1927 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
1928 ret <16 x float> %res
1931 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
1932 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask0:
1934 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1935 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
1936 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1938 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1939 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1940 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1941 ret <16 x float> %res
1943 define <16 x float> @test_16xfloat_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
1944 ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask1:
1946 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1947 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
1948 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1949 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1951 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1952 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1953 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
1954 ret <16 x float> %res
1957 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
1958 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask1:
1960 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1961 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
1962 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1964 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1965 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1966 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1967 ret <16 x float> %res
1969 define <16 x float> @test_16xfloat_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
1970 ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask2:
1972 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1973 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
1974 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1975 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
1977 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1978 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1979 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
1980 ret <16 x float> %res
1983 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
1984 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask2:
1986 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1987 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
1988 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
1990 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
1991 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
1992 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
1993 ret <16 x float> %res
1995 define <16 x float> @test_16xfloat_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2) {
1996 ; CHECK-LABEL: test_16xfloat_unpack_high_mask3:
1998 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
2000 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2001 ret <16 x float> %res
2003 define <16 x float> @test_16xfloat_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
2004 ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mask3:
2006 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
2007 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
2008 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm2 {%k1} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
2009 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
2011 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2012 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2013 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
2014 ret <16 x float> %res
2017 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
2018 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mask3:
2020 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2021 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
2022 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],zmm1[2],zmm0[3],zmm1[3],zmm0[6],zmm1[6],zmm0[7],zmm1[7],zmm0[10],zmm1[10],zmm0[11],zmm1[11],zmm0[14],zmm1[14],zmm0[15],zmm1[15]
2024 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2025 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2026 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2027 ret <16 x float> %res
2029 define <16 x float> @test_16xfloat_unpack_high_mem_mask0(<16 x float> %vec1, ptr %vec2p) {
2030 ; CHECK-LABEL: test_16xfloat_unpack_high_mem_mask0:
2032 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2034 %vec2 = load <16 x float>, ptr %vec2p
2035 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2036 ret <16 x float> %res
2038 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask0(<16 x float> %vec1, ptr %vec2p, <16 x float> %vec3, <16 x float> %mask) {
2039 ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask0:
2041 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2042 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
2043 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2044 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2046 %vec2 = load <16 x float>, ptr %vec2p
2047 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2048 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2049 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
2050 ret <16 x float> %res
2053 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask0(<16 x float> %vec1, ptr %vec2p, <16 x float> %mask) {
2054 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask0:
2056 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2057 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
2058 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2060 %vec2 = load <16 x float>, ptr %vec2p
2061 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2062 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2063 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2064 ret <16 x float> %res
2067 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask1(<16 x float> %vec1, ptr %vec2p, <16 x float> %vec3, <16 x float> %mask) {
2068 ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask1:
2070 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2071 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
2072 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2073 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2075 %vec2 = load <16 x float>, ptr %vec2p
2076 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2077 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2078 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
2079 ret <16 x float> %res
2082 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask1(<16 x float> %vec1, ptr %vec2p, <16 x float> %mask) {
2083 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask1:
2085 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2086 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
2087 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2089 %vec2 = load <16 x float>, ptr %vec2p
2090 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2091 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2092 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2093 ret <16 x float> %res
2096 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask2(<16 x float> %vec1, ptr %vec2p, <16 x float> %vec3, <16 x float> %mask) {
2097 ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask2:
2099 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2100 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
2101 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2102 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2104 %vec2 = load <16 x float>, ptr %vec2p
2105 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2106 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2107 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
2108 ret <16 x float> %res
2111 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask2(<16 x float> %vec1, ptr %vec2p, <16 x float> %mask) {
2112 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask2:
2114 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2115 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
2116 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2118 %vec2 = load <16 x float>, ptr %vec2p
2119 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2120 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2121 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2122 ret <16 x float> %res
2125 define <16 x float> @test_16xfloat_unpack_high_mem_mask3(<16 x float> %vec1, ptr %vec2p) {
2126 ; CHECK-LABEL: test_16xfloat_unpack_high_mem_mask3:
2128 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2130 %vec2 = load <16 x float>, ptr %vec2p
2131 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2132 ret <16 x float> %res
2134 define <16 x float> @test_16xfloat_masked_unpack_high_mem_mask3(<16 x float> %vec1, ptr %vec2p, <16 x float> %vec3, <16 x float> %mask) {
2135 ; CHECK-LABEL: test_16xfloat_masked_unpack_high_mem_mask3:
2137 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2138 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
2139 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm1 {%k1} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2140 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2142 %vec2 = load <16 x float>, ptr %vec2p
2143 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2144 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2145 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
2146 ret <16 x float> %res
2149 define <16 x float> @test_16xfloat_zero_masked_unpack_high_mem_mask3(<16 x float> %vec1, ptr %vec2p, <16 x float> %mask) {
2150 ; CHECK-LABEL: test_16xfloat_zero_masked_unpack_high_mem_mask3:
2152 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2153 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
2154 ; CHECK-NEXT: vunpckhps {{.*#+}} zmm0 {%k1} {z} = zmm0[2],mem[2],zmm0[3],mem[3],zmm0[6],mem[6],zmm0[7],mem[7],zmm0[10],mem[10],zmm0[11],mem[11],zmm0[14],mem[14],zmm0[15],mem[15]
2156 %vec2 = load <16 x float>, ptr %vec2p
2157 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 2, i32 18, i32 3, i32 19, i32 6, i32 22, i32 7, i32 23, i32 10, i32 26, i32 11, i32 27, i32 14, i32 30, i32 15, i32 31>
2158 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2159 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2160 ret <16 x float> %res
2163 define <2 x double> @test_2xdouble_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2) {
2164 ; CHECK-LABEL: test_2xdouble_unpack_high_mask0:
2166 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],xmm1[1]
2168 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2169 ret <2 x double> %res
2171 define <2 x double> @test_2xdouble_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
2172 ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask0:
2174 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2175 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1
2176 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
2177 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
2179 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2180 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2181 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
2182 ret <2 x double> %res
2185 define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask0(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
2186 ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask0:
2188 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2189 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
2190 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
2192 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2193 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2194 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
2195 ret <2 x double> %res
2197 define <2 x double> @test_2xdouble_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %vec3, <2 x double> %mask) {
2198 ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mask1:
2200 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2201 ; CHECK-NEXT: vcmpeqpd %xmm4, %xmm3, %k1
2202 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm2 {%k1} = xmm0[1],xmm1[1]
2203 ; CHECK-NEXT: vmovapd %xmm2, %xmm0
2205 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2206 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2207 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
2208 ret <2 x double> %res
2211 define <2 x double> @test_2xdouble_zero_masked_unpack_high_mask1(<2 x double> %vec1, <2 x double> %vec2, <2 x double> %mask) {
2212 ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mask1:
2214 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2215 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
2216 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],xmm1[1]
2218 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2219 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2220 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
2221 ret <2 x double> %res
2223 define <2 x double> @test_2xdouble_unpack_high_mem_mask0(<2 x double> %vec1, ptr %vec2p) {
2224 ; CHECK-LABEL: test_2xdouble_unpack_high_mem_mask0:
2226 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 = xmm0[1],mem[1]
2228 %vec2 = load <2 x double>, ptr %vec2p
2229 %res = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2230 ret <2 x double> %res
2232 define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask0(<2 x double> %vec1, ptr %vec2p, <2 x double> %vec3, <2 x double> %mask) {
2233 ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask0:
2235 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2236 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
2237 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1]
2238 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
2240 %vec2 = load <2 x double>, ptr %vec2p
2241 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2242 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2243 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
2244 ret <2 x double> %res
2247 define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask0(<2 x double> %vec1, ptr %vec2p, <2 x double> %mask) {
2248 ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask0:
2250 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2251 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
2252 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1]
2254 %vec2 = load <2 x double>, ptr %vec2p
2255 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2256 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2257 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
2258 ret <2 x double> %res
2261 define <2 x double> @test_2xdouble_masked_unpack_high_mem_mask1(<2 x double> %vec1, ptr %vec2p, <2 x double> %vec3, <2 x double> %mask) {
2262 ; CHECK-LABEL: test_2xdouble_masked_unpack_high_mem_mask1:
2264 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2265 ; CHECK-NEXT: vcmpeqpd %xmm3, %xmm2, %k1
2266 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm1 {%k1} = xmm0[1],mem[1]
2267 ; CHECK-NEXT: vmovapd %xmm1, %xmm0
2269 %vec2 = load <2 x double>, ptr %vec2p
2270 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2271 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2272 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> %vec3
2273 ret <2 x double> %res
2276 define <2 x double> @test_2xdouble_zero_masked_unpack_high_mem_mask1(<2 x double> %vec1, ptr %vec2p, <2 x double> %mask) {
2277 ; CHECK-LABEL: test_2xdouble_zero_masked_unpack_high_mem_mask1:
2279 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2280 ; CHECK-NEXT: vcmpeqpd %xmm2, %xmm1, %k1
2281 ; CHECK-NEXT: vunpckhpd {{.*#+}} xmm0 {%k1} {z} = xmm0[1],mem[1]
2283 %vec2 = load <2 x double>, ptr %vec2p
2284 %shuf = shufflevector <2 x double> %vec1, <2 x double> %vec2, <2 x i32> <i32 1, i32 3>
2285 %cmp = fcmp oeq <2 x double> %mask, zeroinitializer
2286 %res = select <2 x i1> %cmp, <2 x double> %shuf, <2 x double> zeroinitializer
2287 ret <2 x double> %res
2290 define <4 x double> @test_4xdouble_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2) {
2291 ; CHECK-LABEL: test_4xdouble_unpack_high_mask0:
2293 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2295 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2296 ret <4 x double> %res
2298 define <4 x double> @test_4xdouble_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
2299 ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask0:
2301 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2302 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
2303 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2304 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
2306 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2307 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2308 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2309 ret <4 x double> %res
2312 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
2313 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask0:
2315 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2316 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
2317 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2319 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2320 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2321 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2322 ret <4 x double> %res
2324 define <4 x double> @test_4xdouble_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
2325 ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask1:
2327 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2328 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
2329 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2330 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
2332 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2333 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2334 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2335 ret <4 x double> %res
2338 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
2339 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask1:
2341 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2342 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
2343 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2345 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2346 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2347 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2348 ret <4 x double> %res
2350 define <4 x double> @test_4xdouble_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
2351 ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask2:
2353 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2354 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
2355 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2356 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
2358 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2359 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2360 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2361 ret <4 x double> %res
2364 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
2365 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask2:
2367 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2368 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
2369 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2371 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2372 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2373 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2374 ret <4 x double> %res
2376 define <4 x double> @test_4xdouble_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2) {
2377 ; CHECK-LABEL: test_4xdouble_unpack_high_mask3:
2379 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2381 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2382 ret <4 x double> %res
2384 define <4 x double> @test_4xdouble_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
2385 ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mask3:
2387 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2388 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
2389 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm2 {%k1} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2390 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
2392 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2393 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2394 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2395 ret <4 x double> %res
2398 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
2399 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mask3:
2401 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2402 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
2403 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],ymm1[1],ymm0[3],ymm1[3]
2405 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2406 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2407 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2408 ret <4 x double> %res
2410 define <4 x double> @test_4xdouble_unpack_high_mem_mask0(<4 x double> %vec1, ptr %vec2p) {
2411 ; CHECK-LABEL: test_4xdouble_unpack_high_mem_mask0:
2413 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3]
2415 %vec2 = load <4 x double>, ptr %vec2p
2416 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2417 ret <4 x double> %res
2419 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask0(<4 x double> %vec1, ptr %vec2p, <4 x double> %vec3, <4 x double> %mask) {
2420 ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask0:
2422 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2423 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
2424 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
2425 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
2427 %vec2 = load <4 x double>, ptr %vec2p
2428 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2429 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2430 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2431 ret <4 x double> %res
2434 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask0(<4 x double> %vec1, ptr %vec2p, <4 x double> %mask) {
2435 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask0:
2437 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2438 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
2439 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
2441 %vec2 = load <4 x double>, ptr %vec2p
2442 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2443 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2444 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2445 ret <4 x double> %res
2448 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask1(<4 x double> %vec1, ptr %vec2p, <4 x double> %vec3, <4 x double> %mask) {
2449 ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask1:
2451 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2452 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
2453 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
2454 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
2456 %vec2 = load <4 x double>, ptr %vec2p
2457 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2458 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2459 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2460 ret <4 x double> %res
2463 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask1(<4 x double> %vec1, ptr %vec2p, <4 x double> %mask) {
2464 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask1:
2466 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2467 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
2468 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
2470 %vec2 = load <4 x double>, ptr %vec2p
2471 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2472 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2473 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2474 ret <4 x double> %res
2477 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask2(<4 x double> %vec1, ptr %vec2p, <4 x double> %vec3, <4 x double> %mask) {
2478 ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask2:
2480 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2481 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
2482 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
2483 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
2485 %vec2 = load <4 x double>, ptr %vec2p
2486 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2487 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2488 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2489 ret <4 x double> %res
2492 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask2(<4 x double> %vec1, ptr %vec2p, <4 x double> %mask) {
2493 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask2:
2495 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2496 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
2497 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
2499 %vec2 = load <4 x double>, ptr %vec2p
2500 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2501 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2502 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2503 ret <4 x double> %res
2506 define <4 x double> @test_4xdouble_unpack_high_mem_mask3(<4 x double> %vec1, ptr %vec2p) {
2507 ; CHECK-LABEL: test_4xdouble_unpack_high_mem_mask3:
2509 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 = ymm0[1],mem[1],ymm0[3],mem[3]
2511 %vec2 = load <4 x double>, ptr %vec2p
2512 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2513 ret <4 x double> %res
2515 define <4 x double> @test_4xdouble_masked_unpack_high_mem_mask3(<4 x double> %vec1, ptr %vec2p, <4 x double> %vec3, <4 x double> %mask) {
2516 ; CHECK-LABEL: test_4xdouble_masked_unpack_high_mem_mask3:
2518 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2519 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
2520 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm1 {%k1} = ymm0[1],mem[1],ymm0[3],mem[3]
2521 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
2523 %vec2 = load <4 x double>, ptr %vec2p
2524 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2525 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2526 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
2527 ret <4 x double> %res
2530 define <4 x double> @test_4xdouble_zero_masked_unpack_high_mem_mask3(<4 x double> %vec1, ptr %vec2p, <4 x double> %mask) {
2531 ; CHECK-LABEL: test_4xdouble_zero_masked_unpack_high_mem_mask3:
2533 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2534 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
2535 ; CHECK-NEXT: vunpckhpd {{.*#+}} ymm0 {%k1} {z} = ymm0[1],mem[1],ymm0[3],mem[3]
2537 %vec2 = load <4 x double>, ptr %vec2p
2538 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 1, i32 5, i32 3, i32 7>
2539 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2540 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2541 ret <4 x double> %res
2544 define <8 x double> @test_8xdouble_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2) {
2545 ; CHECK-LABEL: test_8xdouble_unpack_high_mask0:
2547 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2549 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2550 ret <8 x double> %res
2552 define <8 x double> @test_8xdouble_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
2553 ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask0:
2555 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2556 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
2557 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2558 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
2560 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2561 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2562 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2563 ret <8 x double> %res
2566 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
2567 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask0:
2569 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2570 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
2571 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2573 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2574 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2575 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2576 ret <8 x double> %res
2578 define <8 x double> @test_8xdouble_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
2579 ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask1:
2581 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2582 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
2583 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2584 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
2586 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2587 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2588 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2589 ret <8 x double> %res
2592 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
2593 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask1:
2595 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2596 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
2597 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2599 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2600 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2601 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2602 ret <8 x double> %res
2604 define <8 x double> @test_8xdouble_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
2605 ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask2:
2607 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2608 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
2609 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2610 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
2612 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2613 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2614 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2615 ret <8 x double> %res
2618 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
2619 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask2:
2621 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2622 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
2623 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2625 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2626 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2627 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2628 ret <8 x double> %res
2630 define <8 x double> @test_8xdouble_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2) {
2631 ; CHECK-LABEL: test_8xdouble_unpack_high_mask3:
2633 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2635 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2636 ret <8 x double> %res
2638 define <8 x double> @test_8xdouble_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
2639 ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mask3:
2641 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2642 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
2643 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm2 {%k1} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2644 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
2646 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2647 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2648 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2649 ret <8 x double> %res
2652 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
2653 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mask3:
2655 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2656 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
2657 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],zmm1[1],zmm0[3],zmm1[3],zmm0[5],zmm1[5],zmm0[7],zmm1[7]
2659 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2660 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2661 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2662 ret <8 x double> %res
2664 define <8 x double> @test_8xdouble_unpack_high_mem_mask0(<8 x double> %vec1, ptr %vec2p) {
2665 ; CHECK-LABEL: test_8xdouble_unpack_high_mem_mask0:
2667 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2669 %vec2 = load <8 x double>, ptr %vec2p
2670 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2671 ret <8 x double> %res
2673 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask0(<8 x double> %vec1, ptr %vec2p, <8 x double> %vec3, <8 x double> %mask) {
2674 ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask0:
2676 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2677 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
2678 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2679 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2681 %vec2 = load <8 x double>, ptr %vec2p
2682 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2683 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2684 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2685 ret <8 x double> %res
2688 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask0(<8 x double> %vec1, ptr %vec2p, <8 x double> %mask) {
2689 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask0:
2691 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2692 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
2693 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2695 %vec2 = load <8 x double>, ptr %vec2p
2696 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2697 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2698 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2699 ret <8 x double> %res
2702 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask1(<8 x double> %vec1, ptr %vec2p, <8 x double> %vec3, <8 x double> %mask) {
2703 ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask1:
2705 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2706 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
2707 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2708 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2710 %vec2 = load <8 x double>, ptr %vec2p
2711 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2712 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2713 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2714 ret <8 x double> %res
2717 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask1(<8 x double> %vec1, ptr %vec2p, <8 x double> %mask) {
2718 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask1:
2720 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2721 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
2722 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2724 %vec2 = load <8 x double>, ptr %vec2p
2725 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2726 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2727 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2728 ret <8 x double> %res
2731 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask2(<8 x double> %vec1, ptr %vec2p, <8 x double> %vec3, <8 x double> %mask) {
2732 ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask2:
2734 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2735 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
2736 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2737 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2739 %vec2 = load <8 x double>, ptr %vec2p
2740 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2741 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2742 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2743 ret <8 x double> %res
2746 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask2(<8 x double> %vec1, ptr %vec2p, <8 x double> %mask) {
2747 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask2:
2749 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2750 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
2751 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2753 %vec2 = load <8 x double>, ptr %vec2p
2754 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2755 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2756 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2757 ret <8 x double> %res
2760 define <8 x double> @test_8xdouble_unpack_high_mem_mask3(<8 x double> %vec1, ptr %vec2p) {
2761 ; CHECK-LABEL: test_8xdouble_unpack_high_mem_mask3:
2763 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2765 %vec2 = load <8 x double>, ptr %vec2p
2766 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2767 ret <8 x double> %res
2769 define <8 x double> @test_8xdouble_masked_unpack_high_mem_mask3(<8 x double> %vec1, ptr %vec2p, <8 x double> %vec3, <8 x double> %mask) {
2770 ; CHECK-LABEL: test_8xdouble_masked_unpack_high_mem_mask3:
2772 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2773 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
2774 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm1 {%k1} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2775 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2777 %vec2 = load <8 x double>, ptr %vec2p
2778 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2779 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2780 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
2781 ret <8 x double> %res
2784 define <8 x double> @test_8xdouble_zero_masked_unpack_high_mem_mask3(<8 x double> %vec1, ptr %vec2p, <8 x double> %mask) {
2785 ; CHECK-LABEL: test_8xdouble_zero_masked_unpack_high_mem_mask3:
2787 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2788 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
2789 ; CHECK-NEXT: vunpckhpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1],mem[1],zmm0[3],mem[3],zmm0[5],mem[5],zmm0[7],mem[7]
2791 %vec2 = load <8 x double>, ptr %vec2p
2792 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 1, i32 9, i32 3, i32 11, i32 5, i32 13, i32 7, i32 15>
2793 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2794 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2795 ret <8 x double> %res