1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl %s -o - | FileCheck %s
4 ; FIXME: 128-bit shuffles of 256-bit vectors cases should be fixed by PR34359
6 define <8 x float> @test_8xfloat_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2) {
7 ; CHECK-LABEL: test_8xfloat_shuff_mask0:
9 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
11 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
14 define <8 x float> @test_8xfloat_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
15 ; CHECK-LABEL: test_8xfloat_masked_shuff_mask0:
17 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
18 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
19 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
20 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
22 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
23 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
24 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
28 define <8 x float> @test_8xfloat_zero_masked_shuff_mask0(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
29 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask0:
31 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
32 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
33 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
35 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
36 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
37 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
40 define <8 x float> @test_8xfloat_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
41 ; CHECK-LABEL: test_8xfloat_masked_shuff_mask1:
43 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
44 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
45 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
46 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
48 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
49 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
50 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
54 define <8 x float> @test_8xfloat_zero_masked_shuff_mask1(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
55 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask1:
57 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
58 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
59 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
61 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
62 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
63 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
66 define <8 x float> @test_8xfloat_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
67 ; CHECK-LABEL: test_8xfloat_masked_shuff_mask2:
69 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
70 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
71 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7]
72 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
74 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
75 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
76 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
80 define <8 x float> @test_8xfloat_zero_masked_shuff_mask2(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
81 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask2:
83 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
84 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
85 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7]
87 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
88 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
89 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
92 define <8 x float> @test_8xfloat_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2) {
93 ; CHECK-LABEL: test_8xfloat_shuff_mask3:
95 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
97 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
100 define <8 x float> @test_8xfloat_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %vec3, <8 x float> %mask) {
101 ; CHECK-LABEL: test_8xfloat_masked_shuff_mask3:
103 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
104 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm3, %k1
105 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
106 ; CHECK-NEXT: vmovaps %ymm2, %ymm0
108 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
109 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
110 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
114 define <8 x float> @test_8xfloat_zero_masked_shuff_mask3(<8 x float> %vec1, <8 x float> %vec2, <8 x float> %mask) {
115 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mask3:
117 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
118 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
119 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
121 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
122 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
123 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
126 define <8 x float> @test_8xfloat_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p) {
127 ; CHECK-LABEL: test_8xfloat_shuff_mem_mask0:
129 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
131 %vec2 = load <8 x float>, <8 x float>* %vec2p
132 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
135 define <8 x float> @test_8xfloat_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
136 ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask0:
138 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
139 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
140 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7]
141 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
143 %vec2 = load <8 x float>, <8 x float>* %vec2p
144 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
145 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
146 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
150 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask0(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
151 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask0:
153 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
154 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
155 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7]
157 %vec2 = load <8 x float>, <8 x float>* %vec2p
158 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
159 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
160 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
164 define <8 x float> @test_8xfloat_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
165 ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask1:
167 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
168 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
169 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7]
170 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
172 %vec2 = load <8 x float>, <8 x float>* %vec2p
173 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
174 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
175 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
179 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask1(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
180 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask1:
182 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
183 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
184 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7]
186 %vec2 = load <8 x float>, <8 x float>* %vec2p
187 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
188 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
189 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
193 define <8 x float> @test_8xfloat_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
194 ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask2:
196 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
197 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
198 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3]
199 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
201 %vec2 = load <8 x float>, <8 x float>* %vec2p
202 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
203 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
204 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
208 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask2(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
209 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask2:
211 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
212 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
213 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3]
215 %vec2 = load <8 x float>, <8 x float>* %vec2p
216 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
217 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
218 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
222 define <8 x float> @test_8xfloat_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p) {
223 ; CHECK-LABEL: test_8xfloat_shuff_mem_mask3:
225 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1]
227 %vec2 = load <8 x float>, <8 x float>* %vec2p
228 %res = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
231 define <8 x float> @test_8xfloat_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %vec3, <8 x float> %mask) {
232 ; CHECK-LABEL: test_8xfloat_masked_shuff_mem_mask3:
234 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
235 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm2, %k1
236 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3]
237 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
239 %vec2 = load <8 x float>, <8 x float>* %vec2p
240 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
241 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
242 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec3
246 define <8 x float> @test_8xfloat_zero_masked_shuff_mem_mask3(<8 x float> %vec1, <8 x float>* %vec2p, <8 x float> %mask) {
247 ; CHECK-LABEL: test_8xfloat_zero_masked_shuff_mem_mask3:
249 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
250 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm1, %k1
251 ; CHECK-NEXT: vshuff32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3]
253 %vec2 = load <8 x float>, <8 x float>* %vec2p
254 %shuf = shufflevector <8 x float> %vec1, <8 x float> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
255 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
256 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
260 define <16 x float> @test_16xfloat_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2) {
261 ; CHECK-LABEL: test_16xfloat_shuff_mask0:
263 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],zmm1[2,3,6,7]
265 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
266 ret <16 x float> %res
268 define <16 x float> @test_16xfloat_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
269 ; CHECK-LABEL: test_16xfloat_masked_shuff_mask0:
271 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
272 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
273 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15]
274 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
276 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
277 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
278 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
279 ret <16 x float> %res
282 define <16 x float> @test_16xfloat_zero_masked_shuff_mask0(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
283 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask0:
285 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
286 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
287 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,0,1,2,3],zmm1[4,5,6,7,12,13,14,15]
289 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 0, i32 1, i32 2, i32 3, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
290 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
291 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
292 ret <16 x float> %res
294 define <16 x float> @test_16xfloat_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
295 ; CHECK-LABEL: test_16xfloat_masked_shuff_mask1:
297 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
298 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
299 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15]
300 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
302 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 28, i32 29, i32 30, i32 31>
303 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
304 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
305 ret <16 x float> %res
308 define <16 x float> @test_16xfloat_zero_masked_shuff_mask1(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
309 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask1:
311 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
312 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
313 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,8,9,10,11],zmm1[0,1,2,3,12,13,14,15]
315 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 28, i32 29, i32 30, i32 31>
316 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
317 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
318 ret <16 x float> %res
320 define <16 x float> @test_16xfloat_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
321 ; CHECK-LABEL: test_16xfloat_masked_shuff_mask2:
323 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
324 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
325 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
326 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
328 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
329 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
330 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
331 ret <16 x float> %res
334 define <16 x float> @test_16xfloat_zero_masked_shuff_mask2(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
335 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask2:
337 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
338 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
339 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,4,5,6,7],zmm1[0,1,2,3,4,5,6,7]
341 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
342 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
343 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
344 ret <16 x float> %res
346 define <16 x float> @test_16xfloat_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2) {
347 ; CHECK-LABEL: test_16xfloat_shuff_mask3:
349 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,6,7],zmm1[0,1,4,5]
351 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
352 ret <16 x float> %res
354 define <16 x float> @test_16xfloat_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %vec3, <16 x float> %mask) {
355 ; CHECK-LABEL: test_16xfloat_masked_shuff_mask3:
357 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
358 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm3, %k1
359 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11]
360 ; CHECK-NEXT: vmovaps %zmm2, %zmm0
362 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
363 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
364 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
365 ret <16 x float> %res
368 define <16 x float> @test_16xfloat_zero_masked_shuff_mask3(<16 x float> %vec1, <16 x float> %vec2, <16 x float> %mask) {
369 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mask3:
371 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
372 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
373 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,12,13,14,15],zmm1[0,1,2,3,8,9,10,11]
375 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
376 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
377 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
378 ret <16 x float> %res
380 define <16 x float> @test_16xfloat_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p) {
381 ; CHECK-LABEL: test_16xfloat_shuff_mem_mask0:
383 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,4,5],mem[4,5,2,3]
385 %vec2 = load <16 x float>, <16 x float>* %vec2p
386 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
387 ret <16 x float> %res
389 define <16 x float> @test_16xfloat_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
390 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask0:
392 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
393 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
394 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7]
395 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
397 %vec2 = load <16 x float>, <16 x float>* %vec2p
398 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
399 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
400 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
401 ret <16 x float> %res
404 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask0(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
405 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask0:
407 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
408 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
409 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[12,13,14,15,8,9,10,11],mem[8,9,10,11,4,5,6,7]
411 %vec2 = load <16 x float>, <16 x float>* %vec2p
412 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 12, i32 13, i32 14, i32 15, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
413 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
414 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
415 ret <16 x float> %res
418 define <16 x float> @test_16xfloat_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
419 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask1:
421 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
422 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
423 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7]
424 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
426 %vec2 = load <16 x float>, <16 x float>* %vec2p
427 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
428 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
429 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
430 ret <16 x float> %res
433 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask1(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
434 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask1:
436 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
437 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
438 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,4,5,6,7]
440 %vec2 = load <16 x float>, <16 x float>* %vec2p
441 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
442 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
443 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
444 ret <16 x float> %res
447 define <16 x float> @test_16xfloat_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
448 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask2:
450 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
451 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
452 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11]
453 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
455 %vec2 = load <16 x float>, <16 x float>* %vec2p
456 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 24, i32 25, i32 26, i32 27>
457 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
458 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
459 ret <16 x float> %res
462 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask2(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
463 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask2:
465 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
466 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
467 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,0,1,2,3],mem[8,9,10,11,8,9,10,11]
469 %vec2 = load <16 x float>, <16 x float>* %vec2p
470 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 24, i32 25, i32 26, i32 27>
471 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
472 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
473 ret <16 x float> %res
476 define <16 x float> @test_16xfloat_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p) {
477 ; CHECK-LABEL: test_16xfloat_shuff_mem_mask3:
479 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,6,7]
481 %vec2 = load <16 x float>, <16 x float>* %vec2p
482 %res = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
483 ret <16 x float> %res
485 define <16 x float> @test_16xfloat_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %vec3, <16 x float> %mask) {
486 ; CHECK-LABEL: test_16xfloat_masked_shuff_mem_mask3:
488 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
489 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm2, %k1
490 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15]
491 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
493 %vec2 = load <16 x float>, <16 x float>* %vec2p
494 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
495 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
496 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec3
497 ret <16 x float> %res
500 define <16 x float> @test_16xfloat_zero_masked_shuff_mem_mask3(<16 x float> %vec1, <16 x float>* %vec2p, <16 x float> %mask) {
501 ; CHECK-LABEL: test_16xfloat_zero_masked_shuff_mem_mask3:
503 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
504 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm1, %k1
505 ; CHECK-NEXT: vshuff32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],mem[12,13,14,15,12,13,14,15]
507 %vec2 = load <16 x float>, <16 x float>* %vec2p
508 %shuf = shufflevector <16 x float> %vec1, <16 x float> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
509 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
510 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
511 ret <16 x float> %res
514 define <4 x double> @test_4xdouble_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2) {
515 ; CHECK-LABEL: test_4xdouble_shuff_mask0:
517 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
519 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
520 ret <4 x double> %res
522 define <4 x double> @test_4xdouble_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
523 ; CHECK-LABEL: test_4xdouble_masked_shuff_mask0:
525 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
526 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
527 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1]
528 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
530 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
531 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
532 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
533 ret <4 x double> %res
536 define <4 x double> @test_4xdouble_zero_masked_shuff_mask0(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
537 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask0:
539 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
540 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
541 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1]
543 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
544 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
545 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
546 ret <4 x double> %res
548 define <4 x double> @test_4xdouble_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
549 ; CHECK-LABEL: test_4xdouble_masked_shuff_mask1:
551 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
552 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
553 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1]
554 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
556 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
557 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
558 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
559 ret <4 x double> %res
562 define <4 x double> @test_4xdouble_zero_masked_shuff_mask1(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
563 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask1:
565 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
566 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
567 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1]
569 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
570 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
571 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
572 ret <4 x double> %res
574 define <4 x double> @test_4xdouble_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
575 ; CHECK-LABEL: test_4xdouble_masked_shuff_mask2:
577 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
578 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
579 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3]
580 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
582 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
583 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
584 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
585 ret <4 x double> %res
588 define <4 x double> @test_4xdouble_zero_masked_shuff_mask2(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
589 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask2:
591 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
592 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
593 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
595 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
596 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
597 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
598 ret <4 x double> %res
600 define <4 x double> @test_4xdouble_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2) {
601 ; CHECK-LABEL: test_4xdouble_shuff_mask3:
603 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
605 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
606 ret <4 x double> %res
608 define <4 x double> @test_4xdouble_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %vec3, <4 x double> %mask) {
609 ; CHECK-LABEL: test_4xdouble_masked_shuff_mask3:
611 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
612 ; CHECK-NEXT: vcmpeqpd %ymm4, %ymm3, %k1
613 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3]
614 ; CHECK-NEXT: vmovapd %ymm2, %ymm0
616 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
617 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
618 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
619 ret <4 x double> %res
622 define <4 x double> @test_4xdouble_zero_masked_shuff_mask3(<4 x double> %vec1, <4 x double> %vec2, <4 x double> %mask) {
623 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mask3:
625 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
626 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
627 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
629 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
630 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
631 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
632 ret <4 x double> %res
634 define <4 x double> @test_4xdouble_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p) {
635 ; CHECK-LABEL: test_4xdouble_shuff_mem_mask0:
637 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
639 %vec2 = load <4 x double>, <4 x double>* %vec2p
640 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
641 ret <4 x double> %res
643 define <4 x double> @test_4xdouble_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
644 ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask0:
646 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
647 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
648 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3]
649 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
651 %vec2 = load <4 x double>, <4 x double>* %vec2p
652 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
653 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
654 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
655 ret <4 x double> %res
658 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask0(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
659 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask0:
661 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
662 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
663 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3]
665 %vec2 = load <4 x double>, <4 x double>* %vec2p
666 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
667 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
668 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
669 ret <4 x double> %res
672 define <4 x double> @test_4xdouble_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
673 ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask1:
675 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
676 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
677 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1]
678 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
680 %vec2 = load <4 x double>, <4 x double>* %vec2p
681 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
682 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
683 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
684 ret <4 x double> %res
687 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask1(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
688 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask1:
690 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
691 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
692 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1]
694 %vec2 = load <4 x double>, <4 x double>* %vec2p
695 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
696 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
697 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
698 ret <4 x double> %res
701 define <4 x double> @test_4xdouble_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
702 ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask2:
704 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
705 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
706 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1]
707 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
709 %vec2 = load <4 x double>, <4 x double>* %vec2p
710 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
711 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
712 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
713 ret <4 x double> %res
716 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask2(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
717 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask2:
719 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
720 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
721 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1]
723 %vec2 = load <4 x double>, <4 x double>* %vec2p
724 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
725 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
726 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
727 ret <4 x double> %res
730 define <4 x double> @test_4xdouble_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p) {
731 ; CHECK-LABEL: test_4xdouble_shuff_mem_mask3:
733 ; CHECK-NEXT: vperm2f128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
735 %vec2 = load <4 x double>, <4 x double>* %vec2p
736 %res = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
737 ret <4 x double> %res
739 define <4 x double> @test_4xdouble_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %vec3, <4 x double> %mask) {
740 ; CHECK-LABEL: test_4xdouble_masked_shuff_mem_mask3:
742 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
743 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
744 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3]
745 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
747 %vec2 = load <4 x double>, <4 x double>* %vec2p
748 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
749 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
750 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec3
751 ret <4 x double> %res
754 define <4 x double> @test_4xdouble_zero_masked_shuff_mem_mask3(<4 x double> %vec1, <4 x double>* %vec2p, <4 x double> %mask) {
755 ; CHECK-LABEL: test_4xdouble_zero_masked_shuff_mem_mask3:
757 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
758 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
759 ; CHECK-NEXT: vshuff64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3]
761 %vec2 = load <4 x double>, <4 x double>* %vec2p
762 %shuf = shufflevector <4 x double> %vec1, <4 x double> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
763 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
764 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
765 ret <4 x double> %res
768 define <8 x double> @test_8xdouble_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2) {
769 ; CHECK-LABEL: test_8xdouble_shuff_mask0:
771 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,2,3],zmm1[6,7,0,1]
773 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9>
774 ret <8 x double> %res
776 define <8 x double> @test_8xdouble_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
777 ; CHECK-LABEL: test_8xdouble_masked_shuff_mask0:
779 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
780 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
781 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,2,3],zmm1[6,7,0,1]
782 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
784 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9>
785 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
786 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
787 ret <8 x double> %res
790 define <8 x double> @test_8xdouble_zero_masked_shuff_mask0(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
791 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask0:
793 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
794 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
795 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,2,3],zmm1[6,7,0,1]
797 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 2, i32 3, i32 14, i32 15, i32 8, i32 9>
798 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
799 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
800 ret <8 x double> %res
802 define <8 x double> @test_8xdouble_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
803 ; CHECK-LABEL: test_8xdouble_masked_shuff_mask1:
805 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
806 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
807 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,4,5]
808 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
810 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
811 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
812 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
813 ret <8 x double> %res
816 define <8 x double> @test_8xdouble_zero_masked_shuff_mask1(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
817 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask1:
819 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
820 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
821 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,4,5]
823 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 12, i32 13>
824 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
825 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
826 ret <8 x double> %res
828 define <8 x double> @test_8xdouble_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
829 ; CHECK-LABEL: test_8xdouble_masked_shuff_mask2:
831 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
832 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
833 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[4,5,0,1]
834 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
836 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 12, i32 13, i32 8, i32 9>
837 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
838 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
839 ret <8 x double> %res
842 define <8 x double> @test_8xdouble_zero_masked_shuff_mask2(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
843 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask2:
845 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
846 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
847 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[4,5,0,1]
849 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 12, i32 13, i32 8, i32 9>
850 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
851 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
852 ret <8 x double> %res
854 define <8 x double> @test_8xdouble_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2) {
855 ; CHECK-LABEL: test_8xdouble_shuff_mask3:
857 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,2,3]
859 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11>
860 ret <8 x double> %res
862 define <8 x double> @test_8xdouble_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %vec3, <8 x double> %mask) {
863 ; CHECK-LABEL: test_8xdouble_masked_shuff_mask3:
865 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
866 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm3, %k1
867 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,2,3]
868 ; CHECK-NEXT: vmovapd %zmm2, %zmm0
870 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11>
871 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
872 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
873 ret <8 x double> %res
876 define <8 x double> @test_8xdouble_zero_masked_shuff_mask3(<8 x double> %vec1, <8 x double> %vec2, <8 x double> %mask) {
877 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mask3:
879 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
880 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
881 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,2,3]
883 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 10, i32 11>
884 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
885 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
886 ret <8 x double> %res
888 define <8 x double> @test_8xdouble_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p) {
889 ; CHECK-LABEL: test_8xdouble_shuff_mem_mask0:
891 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[6,7,0,1],mem[0,1,0,1]
893 %vec2 = load <8 x double>, <8 x double>* %vec2p
894 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9>
895 ret <8 x double> %res
897 define <8 x double> @test_8xdouble_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
898 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask0:
900 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
901 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
902 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,0,1],mem[0,1,0,1]
903 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
905 %vec2 = load <8 x double>, <8 x double>* %vec2p
906 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9>
907 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
908 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
909 ret <8 x double> %res
912 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask0(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
913 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask0:
915 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
916 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
917 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,0,1],mem[0,1,0,1]
919 %vec2 = load <8 x double>, <8 x double>* %vec2p
920 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9>
921 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
922 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
923 ret <8 x double> %res
926 define <8 x double> @test_8xdouble_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
927 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask1:
929 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
930 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
931 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[6,7,6,7],mem[0,1,2,3]
932 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
934 %vec2 = load <8 x double>, <8 x double>* %vec2p
935 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
936 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
937 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
938 ret <8 x double> %res
941 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask1(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
942 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask1:
944 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
945 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
946 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,6,7],mem[0,1,2,3]
948 %vec2 = load <8 x double>, <8 x double>* %vec2p
949 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 6, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
950 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
951 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
952 ret <8 x double> %res
955 define <8 x double> @test_8xdouble_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
956 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask2:
958 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
959 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
960 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3],mem[0,1,4,5]
961 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
963 %vec2 = load <8 x double>, <8 x double>* %vec2p
964 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 12, i32 13>
965 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
966 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
967 ret <8 x double> %res
970 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask2(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
971 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask2:
973 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
974 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
975 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3],mem[0,1,4,5]
977 %vec2 = load <8 x double>, <8 x double>* %vec2p
978 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 8, i32 9, i32 12, i32 13>
979 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
980 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
981 ret <8 x double> %res
984 define <8 x double> @test_8xdouble_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p) {
985 ; CHECK-LABEL: test_8xdouble_shuff_mem_mask3:
987 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[4,5,0,1]
989 %vec2 = load <8 x double>, <8 x double>* %vec2p
990 %res = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9>
991 ret <8 x double> %res
993 define <8 x double> @test_8xdouble_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %vec3, <8 x double> %mask) {
994 ; CHECK-LABEL: test_8xdouble_masked_shuff_mem_mask3:
996 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
997 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
998 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[4,5,0,1]
999 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
1001 %vec2 = load <8 x double>, <8 x double>* %vec2p
1002 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9>
1003 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1004 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec3
1005 ret <8 x double> %res
1008 define <8 x double> @test_8xdouble_zero_masked_shuff_mem_mask3(<8 x double> %vec1, <8 x double>* %vec2p, <8 x double> %mask) {
1009 ; CHECK-LABEL: test_8xdouble_zero_masked_shuff_mem_mask3:
1011 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
1012 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
1013 ; CHECK-NEXT: vshuff64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[4,5,0,1]
1015 %vec2 = load <8 x double>, <8 x double>* %vec2p
1016 %shuf = shufflevector <8 x double> %vec1, <8 x double> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 12, i32 13, i32 8, i32 9>
1017 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
1018 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
1019 ret <8 x double> %res
1022 define <8 x i32> @test_8xi32_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2) {
1023 ; CHECK-LABEL: test_8xi32_shuff_mask0:
1025 ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1027 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
1030 define <8 x i32> @test_8xi32_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) {
1031 ; CHECK-LABEL: test_8xi32_masked_shuff_mask0:
1033 ; CHECK-NEXT: vptestnmd %ymm3, %ymm3, %k1
1034 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7]
1035 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
1037 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
1038 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1039 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
1043 define <8 x i32> @test_8xi32_zero_masked_shuff_mask0(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) {
1044 ; CHECK-LABEL: test_8xi32_zero_masked_shuff_mask0:
1046 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
1047 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7]
1049 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
1050 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1051 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
1054 define <8 x i32> @test_8xi32_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) {
1055 ; CHECK-LABEL: test_8xi32_masked_shuff_mask1:
1057 ; CHECK-NEXT: vptestnmd %ymm3, %ymm3, %k1
1058 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
1059 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
1061 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1062 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1063 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
1067 define <8 x i32> @test_8xi32_zero_masked_shuff_mask1(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) {
1068 ; CHECK-LABEL: test_8xi32_zero_masked_shuff_mask1:
1070 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
1071 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
1073 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1074 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1075 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
1078 define <8 x i32> @test_8xi32_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) {
1079 ; CHECK-LABEL: test_8xi32_masked_shuff_mask2:
1081 ; CHECK-NEXT: vptestnmd %ymm3, %ymm3, %k1
1082 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[4,5,6,7]
1083 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
1085 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
1086 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1087 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
1091 define <8 x i32> @test_8xi32_zero_masked_shuff_mask2(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) {
1092 ; CHECK-LABEL: test_8xi32_zero_masked_shuff_mask2:
1094 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
1095 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[4,5,6,7]
1097 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
1098 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1099 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
1102 define <8 x i32> @test_8xi32_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2) {
1103 ; CHECK-LABEL: test_8xi32_shuff_mask3:
1105 ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
1107 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1110 define <8 x i32> @test_8xi32_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %vec3, <8 x i32> %mask) {
1111 ; CHECK-LABEL: test_8xi32_masked_shuff_mask3:
1113 ; CHECK-NEXT: vptestnmd %ymm3, %ymm3, %k1
1114 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm2 {%k1} = ymm0[4,5,6,7],ymm1[0,1,2,3]
1115 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
1117 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1118 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1119 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
1123 define <8 x i32> @test_8xi32_zero_masked_shuff_mask3(<8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> %mask) {
1124 ; CHECK-LABEL: test_8xi32_zero_masked_shuff_mask3:
1126 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
1127 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],ymm1[0,1,2,3]
1129 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1130 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1131 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
1134 define <8 x i32> @test_8xi32_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p) {
1135 ; CHECK-LABEL: test_8xi32_shuff_mem_mask0:
1137 ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
1139 %vec2 = load <8 x i32>, <8 x i32>* %vec2p
1140 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
1143 define <8 x i32> @test_8xi32_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) {
1144 ; CHECK-LABEL: test_8xi32_masked_shuff_mem_mask0:
1146 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
1147 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[4,5,6,7]
1148 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1150 %vec2 = load <8 x i32>, <8 x i32>* %vec2p
1151 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
1152 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1153 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
1157 define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask0(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) {
1158 ; CHECK-LABEL: test_8xi32_zero_masked_shuff_mem_mask0:
1160 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
1161 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[4,5,6,7]
1163 %vec2 = load <8 x i32>, <8 x i32>* %vec2p
1164 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
1165 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1166 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
1170 define <8 x i32> @test_8xi32_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) {
1171 ; CHECK-LABEL: test_8xi32_masked_shuff_mem_mask1:
1173 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
1174 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3]
1175 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1177 %vec2 = load <8 x i32>, <8 x i32>* %vec2p
1178 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1179 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1180 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
1184 define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask1(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) {
1185 ; CHECK-LABEL: test_8xi32_zero_masked_shuff_mem_mask1:
1187 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
1188 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3]
1190 %vec2 = load <8 x i32>, <8 x i32>* %vec2p
1191 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1192 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1193 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
1197 define <8 x i32> @test_8xi32_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) {
1198 ; CHECK-LABEL: test_8xi32_masked_shuff_mem_mask2:
1200 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
1201 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3]
1202 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1204 %vec2 = load <8 x i32>, <8 x i32>* %vec2p
1205 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1206 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1207 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
1211 define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask2(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) {
1212 ; CHECK-LABEL: test_8xi32_zero_masked_shuff_mem_mask2:
1214 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
1215 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3]
1217 %vec2 = load <8 x i32>, <8 x i32>* %vec2p
1218 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1219 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1220 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
1224 define <8 x i32> @test_8xi32_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p) {
1225 ; CHECK-LABEL: test_8xi32_shuff_mem_mask3:
1227 ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[0,1]
1229 %vec2 = load <8 x i32>, <8 x i32>* %vec2p
1230 %res = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1233 define <8 x i32> @test_8xi32_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %vec3, <8 x i32> %mask) {
1234 ; CHECK-LABEL: test_8xi32_masked_shuff_mem_mask3:
1236 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
1237 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm1 {%k1} = ymm0[4,5,6,7],mem[0,1,2,3]
1238 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1240 %vec2 = load <8 x i32>, <8 x i32>* %vec2p
1241 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1242 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1243 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec3
1247 define <8 x i32> @test_8xi32_zero_masked_shuff_mem_mask3(<8 x i32> %vec1, <8 x i32>* %vec2p, <8 x i32> %mask) {
1248 ; CHECK-LABEL: test_8xi32_zero_masked_shuff_mem_mask3:
1250 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
1251 ; CHECK-NEXT: vshufi32x4 {{.*#+}} ymm0 {%k1} {z} = ymm0[4,5,6,7],mem[0,1,2,3]
1253 %vec2 = load <8 x i32>, <8 x i32>* %vec2p
1254 %shuf = shufflevector <8 x i32> %vec1, <8 x i32> %vec2, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11>
1255 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
1256 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
1260 define <16 x i32> @test_16xi32_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2) {
1261 ; CHECK-LABEL: test_16xi32_shuff_mask0:
1263 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],zmm1[2,3,6,7]
1265 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
1268 define <16 x i32> @test_16xi32_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) {
1269 ; CHECK-LABEL: test_16xi32_masked_shuff_mask0:
1271 ; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k1
1272 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15]
1273 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
1275 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
1276 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1277 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
1281 define <16 x i32> @test_16xi32_zero_masked_shuff_mask0(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) {
1282 ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mask0:
1284 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
1285 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],zmm1[4,5,6,7,12,13,14,15]
1287 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
1288 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1289 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1292 define <16 x i32> @test_16xi32_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) {
1293 ; CHECK-LABEL: test_16xi32_masked_shuff_mask1:
1295 ; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k1
1296 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7]
1297 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
1299 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
1300 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1301 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
1305 define <16 x i32> @test_16xi32_zero_masked_shuff_mask1(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) {
1306 ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mask1:
1308 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
1309 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,8,9,10,11],zmm1[8,9,10,11,4,5,6,7]
1311 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 8, i32 9, i32 10, i32 11, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
1312 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1313 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1316 define <16 x i32> @test_16xi32_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) {
1317 ; CHECK-LABEL: test_16xi32_masked_shuff_mask2:
1319 ; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k1
1320 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3]
1321 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
1323 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
1324 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1325 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
1329 define <16 x i32> @test_16xi32_zero_masked_shuff_mask2(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) {
1330 ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mask2:
1332 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
1333 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],zmm1[0,1,2,3,0,1,2,3]
1335 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 16, i32 17, i32 18, i32 19, i32 16, i32 17, i32 18, i32 19>
1336 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1337 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1340 define <16 x i32> @test_16xi32_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2) {
1341 ; CHECK-LABEL: test_16xi32_shuff_mask3:
1343 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],zmm1[4,5,2,3]
1345 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
1348 define <16 x i32> @test_16xi32_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %vec3, <16 x i32> %mask) {
1349 ; CHECK-LABEL: test_16xi32_masked_shuff_mask3:
1351 ; CHECK-NEXT: vptestnmd %zmm3, %zmm3, %k1
1352 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm2 {%k1} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7]
1353 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
1355 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
1356 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1357 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
1361 define <16 x i32> @test_16xi32_zero_masked_shuff_mask3(<16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> %mask) {
1362 ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mask3:
1364 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
1365 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,0,1,2,3],zmm1[8,9,10,11,4,5,6,7]
1367 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 24, i32 25, i32 26, i32 27, i32 20, i32 21, i32 22, i32 23>
1368 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1369 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1372 define <16 x i32> @test_16xi32_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p) {
1373 ; CHECK-LABEL: test_16xi32_shuff_mem_mask0:
1375 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,2,3],mem[4,5,0,1]
1377 %vec2 = load <16 x i32>, <16 x i32>* %vec2p
1378 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19>
1381 define <16 x i32> @test_16xi32_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) {
1382 ; CHECK-LABEL: test_16xi32_masked_shuff_mem_mask0:
1384 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
1385 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3]
1386 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1388 %vec2 = load <16 x i32>, <16 x i32>* %vec2p
1389 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19>
1390 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1391 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
1395 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask0(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) {
1396 ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mem_mask0:
1398 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1399 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[8,9,10,11,4,5,6,7],mem[8,9,10,11,0,1,2,3]
1401 %vec2 = load <16 x i32>, <16 x i32>* %vec2p
1402 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 8, i32 9, i32 10, i32 11, i32 4, i32 5, i32 6, i32 7, i32 24, i32 25, i32 26, i32 27, i32 16, i32 17, i32 18, i32 19>
1403 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1404 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1408 define <16 x i32> @test_16xi32_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) {
1409 ; CHECK-LABEL: test_16xi32_masked_shuff_mem_mask1:
1411 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
1412 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11]
1413 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1415 %vec2 = load <16 x i32>, <16 x i32>* %vec2p
1416 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
1417 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1418 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
1422 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask1(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) {
1423 ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mem_mask1:
1425 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1426 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[0,1,2,3,8,9,10,11]
1428 %vec2 = load <16 x i32>, <16 x i32>* %vec2p
1429 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 16, i32 17, i32 18, i32 19, i32 24, i32 25, i32 26, i32 27>
1430 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1431 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1435 define <16 x i32> @test_16xi32_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) {
1436 ; CHECK-LABEL: test_16xi32_masked_shuff_mem_mask2:
1438 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
1439 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15]
1440 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1442 %vec2 = load <16 x i32>, <16 x i32>* %vec2p
1443 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
1444 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1445 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
1449 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask2(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) {
1450 ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mem_mask2:
1452 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1453 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,8,9,10,11],mem[12,13,14,15,12,13,14,15]
1455 %vec2 = load <16 x i32>, <16 x i32>* %vec2p
1456 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 28, i32 29, i32 30, i32 31, i32 28, i32 29, i32 30, i32 31>
1457 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1458 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1462 define <16 x i32> @test_16xi32_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p) {
1463 ; CHECK-LABEL: test_16xi32_shuff_mem_mask3:
1465 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[2,3,6,7]
1467 %vec2 = load <16 x i32>, <16 x i32>* %vec2p
1468 %res = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
1471 define <16 x i32> @test_16xi32_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %vec3, <16 x i32> %mask) {
1472 ; CHECK-LABEL: test_16xi32_masked_shuff_mem_mask3:
1474 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
1475 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm1 {%k1} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15]
1476 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1478 %vec2 = load <16 x i32>, <16 x i32>* %vec2p
1479 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
1480 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1481 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec3
1485 define <16 x i32> @test_16xi32_zero_masked_shuff_mem_mask3(<16 x i32> %vec1, <16 x i32>* %vec2p, <16 x i32> %mask) {
1486 ; CHECK-LABEL: test_16xi32_zero_masked_shuff_mem_mask3:
1488 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1489 ; CHECK-NEXT: vshufi32x4 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,6,7,4,5,6,7],mem[4,5,6,7,12,13,14,15]
1491 %vec2 = load <16 x i32>, <16 x i32>* %vec2p
1492 %shuf = shufflevector <16 x i32> %vec1, <16 x i32> %vec2, <16 x i32> <i32 4, i32 5, i32 6, i32 7, i32 4, i32 5, i32 6, i32 7, i32 20, i32 21, i32 22, i32 23, i32 28, i32 29, i32 30, i32 31>
1493 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1494 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1498 define <4 x i64> @test_4xi64_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2) {
1499 ; CHECK-LABEL: test_4xi64_shuff_mask0:
1501 ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[0,1]
1503 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1506 define <4 x i64> @test_4xi64_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) {
1507 ; CHECK-LABEL: test_4xi64_masked_shuff_mask0:
1509 ; CHECK-NEXT: vptestnmq %ymm3, %ymm3, %k1
1510 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1]
1511 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
1513 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1514 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1515 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
1519 define <4 x i64> @test_4xi64_zero_masked_shuff_mask0(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) {
1520 ; CHECK-LABEL: test_4xi64_zero_masked_shuff_mask0:
1522 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1523 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1]
1525 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1526 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1527 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1530 define <4 x i64> @test_4xi64_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) {
1531 ; CHECK-LABEL: test_4xi64_masked_shuff_mask1:
1533 ; CHECK-NEXT: vptestnmq %ymm3, %ymm3, %k1
1534 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3]
1535 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
1537 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
1538 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1539 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
1543 define <4 x i64> @test_4xi64_zero_masked_shuff_mask1(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) {
1544 ; CHECK-LABEL: test_4xi64_zero_masked_shuff_mask1:
1546 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1547 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
1549 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
1550 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1551 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1554 define <4 x i64> @test_4xi64_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) {
1555 ; CHECK-LABEL: test_4xi64_masked_shuff_mask2:
1557 ; CHECK-NEXT: vptestnmq %ymm3, %ymm3, %k1
1558 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[0,1]
1559 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
1561 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1562 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1563 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
1567 define <4 x i64> @test_4xi64_zero_masked_shuff_mask2(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) {
1568 ; CHECK-LABEL: test_4xi64_zero_masked_shuff_mask2:
1570 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1571 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[0,1]
1573 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1574 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1575 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1578 define <4 x i64> @test_4xi64_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2) {
1579 ; CHECK-LABEL: test_4xi64_shuff_mask3:
1581 ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],ymm1[2,3]
1583 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
1586 define <4 x i64> @test_4xi64_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %vec3, <4 x i64> %mask) {
1587 ; CHECK-LABEL: test_4xi64_masked_shuff_mask3:
1589 ; CHECK-NEXT: vptestnmq %ymm3, %ymm3, %k1
1590 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm2 {%k1} = ymm0[2,3],ymm1[2,3]
1591 ; CHECK-NEXT: vmovdqa %ymm2, %ymm0
1593 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
1594 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1595 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
1599 define <4 x i64> @test_4xi64_zero_masked_shuff_mask3(<4 x i64> %vec1, <4 x i64> %vec2, <4 x i64> %mask) {
1600 ; CHECK-LABEL: test_4xi64_zero_masked_shuff_mask3:
1602 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1603 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],ymm1[2,3]
1605 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
1606 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1607 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1610 define <4 x i64> @test_4xi64_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p) {
1611 ; CHECK-LABEL: test_4xi64_shuff_mem_mask0:
1613 ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
1615 %vec2 = load <4 x i64>, <4 x i64>* %vec2p
1616 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
1619 define <4 x i64> @test_4xi64_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) {
1620 ; CHECK-LABEL: test_4xi64_masked_shuff_mem_mask0:
1622 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1623 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3]
1624 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1626 %vec2 = load <4 x i64>, <4 x i64>* %vec2p
1627 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
1628 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1629 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
1633 define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask0(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) {
1634 ; CHECK-LABEL: test_4xi64_zero_masked_shuff_mem_mask0:
1636 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1637 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3]
1639 %vec2 = load <4 x i64>, <4 x i64>* %vec2p
1640 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
1641 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1642 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1646 define <4 x i64> @test_4xi64_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) {
1647 ; CHECK-LABEL: test_4xi64_masked_shuff_mem_mask1:
1649 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1650 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1]
1651 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1653 %vec2 = load <4 x i64>, <4 x i64>* %vec2p
1654 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1655 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1656 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
1660 define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask1(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) {
1661 ; CHECK-LABEL: test_4xi64_zero_masked_shuff_mem_mask1:
1663 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1664 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1]
1666 %vec2 = load <4 x i64>, <4 x i64>* %vec2p
1667 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1668 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1669 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1673 define <4 x i64> @test_4xi64_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) {
1674 ; CHECK-LABEL: test_4xi64_masked_shuff_mem_mask2:
1676 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1677 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[0,1]
1678 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1680 %vec2 = load <4 x i64>, <4 x i64>* %vec2p
1681 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1682 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1683 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
1687 define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask2(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) {
1688 ; CHECK-LABEL: test_4xi64_zero_masked_shuff_mem_mask2:
1690 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1691 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[0,1]
1693 %vec2 = load <4 x i64>, <4 x i64>* %vec2p
1694 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 4, i32 5>
1695 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1696 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1700 define <4 x i64> @test_4xi64_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p) {
1701 ; CHECK-LABEL: test_4xi64_shuff_mem_mask3:
1703 ; CHECK-NEXT: vperm2i128 {{.*#+}} ymm0 = ymm0[2,3],mem[2,3]
1705 %vec2 = load <4 x i64>, <4 x i64>* %vec2p
1706 %res = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
1709 define <4 x i64> @test_4xi64_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %vec3, <4 x i64> %mask) {
1710 ; CHECK-LABEL: test_4xi64_masked_shuff_mem_mask3:
1712 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1713 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm1 {%k1} = ymm0[2,3],mem[2,3]
1714 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1716 %vec2 = load <4 x i64>, <4 x i64>* %vec2p
1717 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
1718 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1719 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec3
1723 define <4 x i64> @test_4xi64_zero_masked_shuff_mem_mask3(<4 x i64> %vec1, <4 x i64>* %vec2p, <4 x i64> %mask) {
1724 ; CHECK-LABEL: test_4xi64_zero_masked_shuff_mem_mask3:
1726 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1727 ; CHECK-NEXT: vshufi64x2 {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3],mem[2,3]
1729 %vec2 = load <4 x i64>, <4 x i64>* %vec2p
1730 %shuf = shufflevector <4 x i64> %vec1, <4 x i64> %vec2, <4 x i32> <i32 2, i32 3, i32 6, i32 7>
1731 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1732 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1736 define <8 x i64> @test_8xi64_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2) {
1737 ; CHECK-LABEL: test_8xi64_shuff_mask0:
1739 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[4,5,4,5],zmm1[4,5,4,5]
1741 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13>
1744 define <8 x i64> @test_8xi64_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) {
1745 ; CHECK-LABEL: test_8xi64_masked_shuff_mask0:
1747 ; CHECK-NEXT: vptestnmq %zmm3, %zmm3, %k1
1748 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[4,5,4,5],zmm1[4,5,4,5]
1749 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
1751 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13>
1752 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1753 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
1757 define <8 x i64> @test_8xi64_zero_masked_shuff_mask0(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) {
1758 ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask0:
1760 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1761 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,4,5],zmm1[4,5,4,5]
1763 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 4, i32 5, i32 12, i32 13, i32 12, i32 13>
1764 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1765 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1768 define <8 x i64> @test_8xi64_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) {
1769 ; CHECK-LABEL: test_8xi64_masked_shuff_mask1:
1771 ; CHECK-NEXT: vptestnmq %zmm3, %zmm3, %k1
1772 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[6,7,4,5],zmm1[2,3,4,5]
1773 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
1775 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 12, i32 13>
1776 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1777 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
1781 define <8 x i64> @test_8xi64_zero_masked_shuff_mask1(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) {
1782 ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask1:
1784 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1785 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[6,7,4,5],zmm1[2,3,4,5]
1787 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 6, i32 7, i32 4, i32 5, i32 10, i32 11, i32 12, i32 13>
1788 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1789 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1792 define <8 x i64> @test_8xi64_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) {
1793 ; CHECK-LABEL: test_8xi64_masked_shuff_mask2:
1795 ; CHECK-NEXT: vptestnmq %zmm3, %zmm3, %k1
1796 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[0,1,4,5],zmm1[0,1,0,1]
1797 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
1799 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9>
1800 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1801 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
1805 define <8 x i64> @test_8xi64_zero_masked_shuff_mask2(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) {
1806 ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask2:
1808 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1809 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,4,5],zmm1[0,1,0,1]
1811 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 0, i32 1, i32 4, i32 5, i32 8, i32 9, i32 8, i32 9>
1812 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1813 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1816 define <8 x i64> @test_8xi64_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2) {
1817 ; CHECK-LABEL: test_8xi64_shuff_mask3:
1819 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,6,7],zmm1[4,5,2,3]
1821 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11>
1824 define <8 x i64> @test_8xi64_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %vec3, <8 x i64> %mask) {
1825 ; CHECK-LABEL: test_8xi64_masked_shuff_mask3:
1827 ; CHECK-NEXT: vptestnmq %zmm3, %zmm3, %k1
1828 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm2 {%k1} = zmm0[2,3,6,7],zmm1[4,5,2,3]
1829 ; CHECK-NEXT: vmovdqa64 %zmm2, %zmm0
1831 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11>
1832 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1833 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
1837 define <8 x i64> @test_8xi64_zero_masked_shuff_mask3(<8 x i64> %vec1, <8 x i64> %vec2, <8 x i64> %mask) {
1838 ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mask3:
1840 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1841 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,6,7],zmm1[4,5,2,3]
1843 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 6, i32 7, i32 12, i32 13, i32 10, i32 11>
1844 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1845 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1848 define <8 x i64> @test_8xi64_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p) {
1849 ; CHECK-LABEL: test_8xi64_shuff_mem_mask0:
1851 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,2,3],mem[4,5,2,3]
1853 %vec2 = load <8 x i64>, <8 x i64>* %vec2p
1854 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11>
1857 define <8 x i64> @test_8xi64_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) {
1858 ; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask0:
1860 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1861 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,2,3],mem[4,5,2,3]
1862 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1864 %vec2 = load <8 x i64>, <8 x i64>* %vec2p
1865 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11>
1866 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1867 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
1871 define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask0(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) {
1872 ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask0:
1874 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1875 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,2,3],mem[4,5,2,3]
1877 %vec2 = load <8 x i64>, <8 x i64>* %vec2p
1878 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 2, i32 3, i32 12, i32 13, i32 10, i32 11>
1879 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1880 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1884 define <8 x i64> @test_8xi64_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) {
1885 ; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask1:
1887 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1888 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[0,1,0,1]
1889 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1891 %vec2 = load <8 x i64>, <8 x i64>* %vec2p
1892 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9>
1893 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1894 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
1898 define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask1(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) {
1899 ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask1:
1901 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1902 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[0,1,0,1]
1904 %vec2 = load <8 x i64>, <8 x i64>* %vec2p
1905 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 8, i32 9, i32 8, i32 9>
1906 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1907 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1911 define <8 x i64> @test_8xi64_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) {
1912 ; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask2:
1914 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1915 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[4,5,0,1],mem[2,3,2,3]
1916 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1918 %vec2 = load <8 x i64>, <8 x i64>* %vec2p
1919 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 10, i32 11, i32 10, i32 11>
1920 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1921 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
1925 define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask2(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) {
1926 ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask2:
1928 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1929 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[4,5,0,1],mem[2,3,2,3]
1931 %vec2 = load <8 x i64>, <8 x i64>* %vec2p
1932 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 4, i32 5, i32 0, i32 1, i32 10, i32 11, i32 10, i32 11>
1933 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1934 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1938 define <8 x i64> @test_8xi64_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p) {
1939 ; CHECK-LABEL: test_8xi64_shuff_mem_mask3:
1941 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 = zmm0[2,3,0,1],mem[6,7,2,3]
1943 %vec2 = load <8 x i64>, <8 x i64>* %vec2p
1944 %res = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11>
1947 define <8 x i64> @test_8xi64_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %vec3, <8 x i64> %mask) {
1948 ; CHECK-LABEL: test_8xi64_masked_shuff_mem_mask3:
1950 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1951 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm1 {%k1} = zmm0[2,3,0,1],mem[6,7,2,3]
1952 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1954 %vec2 = load <8 x i64>, <8 x i64>* %vec2p
1955 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11>
1956 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1957 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec3
1961 define <8 x i64> @test_8xi64_zero_masked_shuff_mem_mask3(<8 x i64> %vec1, <8 x i64>* %vec2p, <8 x i64> %mask) {
1962 ; CHECK-LABEL: test_8xi64_zero_masked_shuff_mem_mask3:
1964 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1965 ; CHECK-NEXT: vshufi64x2 {{.*#+}} zmm0 {%k1} {z} = zmm0[2,3,0,1],mem[6,7,2,3]
1967 %vec2 = load <8 x i64>, <8 x i64>* %vec2p
1968 %shuf = shufflevector <8 x i64> %vec1, <8 x i64> %vec2, <8 x i32> <i32 2, i32 3, i32 0, i32 1, i32 14, i32 15, i32 10, i32 11>
1969 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1970 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer