1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq,+fast-variable-crosslane-shuffle,+fast-variable-perlane-shuffle %s -o - | FileCheck %s
3 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512dq,+fast-variable-perlane-shuffle %s -o - | FileCheck %s
5 define <4 x i32> @test_2xi32_to_4xi32(<4 x i32> %vec) {
6 ; CHECK-LABEL: test_2xi32_to_4xi32:
8 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = xmm0[0,0]
10 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
13 define <4 x i32> @test_masked_2xi32_to_4xi32_mask0(<4 x i32> %vec, <4 x i32> %default, <4 x i32> %mask) {
14 ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask0:
16 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
17 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1]
18 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
20 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
21 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
22 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
26 define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask0(<4 x i32> %vec, <4 x i32> %mask) {
27 ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask0:
29 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
30 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1]
32 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
33 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
34 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
37 define <4 x i32> @test_masked_2xi32_to_4xi32_mask1(<4 x i32> %vec, <4 x i32> %default, <4 x i32> %mask) {
38 ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask1:
40 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
41 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1]
42 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
44 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
45 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
46 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
50 define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask1(<4 x i32> %vec, <4 x i32> %mask) {
51 ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask1:
53 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
54 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1]
56 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
57 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
58 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
61 define <4 x i32> @test_masked_2xi32_to_4xi32_mask2(<4 x i32> %vec, <4 x i32> %default, <4 x i32> %mask) {
62 ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask2:
64 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
65 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1]
66 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
68 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
69 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
70 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
74 define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask2(<4 x i32> %vec, <4 x i32> %mask) {
75 ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask2:
77 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
78 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1]
80 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
81 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
82 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
85 define <4 x i32> @test_masked_2xi32_to_4xi32_mask3(<4 x i32> %vec, <4 x i32> %default, <4 x i32> %mask) {
86 ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mask3:
88 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
89 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm1 {%k1} = xmm0[0,1,0,1]
90 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
92 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
93 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
94 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
98 define <4 x i32> @test_masked_z_2xi32_to_4xi32_mask3(<4 x i32> %vec, <4 x i32> %mask) {
99 ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mask3:
101 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
102 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,0,1]
104 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
105 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
106 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
109 define <8 x i32> @test_2xi32_to_8xi32(<8 x i32> %vec) {
110 ; CHECK-LABEL: test_2xi32_to_8xi32:
112 ; CHECK-NEXT: vbroadcastsd %xmm0, %ymm0
114 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
117 define <8 x i32> @test_masked_2xi32_to_8xi32_mask0(<8 x i32> %vec, <8 x i32> %default, <8 x i32> %mask) {
118 ; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask0:
120 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
121 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
122 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
124 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
125 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
126 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
130 define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask0(<8 x i32> %vec, <8 x i32> %mask) {
131 ; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask0:
133 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
134 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
136 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
137 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
138 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
141 define <8 x i32> @test_masked_2xi32_to_8xi32_mask1(<8 x i32> %vec, <8 x i32> %default, <8 x i32> %mask) {
142 ; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask1:
144 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
145 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
146 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
148 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
149 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
150 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
154 define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask1(<8 x i32> %vec, <8 x i32> %mask) {
155 ; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask1:
157 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
158 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
160 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
161 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
162 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
165 define <8 x i32> @test_masked_2xi32_to_8xi32_mask2(<8 x i32> %vec, <8 x i32> %default, <8 x i32> %mask) {
166 ; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask2:
168 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
169 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
170 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
172 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
173 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
174 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
178 define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask2(<8 x i32> %vec, <8 x i32> %mask) {
179 ; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask2:
181 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
182 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
184 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
185 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
186 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
189 define <8 x i32> @test_masked_2xi32_to_8xi32_mask3(<8 x i32> %vec, <8 x i32> %default, <8 x i32> %mask) {
190 ; CHECK-LABEL: test_masked_2xi32_to_8xi32_mask3:
192 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
193 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm1 {%k1} = xmm0[0,1,0,1,0,1,0,1]
194 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
196 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
197 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
198 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
202 define <8 x i32> @test_masked_z_2xi32_to_8xi32_mask3(<8 x i32> %vec, <8 x i32> %mask) {
203 ; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mask3:
205 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
206 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1]
208 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
209 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
210 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
213 define <16 x i32> @test_2xi32_to_16xi32(<16 x i32> %vec) {
214 ; CHECK-LABEL: test_2xi32_to_16xi32:
216 ; CHECK-NEXT: vbroadcastsd %xmm0, %zmm0
218 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
221 define <16 x i32> @test_masked_2xi32_to_16xi32_mask0(<16 x i32> %vec, <16 x i32> %default, <16 x i32> %mask) {
222 ; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask0:
224 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
225 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
226 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
228 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
229 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
230 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
234 define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask0(<16 x i32> %vec, <16 x i32> %mask) {
235 ; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask0:
237 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
238 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
240 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
241 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
242 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
245 define <16 x i32> @test_masked_2xi32_to_16xi32_mask1(<16 x i32> %vec, <16 x i32> %default, <16 x i32> %mask) {
246 ; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask1:
248 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
249 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
250 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
252 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
253 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
254 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
258 define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask1(<16 x i32> %vec, <16 x i32> %mask) {
259 ; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask1:
261 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
262 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
264 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
265 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
266 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
269 define <16 x i32> @test_masked_2xi32_to_16xi32_mask2(<16 x i32> %vec, <16 x i32> %default, <16 x i32> %mask) {
270 ; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask2:
272 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
273 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
274 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
276 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
277 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
278 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
282 define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask2(<16 x i32> %vec, <16 x i32> %mask) {
283 ; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask2:
285 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
286 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
288 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
289 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
290 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
293 define <16 x i32> @test_masked_2xi32_to_16xi32_mask3(<16 x i32> %vec, <16 x i32> %default, <16 x i32> %mask) {
294 ; CHECK-LABEL: test_masked_2xi32_to_16xi32_mask3:
296 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
297 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm1 {%k1} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
298 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
300 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
301 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
302 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
306 define <16 x i32> @test_masked_z_2xi32_to_16xi32_mask3(<16 x i32> %vec, <16 x i32> %mask) {
307 ; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mask3:
309 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
310 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = xmm0[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
312 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
313 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
314 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
317 define <4 x i32> @test_2xi32_to_4xi32_mem(ptr %vp) {
318 ; CHECK-LABEL: test_2xi32_to_4xi32_mem:
320 ; CHECK-NEXT: vmovddup {{.*#+}} xmm0 = mem[0,0]
322 %vec = load <2 x i32>, ptr %vp
323 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
326 define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask0(ptr %vp, <4 x i32> %default, <4 x i32> %mask) {
327 ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask0:
329 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
330 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1]
332 %vec = load <2 x i32>, ptr %vp
333 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
334 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
335 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
339 define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask0(ptr %vp, <4 x i32> %mask) {
340 ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask0:
342 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
343 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1]
345 %vec = load <2 x i32>, ptr %vp
346 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
347 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
348 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
351 define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask1(ptr %vp, <4 x i32> %default, <4 x i32> %mask) {
352 ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask1:
354 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
355 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1]
357 %vec = load <2 x i32>, ptr %vp
358 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
359 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
360 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
364 define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask1(ptr %vp, <4 x i32> %mask) {
365 ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask1:
367 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
368 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1]
370 %vec = load <2 x i32>, ptr %vp
371 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
372 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
373 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
376 define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask2(ptr %vp, <4 x i32> %default, <4 x i32> %mask) {
377 ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask2:
379 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
380 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1]
382 %vec = load <2 x i32>, ptr %vp
383 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
384 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
385 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
389 define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask2(ptr %vp, <4 x i32> %mask) {
390 ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask2:
392 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
393 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1]
395 %vec = load <2 x i32>, ptr %vp
396 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
397 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
398 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
401 define <4 x i32> @test_masked_2xi32_to_4xi32_mem_mask3(ptr %vp, <4 x i32> %default, <4 x i32> %mask) {
402 ; CHECK-LABEL: test_masked_2xi32_to_4xi32_mem_mask3:
404 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
405 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} = mem[0,1,0,1]
407 %vec = load <2 x i32>, ptr %vp
408 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
409 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
410 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %default
414 define <4 x i32> @test_masked_z_2xi32_to_4xi32_mem_mask3(ptr %vp, <4 x i32> %mask) {
415 ; CHECK-LABEL: test_masked_z_2xi32_to_4xi32_mem_mask3:
417 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
418 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} xmm0 {%k1} {z} = mem[0,1,0,1]
420 %vec = load <2 x i32>, ptr %vp
421 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
422 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
423 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
426 define <8 x i32> @test_2xi32_to_8xi32_mem(ptr %vp) {
427 ; CHECK-LABEL: test_2xi32_to_8xi32_mem:
429 ; CHECK-NEXT: vbroadcastsd (%rdi), %ymm0
431 %vec = load <2 x i32>, ptr %vp
432 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
435 define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask0(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
436 ; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask0:
438 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
439 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
441 %vec = load <2 x i32>, ptr %vp
442 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
443 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
444 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
448 define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask0(ptr %vp, <8 x i32> %mask) {
449 ; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask0:
451 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
452 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
454 %vec = load <2 x i32>, ptr %vp
455 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
456 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
457 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
460 define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask1(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
461 ; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask1:
463 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
464 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
466 %vec = load <2 x i32>, ptr %vp
467 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
468 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
469 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
473 define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask1(ptr %vp, <8 x i32> %mask) {
474 ; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask1:
476 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
477 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
479 %vec = load <2 x i32>, ptr %vp
480 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
481 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
482 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
485 define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask2(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
486 ; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask2:
488 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
489 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
491 %vec = load <2 x i32>, ptr %vp
492 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
493 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
494 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
498 define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask2(ptr %vp, <8 x i32> %mask) {
499 ; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask2:
501 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
502 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
504 %vec = load <2 x i32>, ptr %vp
505 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
506 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
507 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
510 define <8 x i32> @test_masked_2xi32_to_8xi32_mem_mask3(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
511 ; CHECK-LABEL: test_masked_2xi32_to_8xi32_mem_mask3:
513 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
514 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1,0,1,0,1]
516 %vec = load <2 x i32>, ptr %vp
517 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
518 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
519 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
523 define <8 x i32> @test_masked_z_2xi32_to_8xi32_mem_mask3(ptr %vp, <8 x i32> %mask) {
524 ; CHECK-LABEL: test_masked_z_2xi32_to_8xi32_mem_mask3:
526 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
527 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
529 %vec = load <2 x i32>, ptr %vp
530 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
531 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
532 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
535 define <16 x i32> @test_2xi32_to_16xi32_mem(ptr %vp) {
536 ; CHECK-LABEL: test_2xi32_to_16xi32_mem:
538 ; CHECK-NEXT: vbroadcastsd (%rdi), %zmm0
540 %vec = load <2 x i32>, ptr %vp
541 %res = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
544 define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
545 ; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask0:
547 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
548 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
550 %vec = load <2 x i32>, ptr %vp
551 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
552 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
553 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
557 define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %mask) {
558 ; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask0:
560 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
561 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
563 %vec = load <2 x i32>, ptr %vp
564 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
565 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
566 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
569 define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
570 ; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask1:
572 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
573 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
575 %vec = load <2 x i32>, ptr %vp
576 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
577 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
578 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
582 define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %mask) {
583 ; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask1:
585 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
586 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
588 %vec = load <2 x i32>, ptr %vp
589 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
590 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
591 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
594 define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
595 ; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask2:
597 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
598 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
600 %vec = load <2 x i32>, ptr %vp
601 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
602 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
603 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
607 define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %mask) {
608 ; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask2:
610 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
611 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
613 %vec = load <2 x i32>, ptr %vp
614 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
615 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
616 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
619 define <16 x i32> @test_masked_2xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
620 ; CHECK-LABEL: test_masked_2xi32_to_16xi32_mem_mask3:
622 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
623 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
625 %vec = load <2 x i32>, ptr %vp
626 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
627 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
628 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
632 define <16 x i32> @test_masked_z_2xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %mask) {
633 ; CHECK-LABEL: test_masked_z_2xi32_to_16xi32_mem_mask3:
635 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
636 ; CHECK-NEXT: vbroadcasti32x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1,0,1,0,1,0,1,0,1]
638 %vec = load <2 x i32>, ptr %vp
639 %shuf = shufflevector <2 x i32> %vec, <2 x i32> undef, <16 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
640 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
641 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
644 define <8 x i32> @test_4xi32_to_8xi32_mem(ptr %vp) {
645 ; CHECK-LABEL: test_4xi32_to_8xi32_mem:
647 ; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
649 %vec = load <4 x i32>, ptr %vp
650 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
653 define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask0(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
654 ; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask0:
656 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
657 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
659 %vec = load <4 x i32>, ptr %vp
660 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
661 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
662 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
666 define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask0(ptr %vp, <8 x i32> %mask) {
667 ; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask0:
669 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
670 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
672 %vec = load <4 x i32>, ptr %vp
673 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
674 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
675 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
678 define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask1(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
679 ; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask1:
681 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
682 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
684 %vec = load <4 x i32>, ptr %vp
685 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
686 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
687 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
691 define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask1(ptr %vp, <8 x i32> %mask) {
692 ; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask1:
694 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
695 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
697 %vec = load <4 x i32>, ptr %vp
698 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
699 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
700 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
703 define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask2(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
704 ; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask2:
706 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
707 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
709 %vec = load <4 x i32>, ptr %vp
710 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
711 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
712 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
716 define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask2(ptr %vp, <8 x i32> %mask) {
717 ; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask2:
719 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
720 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
722 %vec = load <4 x i32>, ptr %vp
723 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
724 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
725 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
728 define <8 x i32> @test_masked_4xi32_to_8xi32_mem_mask3(ptr %vp, <8 x i32> %default, <8 x i32> %mask) {
729 ; CHECK-LABEL: test_masked_4xi32_to_8xi32_mem_mask3:
731 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
732 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,0,1,2,3]
734 %vec = load <4 x i32>, ptr %vp
735 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
736 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
737 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %default
741 define <8 x i32> @test_masked_z_4xi32_to_8xi32_mem_mask3(ptr %vp, <8 x i32> %mask) {
742 ; CHECK-LABEL: test_masked_z_4xi32_to_8xi32_mem_mask3:
744 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
745 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
747 %vec = load <4 x i32>, ptr %vp
748 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
749 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
750 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
753 define <16 x i32> @test_4xi32_to_16xi32_mem(ptr %vp) {
754 ; CHECK-LABEL: test_4xi32_to_16xi32_mem:
756 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
758 %vec = load <4 x i32>, ptr %vp
759 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
762 define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
763 ; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask0:
765 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
766 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
768 %vec = load <4 x i32>, ptr %vp
769 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
770 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
771 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
775 define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %mask) {
776 ; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask0:
778 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
779 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
781 %vec = load <4 x i32>, ptr %vp
782 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
783 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
784 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
787 define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
788 ; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask1:
790 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
791 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
793 %vec = load <4 x i32>, ptr %vp
794 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
795 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
796 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
800 define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %mask) {
801 ; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask1:
803 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
804 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
806 %vec = load <4 x i32>, ptr %vp
807 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
808 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
809 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
812 define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
813 ; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask2:
815 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
816 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
818 %vec = load <4 x i32>, ptr %vp
819 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
820 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
821 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
825 define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %mask) {
826 ; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask2:
828 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
829 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
831 %vec = load <4 x i32>, ptr %vp
832 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
833 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
834 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
837 define <16 x i32> @test_masked_4xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
838 ; CHECK-LABEL: test_masked_4xi32_to_16xi32_mem_mask3:
840 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
841 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
843 %vec = load <4 x i32>, ptr %vp
844 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
845 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
846 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
850 define <16 x i32> @test_masked_z_4xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %mask) {
851 ; CHECK-LABEL: test_masked_z_4xi32_to_16xi32_mem_mask3:
853 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
854 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
856 %vec = load <4 x i32>, ptr %vp
857 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
858 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
859 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
862 define <4 x i64> @test_2xi64_to_4xi64_mem(ptr %vp) {
863 ; CHECK-LABEL: test_2xi64_to_4xi64_mem:
865 ; CHECK-NEXT: vbroadcasti128 {{.*#+}} ymm0 = mem[0,1,0,1]
867 %vec = load <2 x i64>, ptr %vp
868 %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
871 define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask0(ptr %vp, <4 x i64> %default, <4 x i64> %mask) {
872 ; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask0:
874 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
875 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
877 %vec = load <2 x i64>, ptr %vp
878 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
879 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
880 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
884 define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask0(ptr %vp, <4 x i64> %mask) {
885 ; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask0:
887 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
888 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
890 %vec = load <2 x i64>, ptr %vp
891 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
892 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
893 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
896 define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask1(ptr %vp, <4 x i64> %default, <4 x i64> %mask) {
897 ; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask1:
899 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
900 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
902 %vec = load <2 x i64>, ptr %vp
903 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
904 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
905 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
909 define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask1(ptr %vp, <4 x i64> %mask) {
910 ; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask1:
912 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
913 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
915 %vec = load <2 x i64>, ptr %vp
916 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
917 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
918 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
921 define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask2(ptr %vp, <4 x i64> %default, <4 x i64> %mask) {
922 ; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask2:
924 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
925 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
927 %vec = load <2 x i64>, ptr %vp
928 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
929 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
930 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
934 define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask2(ptr %vp, <4 x i64> %mask) {
935 ; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask2:
937 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
938 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
940 %vec = load <2 x i64>, ptr %vp
941 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
942 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
943 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
946 define <4 x i64> @test_masked_2xi64_to_4xi64_mem_mask3(ptr %vp, <4 x i64> %default, <4 x i64> %mask) {
947 ; CHECK-LABEL: test_masked_2xi64_to_4xi64_mem_mask3:
949 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
950 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} = mem[0,1,0,1]
952 %vec = load <2 x i64>, ptr %vp
953 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
954 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
955 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %default
959 define <4 x i64> @test_masked_z_2xi64_to_4xi64_mem_mask3(ptr %vp, <4 x i64> %mask) {
960 ; CHECK-LABEL: test_masked_z_2xi64_to_4xi64_mem_mask3:
962 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
963 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} ymm0 {%k1} {z} = mem[0,1,0,1]
965 %vec = load <2 x i64>, ptr %vp
966 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <4 x i32> <i32 0, i32 1, i32 0, i32 1>
967 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
968 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
971 define <8 x i64> @test_2xi64_to_8xi64_mem(ptr %vp) {
972 ; CHECK-LABEL: test_2xi64_to_8xi64_mem:
974 ; CHECK-NEXT: vbroadcasti32x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3,0,1,2,3,0,1,2,3]
976 %vec = load <2 x i64>, ptr %vp
977 %res = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
980 define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask0(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
981 ; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask0:
983 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
984 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
986 %vec = load <2 x i64>, ptr %vp
987 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
988 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
989 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
993 define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask0(ptr %vp, <8 x i64> %mask) {
994 ; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask0:
996 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
997 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
999 %vec = load <2 x i64>, ptr %vp
1000 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1001 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1002 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1005 define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask1(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1006 ; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask1:
1008 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1009 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
1011 %vec = load <2 x i64>, ptr %vp
1012 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1013 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1014 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1018 define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask1(ptr %vp, <8 x i64> %mask) {
1019 ; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask1:
1021 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1022 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
1024 %vec = load <2 x i64>, ptr %vp
1025 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1026 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1027 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1030 define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask2(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1031 ; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask2:
1033 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1034 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
1036 %vec = load <2 x i64>, ptr %vp
1037 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1038 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1039 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1043 define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask2(ptr %vp, <8 x i64> %mask) {
1044 ; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask2:
1046 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1047 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
1049 %vec = load <2 x i64>, ptr %vp
1050 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1051 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1052 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1055 define <8 x i64> @test_masked_2xi64_to_8xi64_mem_mask3(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1056 ; CHECK-LABEL: test_masked_2xi64_to_8xi64_mem_mask3:
1058 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1059 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} = mem[0,1,0,1,0,1,0,1]
1061 %vec = load <2 x i64>, ptr %vp
1062 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1063 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1064 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1068 define <8 x i64> @test_masked_z_2xi64_to_8xi64_mem_mask3(ptr %vp, <8 x i64> %mask) {
1069 ; CHECK-LABEL: test_masked_z_2xi64_to_8xi64_mem_mask3:
1071 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1072 ; CHECK-NEXT: vbroadcasti64x2 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,0,1,0,1,0,1]
1074 %vec = load <2 x i64>, ptr %vp
1075 %shuf = shufflevector <2 x i64> %vec, <2 x i64> undef, <8 x i32> <i32 0, i32 1, i32 0, i32 1, i32 0, i32 1, i32 0, i32 1>
1076 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1077 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1080 define <16 x i32> @test_8xi32_to_16xi32_mem(ptr %vp) {
1081 ; CHECK-LABEL: test_8xi32_to_16xi32_mem:
1083 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
1085 %vec = load <8 x i32>, ptr %vp
1086 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1089 define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
1090 ; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask0:
1092 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1093 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1095 %vec = load <8 x i32>, ptr %vp
1096 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1097 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1098 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
1102 define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask0(ptr %vp, <16 x i32> %mask) {
1103 ; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask0:
1105 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
1106 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1108 %vec = load <8 x i32>, ptr %vp
1109 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1110 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1111 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1114 define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
1115 ; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask1:
1117 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1118 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1120 %vec = load <8 x i32>, ptr %vp
1121 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1122 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1123 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
1127 define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask1(ptr %vp, <16 x i32> %mask) {
1128 ; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask1:
1130 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
1131 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1133 %vec = load <8 x i32>, ptr %vp
1134 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1135 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1136 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1139 define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
1140 ; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask2:
1142 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1143 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1145 %vec = load <8 x i32>, ptr %vp
1146 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1147 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1148 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
1152 define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask2(ptr %vp, <16 x i32> %mask) {
1153 ; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask2:
1155 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
1156 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1158 %vec = load <8 x i32>, ptr %vp
1159 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1160 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1161 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1164 define <16 x i32> @test_masked_8xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %default, <16 x i32> %mask) {
1165 ; CHECK-LABEL: test_masked_8xi32_to_16xi32_mem_mask3:
1167 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
1168 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1170 %vec = load <8 x i32>, ptr %vp
1171 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1172 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1173 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %default
1177 define <16 x i32> @test_masked_z_8xi32_to_16xi32_mem_mask3(ptr %vp, <16 x i32> %mask) {
1178 ; CHECK-LABEL: test_masked_z_8xi32_to_16xi32_mem_mask3:
1180 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
1181 ; CHECK-NEXT: vbroadcasti32x8 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,5,6,7,0,1,2,3,4,5,6,7]
1183 %vec = load <8 x i32>, ptr %vp
1184 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
1185 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1186 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1189 define <8 x i64> @test_4xi64_to_8xi64_mem(ptr %vp) {
1190 ; CHECK-LABEL: test_4xi64_to_8xi64_mem:
1192 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 = mem[0,1,2,3,0,1,2,3]
1194 %vec = load <4 x i64>, ptr %vp
1195 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1198 define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask0(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1199 ; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask0:
1201 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1202 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1204 %vec = load <4 x i64>, ptr %vp
1205 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1206 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1207 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1211 define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask0(ptr %vp, <8 x i64> %mask) {
1212 ; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask0:
1214 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1215 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1217 %vec = load <4 x i64>, ptr %vp
1218 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1219 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1220 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1223 define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask1(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1224 ; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask1:
1226 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1227 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1229 %vec = load <4 x i64>, ptr %vp
1230 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1231 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1232 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1236 define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask1(ptr %vp, <8 x i64> %mask) {
1237 ; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask1:
1239 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1240 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1242 %vec = load <4 x i64>, ptr %vp
1243 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1244 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1245 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1248 define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask2(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1249 ; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask2:
1251 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1252 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1254 %vec = load <4 x i64>, ptr %vp
1255 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1256 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1257 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1261 define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask2(ptr %vp, <8 x i64> %mask) {
1262 ; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask2:
1264 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1265 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1267 %vec = load <4 x i64>, ptr %vp
1268 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1269 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1270 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1273 define <8 x i64> @test_masked_4xi64_to_8xi64_mem_mask3(ptr %vp, <8 x i64> %default, <8 x i64> %mask) {
1274 ; CHECK-LABEL: test_masked_4xi64_to_8xi64_mem_mask3:
1276 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1277 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,0,1,2,3]
1279 %vec = load <4 x i64>, ptr %vp
1280 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1281 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1282 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %default
1286 define <8 x i64> @test_masked_z_4xi64_to_8xi64_mem_mask3(ptr %vp, <8 x i64> %mask) {
1287 ; CHECK-LABEL: test_masked_z_4xi64_to_8xi64_mem_mask3:
1289 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1290 ; CHECK-NEXT: vbroadcasti64x4 {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,0,1,2,3]
1292 %vec = load <4 x i64>, ptr %vp
1293 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 0, i32 1, i32 2, i32 3>
1294 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1295 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer