1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s
4 define <16 x i8> @test_16xi8_perm_mask0(<16 x i8> %vec) {
5 ; CHECK-LABEL: test_16xi8_perm_mask0:
7 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14]
9 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
12 define <16 x i8> @test_masked_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) {
13 ; CHECK-LABEL: test_masked_16xi8_perm_mask0:
15 ; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1
16 ; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14]
17 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
19 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
20 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
21 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
25 define <16 x i8> @test_masked_z_16xi8_perm_mask0(<16 x i8> %vec, <16 x i8> %mask) {
26 ; CHECK-LABEL: test_masked_z_16xi8_perm_mask0:
28 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1
29 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14]
31 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
32 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
33 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
36 define <16 x i8> @test_masked_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) {
37 ; CHECK-LABEL: test_masked_16xi8_perm_mask1:
39 ; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1
40 ; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0]
41 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
43 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0>
44 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
45 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
49 define <16 x i8> @test_masked_z_16xi8_perm_mask1(<16 x i8> %vec, <16 x i8> %mask) {
50 ; CHECK-LABEL: test_masked_z_16xi8_perm_mask1:
52 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1
53 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0]
55 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0>
56 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
57 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
60 define <16 x i8> @test_masked_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) {
61 ; CHECK-LABEL: test_masked_16xi8_perm_mask2:
63 ; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1
64 ; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7]
65 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
67 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7>
68 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
69 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
73 define <16 x i8> @test_masked_z_16xi8_perm_mask2(<16 x i8> %vec, <16 x i8> %mask) {
74 ; CHECK-LABEL: test_masked_z_16xi8_perm_mask2:
76 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1
77 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7]
79 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7>
80 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
81 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
84 define <16 x i8> @test_16xi8_perm_mask3(<16 x i8> %vec) {
85 ; CHECK-LABEL: test_16xi8_perm_mask3:
87 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6]
89 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
92 define <16 x i8> @test_masked_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %vec2, <16 x i8> %mask) {
93 ; CHECK-LABEL: test_masked_16xi8_perm_mask3:
95 ; CHECK-NEXT: vptestnmb %xmm2, %xmm2, %k1
96 ; CHECK-NEXT: vpshufb {{.*#+}} xmm1 {%k1} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6]
97 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
99 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
100 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
101 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
105 define <16 x i8> @test_masked_z_16xi8_perm_mask3(<16 x i8> %vec, <16 x i8> %mask) {
106 ; CHECK-LABEL: test_masked_z_16xi8_perm_mask3:
108 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1
109 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm0[1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6]
111 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
112 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
113 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
116 define <16 x i8> @test_16xi8_perm_mem_mask0(ptr %vp) {
117 ; CHECK-LABEL: test_16xi8_perm_mem_mask0:
119 ; CHECK-NEXT: vmovdqa (%rdi), %xmm0
120 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13]
122 %vec = load <16 x i8>, ptr %vp
123 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
126 define <16 x i8> @test_masked_16xi8_perm_mem_mask0(ptr %vp, <16 x i8> %vec2, <16 x i8> %mask) {
127 ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask0:
129 ; CHECK-NEXT: vmovdqa (%rdi), %xmm2
130 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1
131 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13]
133 %vec = load <16 x i8>, ptr %vp
134 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
135 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
136 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
140 define <16 x i8> @test_masked_z_16xi8_perm_mem_mask0(ptr %vp, <16 x i8> %mask) {
141 ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask0:
143 ; CHECK-NEXT: vmovdqa (%rdi), %xmm1
144 ; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1
145 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13]
147 %vec = load <16 x i8>, ptr %vp
148 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
149 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
150 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
154 define <16 x i8> @test_masked_16xi8_perm_mem_mask1(ptr %vp, <16 x i8> %vec2, <16 x i8> %mask) {
155 ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask1:
157 ; CHECK-NEXT: vmovdqa (%rdi), %xmm2
158 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1
159 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11]
161 %vec = load <16 x i8>, ptr %vp
162 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11>
163 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
164 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
168 define <16 x i8> @test_masked_z_16xi8_perm_mem_mask1(ptr %vp, <16 x i8> %mask) {
169 ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask1:
171 ; CHECK-NEXT: vmovdqa (%rdi), %xmm1
172 ; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1
173 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11]
175 %vec = load <16 x i8>, ptr %vp
176 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11>
177 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
178 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
182 define <16 x i8> @test_masked_16xi8_perm_mem_mask2(ptr %vp, <16 x i8> %vec2, <16 x i8> %mask) {
183 ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask2:
185 ; CHECK-NEXT: vmovdqa (%rdi), %xmm2
186 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1
187 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9]
189 %vec = load <16 x i8>, ptr %vp
190 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9>
191 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
192 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
196 define <16 x i8> @test_masked_z_16xi8_perm_mem_mask2(ptr %vp, <16 x i8> %mask) {
197 ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask2:
199 ; CHECK-NEXT: vmovdqa (%rdi), %xmm1
200 ; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1
201 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9]
203 %vec = load <16 x i8>, ptr %vp
204 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9>
205 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
206 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
210 define <16 x i8> @test_16xi8_perm_mem_mask3(ptr %vp) {
211 ; CHECK-LABEL: test_16xi8_perm_mem_mask3:
213 ; CHECK-NEXT: vmovdqa (%rdi), %xmm0
214 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 = xmm0[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4]
216 %vec = load <16 x i8>, ptr %vp
217 %res = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
220 define <16 x i8> @test_masked_16xi8_perm_mem_mask3(ptr %vp, <16 x i8> %vec2, <16 x i8> %mask) {
221 ; CHECK-LABEL: test_masked_16xi8_perm_mem_mask3:
223 ; CHECK-NEXT: vmovdqa (%rdi), %xmm2
224 ; CHECK-NEXT: vptestnmb %xmm1, %xmm1, %k1
225 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} = xmm2[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4]
227 %vec = load <16 x i8>, ptr %vp
228 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
229 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
230 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> %vec2
234 define <16 x i8> @test_masked_z_16xi8_perm_mem_mask3(ptr %vp, <16 x i8> %mask) {
235 ; CHECK-LABEL: test_masked_z_16xi8_perm_mem_mask3:
237 ; CHECK-NEXT: vmovdqa (%rdi), %xmm1
238 ; CHECK-NEXT: vptestnmb %xmm0, %xmm0, %k1
239 ; CHECK-NEXT: vpshufb {{.*#+}} xmm0 {%k1} {z} = xmm1[9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4]
241 %vec = load <16 x i8>, ptr %vp
242 %shuf = shufflevector <16 x i8> %vec, <16 x i8> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
243 %cmp = icmp eq <16 x i8> %mask, zeroinitializer
244 %res = select <16 x i1> %cmp, <16 x i8> %shuf, <16 x i8> zeroinitializer
248 define <32 x i8> @test_32xi8_perm_mask0(<32 x i8> %vec) {
249 ; CHECK-LABEL: test_32xi8_perm_mask0:
251 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21]
253 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21>
256 define <32 x i8> @test_masked_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) {
257 ; CHECK-LABEL: test_masked_32xi8_perm_mask0:
259 ; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1
260 ; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21]
261 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
263 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21>
264 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
265 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
269 define <32 x i8> @test_masked_z_32xi8_perm_mask0(<32 x i8> %vec, <32 x i8> %mask) {
270 ; CHECK-LABEL: test_masked_z_32xi8_perm_mask0:
272 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1
273 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[8,0,1,15,3,5,11,13,14,2,10,15,0,10,13,5,20,25,23,18,23,22,25,24,20,21,29,20,24,16,27,21]
275 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 8, i32 0, i32 1, i32 15, i32 3, i32 5, i32 11, i32 13, i32 14, i32 2, i32 10, i32 15, i32 0, i32 10, i32 13, i32 5, i32 20, i32 25, i32 23, i32 18, i32 23, i32 22, i32 25, i32 24, i32 20, i32 21, i32 29, i32 20, i32 24, i32 16, i32 27, i32 21>
276 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
277 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
280 define <32 x i8> @test_masked_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) {
281 ; CHECK-LABEL: test_masked_32xi8_perm_mask1:
283 ; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1
284 ; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24]
285 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
287 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 3, i32 15, i32 5, i32 4, i32 5, i32 15, i32 10, i32 9, i32 11, i32 6, i32 6, i32 10, i32 0, i32 3, i32 21, i32 19, i32 26, i32 22, i32 30, i32 25, i32 22, i32 22, i32 27, i32 22, i32 26, i32 16, i32 23, i32 20, i32 18, i32 24>
288 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
289 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
293 define <32 x i8> @test_masked_z_32xi8_perm_mask1(<32 x i8> %vec, <32 x i8> %mask) {
294 ; CHECK-LABEL: test_masked_z_32xi8_perm_mask1:
296 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1
297 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[0,4,3,15,5,4,5,15,10,9,11,6,6,10,0,3,21,19,26,22,30,25,22,22,27,22,26,16,23,20,18,24]
299 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 0, i32 4, i32 3, i32 15, i32 5, i32 4, i32 5, i32 15, i32 10, i32 9, i32 11, i32 6, i32 6, i32 10, i32 0, i32 3, i32 21, i32 19, i32 26, i32 22, i32 30, i32 25, i32 22, i32 22, i32 27, i32 22, i32 26, i32 16, i32 23, i32 20, i32 18, i32 24>
300 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
301 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
304 define <32 x i8> @test_masked_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) {
305 ; CHECK-LABEL: test_masked_32xi8_perm_mask2:
307 ; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1
308 ; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29]
309 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
311 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 7, i32 8, i32 12, i32 14, i32 7, i32 4, i32 7, i32 12, i32 14, i32 12, i32 3, i32 15, i32 10, i32 1, i32 11, i32 15, i32 22, i32 26, i32 21, i32 19, i32 27, i32 16, i32 29, i32 24, i32 17, i32 17, i32 26, i32 29, i32 20, i32 31, i32 17, i32 29>
312 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
313 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
317 define <32 x i8> @test_masked_z_32xi8_perm_mask2(<32 x i8> %vec, <32 x i8> %mask) {
318 ; CHECK-LABEL: test_masked_z_32xi8_perm_mask2:
320 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1
321 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[7,8,12,14,7,4,7,12,14,12,3,15,10,1,11,15,22,26,21,19,27,16,29,24,17,17,26,29,20,31,17,29]
323 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 7, i32 8, i32 12, i32 14, i32 7, i32 4, i32 7, i32 12, i32 14, i32 12, i32 3, i32 15, i32 10, i32 1, i32 11, i32 15, i32 22, i32 26, i32 21, i32 19, i32 27, i32 16, i32 29, i32 24, i32 17, i32 17, i32 26, i32 29, i32 20, i32 31, i32 17, i32 29>
324 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
325 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
328 define <32 x i8> @test_32xi8_perm_mask3(<32 x i8> %vec) {
329 ; CHECK-LABEL: test_32xi8_perm_mask3:
331 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18]
333 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18>
336 define <32 x i8> @test_masked_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %vec2, <32 x i8> %mask) {
337 ; CHECK-LABEL: test_masked_32xi8_perm_mask3:
339 ; CHECK-NEXT: vptestnmb %ymm2, %ymm2, %k1
340 ; CHECK-NEXT: vpshufb {{.*#+}} ymm1 {%k1} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18]
341 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
343 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18>
344 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
345 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
349 define <32 x i8> @test_masked_z_32xi8_perm_mask3(<32 x i8> %vec, <32 x i8> %mask) {
350 ; CHECK-LABEL: test_masked_z_32xi8_perm_mask3:
352 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1
353 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm0[6,1,4,7,12,13,2,8,10,5,13,4,0,0,10,8,31,31,30,16,27,27,26,27,30,26,21,24,19,25,16,18]
355 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 6, i32 1, i32 4, i32 7, i32 12, i32 13, i32 2, i32 8, i32 10, i32 5, i32 13, i32 4, i32 0, i32 0, i32 10, i32 8, i32 31, i32 31, i32 30, i32 16, i32 27, i32 27, i32 26, i32 27, i32 30, i32 26, i32 21, i32 24, i32 19, i32 25, i32 16, i32 18>
356 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
357 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
360 define <32 x i8> @test_32xi8_perm_mem_mask0(ptr %vp) {
361 ; CHECK-LABEL: test_32xi8_perm_mem_mask0:
363 ; CHECK-NEXT: vmovdqa (%rdi), %ymm0
364 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22]
366 %vec = load <32 x i8>, ptr %vp
367 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22>
370 define <32 x i8> @test_masked_32xi8_perm_mem_mask0(ptr %vp, <32 x i8> %vec2, <32 x i8> %mask) {
371 ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask0:
373 ; CHECK-NEXT: vmovdqa (%rdi), %ymm2
374 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1
375 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22]
377 %vec = load <32 x i8>, ptr %vp
378 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22>
379 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
380 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
384 define <32 x i8> @test_masked_z_32xi8_perm_mem_mask0(ptr %vp, <32 x i8> %mask) {
385 ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask0:
387 ; CHECK-NEXT: vmovdqa (%rdi), %ymm1
388 ; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1
389 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[9,0,2,15,4,6,8,4,7,3,0,2,8,1,6,5,22,17,30,23,29,31,21,23,27,22,20,27,30,30,26,22]
391 %vec = load <32 x i8>, ptr %vp
392 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 9, i32 0, i32 2, i32 15, i32 4, i32 6, i32 8, i32 4, i32 7, i32 3, i32 0, i32 2, i32 8, i32 1, i32 6, i32 5, i32 22, i32 17, i32 30, i32 23, i32 29, i32 31, i32 21, i32 23, i32 27, i32 22, i32 20, i32 27, i32 30, i32 30, i32 26, i32 22>
393 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
394 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
398 define <32 x i8> @test_masked_32xi8_perm_mem_mask1(ptr %vp, <32 x i8> %vec2, <32 x i8> %mask) {
399 ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask1:
401 ; CHECK-NEXT: vmovdqa (%rdi), %ymm2
402 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1
403 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19]
405 %vec = load <32 x i8>, ptr %vp
406 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 15, i32 10, i32 1, i32 1, i32 11, i32 0, i32 0, i32 6, i32 8, i32 7, i32 7, i32 9, i32 10, i32 6, i32 5, i32 15, i32 20, i32 28, i32 22, i32 21, i32 17, i32 29, i32 27, i32 30, i32 23, i32 26, i32 17, i32 22, i32 19, i32 16, i32 31, i32 19>
407 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
408 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
412 define <32 x i8> @test_masked_z_32xi8_perm_mem_mask1(ptr %vp, <32 x i8> %mask) {
413 ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask1:
415 ; CHECK-NEXT: vmovdqa (%rdi), %ymm1
416 ; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1
417 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[15,10,1,1,11,0,0,6,8,7,7,9,10,6,5,15,20,28,22,21,17,29,27,30,23,26,17,22,19,16,31,19]
419 %vec = load <32 x i8>, ptr %vp
420 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 15, i32 10, i32 1, i32 1, i32 11, i32 0, i32 0, i32 6, i32 8, i32 7, i32 7, i32 9, i32 10, i32 6, i32 5, i32 15, i32 20, i32 28, i32 22, i32 21, i32 17, i32 29, i32 27, i32 30, i32 23, i32 26, i32 17, i32 22, i32 19, i32 16, i32 31, i32 19>
421 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
422 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
426 define <32 x i8> @test_masked_32xi8_perm_mem_mask2(ptr %vp, <32 x i8> %vec2, <32 x i8> %mask) {
427 ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask2:
429 ; CHECK-NEXT: vmovdqa (%rdi), %ymm2
430 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1
431 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28]
433 %vec = load <32 x i8>, ptr %vp
434 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 6, i32 8, i32 2, i32 15, i32 15, i32 2, i32 6, i32 10, i32 14, i32 7, i32 14, i32 5, i32 7, i32 7, i32 26, i32 19, i32 25, i32 19, i32 21, i32 31, i32 30, i32 29, i32 16, i32 18, i32 20, i32 28, i32 29, i32 25, i32 27, i32 28>
435 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
436 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
440 define <32 x i8> @test_masked_z_32xi8_perm_mem_mask2(ptr %vp, <32 x i8> %mask) {
441 ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask2:
443 ; CHECK-NEXT: vmovdqa (%rdi), %ymm1
444 ; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1
445 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[2,3,6,8,2,15,15,2,6,10,14,7,14,5,7,7,26,19,25,19,21,31,30,29,16,18,20,28,29,25,27,28]
447 %vec = load <32 x i8>, ptr %vp
448 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 2, i32 3, i32 6, i32 8, i32 2, i32 15, i32 15, i32 2, i32 6, i32 10, i32 14, i32 7, i32 14, i32 5, i32 7, i32 7, i32 26, i32 19, i32 25, i32 19, i32 21, i32 31, i32 30, i32 29, i32 16, i32 18, i32 20, i32 28, i32 29, i32 25, i32 27, i32 28>
449 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
450 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
454 define <32 x i8> @test_32xi8_perm_mem_mask3(ptr %vp) {
455 ; CHECK-LABEL: test_32xi8_perm_mem_mask3:
457 ; CHECK-NEXT: vmovdqa (%rdi), %ymm0
458 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 = ymm0[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29]
460 %vec = load <32 x i8>, ptr %vp
461 %res = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29>
464 define <32 x i8> @test_masked_32xi8_perm_mem_mask3(ptr %vp, <32 x i8> %vec2, <32 x i8> %mask) {
465 ; CHECK-LABEL: test_masked_32xi8_perm_mem_mask3:
467 ; CHECK-NEXT: vmovdqa (%rdi), %ymm2
468 ; CHECK-NEXT: vptestnmb %ymm1, %ymm1, %k1
469 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} = ymm2[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29]
471 %vec = load <32 x i8>, ptr %vp
472 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29>
473 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
474 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> %vec2
478 define <32 x i8> @test_masked_z_32xi8_perm_mem_mask3(ptr %vp, <32 x i8> %mask) {
479 ; CHECK-LABEL: test_masked_z_32xi8_perm_mem_mask3:
481 ; CHECK-NEXT: vmovdqa (%rdi), %ymm1
482 ; CHECK-NEXT: vptestnmb %ymm0, %ymm0, %k1
483 ; CHECK-NEXT: vpshufb {{.*#+}} ymm0 {%k1} {z} = ymm1[1,1,13,0,3,0,0,13,5,2,2,10,15,8,14,8,25,26,28,28,31,27,30,19,24,25,29,23,28,22,25,29]
485 %vec = load <32 x i8>, ptr %vp
486 %shuf = shufflevector <32 x i8> %vec, <32 x i8> undef, <32 x i32> <i32 1, i32 1, i32 13, i32 0, i32 3, i32 0, i32 0, i32 13, i32 5, i32 2, i32 2, i32 10, i32 15, i32 8, i32 14, i32 8, i32 25, i32 26, i32 28, i32 28, i32 31, i32 27, i32 30, i32 19, i32 24, i32 25, i32 29, i32 23, i32 28, i32 22, i32 25, i32 29>
487 %cmp = icmp eq <32 x i8> %mask, zeroinitializer
488 %res = select <32 x i1> %cmp, <32 x i8> %shuf, <32 x i8> zeroinitializer
492 define <64 x i8> @test_64xi8_perm_mask0(<64 x i8> %vec) {
493 ; CHECK-LABEL: test_64xi8_perm_mask0:
495 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62]
497 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62>
500 define <64 x i8> @test_masked_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) {
501 ; CHECK-LABEL: test_masked_64xi8_perm_mask0:
503 ; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1
504 ; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62]
505 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
507 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62>
508 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
509 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
513 define <64 x i8> @test_masked_z_64xi8_perm_mask0(<64 x i8> %vec, <64 x i8> %mask) {
514 ; CHECK-LABEL: test_masked_z_64xi8_perm_mask0:
516 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1
517 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[8,4,1,13,15,4,6,12,0,10,2,4,13,0,0,6,23,29,27,26,18,31,22,25,22,16,23,18,16,25,26,17,40,37,38,44,39,46,41,39,42,37,33,42,41,44,34,46,60,62,61,58,60,56,60,51,60,55,60,55,60,49,48,62]
519 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 8, i32 4, i32 1, i32 13, i32 15, i32 4, i32 6, i32 12, i32 0, i32 10, i32 2, i32 4, i32 13, i32 0, i32 0, i32 6, i32 23, i32 29, i32 27, i32 26, i32 18, i32 31, i32 22, i32 25, i32 22, i32 16, i32 23, i32 18, i32 16, i32 25, i32 26, i32 17, i32 40, i32 37, i32 38, i32 44, i32 39, i32 46, i32 41, i32 39, i32 42, i32 37, i32 33, i32 42, i32 41, i32 44, i32 34, i32 46, i32 60, i32 62, i32 61, i32 58, i32 60, i32 56, i32 60, i32 51, i32 60, i32 55, i32 60, i32 55, i32 60, i32 49, i32 48, i32 62>
520 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
521 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
524 define <64 x i8> @test_masked_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) {
525 ; CHECK-LABEL: test_masked_64xi8_perm_mask1:
527 ; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1
528 ; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49]
529 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
531 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 7, i32 14, i32 15, i32 10, i32 9, i32 3, i32 1, i32 13, i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 30, i32 30, i32 22, i32 17, i32 28, i32 27, i32 16, i32 23, i32 26, i32 16, i32 30, i32 31, i32 27, i32 17, i32 17, i32 21, i32 32, i32 37, i32 32, i32 47, i32 45, i32 33, i32 46, i32 35, i32 35, i32 42, i32 47, i32 33, i32 32, i32 37, i32 32, i32 41, i32 61, i32 50, i32 49, i32 53, i32 63, i32 50, i32 63, i32 53, i32 55, i32 52, i32 62, i32 63, i32 58, i32 50, i32 63, i32 49>
532 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
533 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
537 define <64 x i8> @test_masked_z_64xi8_perm_mask1(<64 x i8> %vec, <64 x i8> %mask) {
538 ; CHECK-LABEL: test_masked_z_64xi8_perm_mask1:
540 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1
541 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[7,14,15,10,9,3,1,13,14,12,11,6,4,1,6,9,30,30,22,17,28,27,16,23,26,16,30,31,27,17,17,21,32,37,32,47,45,33,46,35,35,42,47,33,32,37,32,41,61,50,49,53,63,50,63,53,55,52,62,63,58,50,63,49]
543 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 7, i32 14, i32 15, i32 10, i32 9, i32 3, i32 1, i32 13, i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 30, i32 30, i32 22, i32 17, i32 28, i32 27, i32 16, i32 23, i32 26, i32 16, i32 30, i32 31, i32 27, i32 17, i32 17, i32 21, i32 32, i32 37, i32 32, i32 47, i32 45, i32 33, i32 46, i32 35, i32 35, i32 42, i32 47, i32 33, i32 32, i32 37, i32 32, i32 41, i32 61, i32 50, i32 49, i32 53, i32 63, i32 50, i32 63, i32 53, i32 55, i32 52, i32 62, i32 63, i32 58, i32 50, i32 63, i32 49>
544 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
545 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
548 define <64 x i8> @test_masked_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) {
549 ; CHECK-LABEL: test_masked_64xi8_perm_mask2:
551 ; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1
552 ; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60]
553 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
555 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12, i32 4, i32 6, i32 0, i32 2, i32 0, i32 1, i32 1, i32 6, i32 24, i32 27, i32 18, i32 22, i32 26, i32 17, i32 23, i32 21, i32 31, i32 16, i32 22, i32 22, i32 27, i32 21, i32 19, i32 20, i32 39, i32 47, i32 44, i32 36, i32 40, i32 43, i32 44, i32 39, i32 38, i32 44, i32 38, i32 35, i32 39, i32 46, i32 34, i32 39, i32 58, i32 55, i32 51, i32 48, i32 59, i32 57, i32 48, i32 52, i32 60, i32 58, i32 56, i32 50, i32 59, i32 55, i32 58, i32 60>
556 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
557 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
561 define <64 x i8> @test_masked_z_64xi8_perm_mask2(<64 x i8> %vec, <64 x i8> %mask) {
562 ; CHECK-LABEL: test_masked_z_64xi8_perm_mask2:
564 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1
565 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[9,2,14,15,12,5,3,12,4,6,0,2,0,1,1,6,24,27,18,22,26,17,23,21,31,16,22,22,27,21,19,20,39,47,44,36,40,43,44,39,38,44,38,35,39,46,34,39,58,55,51,48,59,57,48,52,60,58,56,50,59,55,58,60]
567 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12, i32 4, i32 6, i32 0, i32 2, i32 0, i32 1, i32 1, i32 6, i32 24, i32 27, i32 18, i32 22, i32 26, i32 17, i32 23, i32 21, i32 31, i32 16, i32 22, i32 22, i32 27, i32 21, i32 19, i32 20, i32 39, i32 47, i32 44, i32 36, i32 40, i32 43, i32 44, i32 39, i32 38, i32 44, i32 38, i32 35, i32 39, i32 46, i32 34, i32 39, i32 58, i32 55, i32 51, i32 48, i32 59, i32 57, i32 48, i32 52, i32 60, i32 58, i32 56, i32 50, i32 59, i32 55, i32 58, i32 60>
568 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
569 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
572 define <64 x i8> @test_64xi8_perm_mask3(<64 x i8> %vec) {
573 ; CHECK-LABEL: test_64xi8_perm_mask3:
575 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61]
577 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61>
580 define <64 x i8> @test_masked_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %vec2, <64 x i8> %mask) {
581 ; CHECK-LABEL: test_masked_64xi8_perm_mask3:
583 ; CHECK-NEXT: vptestnmb %zmm2, %zmm2, %k1
584 ; CHECK-NEXT: vpshufb {{.*#+}} zmm1 {%k1} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61]
585 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
587 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61>
588 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
589 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
593 define <64 x i8> @test_masked_z_64xi8_perm_mask3(<64 x i8> %vec, <64 x i8> %mask) {
594 ; CHECK-LABEL: test_masked_z_64xi8_perm_mask3:
596 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1
597 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm0[3,12,4,15,1,14,0,4,8,9,6,1,4,4,12,14,25,16,28,20,21,24,19,30,18,22,20,24,25,26,24,22,42,38,44,44,36,37,42,34,43,38,41,34,42,37,39,38,55,59,53,58,48,52,59,48,57,48,55,62,48,56,49,61]
599 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1, i32 4, i32 4, i32 12, i32 14, i32 25, i32 16, i32 28, i32 20, i32 21, i32 24, i32 19, i32 30, i32 18, i32 22, i32 20, i32 24, i32 25, i32 26, i32 24, i32 22, i32 42, i32 38, i32 44, i32 44, i32 36, i32 37, i32 42, i32 34, i32 43, i32 38, i32 41, i32 34, i32 42, i32 37, i32 39, i32 38, i32 55, i32 59, i32 53, i32 58, i32 48, i32 52, i32 59, i32 48, i32 57, i32 48, i32 55, i32 62, i32 48, i32 56, i32 49, i32 61>
600 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
601 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
604 define <64 x i8> @test_64xi8_perm_mem_mask0(ptr %vp) {
605 ; CHECK-LABEL: test_64xi8_perm_mem_mask0:
607 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0
608 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58]
610 %vec = load <64 x i8>, ptr %vp
611 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58>
614 define <64 x i8> @test_masked_64xi8_perm_mem_mask0(ptr %vp, <64 x i8> %vec2, <64 x i8> %mask) {
615 ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask0:
617 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
618 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1
619 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58]
621 %vec = load <64 x i8>, ptr %vp
622 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58>
623 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
624 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
628 define <64 x i8> @test_masked_z_64xi8_perm_mem_mask0(ptr %vp, <64 x i8> %mask) {
629 ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask0:
631 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
632 ; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1
633 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[0,9,15,13,11,11,3,12,4,1,7,5,2,6,14,6,23,27,24,18,30,23,28,22,28,22,19,19,31,25,16,22,35,33,34,32,42,34,41,41,43,40,36,46,37,39,42,40,63,63,62,62,57,55,59,51,52,48,50,48,58,50,60,58]
635 %vec = load <64 x i8>, ptr %vp
636 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 0, i32 9, i32 15, i32 13, i32 11, i32 11, i32 3, i32 12, i32 4, i32 1, i32 7, i32 5, i32 2, i32 6, i32 14, i32 6, i32 23, i32 27, i32 24, i32 18, i32 30, i32 23, i32 28, i32 22, i32 28, i32 22, i32 19, i32 19, i32 31, i32 25, i32 16, i32 22, i32 35, i32 33, i32 34, i32 32, i32 42, i32 34, i32 41, i32 41, i32 43, i32 40, i32 36, i32 46, i32 37, i32 39, i32 42, i32 40, i32 63, i32 63, i32 62, i32 62, i32 57, i32 55, i32 59, i32 51, i32 52, i32 48, i32 50, i32 48, i32 58, i32 50, i32 60, i32 58>
637 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
638 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
642 define <64 x i8> @test_masked_64xi8_perm_mem_mask1(ptr %vp, <64 x i8> %vec2, <64 x i8> %mask) {
643 ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask1:
645 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
646 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1
647 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49]
649 %vec = load <64 x i8>, ptr %vp
650 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 15, i32 6, i32 14, i32 7, i32 5, i32 1, i32 14, i32 12, i32 5, i32 7, i32 5, i32 0, i32 0, i32 5, i32 3, i32 8, i32 19, i32 19, i32 26, i32 27, i32 20, i32 29, i32 20, i32 21, i32 27, i32 16, i32 30, i32 17, i32 23, i32 27, i32 16, i32 28, i32 47, i32 39, i32 33, i32 33, i32 33, i32 44, i32 38, i32 46, i32 39, i32 33, i32 38, i32 44, i32 45, i32 32, i32 34, i32 39, i32 50, i32 61, i32 62, i32 53, i32 54, i32 56, i32 52, i32 56, i32 51, i32 52, i32 55, i32 57, i32 56, i32 52, i32 51, i32 49>
651 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
652 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
656 define <64 x i8> @test_masked_z_64xi8_perm_mem_mask1(ptr %vp, <64 x i8> %mask) {
657 ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask1:
659 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
660 ; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1
661 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[15,6,14,7,5,1,14,12,5,7,5,0,0,5,3,8,19,19,26,27,20,29,20,21,27,16,30,17,23,27,16,28,47,39,33,33,33,44,38,46,39,33,38,44,45,32,34,39,50,61,62,53,54,56,52,56,51,52,55,57,56,52,51,49]
663 %vec = load <64 x i8>, ptr %vp
664 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 15, i32 6, i32 14, i32 7, i32 5, i32 1, i32 14, i32 12, i32 5, i32 7, i32 5, i32 0, i32 0, i32 5, i32 3, i32 8, i32 19, i32 19, i32 26, i32 27, i32 20, i32 29, i32 20, i32 21, i32 27, i32 16, i32 30, i32 17, i32 23, i32 27, i32 16, i32 28, i32 47, i32 39, i32 33, i32 33, i32 33, i32 44, i32 38, i32 46, i32 39, i32 33, i32 38, i32 44, i32 45, i32 32, i32 34, i32 39, i32 50, i32 61, i32 62, i32 53, i32 54, i32 56, i32 52, i32 56, i32 51, i32 52, i32 55, i32 57, i32 56, i32 52, i32 51, i32 49>
665 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
666 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
670 define <64 x i8> @test_masked_64xi8_perm_mem_mask2(ptr %vp, <64 x i8> %vec2, <64 x i8> %mask) {
671 ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask2:
673 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
674 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1
675 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61]
677 %vec = load <64 x i8>, ptr %vp
678 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 12, i32 1, i32 11, i32 3, i32 4, i32 11, i32 10, i32 11, i32 8, i32 13, i32 1, i32 10, i32 1, i32 11, i32 5, i32 10, i32 27, i32 26, i32 19, i32 29, i32 19, i32 24, i32 26, i32 19, i32 26, i32 20, i32 18, i32 28, i32 24, i32 21, i32 25, i32 16, i32 34, i32 38, i32 47, i32 40, i32 33, i32 44, i32 44, i32 44, i32 41, i32 43, i32 35, i32 43, i32 45, i32 44, i32 37, i32 41, i32 58, i32 62, i32 49, i32 61, i32 56, i32 53, i32 55, i32 48, i32 51, i32 58, i32 58, i32 55, i32 63, i32 55, i32 53, i32 61>
679 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
680 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
684 define <64 x i8> @test_masked_z_64xi8_perm_mem_mask2(ptr %vp, <64 x i8> %mask) {
685 ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask2:
687 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
688 ; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1
689 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[12,1,11,3,4,11,10,11,8,13,1,10,1,11,5,10,27,26,19,29,19,24,26,19,26,20,18,28,24,21,25,16,34,38,47,40,33,44,44,44,41,43,35,43,45,44,37,41,58,62,49,61,56,53,55,48,51,58,58,55,63,55,53,61]
691 %vec = load <64 x i8>, ptr %vp
692 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 12, i32 1, i32 11, i32 3, i32 4, i32 11, i32 10, i32 11, i32 8, i32 13, i32 1, i32 10, i32 1, i32 11, i32 5, i32 10, i32 27, i32 26, i32 19, i32 29, i32 19, i32 24, i32 26, i32 19, i32 26, i32 20, i32 18, i32 28, i32 24, i32 21, i32 25, i32 16, i32 34, i32 38, i32 47, i32 40, i32 33, i32 44, i32 44, i32 44, i32 41, i32 43, i32 35, i32 43, i32 45, i32 44, i32 37, i32 41, i32 58, i32 62, i32 49, i32 61, i32 56, i32 53, i32 55, i32 48, i32 51, i32 58, i32 58, i32 55, i32 63, i32 55, i32 53, i32 61>
693 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
694 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
698 define <64 x i8> @test_64xi8_perm_mem_mask3(ptr %vp) {
699 ; CHECK-LABEL: test_64xi8_perm_mem_mask3:
701 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm0
702 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 = zmm0[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60]
704 %vec = load <64 x i8>, ptr %vp
705 %res = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60>
708 define <64 x i8> @test_masked_64xi8_perm_mem_mask3(ptr %vp, <64 x i8> %vec2, <64 x i8> %mask) {
709 ; CHECK-LABEL: test_masked_64xi8_perm_mem_mask3:
711 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm2
712 ; CHECK-NEXT: vptestnmb %zmm1, %zmm1, %k1
713 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} = zmm2[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60]
715 %vec = load <64 x i8>, ptr %vp
716 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60>
717 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
718 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> %vec2
722 define <64 x i8> @test_masked_z_64xi8_perm_mem_mask3(ptr %vp, <64 x i8> %mask) {
723 ; CHECK-LABEL: test_masked_z_64xi8_perm_mem_mask3:
725 ; CHECK-NEXT: vmovdqa64 (%rdi), %zmm1
726 ; CHECK-NEXT: vptestnmb %zmm0, %zmm0, %k1
727 ; CHECK-NEXT: vpshufb {{.*#+}} zmm0 {%k1} {z} = zmm1[4,9,11,13,12,6,0,0,11,15,5,7,11,10,4,10,20,21,24,27,18,16,26,16,16,19,26,17,16,31,22,30,35,38,37,34,37,47,43,38,38,36,40,43,42,39,32,46,54,54,48,50,61,56,59,50,53,61,61,51,48,60,50,60]
729 %vec = load <64 x i8>, ptr %vp
730 %shuf = shufflevector <64 x i8> %vec, <64 x i8> undef, <64 x i32> <i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7, i32 11, i32 10, i32 4, i32 10, i32 20, i32 21, i32 24, i32 27, i32 18, i32 16, i32 26, i32 16, i32 16, i32 19, i32 26, i32 17, i32 16, i32 31, i32 22, i32 30, i32 35, i32 38, i32 37, i32 34, i32 37, i32 47, i32 43, i32 38, i32 38, i32 36, i32 40, i32 43, i32 42, i32 39, i32 32, i32 46, i32 54, i32 54, i32 48, i32 50, i32 61, i32 56, i32 59, i32 50, i32 53, i32 61, i32 61, i32 51, i32 48, i32 60, i32 50, i32 60>
731 %cmp = icmp eq <64 x i8> %mask, zeroinitializer
732 %res = select <64 x i1> %cmp, <64 x i8> %shuf, <64 x i8> zeroinitializer
736 define <8 x i16> @test_8xi16_perm_high_mask0(<8 x i16> %vec) {
737 ; CHECK-LABEL: test_8xi16_perm_high_mask0:
739 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,7,6]
741 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6>
744 define <8 x i16> @test_masked_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
745 ; CHECK-LABEL: test_masked_8xi16_perm_high_mask0:
747 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1
748 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,7,6]
749 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
751 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6>
752 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
753 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
757 define <8 x i16> @test_masked_z_8xi16_perm_high_mask0(<8 x i16> %vec, <8 x i16> %mask) {
758 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask0:
760 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
761 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,7,6]
763 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 7, i32 6>
764 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
765 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
768 define <8 x i16> @test_masked_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
769 ; CHECK-LABEL: test_masked_8xi16_perm_low_mask1:
771 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1
772 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[0,3,0,0,4,5,6,7]
773 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
775 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
776 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
777 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
781 define <8 x i16> @test_masked_z_8xi16_perm_low_mask1(<8 x i16> %vec, <8 x i16> %mask) {
782 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask1:
784 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
785 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,3,0,0,4,5,6,7]
787 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7>
788 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
789 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
792 define <8 x i16> @test_masked_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
793 ; CHECK-LABEL: test_masked_8xi16_perm_high_mask2:
795 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1
796 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,4,4,5]
797 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
799 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 4, i32 5>
800 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
801 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
805 define <8 x i16> @test_masked_z_8xi16_perm_high_mask2(<8 x i16> %vec, <8 x i16> %mask) {
806 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask2:
808 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
809 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,4,4,5]
811 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 4, i32 4, i32 5>
812 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
813 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
816 define <8 x i16> @test_8xi16_perm_low_mask3(<8 x i16> %vec) {
817 ; CHECK-LABEL: test_8xi16_perm_low_mask3:
819 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = xmm0[2,1,1,1,4,5,6,7]
821 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7>
824 define <8 x i16> @test_masked_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
825 ; CHECK-LABEL: test_masked_8xi16_perm_low_mask3:
827 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1
828 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[2,1,1,1,4,5,6,7]
829 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
831 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7>
832 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
833 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
837 define <8 x i16> @test_masked_z_8xi16_perm_low_mask3(<8 x i16> %vec, <8 x i16> %mask) {
838 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask3:
840 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
841 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[2,1,1,1,4,5,6,7]
843 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 1, i32 4, i32 5, i32 6, i32 7>
844 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
845 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
848 define <8 x i16> @test_masked_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
849 ; CHECK-LABEL: test_masked_8xi16_perm_high_mask4:
851 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1
852 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,5,5,7,6]
853 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
855 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 7, i32 6>
856 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
857 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
861 define <8 x i16> @test_masked_z_8xi16_perm_high_mask4(<8 x i16> %vec, <8 x i16> %mask) {
862 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask4:
864 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
865 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,5,5,7,6]
867 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 5, i32 7, i32 6>
868 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
869 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
872 define <8 x i16> @test_masked_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
873 ; CHECK-LABEL: test_masked_8xi16_perm_low_mask5:
875 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1
876 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[3,3,2,1,4,5,6,7]
877 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
879 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 1, i32 4, i32 5, i32 6, i32 7>
880 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
881 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
885 define <8 x i16> @test_masked_z_8xi16_perm_low_mask5(<8 x i16> %vec, <8 x i16> %mask) {
886 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask5:
888 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
889 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[3,3,2,1,4,5,6,7]
891 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 1, i32 4, i32 5, i32 6, i32 7>
892 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
893 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
896 define <8 x i16> @test_8xi16_perm_high_mask6(<8 x i16> %vec) {
897 ; CHECK-LABEL: test_8xi16_perm_high_mask6:
899 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = xmm0[0,1,2,3,6,5,6,5]
901 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5>
904 define <8 x i16> @test_masked_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
905 ; CHECK-LABEL: test_masked_8xi16_perm_high_mask6:
907 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1
908 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm1 {%k1} = xmm0[0,1,2,3,6,5,6,5]
909 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
911 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5>
912 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
913 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
917 define <8 x i16> @test_masked_z_8xi16_perm_high_mask6(<8 x i16> %vec, <8 x i16> %mask) {
918 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mask6:
920 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
921 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = xmm0[0,1,2,3,6,5,6,5]
923 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 5>
924 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
925 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
928 define <8 x i16> @test_masked_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %vec2, <8 x i16> %mask) {
929 ; CHECK-LABEL: test_masked_8xi16_perm_low_mask7:
931 ; CHECK-NEXT: vptestnmw %xmm2, %xmm2, %k1
932 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0,4,5,6,7]
933 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
935 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
936 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
937 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
941 define <8 x i16> @test_masked_z_8xi16_perm_low_mask7(<8 x i16> %vec, <8 x i16> %mask) {
942 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mask7:
944 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
945 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0,4,5,6,7]
947 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
948 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
949 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
952 define <8 x i16> @test_8xi16_perm_high_mem_mask0(ptr %vp) {
953 ; CHECK-LABEL: test_8xi16_perm_high_mem_mask0:
955 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,7,4,6]
957 %vec = load <8 x i16>, ptr %vp
958 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6>
961 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask0(ptr %vp, <8 x i16> %vec2, <8 x i16> %mask) {
962 ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask0:
964 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
965 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,7,4,6]
967 %vec = load <8 x i16>, ptr %vp
968 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6>
969 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
970 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
974 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask0(ptr %vp, <8 x i16> %mask) {
975 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask0:
977 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
978 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,7,4,6]
980 %vec = load <8 x i16>, ptr %vp
981 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 4, i32 6>
982 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
983 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
987 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask1(ptr %vp, <8 x i16> %vec2, <8 x i16> %mask) {
988 ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask1:
990 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
991 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[1,3,3,2,4,5,6,7]
993 %vec = load <8 x i16>, ptr %vp
994 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
995 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
996 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
1000 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask1(ptr %vp, <8 x i16> %mask) {
1001 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask1:
1003 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
1004 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7]
1006 %vec = load <8 x i16>, ptr %vp
1007 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1008 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1009 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
1013 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask2(ptr %vp, <8 x i16> %vec2, <8 x i16> %mask) {
1014 ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask2:
1016 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
1017 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,6,6,5,7]
1019 %vec = load <8 x i16>, ptr %vp
1020 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 6, i32 5, i32 7>
1021 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1022 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
1026 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask2(ptr %vp, <8 x i16> %mask) {
1027 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask2:
1029 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
1030 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,6,6,5,7]
1032 %vec = load <8 x i16>, ptr %vp
1033 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 6, i32 5, i32 7>
1034 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1035 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
1039 define <8 x i16> @test_8xi16_perm_low_mem_mask3(ptr %vp) {
1040 ; CHECK-LABEL: test_8xi16_perm_low_mem_mask3:
1042 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 = mem[3,1,2,0,4,5,6,7]
1044 %vec = load <8 x i16>, ptr %vp
1045 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
1048 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask3(ptr %vp, <8 x i16> %vec2, <8 x i16> %mask) {
1049 ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask3:
1051 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
1052 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[3,1,2,0,4,5,6,7]
1054 %vec = load <8 x i16>, ptr %vp
1055 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
1056 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1057 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
1061 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask3(ptr %vp, <8 x i16> %mask) {
1062 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask3:
1064 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
1065 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[3,1,2,0,4,5,6,7]
1067 %vec = load <8 x i16>, ptr %vp
1068 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 3, i32 1, i32 2, i32 0, i32 4, i32 5, i32 6, i32 7>
1069 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1070 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
1074 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask4(ptr %vp, <8 x i16> %vec2, <8 x i16> %mask) {
1075 ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask4:
1077 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
1078 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,6,7,5]
1080 %vec = load <8 x i16>, ptr %vp
1081 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 7, i32 5>
1082 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1083 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
1087 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask4(ptr %vp, <8 x i16> %mask) {
1088 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask4:
1090 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
1091 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,6,7,5]
1093 %vec = load <8 x i16>, ptr %vp
1094 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 6, i32 7, i32 5>
1095 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1096 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
1100 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask5(ptr %vp, <8 x i16> %vec2, <8 x i16> %mask) {
1101 ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask5:
1103 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
1104 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[2,1,3,2,4,5,6,7]
1106 %vec = load <8 x i16>, ptr %vp
1107 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1108 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1109 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
1113 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask5(ptr %vp, <8 x i16> %mask) {
1114 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask5:
1116 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
1117 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[2,1,3,2,4,5,6,7]
1119 %vec = load <8 x i16>, ptr %vp
1120 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 2, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7>
1121 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1122 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
1126 define <8 x i16> @test_8xi16_perm_high_mem_mask6(ptr %vp) {
1127 ; CHECK-LABEL: test_8xi16_perm_high_mem_mask6:
1129 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 = mem[0,1,2,3,7,4,4,4]
1131 %vec = load <8 x i16>, ptr %vp
1132 %res = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4>
1135 define <8 x i16> @test_masked_8xi16_perm_high_mem_mask6(ptr %vp, <8 x i16> %vec2, <8 x i16> %mask) {
1136 ; CHECK-LABEL: test_masked_8xi16_perm_high_mem_mask6:
1138 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
1139 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} = mem[0,1,2,3,7,4,4,4]
1141 %vec = load <8 x i16>, ptr %vp
1142 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4>
1143 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1144 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
1148 define <8 x i16> @test_masked_z_8xi16_perm_high_mem_mask6(ptr %vp, <8 x i16> %mask) {
1149 ; CHECK-LABEL: test_masked_z_8xi16_perm_high_mem_mask6:
1151 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
1152 ; CHECK-NEXT: vpshufhw {{.*#+}} xmm0 {%k1} {z} = mem[0,1,2,3,7,4,4,4]
1154 %vec = load <8 x i16>, ptr %vp
1155 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 4, i32 4>
1156 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1157 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
1161 define <8 x i16> @test_masked_8xi16_perm_low_mem_mask7(ptr %vp, <8 x i16> %vec2, <8 x i16> %mask) {
1162 ; CHECK-LABEL: test_masked_8xi16_perm_low_mem_mask7:
1164 ; CHECK-NEXT: vptestnmw %xmm1, %xmm1, %k1
1165 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} = mem[0,3,3,1,4,5,6,7]
1167 %vec = load <8 x i16>, ptr %vp
1168 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 1, i32 4, i32 5, i32 6, i32 7>
1169 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1170 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> %vec2
1174 define <8 x i16> @test_masked_z_8xi16_perm_low_mem_mask7(ptr %vp, <8 x i16> %mask) {
1175 ; CHECK-LABEL: test_masked_z_8xi16_perm_low_mem_mask7:
1177 ; CHECK-NEXT: vptestnmw %xmm0, %xmm0, %k1
1178 ; CHECK-NEXT: vpshuflw {{.*#+}} xmm0 {%k1} {z} = mem[0,3,3,1,4,5,6,7]
1180 %vec = load <8 x i16>, ptr %vp
1181 %shuf = shufflevector <8 x i16> %vec, <8 x i16> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 1, i32 4, i32 5, i32 6, i32 7>
1182 %cmp = icmp eq <8 x i16> %mask, zeroinitializer
1183 %res = select <8 x i1> %cmp, <8 x i16> %shuf, <8 x i16> zeroinitializer
1187 define <16 x i16> @test_16xi16_perm_high_mask0(<16 x i16> %vec) {
1188 ; CHECK-LABEL: test_16xi16_perm_high_mask0:
1190 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12]
1192 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12>
1195 define <16 x i16> @test_masked_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
1196 ; CHECK-LABEL: test_masked_16xi16_perm_high_mask0:
1198 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1
1199 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12]
1200 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1202 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12>
1203 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1204 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1208 define <16 x i16> @test_masked_z_16xi16_perm_high_mask0(<16 x i16> %vec, <16 x i16> %mask) {
1209 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask0:
1211 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1212 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,4,4,6,4,8,9,10,11,12,12,14,12]
1214 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 14, i32 12>
1215 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1216 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1219 define <16 x i16> @test_masked_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
1220 ; CHECK-LABEL: test_masked_16xi16_perm_low_mask1:
1222 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1
1223 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15]
1224 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1226 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
1227 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1228 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1232 define <16 x i16> @test_masked_z_16xi16_perm_low_mask1(<16 x i16> %vec, <16 x i16> %mask) {
1233 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask1:
1235 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1236 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,2,3,2,4,5,6,7,8,10,11,10,12,13,14,15]
1238 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 8, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
1239 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1240 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1243 define <16 x i16> @test_masked_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
1244 ; CHECK-LABEL: test_masked_16xi16_perm_high_mask2:
1246 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1
1247 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13]
1248 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1250 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 5, i32 5, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 13, i32 13, i32 13>
1251 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1252 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1256 define <16 x i16> @test_masked_z_16xi16_perm_high_mask2(<16 x i16> %vec, <16 x i16> %mask) {
1257 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask2:
1259 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1260 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,7,5,5,5,8,9,10,11,15,13,13,13]
1262 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 5, i32 5, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 13, i32 13, i32 13>
1263 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1264 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1267 define <16 x i16> @test_16xi16_perm_low_mask3(<16 x i16> %vec) {
1268 ; CHECK-LABEL: test_16xi16_perm_low_mask3:
1270 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15]
1272 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
1275 define <16 x i16> @test_masked_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
1276 ; CHECK-LABEL: test_masked_16xi16_perm_low_mask3:
1278 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1
1279 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15]
1280 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1282 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
1283 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1284 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1288 define <16 x i16> @test_masked_z_16xi16_perm_low_mask3(<16 x i16> %vec, <16 x i16> %mask) {
1289 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask3:
1291 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1292 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,3,2,4,5,6,7,11,10,11,10,12,13,14,15]
1294 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
1295 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1296 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1299 define <16 x i16> @test_masked_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
1300 ; CHECK-LABEL: test_masked_16xi16_perm_high_mask4:
1302 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1
1303 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15]
1304 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1306 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 15>
1307 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1308 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1312 define <16 x i16> @test_masked_z_16xi16_perm_high_mask4(<16 x i16> %vec, <16 x i16> %mask) {
1313 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask4:
1315 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1316 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,4,7,8,9,10,11,14,15,12,15]
1318 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 12, i32 15>
1319 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1320 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1323 define <16 x i16> @test_masked_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
1324 ; CHECK-LABEL: test_masked_16xi16_perm_low_mask5:
1326 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1
1327 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15]
1328 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1330 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
1331 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1332 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1336 define <16 x i16> @test_masked_z_16xi16_perm_low_mask5(<16 x i16> %vec, <16 x i16> %mask) {
1337 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask5:
1339 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1340 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,0,4,5,6,7,11,11,11,8,12,13,14,15]
1342 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
1343 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1344 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1347 define <16 x i16> @test_16xi16_perm_high_mask6(<16 x i16> %vec) {
1348 ; CHECK-LABEL: test_16xi16_perm_high_mask6:
1350 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13]
1352 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13>
1355 define <16 x i16> @test_masked_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
1356 ; CHECK-LABEL: test_masked_16xi16_perm_high_mask6:
1358 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1
1359 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm1 {%k1} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13]
1360 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1362 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13>
1363 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1364 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1368 define <16 x i16> @test_masked_z_16xi16_perm_high_mask6(<16 x i16> %vec, <16 x i16> %mask) {
1369 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mask6:
1371 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1372 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = ymm0[0,1,2,3,6,7,6,5,8,9,10,11,14,15,14,13]
1374 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 7, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 14, i32 15, i32 14, i32 13>
1375 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1376 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1379 define <16 x i16> @test_masked_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
1380 ; CHECK-LABEL: test_masked_16xi16_perm_low_mask7:
1382 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1
1383 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15]
1384 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1386 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 10, i32 12, i32 13, i32 14, i32 15>
1387 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1388 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1392 define <16 x i16> @test_masked_z_16xi16_perm_low_mask7(<16 x i16> %vec, <16 x i16> %mask) {
1393 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mask7:
1395 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1396 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,2,4,5,6,7,11,10,9,10,12,13,14,15]
1398 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 1, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 9, i32 10, i32 12, i32 13, i32 14, i32 15>
1399 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1400 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1403 define <16 x i16> @test_16xi16_perm_high_mem_mask0(ptr %vp) {
1404 ; CHECK-LABEL: test_16xi16_perm_high_mem_mask0:
1406 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15]
1408 %vec = load <16 x i16>, ptr %vp
1409 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15>
1412 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask0(ptr %vp, <16 x i16> %vec2, <16 x i16> %mask) {
1413 ; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask0:
1415 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1416 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15]
1418 %vec = load <16 x i16>, ptr %vp
1419 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15>
1420 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1421 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1425 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask0(ptr %vp, <16 x i16> %mask) {
1426 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask0:
1428 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1
1429 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,4,7,8,9,10,11,13,14,12,15]
1431 %vec = load <16 x i16>, ptr %vp
1432 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 12, i32 15>
1433 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1434 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1438 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask1(ptr %vp, <16 x i16> %vec2, <16 x i16> %mask) {
1439 ; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask1:
1441 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1442 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15]
1444 %vec = load <16 x i16>, ptr %vp
1445 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
1446 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1447 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1451 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask1(ptr %vp, <16 x i16> %mask) {
1452 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask1:
1454 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1
1455 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,0,4,5,6,7,9,11,11,8,12,13,14,15]
1457 %vec = load <16 x i16>, ptr %vp
1458 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
1459 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1460 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1464 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask2(ptr %vp, <16 x i16> %vec2, <16 x i16> %mask) {
1465 ; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask2:
1467 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1468 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14]
1470 %vec = load <16 x i16>, ptr %vp
1471 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14>
1472 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1473 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1477 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask2(ptr %vp, <16 x i16> %mask) {
1478 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask2:
1480 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1
1481 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,5,6,5,6,8,9,10,11,13,14,13,14]
1483 %vec = load <16 x i16>, ptr %vp
1484 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 13, i32 14, i32 13, i32 14>
1485 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1486 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1490 define <16 x i16> @test_16xi16_perm_low_mem_mask3(ptr %vp) {
1491 ; CHECK-LABEL: test_16xi16_perm_low_mem_mask3:
1493 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15]
1495 %vec = load <16 x i16>, ptr %vp
1496 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
1499 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask3(ptr %vp, <16 x i16> %vec2, <16 x i16> %mask) {
1500 ; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask3:
1502 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1503 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15]
1505 %vec = load <16 x i16>, ptr %vp
1506 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
1507 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1508 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1512 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask3(ptr %vp, <16 x i16> %mask) {
1513 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask3:
1515 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1
1516 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,0,4,5,6,7,11,10,11,8,12,13,14,15]
1518 %vec = load <16 x i16>, ptr %vp
1519 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 2, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 10, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15>
1520 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1521 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1525 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask4(ptr %vp, <16 x i16> %vec2, <16 x i16> %mask) {
1526 ; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask4:
1528 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1529 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15]
1531 %vec = load <16 x i16>, ptr %vp
1532 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15>
1533 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1534 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1538 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask4(ptr %vp, <16 x i16> %mask) {
1539 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask4:
1541 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1
1542 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,7,7,6,7,8,9,10,11,15,15,14,15]
1544 %vec = load <16 x i16>, ptr %vp
1545 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 14, i32 15>
1546 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1547 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1551 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask5(ptr %vp, <16 x i16> %vec2, <16 x i16> %mask) {
1552 ; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask5:
1554 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1555 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15]
1557 %vec = load <16 x i16>, ptr %vp
1558 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
1559 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1560 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1564 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask5(ptr %vp, <16 x i16> %mask) {
1565 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask5:
1567 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1
1568 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[1,3,3,2,4,5,6,7,9,11,11,10,12,13,14,15]
1570 %vec = load <16 x i16>, ptr %vp
1571 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 9, i32 11, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
1572 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1573 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1577 define <16 x i16> @test_16xi16_perm_high_mem_mask6(ptr %vp) {
1578 ; CHECK-LABEL: test_16xi16_perm_high_mem_mask6:
1580 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13]
1582 %vec = load <16 x i16>, ptr %vp
1583 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13>
1586 define <16 x i16> @test_masked_16xi16_perm_high_mem_mask6(ptr %vp, <16 x i16> %vec2, <16 x i16> %mask) {
1587 ; CHECK-LABEL: test_masked_16xi16_perm_high_mem_mask6:
1589 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1590 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13]
1592 %vec = load <16 x i16>, ptr %vp
1593 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13>
1594 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1595 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1599 define <16 x i16> @test_masked_z_16xi16_perm_high_mem_mask6(ptr %vp, <16 x i16> %mask) {
1600 ; CHECK-LABEL: test_masked_z_16xi16_perm_high_mem_mask6:
1602 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1
1603 ; CHECK-NEXT: vpshufhw {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,3,4,4,4,5,8,9,10,11,12,12,12,13]
1605 %vec = load <16 x i16>, ptr %vp
1606 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 4, i32 5, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 12, i32 13>
1607 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1608 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1612 define <16 x i16> @test_masked_16xi16_perm_low_mem_mask7(ptr %vp, <16 x i16> %vec2, <16 x i16> %mask) {
1613 ; CHECK-LABEL: test_masked_16xi16_perm_low_mem_mask7:
1615 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
1616 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15]
1618 %vec = load <16 x i16>, ptr %vp
1619 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
1620 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1621 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
1625 define <16 x i16> @test_masked_z_16xi16_perm_low_mem_mask7(ptr %vp, <16 x i16> %mask) {
1626 ; CHECK-LABEL: test_masked_z_16xi16_perm_low_mem_mask7:
1628 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1
1629 ; CHECK-NEXT: vpshuflw {{.*#+}} ymm0 {%k1} {z} = mem[3,1,3,2,4,5,6,7,11,9,11,10,12,13,14,15]
1631 %vec = load <16 x i16>, ptr %vp
1632 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 2, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 10, i32 12, i32 13, i32 14, i32 15>
1633 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
1634 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
1638 define <32 x i16> @test_32xi16_perm_high_mask0(<32 x i16> %vec) {
1639 ; CHECK-LABEL: test_32xi16_perm_high_mask0:
1641 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28]
1643 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28>
1646 define <32 x i16> @test_masked_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
1647 ; CHECK-LABEL: test_masked_32xi16_perm_high_mask0:
1649 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1
1650 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28]
1651 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1653 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28>
1654 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1655 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1659 define <32 x i16> @test_masked_z_32xi16_perm_high_mask0(<32 x i16> %vec, <32 x i16> %mask) {
1660 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask0:
1662 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1663 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,5,6,4,8,9,10,11,12,13,14,12,16,17,18,19,20,21,22,20,24,25,26,27,28,29,30,28]
1665 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 28>
1666 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1667 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1670 define <32 x i16> @test_masked_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
1671 ; CHECK-LABEL: test_masked_32xi16_perm_low_mask1:
1673 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1
1674 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31]
1675 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1677 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
1678 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1679 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1683 define <32 x i16> @test_masked_z_32xi16_perm_low_mask1(<32 x i16> %vec, <32 x i16> %mask) {
1684 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask1:
1686 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1687 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,0,0,4,5,6,7,10,9,8,8,12,13,14,15,18,17,16,16,20,21,22,23,26,25,24,24,28,29,30,31]
1689 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 0, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 8, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 16, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 24, i32 24, i32 28, i32 29, i32 30, i32 31>
1690 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1691 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1694 define <32 x i16> @test_masked_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
1695 ; CHECK-LABEL: test_masked_32xi16_perm_high_mask2:
1697 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1
1698 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31]
1699 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1701 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 14, i32 12, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 22, i32 20, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 30, i32 28, i32 31>
1702 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1703 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1707 define <32 x i16> @test_masked_z_32xi16_perm_high_mask2(<32 x i16> %vec, <32 x i16> %mask) {
1708 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask2:
1710 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1711 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,6,4,7,8,9,10,11,12,14,12,15,16,17,18,19,20,22,20,23,24,25,26,27,28,30,28,31]
1713 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 6, i32 4, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 14, i32 12, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 22, i32 20, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 30, i32 28, i32 31>
1714 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1715 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1718 define <32 x i16> @test_32xi16_perm_low_mask3(<32 x i16> %vec) {
1719 ; CHECK-LABEL: test_32xi16_perm_low_mask3:
1721 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31]
1723 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31>
1726 define <32 x i16> @test_masked_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
1727 ; CHECK-LABEL: test_masked_32xi16_perm_low_mask3:
1729 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1
1730 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31]
1731 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1733 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31>
1734 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1735 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1739 define <32 x i16> @test_masked_z_32xi16_perm_low_mask3(<32 x i16> %vec, <32 x i16> %mask) {
1740 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask3:
1742 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1743 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,1,3,4,5,6,7,11,11,9,11,12,13,14,15,19,19,17,19,20,21,22,23,27,27,25,27,28,29,30,31]
1745 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 3, i32 1, i32 3, i32 4, i32 5, i32 6, i32 7, i32 11, i32 11, i32 9, i32 11, i32 12, i32 13, i32 14, i32 15, i32 19, i32 19, i32 17, i32 19, i32 20, i32 21, i32 22, i32 23, i32 27, i32 27, i32 25, i32 27, i32 28, i32 29, i32 30, i32 31>
1746 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1747 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1750 define <32 x i16> @test_masked_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
1751 ; CHECK-LABEL: test_masked_32xi16_perm_high_mask4:
1753 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1
1754 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30]
1755 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1757 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 23, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 31, i32 29, i32 30>
1758 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1759 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1763 define <32 x i16> @test_masked_z_32xi16_perm_high_mask4(<32 x i16> %vec, <32 x i16> %mask) {
1764 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask4:
1766 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1767 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,7,7,5,6,8,9,10,11,15,15,13,14,16,17,18,19,23,23,21,22,24,25,26,27,31,31,29,30]
1769 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 7, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 15, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 23, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 31, i32 29, i32 30>
1770 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1771 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1774 define <32 x i16> @test_masked_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
1775 ; CHECK-LABEL: test_masked_32xi16_perm_low_mask5:
1777 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1
1778 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31]
1779 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1781 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 17, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 25, i32 24, i32 28, i32 29, i32 30, i32 31>
1782 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1783 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1787 define <32 x i16> @test_masked_z_32xi16_perm_low_mask5(<32 x i16> %vec, <32 x i16> %mask) {
1788 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask5:
1790 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1791 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[2,1,1,0,4,5,6,7,10,9,9,8,12,13,14,15,18,17,17,16,20,21,22,23,26,25,25,24,28,29,30,31]
1793 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 1, i32 1, i32 0, i32 4, i32 5, i32 6, i32 7, i32 10, i32 9, i32 9, i32 8, i32 12, i32 13, i32 14, i32 15, i32 18, i32 17, i32 17, i32 16, i32 20, i32 21, i32 22, i32 23, i32 26, i32 25, i32 25, i32 24, i32 28, i32 29, i32 30, i32 31>
1794 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1795 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1798 define <32 x i16> @test_32xi16_perm_high_mask6(<32 x i16> %vec) {
1799 ; CHECK-LABEL: test_32xi16_perm_high_mask6:
1801 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30]
1803 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30>
1806 define <32 x i16> @test_masked_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
1807 ; CHECK-LABEL: test_masked_32xi16_perm_high_mask6:
1809 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1
1810 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm1 {%k1} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30]
1811 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1813 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30>
1814 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1815 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1819 define <32 x i16> @test_masked_z_32xi16_perm_high_mask6(<32 x i16> %vec, <32 x i16> %mask) {
1820 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mask6:
1822 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1823 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = zmm0[0,1,2,3,4,4,5,6,8,9,10,11,12,12,13,14,16,17,18,19,20,20,21,22,24,25,26,27,28,28,29,30]
1825 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 12, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 20, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 28, i32 28, i32 29, i32 30>
1826 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1827 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1830 define <32 x i16> @test_masked_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
1831 ; CHECK-LABEL: test_masked_32xi16_perm_low_mask7:
1833 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1
1834 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm1 {%k1} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31]
1835 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1837 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 16, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 24, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31>
1838 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1839 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1843 define <32 x i16> @test_masked_z_32xi16_perm_low_mask7(<32 x i16> %vec, <32 x i16> %mask) {
1844 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mask7:
1846 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1847 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,3,0,4,5,6,7,11,8,11,8,12,13,14,15,19,16,19,16,20,21,22,23,27,24,27,24,28,29,30,31]
1849 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 0, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 8, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 16, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 24, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31>
1850 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1851 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1854 define <32 x i16> @test_32xi16_perm_high_mem_mask0(ptr %vp) {
1855 ; CHECK-LABEL: test_32xi16_perm_high_mem_mask0:
1857 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30]
1859 %vec = load <32 x i16>, ptr %vp
1860 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30>
1863 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask0(ptr %vp, <32 x i16> %vec2, <32 x i16> %mask) {
1864 ; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask0:
1866 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1867 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30]
1869 %vec = load <32 x i16>, ptr %vp
1870 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30>
1871 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1872 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1876 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask0(ptr %vp, <32 x i16> %mask) {
1877 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask0:
1879 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1
1880 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,5,6,8,9,10,11,15,12,13,14,16,17,18,19,23,20,21,22,24,25,26,27,31,28,29,30]
1882 %vec = load <32 x i16>, ptr %vp
1883 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 5, i32 6, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 13, i32 14, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 21, i32 22, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 29, i32 30>
1884 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1885 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1889 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask1(ptr %vp, <32 x i16> %vec2, <32 x i16> %mask) {
1890 ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask1:
1892 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1893 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31]
1895 %vec = load <32 x i16>, ptr %vp
1896 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 11, i32 11, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 19, i32 19, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 27, i32 27, i32 28, i32 29, i32 30, i32 31>
1897 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1898 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1902 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask1(ptr %vp, <32 x i16> %mask) {
1903 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask1:
1905 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1
1906 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[1,1,3,3,4,5,6,7,9,9,11,11,12,13,14,15,17,17,19,19,20,21,22,23,25,25,27,27,28,29,30,31]
1908 %vec = load <32 x i16>, ptr %vp
1909 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 1, i32 3, i32 3, i32 4, i32 5, i32 6, i32 7, i32 9, i32 9, i32 11, i32 11, i32 12, i32 13, i32 14, i32 15, i32 17, i32 17, i32 19, i32 19, i32 20, i32 21, i32 22, i32 23, i32 25, i32 25, i32 27, i32 27, i32 28, i32 29, i32 30, i32 31>
1910 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1911 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1915 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask2(ptr %vp, <32 x i16> %vec2, <32 x i16> %mask) {
1916 ; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask2:
1918 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1919 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28]
1921 %vec = load <32 x i16>, ptr %vp
1922 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 15, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 31, i32 30, i32 28>
1923 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1924 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1928 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask2(ptr %vp, <32 x i16> %mask) {
1929 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask2:
1931 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1
1932 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,4,7,6,4,8,9,10,11,12,15,14,12,16,17,18,19,20,23,22,20,24,25,26,27,28,31,30,28]
1934 %vec = load <32 x i16>, ptr %vp
1935 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 7, i32 6, i32 4, i32 8, i32 9, i32 10, i32 11, i32 12, i32 15, i32 14, i32 12, i32 16, i32 17, i32 18, i32 19, i32 20, i32 23, i32 22, i32 20, i32 24, i32 25, i32 26, i32 27, i32 28, i32 31, i32 30, i32 28>
1936 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1937 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1941 define <32 x i16> @test_32xi16_perm_low_mem_mask3(ptr %vp) {
1942 ; CHECK-LABEL: test_32xi16_perm_low_mem_mask3:
1944 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31]
1946 %vec = load <32 x i16>, ptr %vp
1947 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31>
1950 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask3(ptr %vp, <32 x i16> %vec2, <32 x i16> %mask) {
1951 ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask3:
1953 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1954 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31]
1956 %vec = load <32 x i16>, ptr %vp
1957 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31>
1958 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1959 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1963 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask3(ptr %vp, <32 x i16> %mask) {
1964 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask3:
1966 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1
1967 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[2,2,0,3,4,5,6,7,10,10,8,11,12,13,14,15,18,18,16,19,20,21,22,23,26,26,24,27,28,29,30,31]
1969 %vec = load <32 x i16>, ptr %vp
1970 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 0, i32 3, i32 4, i32 5, i32 6, i32 7, i32 10, i32 10, i32 8, i32 11, i32 12, i32 13, i32 14, i32 15, i32 18, i32 18, i32 16, i32 19, i32 20, i32 21, i32 22, i32 23, i32 26, i32 26, i32 24, i32 27, i32 28, i32 29, i32 30, i32 31>
1971 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1972 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
1976 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask4(ptr %vp, <32 x i16> %vec2, <32 x i16> %mask) {
1977 ; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask4:
1979 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
1980 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29]
1982 %vec = load <32 x i16>, ptr %vp
1983 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 14, i32 13, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 22, i32 21, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 30, i32 29>
1984 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1985 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
1989 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask4(ptr %vp, <32 x i16> %mask) {
1990 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask4:
1992 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1
1993 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,7,4,6,5,8,9,10,11,15,12,14,13,16,17,18,19,23,20,22,21,24,25,26,27,31,28,30,29]
1995 %vec = load <32 x i16>, ptr %vp
1996 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 7, i32 4, i32 6, i32 5, i32 8, i32 9, i32 10, i32 11, i32 15, i32 12, i32 14, i32 13, i32 16, i32 17, i32 18, i32 19, i32 23, i32 20, i32 22, i32 21, i32 24, i32 25, i32 26, i32 27, i32 31, i32 28, i32 30, i32 29>
1997 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
1998 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
2002 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask5(ptr %vp, <32 x i16> %vec2, <32 x i16> %mask) {
2003 ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask5:
2005 ; CHECK-NEXT: vpshufd {{.*#+}} zmm2 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15]
2006 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
2007 ; CHECK-NEXT: vmovdqu16 %zmm2, %zmm0 {%k1}
2009 %vec = load <32 x i16>, ptr %vp
2010 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 16, i32 17, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 24, i32 25, i32 28, i32 29, i32 30, i32 31>
2011 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
2012 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
2016 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask5(ptr %vp, <32 x i16> %mask) {
2017 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask5:
2019 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 = mem[0,0,2,3,4,4,6,7,8,8,10,11,12,12,14,15]
2020 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1
2021 ; CHECK-NEXT: vmovdqu16 %zmm1, %zmm0 {%k1} {z}
2023 %vec = load <32 x i16>, ptr %vp
2024 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 0, i32 1, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 8, i32 9, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 16, i32 17, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 24, i32 25, i32 28, i32 29, i32 30, i32 31>
2025 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
2026 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
2030 define <32 x i16> @test_32xi16_perm_high_mem_mask6(ptr %vp) {
2031 ; CHECK-LABEL: test_32xi16_perm_high_mem_mask6:
2033 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30]
2035 %vec = load <32 x i16>, ptr %vp
2036 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30>
2039 define <32 x i16> @test_masked_32xi16_perm_high_mem_mask6(ptr %vp, <32 x i16> %vec2, <32 x i16> %mask) {
2040 ; CHECK-LABEL: test_masked_32xi16_perm_high_mem_mask6:
2042 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
2043 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30]
2045 %vec = load <32 x i16>, ptr %vp
2046 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30>
2047 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
2048 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
2052 define <32 x i16> @test_masked_z_32xi16_perm_high_mem_mask6(ptr %vp, <32 x i16> %mask) {
2053 ; CHECK-LABEL: test_masked_z_32xi16_perm_high_mem_mask6:
2055 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1
2056 ; CHECK-NEXT: vpshufhw {{.*#+}} zmm0 {%k1} {z} = mem[0,1,2,3,6,5,6,6,8,9,10,11,14,13,14,14,16,17,18,19,22,21,22,22,24,25,26,27,30,29,30,30]
2058 %vec = load <32 x i16>, ptr %vp
2059 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 6, i32 5, i32 6, i32 6, i32 8, i32 9, i32 10, i32 11, i32 14, i32 13, i32 14, i32 14, i32 16, i32 17, i32 18, i32 19, i32 22, i32 21, i32 22, i32 22, i32 24, i32 25, i32 26, i32 27, i32 30, i32 29, i32 30, i32 30>
2060 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
2061 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
2065 define <32 x i16> @test_masked_32xi16_perm_low_mem_mask7(ptr %vp, <32 x i16> %vec2, <32 x i16> %mask) {
2066 ; CHECK-LABEL: test_masked_32xi16_perm_low_mem_mask7:
2068 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
2069 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31]
2071 %vec = load <32 x i16>, ptr %vp
2072 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 1, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 17, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 25, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31>
2073 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
2074 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
2078 define <32 x i16> @test_masked_z_32xi16_perm_low_mem_mask7(ptr %vp, <32 x i16> %mask) {
2079 ; CHECK-LABEL: test_masked_z_32xi16_perm_low_mem_mask7:
2081 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1
2082 ; CHECK-NEXT: vpshuflw {{.*#+}} zmm0 {%k1} {z} = mem[3,1,3,0,4,5,6,7,11,9,11,8,12,13,14,15,19,17,19,16,20,21,22,23,27,25,27,24,28,29,30,31]
2084 %vec = load <32 x i16>, ptr %vp
2085 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 3, i32 1, i32 3, i32 0, i32 4, i32 5, i32 6, i32 7, i32 11, i32 9, i32 11, i32 8, i32 12, i32 13, i32 14, i32 15, i32 19, i32 17, i32 19, i32 16, i32 20, i32 21, i32 22, i32 23, i32 27, i32 25, i32 27, i32 24, i32 28, i32 29, i32 30, i32 31>
2086 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
2087 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
2091 define <4 x i32> @test_4xi32_perm_mask0(<4 x i32> %vec) {
2092 ; CHECK-LABEL: test_4xi32_perm_mask0:
2094 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[2,3,3,0]
2096 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0>
2099 define <4 x i32> @test_masked_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
2100 ; CHECK-LABEL: test_masked_4xi32_perm_mask0:
2102 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
2103 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[2,3,3,0]
2104 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
2106 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0>
2107 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2108 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
2112 define <4 x i32> @test_masked_z_4xi32_perm_mask0(<4 x i32> %vec, <4 x i32> %mask) {
2113 ; CHECK-LABEL: test_masked_z_4xi32_perm_mask0:
2115 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
2116 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[2,3,3,0]
2118 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 3, i32 3, i32 0>
2119 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2120 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
2123 define <4 x i32> @test_masked_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
2124 ; CHECK-LABEL: test_masked_4xi32_perm_mask1:
2126 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
2127 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,0,2,0]
2128 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
2130 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0>
2131 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2132 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
2136 define <4 x i32> @test_masked_z_4xi32_perm_mask1(<4 x i32> %vec, <4 x i32> %mask) {
2137 ; CHECK-LABEL: test_masked_z_4xi32_perm_mask1:
2139 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
2140 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,0,2,0]
2142 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 2, i32 0>
2143 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2144 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
2147 define <4 x i32> @test_masked_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
2148 ; CHECK-LABEL: test_masked_4xi32_perm_mask2:
2150 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
2151 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[3,0,1,0]
2152 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
2154 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 0>
2155 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2156 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
2160 define <4 x i32> @test_masked_z_4xi32_perm_mask2(<4 x i32> %vec, <4 x i32> %mask) {
2161 ; CHECK-LABEL: test_masked_z_4xi32_perm_mask2:
2163 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
2164 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[3,0,1,0]
2166 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 3, i32 0, i32 1, i32 0>
2167 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2168 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
2171 define <4 x i32> @test_4xi32_perm_mask3(<4 x i32> %vec) {
2172 ; CHECK-LABEL: test_4xi32_perm_mask3:
2174 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[1,1,0,3]
2176 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
2179 define <4 x i32> @test_masked_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %vec2, <4 x i32> %mask) {
2180 ; CHECK-LABEL: test_masked_4xi32_perm_mask3:
2182 ; CHECK-NEXT: vptestnmd %xmm2, %xmm2, %k1
2183 ; CHECK-NEXT: vpshufd {{.*#+}} xmm1 {%k1} = xmm0[1,1,0,3]
2184 ; CHECK-NEXT: vmovdqa %xmm1, %xmm0
2186 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
2187 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2188 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
2192 define <4 x i32> @test_masked_z_4xi32_perm_mask3(<4 x i32> %vec, <4 x i32> %mask) {
2193 ; CHECK-LABEL: test_masked_z_4xi32_perm_mask3:
2195 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
2196 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = xmm0[1,1,0,3]
2198 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 1, i32 0, i32 3>
2199 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2200 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
2203 define <4 x i32> @test_4xi32_perm_mem_mask0(ptr %vp) {
2204 ; CHECK-LABEL: test_4xi32_perm_mem_mask0:
2206 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[0,1,3,3]
2208 %vec = load <4 x i32>, ptr %vp
2209 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
2212 define <4 x i32> @test_masked_4xi32_perm_mem_mask0(ptr %vp, <4 x i32> %vec2, <4 x i32> %mask) {
2213 ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask0:
2215 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
2216 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,1,3,3]
2218 %vec = load <4 x i32>, ptr %vp
2219 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
2220 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2221 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
2225 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask0(ptr %vp, <4 x i32> %mask) {
2226 ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask0:
2228 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
2229 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,1,3,3]
2231 %vec = load <4 x i32>, ptr %vp
2232 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 1, i32 3, i32 3>
2233 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2234 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
2238 define <4 x i32> @test_masked_4xi32_perm_mem_mask1(ptr %vp, <4 x i32> %vec2, <4 x i32> %mask) {
2239 ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask1:
2241 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
2242 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[2,2,3,1]
2244 %vec = load <4 x i32>, ptr %vp
2245 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1>
2246 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2247 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
2251 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask1(ptr %vp, <4 x i32> %mask) {
2252 ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask1:
2254 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
2255 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[2,2,3,1]
2257 %vec = load <4 x i32>, ptr %vp
2258 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 2, i32 2, i32 3, i32 1>
2259 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2260 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
2264 define <4 x i32> @test_masked_4xi32_perm_mem_mask2(ptr %vp, <4 x i32> %vec2, <4 x i32> %mask) {
2265 ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask2:
2267 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
2268 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[0,3,0,1]
2270 %vec = load <4 x i32>, ptr %vp
2271 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 0, i32 1>
2272 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2273 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
2277 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask2(ptr %vp, <4 x i32> %mask) {
2278 ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask2:
2280 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
2281 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[0,3,0,1]
2283 %vec = load <4 x i32>, ptr %vp
2284 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 0, i32 3, i32 0, i32 1>
2285 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2286 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
2290 define <4 x i32> @test_4xi32_perm_mem_mask3(ptr %vp) {
2291 ; CHECK-LABEL: test_4xi32_perm_mem_mask3:
2293 ; CHECK-NEXT: vpermilps {{.*#+}} xmm0 = mem[1,0,1,0]
2295 %vec = load <4 x i32>, ptr %vp
2296 %res = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
2299 define <4 x i32> @test_masked_4xi32_perm_mem_mask3(ptr %vp, <4 x i32> %vec2, <4 x i32> %mask) {
2300 ; CHECK-LABEL: test_masked_4xi32_perm_mem_mask3:
2302 ; CHECK-NEXT: vptestnmd %xmm1, %xmm1, %k1
2303 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[1,0,1,0]
2305 %vec = load <4 x i32>, ptr %vp
2306 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
2307 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2308 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> %vec2
2312 define <4 x i32> @test_masked_z_4xi32_perm_mem_mask3(ptr %vp, <4 x i32> %mask) {
2313 ; CHECK-LABEL: test_masked_z_4xi32_perm_mem_mask3:
2315 ; CHECK-NEXT: vptestnmd %xmm0, %xmm0, %k1
2316 ; CHECK-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[1,0,1,0]
2318 %vec = load <4 x i32>, ptr %vp
2319 %shuf = shufflevector <4 x i32> %vec, <4 x i32> undef, <4 x i32> <i32 1, i32 0, i32 1, i32 0>
2320 %cmp = icmp eq <4 x i32> %mask, zeroinitializer
2321 %res = select <4 x i1> %cmp, <4 x i32> %shuf, <4 x i32> zeroinitializer
2325 define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) {
2326 ; CHECK-LABEL: test_8xi32_perm_mask0:
2328 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[2,3,1,0,6,7,5,4]
2330 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
2333 define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
2334 ; CHECK-LABEL: test_masked_8xi32_perm_mask0:
2336 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
2337 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[2,3,1,0,6,7,5,4]
2338 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
2340 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
2341 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2342 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
2346 define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) {
2347 ; CHECK-LABEL: test_masked_z_8xi32_perm_mask0:
2349 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
2350 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,3,1,0,6,7,5,4]
2352 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 2, i32 3, i32 1, i32 0, i32 6, i32 7, i32 5, i32 4>
2353 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2354 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
2357 define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
2358 ; CHECK-LABEL: test_masked_8xi32_perm_mask1:
2360 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
2361 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,3,4,7,7,7]
2362 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
2364 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 3, i32 4, i32 7, i32 7, i32 7>
2365 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2366 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
2370 define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) {
2371 ; CHECK-LABEL: test_masked_z_8xi32_perm_mask1:
2373 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
2374 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,3,4,7,7,7]
2376 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 3, i32 3, i32 4, i32 7, i32 7, i32 7>
2377 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2378 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
2381 define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
2382 ; CHECK-LABEL: test_masked_8xi32_perm_mask2:
2384 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
2385 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3,5,6,4,7]
2386 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
2388 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 5, i32 6, i32 4, i32 7>
2389 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2390 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
2394 define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) {
2395 ; CHECK-LABEL: test_masked_z_8xi32_perm_mask2:
2397 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
2398 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3,5,6,4,7]
2400 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 2, i32 0, i32 3, i32 5, i32 6, i32 4, i32 7>
2401 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2402 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
2405 define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) {
2406 ; CHECK-LABEL: test_8xi32_perm_mask3:
2408 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[1,3,1,0,5,7,5,4]
2410 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4>
2413 define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
2414 ; CHECK-LABEL: test_masked_8xi32_perm_mask3:
2416 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
2417 ; CHECK-NEXT: vpshufd {{.*#+}} ymm1 {%k1} = ymm0[1,3,1,0,5,7,5,4]
2418 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
2420 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4>
2421 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2422 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
2426 define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) {
2427 ; CHECK-LABEL: test_masked_z_8xi32_perm_mask3:
2429 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
2430 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = ymm0[1,3,1,0,5,7,5,4]
2432 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 0, i32 5, i32 7, i32 5, i32 4>
2433 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2434 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
2437 define <8 x i32> @test_8xi32_perm_mem_mask0(ptr %vp) {
2438 ; CHECK-LABEL: test_8xi32_perm_mem_mask0:
2440 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[1,0,2,0,5,4,6,4]
2442 %vec = load <8 x i32>, ptr %vp
2443 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4>
2446 define <8 x i32> @test_masked_8xi32_perm_mem_mask0(ptr %vp, <8 x i32> %vec2, <8 x i32> %mask) {
2447 ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask0:
2449 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
2450 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[1,0,2,0,5,4,6,4]
2452 %vec = load <8 x i32>, ptr %vp
2453 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4>
2454 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2455 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
2459 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(ptr %vp, <8 x i32> %mask) {
2460 ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask0:
2462 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
2463 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[1,0,2,0,5,4,6,4]
2465 %vec = load <8 x i32>, ptr %vp
2466 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 1, i32 0, i32 2, i32 0, i32 5, i32 4, i32 6, i32 4>
2467 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2468 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
2472 define <8 x i32> @test_masked_8xi32_perm_mem_mask1(ptr %vp, <8 x i32> %vec2, <8 x i32> %mask) {
2473 ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask1:
2475 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
2476 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[0,3,2,0,4,7,6,4]
2478 %vec = load <8 x i32>, ptr %vp
2479 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4>
2480 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2481 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
2485 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(ptr %vp, <8 x i32> %mask) {
2486 ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask1:
2488 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
2489 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4]
2491 %vec = load <8 x i32>, ptr %vp
2492 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4>
2493 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2494 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
2498 define <8 x i32> @test_masked_8xi32_perm_mem_mask2(ptr %vp, <8 x i32> %vec2, <8 x i32> %mask) {
2499 ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask2:
2501 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
2502 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,3,1,7,6,7,5]
2504 %vec = load <8 x i32>, ptr %vp
2505 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 1, i32 7, i32 6, i32 7, i32 5>
2506 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2507 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
2511 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(ptr %vp, <8 x i32> %mask) {
2512 ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask2:
2514 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
2515 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,1,7,6,7,5]
2517 %vec = load <8 x i32>, ptr %vp
2518 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 3, i32 1, i32 7, i32 6, i32 7, i32 5>
2519 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2520 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
2524 define <8 x i32> @test_8xi32_perm_mem_mask3(ptr %vp) {
2525 ; CHECK-LABEL: test_8xi32_perm_mem_mask3:
2527 ; CHECK-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,0,0,7,6,4,4]
2529 %vec = load <8 x i32>, ptr %vp
2530 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4>
2533 define <8 x i32> @test_masked_8xi32_perm_mem_mask3(ptr %vp, <8 x i32> %vec2, <8 x i32> %mask) {
2534 ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask3:
2536 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
2537 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,0,0,7,6,4,4]
2539 %vec = load <8 x i32>, ptr %vp
2540 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4>
2541 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2542 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
2546 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(ptr %vp, <8 x i32> %mask) {
2547 ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask3:
2549 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
2550 ; CHECK-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,0,0,7,6,4,4]
2552 %vec = load <8 x i32>, ptr %vp
2553 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 2, i32 0, i32 0, i32 7, i32 6, i32 4, i32 4>
2554 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
2555 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
2559 define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec) {
2560 ; CHECK-LABEL: test_16xi32_perm_mask0:
2562 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12]
2564 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12>
2567 define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
2568 ; CHECK-LABEL: test_masked_16xi32_perm_mask0:
2570 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
2571 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12]
2572 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
2574 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12>
2575 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2576 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
2580 define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) {
2581 ; CHECK-LABEL: test_masked_z_16xi32_perm_mask0:
2583 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
2584 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,0,7,5,7,4,11,9,11,8,15,13,15,12]
2586 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 3, i32 0, i32 7, i32 5, i32 7, i32 4, i32 11, i32 9, i32 11, i32 8, i32 15, i32 13, i32 15, i32 12>
2587 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2588 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
2591 define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
2592 ; CHECK-LABEL: test_masked_16xi32_perm_mask1:
2594 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
2595 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12]
2596 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
2598 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 0, i32 6, i32 4, i32 7, i32 4, i32 10, i32 8, i32 11, i32 8, i32 14, i32 12, i32 15, i32 12>
2599 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2600 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
2604 define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) {
2605 ; CHECK-LABEL: test_masked_z_16xi32_perm_mask1:
2607 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
2608 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[2,0,3,0,6,4,7,4,10,8,11,8,14,12,15,12]
2610 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 3, i32 0, i32 6, i32 4, i32 7, i32 4, i32 10, i32 8, i32 11, i32 8, i32 14, i32 12, i32 15, i32 12>
2611 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2612 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
2615 define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
2616 ; CHECK-LABEL: test_masked_16xi32_perm_mask2:
2618 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
2619 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12]
2620 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
2622 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4, i32 9, i32 11, i32 11, i32 8, i32 13, i32 15, i32 15, i32 12>
2623 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2624 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
2628 define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) {
2629 ; CHECK-LABEL: test_masked_z_16xi32_perm_mask2:
2631 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
2632 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4,9,11,11,8,13,15,15,12]
2634 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4, i32 9, i32 11, i32 11, i32 8, i32 13, i32 15, i32 15, i32 12>
2635 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2636 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
2639 define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) {
2640 ; CHECK-LABEL: test_16xi32_perm_mask3:
2642 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15]
2644 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15>
2647 define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
2648 ; CHECK-LABEL: test_masked_16xi32_perm_mask3:
2650 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
2651 ; CHECK-NEXT: vpshufd {{.*#+}} zmm1 {%k1} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15]
2652 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
2654 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15>
2655 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2656 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
2660 define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) {
2661 ; CHECK-LABEL: test_masked_z_16xi32_perm_mask3:
2663 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
2664 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,0,3,7,6,4,7,11,10,8,11,15,14,12,15]
2666 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 2, i32 0, i32 3, i32 7, i32 6, i32 4, i32 7, i32 11, i32 10, i32 8, i32 11, i32 15, i32 14, i32 12, i32 15>
2667 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2668 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
2671 define <16 x i32> @test_16xi32_perm_mem_mask0(ptr %vp) {
2672 ; CHECK-LABEL: test_16xi32_perm_mem_mask0:
2674 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15]
2676 %vec = load <16 x i32>, ptr %vp
2677 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15>
2680 define <16 x i32> @test_masked_16xi32_perm_mem_mask0(ptr %vp, <16 x i32> %vec2, <16 x i32> %mask) {
2681 ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask0:
2683 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
2684 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15]
2686 %vec = load <16 x i32>, ptr %vp
2687 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15>
2688 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2689 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
2693 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(ptr %vp, <16 x i32> %mask) {
2694 ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask0:
2696 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
2697 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,1,3,5,4,5,7,9,8,9,11,13,12,13,15]
2699 %vec = load <16 x i32>, ptr %vp
2700 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 1, i32 3, i32 5, i32 4, i32 5, i32 7, i32 9, i32 8, i32 9, i32 11, i32 13, i32 12, i32 13, i32 15>
2701 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2702 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
2706 define <16 x i32> @test_masked_16xi32_perm_mem_mask1(ptr %vp, <16 x i32> %vec2, <16 x i32> %mask) {
2707 ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask1:
2709 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
2710 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14]
2712 %vec = load <16 x i32>, ptr %vp
2713 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 2, i32 5, i32 4, i32 4, i32 6, i32 9, i32 8, i32 8, i32 10, i32 13, i32 12, i32 12, i32 14>
2714 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2715 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
2719 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(ptr %vp, <16 x i32> %mask) {
2720 ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask1:
2722 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
2723 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[1,0,0,2,5,4,4,6,9,8,8,10,13,12,12,14]
2725 %vec = load <16 x i32>, ptr %vp
2726 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 1, i32 0, i32 0, i32 2, i32 5, i32 4, i32 4, i32 6, i32 9, i32 8, i32 8, i32 10, i32 13, i32 12, i32 12, i32 14>
2727 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2728 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
2732 define <16 x i32> @test_masked_16xi32_perm_mem_mask2(ptr %vp, <16 x i32> %vec2, <16 x i32> %mask) {
2733 ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask2:
2735 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
2736 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14]
2738 %vec = load <16 x i32>, ptr %vp
2739 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 1, i32 2, i32 6, i32 4, i32 5, i32 6, i32 10, i32 8, i32 9, i32 10, i32 14, i32 12, i32 13, i32 14>
2740 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2741 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
2745 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(ptr %vp, <16 x i32> %mask) {
2746 ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask2:
2748 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
2749 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[2,0,1,2,6,4,5,6,10,8,9,10,14,12,13,14]
2751 %vec = load <16 x i32>, ptr %vp
2752 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 2, i32 0, i32 1, i32 2, i32 6, i32 4, i32 5, i32 6, i32 10, i32 8, i32 9, i32 10, i32 14, i32 12, i32 13, i32 14>
2753 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2754 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
2758 define <16 x i32> @test_16xi32_perm_mem_mask3(ptr %vp) {
2759 ; CHECK-LABEL: test_16xi32_perm_mem_mask3:
2761 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13]
2763 %vec = load <16 x i32>, ptr %vp
2764 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13>
2767 define <16 x i32> @test_masked_16xi32_perm_mem_mask3(ptr %vp, <16 x i32> %vec2, <16 x i32> %mask) {
2768 ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask3:
2770 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
2771 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13]
2773 %vec = load <16 x i32>, ptr %vp
2774 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13>
2775 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2776 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
2780 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(ptr %vp, <16 x i32> %mask) {
2781 ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask3:
2783 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
2784 ; CHECK-NEXT: vpshufd {{.*#+}} zmm0 {%k1} {z} = mem[3,1,1,1,7,5,5,5,11,9,9,9,15,13,13,13]
2786 %vec = load <16 x i32>, ptr %vp
2787 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 1, i32 1, i32 1, i32 7, i32 5, i32 5, i32 5, i32 11, i32 9, i32 9, i32 9, i32 15, i32 13, i32 13, i32 13>
2788 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
2789 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer