1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-linux-gnu -mattr=+avx512f,+avx512vl,+avx512bw %s -o - | FileCheck %s
4 define <16 x i16> @test_16xi16_perm_mask0(<16 x i16> %vec) {
5 ; CHECK-LABEL: test_16xi16_perm_mask0:
7 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14]
8 ; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0
10 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
13 define <16 x i16> @test_masked_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
14 ; CHECK-LABEL: test_masked_16xi16_perm_mask0:
16 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14]
17 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1
18 ; CHECK-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1}
19 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
21 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
22 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
23 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
27 define <16 x i16> @test_masked_z_16xi16_perm_mask0(<16 x i16> %vec, <16 x i16> %mask) {
28 ; CHECK-LABEL: test_masked_z_16xi16_perm_mask0:
30 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [8,6,12,4,7,9,14,8,4,12,9,4,14,15,12,14]
31 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
32 ; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z}
34 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 8, i32 6, i32 12, i32 4, i32 7, i32 9, i32 14, i32 8, i32 4, i32 12, i32 9, i32 4, i32 14, i32 15, i32 12, i32 14>
35 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
36 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
39 define <16 x i16> @test_masked_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
40 ; CHECK-LABEL: test_masked_16xi16_perm_mask1:
42 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0]
43 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1
44 ; CHECK-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1}
45 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
47 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0>
48 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
49 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
53 define <16 x i16> @test_masked_z_16xi16_perm_mask1(<16 x i16> %vec, <16 x i16> %mask) {
54 ; CHECK-LABEL: test_masked_z_16xi16_perm_mask1:
56 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [4,11,14,10,7,1,6,9,14,15,7,13,4,12,8,0]
57 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
58 ; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z}
60 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 4, i32 11, i32 14, i32 10, i32 7, i32 1, i32 6, i32 9, i32 14, i32 15, i32 7, i32 13, i32 4, i32 12, i32 8, i32 0>
61 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
62 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
65 define <16 x i16> @test_masked_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
66 ; CHECK-LABEL: test_masked_16xi16_perm_mask2:
68 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7]
69 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1
70 ; CHECK-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1}
71 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
73 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7>
74 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
75 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
79 define <16 x i16> @test_masked_z_16xi16_perm_mask2(<16 x i16> %vec, <16 x i16> %mask) {
80 ; CHECK-LABEL: test_masked_z_16xi16_perm_mask2:
82 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [11,6,13,10,0,7,13,3,5,13,3,9,3,15,12,7]
83 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
84 ; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z}
86 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 11, i32 6, i32 13, i32 10, i32 0, i32 7, i32 13, i32 3, i32 5, i32 13, i32 3, i32 9, i32 3, i32 15, i32 12, i32 7>
87 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
88 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
91 define <16 x i16> @test_16xi16_perm_mask3(<16 x i16> %vec) {
92 ; CHECK-LABEL: test_16xi16_perm_mask3:
94 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6]
95 ; CHECK-NEXT: vpermw %ymm0, %ymm1, %ymm0
97 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
100 define <16 x i16> @test_masked_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %vec2, <16 x i16> %mask) {
101 ; CHECK-LABEL: test_masked_16xi16_perm_mask3:
103 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6]
104 ; CHECK-NEXT: vptestnmw %ymm2, %ymm2, %k1
105 ; CHECK-NEXT: vpermw %ymm0, %ymm3, %ymm1 {%k1}
106 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
108 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
109 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
110 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
114 define <16 x i16> @test_masked_z_16xi16_perm_mask3(<16 x i16> %vec, <16 x i16> %mask) {
115 ; CHECK-LABEL: test_masked_z_16xi16_perm_mask3:
117 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [1,5,8,14,1,8,11,8,13,8,15,9,9,7,9,6]
118 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
119 ; CHECK-NEXT: vpermw %ymm0, %ymm2, %ymm0 {%k1} {z}
121 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 5, i32 8, i32 14, i32 1, i32 8, i32 11, i32 8, i32 13, i32 8, i32 15, i32 9, i32 9, i32 7, i32 9, i32 6>
122 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
123 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
126 define <16 x i16> @test_16xi16_perm_mem_mask0(<16 x i16>* %vp) {
127 ; CHECK-LABEL: test_16xi16_perm_mem_mask0:
129 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13]
130 ; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0
132 %vec = load <16 x i16>, <16 x i16>* %vp
133 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
136 define <16 x i16> @test_masked_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
137 ; CHECK-LABEL: test_masked_16xi16_perm_mem_mask0:
139 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13]
140 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
141 ; CHECK-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1}
143 %vec = load <16 x i16>, <16 x i16>* %vp
144 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
145 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
146 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
150 define <16 x i16> @test_masked_z_16xi16_perm_mem_mask0(<16 x i16>* %vp, <16 x i16> %mask) {
151 ; CHECK-LABEL: test_masked_z_16xi16_perm_mem_mask0:
153 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [9,10,7,1,12,14,14,13,14,14,8,6,11,4,12,13]
154 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1
155 ; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z}
157 %vec = load <16 x i16>, <16 x i16>* %vp
158 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 10, i32 7, i32 1, i32 12, i32 14, i32 14, i32 13, i32 14, i32 14, i32 8, i32 6, i32 11, i32 4, i32 12, i32 13>
159 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
160 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
164 define <16 x i16> @test_masked_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
165 ; CHECK-LABEL: test_masked_16xi16_perm_mem_mask1:
167 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11]
168 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
169 ; CHECK-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1}
171 %vec = load <16 x i16>, <16 x i16>* %vp
172 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11>
173 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
174 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
178 define <16 x i16> @test_masked_z_16xi16_perm_mem_mask1(<16 x i16>* %vp, <16 x i16> %mask) {
179 ; CHECK-LABEL: test_masked_z_16xi16_perm_mem_mask1:
181 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [14,9,15,9,7,10,15,14,12,1,9,7,10,13,3,11]
182 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1
183 ; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z}
185 %vec = load <16 x i16>, <16 x i16>* %vp
186 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 14, i32 9, i32 15, i32 9, i32 7, i32 10, i32 15, i32 14, i32 12, i32 1, i32 9, i32 7, i32 10, i32 13, i32 3, i32 11>
187 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
188 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
192 define <16 x i16> @test_masked_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
193 ; CHECK-LABEL: test_masked_16xi16_perm_mem_mask2:
195 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9]
196 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
197 ; CHECK-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1}
199 %vec = load <16 x i16>, <16 x i16>* %vp
200 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9>
201 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
202 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
206 define <16 x i16> @test_masked_z_16xi16_perm_mem_mask2(<16 x i16>* %vp, <16 x i16> %mask) {
207 ; CHECK-LABEL: test_masked_z_16xi16_perm_mem_mask2:
209 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [1,3,12,5,13,1,2,11,0,9,14,8,10,0,10,9]
210 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1
211 ; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z}
213 %vec = load <16 x i16>, <16 x i16>* %vp
214 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 1, i32 3, i32 12, i32 5, i32 13, i32 1, i32 2, i32 11, i32 0, i32 9, i32 14, i32 8, i32 10, i32 0, i32 10, i32 9>
215 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
216 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
220 define <16 x i16> @test_16xi16_perm_mem_mask3(<16 x i16>* %vp) {
221 ; CHECK-LABEL: test_16xi16_perm_mem_mask3:
223 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm0 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4]
224 ; CHECK-NEXT: vpermw (%rdi), %ymm0, %ymm0
226 %vec = load <16 x i16>, <16 x i16>* %vp
227 %res = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
230 define <16 x i16> @test_masked_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %vec2, <16 x i16> %mask) {
231 ; CHECK-LABEL: test_masked_16xi16_perm_mem_mask3:
233 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4]
234 ; CHECK-NEXT: vptestnmw %ymm1, %ymm1, %k1
235 ; CHECK-NEXT: vpermw (%rdi), %ymm2, %ymm0 {%k1}
237 %vec = load <16 x i16>, <16 x i16>* %vp
238 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
239 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
240 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> %vec2
244 define <16 x i16> @test_masked_z_16xi16_perm_mem_mask3(<16 x i16>* %vp, <16 x i16> %mask) {
245 ; CHECK-LABEL: test_masked_z_16xi16_perm_mem_mask3:
247 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [9,6,5,15,0,0,15,2,1,3,12,14,0,6,1,4]
248 ; CHECK-NEXT: vptestnmw %ymm0, %ymm0, %k1
249 ; CHECK-NEXT: vpermw (%rdi), %ymm1, %ymm0 {%k1} {z}
251 %vec = load <16 x i16>, <16 x i16>* %vp
252 %shuf = shufflevector <16 x i16> %vec, <16 x i16> undef, <16 x i32> <i32 9, i32 6, i32 5, i32 15, i32 0, i32 0, i32 15, i32 2, i32 1, i32 3, i32 12, i32 14, i32 0, i32 6, i32 1, i32 4>
253 %cmp = icmp eq <16 x i16> %mask, zeroinitializer
254 %res = select <16 x i1> %cmp, <16 x i16> %shuf, <16 x i16> zeroinitializer
258 define <32 x i16> @test_32xi16_perm_mask0(<32 x i16> %vec) {
259 ; CHECK-LABEL: test_32xi16_perm_mask0:
261 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10]
262 ; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0
264 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10>
267 define <32 x i16> @test_masked_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
268 ; CHECK-LABEL: test_masked_32xi16_perm_mask0:
270 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10]
271 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1
272 ; CHECK-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1}
273 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
275 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10>
276 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
277 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
281 define <32 x i16> @test_masked_z_32xi16_perm_mask0(<32 x i16> %vec, <32 x i16> %mask) {
282 ; CHECK-LABEL: test_masked_z_32xi16_perm_mask0:
284 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [16,1,3,31,6,11,23,26,29,5,21,30,1,21,27,10,8,19,14,5,15,13,18,16,9,11,26,8,17,0,23,10]
285 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
286 ; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z}
288 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 16, i32 1, i32 3, i32 31, i32 6, i32 11, i32 23, i32 26, i32 29, i32 5, i32 21, i32 30, i32 1, i32 21, i32 27, i32 10, i32 8, i32 19, i32 14, i32 5, i32 15, i32 13, i32 18, i32 16, i32 9, i32 11, i32 26, i32 8, i32 17, i32 0, i32 23, i32 10>
289 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
290 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
293 define <32 x i16> @test_masked_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
294 ; CHECK-LABEL: test_masked_32xi16_perm_mask1:
296 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16]
297 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1
298 ; CHECK-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1}
299 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
301 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 8, i32 7, i32 30, i32 11, i32 9, i32 11, i32 30, i32 20, i32 19, i32 22, i32 12, i32 13, i32 20, i32 0, i32 6, i32 10, i32 7, i32 20, i32 12, i32 28, i32 18, i32 13, i32 12, i32 22, i32 13, i32 21, i32 1, i32 14, i32 8, i32 5, i32 16>
302 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
303 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
307 define <32 x i16> @test_masked_z_32xi16_perm_mask1(<32 x i16> %vec, <32 x i16> %mask) {
308 ; CHECK-LABEL: test_masked_z_32xi16_perm_mask1:
310 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,8,7,30,11,9,11,30,20,19,22,12,13,20,0,6,10,7,20,12,28,18,13,12,22,13,21,1,14,8,5,16]
311 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
312 ; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z}
314 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 1, i32 8, i32 7, i32 30, i32 11, i32 9, i32 11, i32 30, i32 20, i32 19, i32 22, i32 12, i32 13, i32 20, i32 0, i32 6, i32 10, i32 7, i32 20, i32 12, i32 28, i32 18, i32 13, i32 12, i32 22, i32 13, i32 21, i32 1, i32 14, i32 8, i32 5, i32 16>
315 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
316 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
319 define <32 x i16> @test_masked_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
320 ; CHECK-LABEL: test_masked_32xi16_perm_mask2:
322 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27]
323 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1
324 ; CHECK-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1}
325 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
327 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 15, i32 17, i32 24, i32 28, i32 15, i32 9, i32 14, i32 25, i32 28, i32 25, i32 6, i32 31, i32 20, i32 2, i32 23, i32 31, i32 12, i32 21, i32 10, i32 6, i32 22, i32 0, i32 26, i32 16, i32 3, i32 3, i32 20, i32 27, i32 8, i32 31, i32 3, i32 27>
328 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
329 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
333 define <32 x i16> @test_masked_z_32xi16_perm_mask2(<32 x i16> %vec, <32 x i16> %mask) {
334 ; CHECK-LABEL: test_masked_z_32xi16_perm_mask2:
336 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [15,17,24,28,15,9,14,25,28,25,6,31,20,2,23,31,12,21,10,6,22,0,26,16,3,3,20,27,8,31,3,27]
337 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
338 ; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z}
340 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 15, i32 17, i32 24, i32 28, i32 15, i32 9, i32 14, i32 25, i32 28, i32 25, i32 6, i32 31, i32 20, i32 2, i32 23, i32 31, i32 12, i32 21, i32 10, i32 6, i32 22, i32 0, i32 26, i32 16, i32 3, i32 3, i32 20, i32 27, i32 8, i32 31, i32 3, i32 27>
341 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
342 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
345 define <32 x i16> @test_32xi16_perm_mask3(<32 x i16> %vec) {
346 ; CHECK-LABEL: test_32xi16_perm_mask3:
348 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4]
349 ; CHECK-NEXT: vpermw %zmm0, %zmm1, %zmm0
351 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4>
354 define <32 x i16> @test_masked_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %vec2, <32 x i16> %mask) {
355 ; CHECK-LABEL: test_masked_32xi16_perm_mask3:
357 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4]
358 ; CHECK-NEXT: vptestnmw %zmm2, %zmm2, %k1
359 ; CHECK-NEXT: vpermw %zmm0, %zmm3, %zmm1 {%k1}
360 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
362 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4>
363 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
364 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
368 define <32 x i16> @test_masked_z_32xi16_perm_mask3(<32 x i16> %vec, <32 x i16> %mask) {
369 ; CHECK-LABEL: test_masked_z_32xi16_perm_mask3:
371 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [12,2,8,14,25,27,4,16,20,11,27,8,0,1,21,17,30,30,29,1,23,22,20,22,28,20,11,17,6,18,0,4]
372 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
373 ; CHECK-NEXT: vpermw %zmm0, %zmm2, %zmm0 {%k1} {z}
375 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 12, i32 2, i32 8, i32 14, i32 25, i32 27, i32 4, i32 16, i32 20, i32 11, i32 27, i32 8, i32 0, i32 1, i32 21, i32 17, i32 30, i32 30, i32 29, i32 1, i32 23, i32 22, i32 20, i32 22, i32 28, i32 20, i32 11, i32 17, i32 6, i32 18, i32 0, i32 4>
376 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
377 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
380 define <32 x i16> @test_32xi16_perm_mem_mask0(<32 x i16>* %vp) {
381 ; CHECK-LABEL: test_32xi16_perm_mem_mask0:
383 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12]
384 ; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0
386 %vec = load <32 x i16>, <32 x i16>* %vp
387 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12>
390 define <32 x i16> @test_masked_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
391 ; CHECK-LABEL: test_masked_32xi16_perm_mem_mask0:
393 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12]
394 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
395 ; CHECK-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1}
397 %vec = load <32 x i16>, <32 x i16>* %vp
398 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12>
399 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
400 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
404 define <32 x i16> @test_masked_z_32xi16_perm_mem_mask0(<32 x i16>* %vp, <32 x i16> %mask) {
405 ; CHECK-LABEL: test_masked_z_32xi16_perm_mem_mask0:
407 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [19,1,5,31,9,12,17,9,15,7,1,5,16,2,12,10,13,3,29,15,26,31,10,15,22,13,9,23,28,29,20,12]
408 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1
409 ; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z}
411 %vec = load <32 x i16>, <32 x i16>* %vp
412 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 19, i32 1, i32 5, i32 31, i32 9, i32 12, i32 17, i32 9, i32 15, i32 7, i32 1, i32 5, i32 16, i32 2, i32 12, i32 10, i32 13, i32 3, i32 29, i32 15, i32 26, i32 31, i32 10, i32 15, i32 22, i32 13, i32 9, i32 23, i32 28, i32 29, i32 20, i32 12>
413 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
414 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
418 define <32 x i16> @test_masked_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
419 ; CHECK-LABEL: test_masked_32xi16_perm_mem_mask1:
421 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6]
422 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
423 ; CHECK-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1}
425 %vec = load <32 x i16>, <32 x i16>* %vp
426 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 31, i32 20, i32 2, i32 2, i32 23, i32 1, i32 0, i32 12, i32 16, i32 14, i32 15, i32 18, i32 21, i32 13, i32 11, i32 31, i32 8, i32 24, i32 13, i32 11, i32 2, i32 27, i32 22, i32 28, i32 14, i32 21, i32 3, i32 12, i32 6, i32 1, i32 30, i32 6>
427 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
428 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
432 define <32 x i16> @test_masked_z_32xi16_perm_mem_mask1(<32 x i16>* %vp, <32 x i16> %mask) {
433 ; CHECK-LABEL: test_masked_z_32xi16_perm_mem_mask1:
435 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [31,20,2,2,23,1,0,12,16,14,15,18,21,13,11,31,8,24,13,11,2,27,22,28,14,21,3,12,6,1,30,6]
436 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1
437 ; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z}
439 %vec = load <32 x i16>, <32 x i16>* %vp
440 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 31, i32 20, i32 2, i32 2, i32 23, i32 1, i32 0, i32 12, i32 16, i32 14, i32 15, i32 18, i32 21, i32 13, i32 11, i32 31, i32 8, i32 24, i32 13, i32 11, i32 2, i32 27, i32 22, i32 28, i32 14, i32 21, i32 3, i32 12, i32 6, i32 1, i32 30, i32 6>
441 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
442 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
446 define <32 x i16> @test_masked_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
447 ; CHECK-LABEL: test_masked_32xi16_perm_mem_mask2:
449 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25]
450 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
451 ; CHECK-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1}
453 %vec = load <32 x i16>, <32 x i16>* %vp
454 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 4, i32 6, i32 12, i32 17, i32 4, i32 31, i32 31, i32 4, i32 12, i32 21, i32 28, i32 15, i32 29, i32 10, i32 15, i32 15, i32 21, i32 6, i32 19, i32 7, i32 10, i32 30, i32 28, i32 26, i32 1, i32 4, i32 8, i32 25, i32 26, i32 18, i32 22, i32 25>
455 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
456 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
460 define <32 x i16> @test_masked_z_32xi16_perm_mem_mask2(<32 x i16>* %vp, <32 x i16> %mask) {
461 ; CHECK-LABEL: test_masked_z_32xi16_perm_mem_mask2:
463 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [4,6,12,17,4,31,31,4,12,21,28,15,29,10,15,15,21,6,19,7,10,30,28,26,1,4,8,25,26,18,22,25]
464 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1
465 ; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z}
467 %vec = load <32 x i16>, <32 x i16>* %vp
468 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 4, i32 6, i32 12, i32 17, i32 4, i32 31, i32 31, i32 4, i32 12, i32 21, i32 28, i32 15, i32 29, i32 10, i32 15, i32 15, i32 21, i32 6, i32 19, i32 7, i32 10, i32 30, i32 28, i32 26, i32 1, i32 4, i32 8, i32 25, i32 26, i32 18, i32 22, i32 25>
469 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
470 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
474 define <32 x i16> @test_32xi16_perm_mem_mask3(<32 x i16>* %vp) {
475 ; CHECK-LABEL: test_32xi16_perm_mem_mask3:
477 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm0 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27]
478 ; CHECK-NEXT: vpermw (%rdi), %zmm0, %zmm0
480 %vec = load <32 x i16>, <32 x i16>* %vp
481 %res = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27>
484 define <32 x i16> @test_masked_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %vec2, <32 x i16> %mask) {
485 ; CHECK-LABEL: test_masked_32xi16_perm_mem_mask3:
487 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27]
488 ; CHECK-NEXT: vptestnmw %zmm1, %zmm1, %k1
489 ; CHECK-NEXT: vpermw (%rdi), %zmm2, %zmm0 {%k1}
491 %vec = load <32 x i16>, <32 x i16>* %vp
492 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27>
493 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
494 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> %vec2
498 define <32 x i16> @test_masked_z_32xi16_perm_mem_mask3(<32 x i16>* %vp, <32 x i16> %mask) {
499 ; CHECK-LABEL: test_masked_z_32xi16_perm_mem_mask3:
501 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [2,2,27,1,7,1,0,27,10,5,4,20,30,16,28,16,18,21,25,24,31,23,28,6,17,19,26,15,25,12,18,27]
502 ; CHECK-NEXT: vptestnmw %zmm0, %zmm0, %k1
503 ; CHECK-NEXT: vpermw (%rdi), %zmm1, %zmm0 {%k1} {z}
505 %vec = load <32 x i16>, <32 x i16>* %vp
506 %shuf = shufflevector <32 x i16> %vec, <32 x i16> undef, <32 x i32> <i32 2, i32 2, i32 27, i32 1, i32 7, i32 1, i32 0, i32 27, i32 10, i32 5, i32 4, i32 20, i32 30, i32 16, i32 28, i32 16, i32 18, i32 21, i32 25, i32 24, i32 31, i32 23, i32 28, i32 6, i32 17, i32 19, i32 26, i32 15, i32 25, i32 12, i32 18, i32 27>
507 %cmp = icmp eq <32 x i16> %mask, zeroinitializer
508 %res = select <32 x i1> %cmp, <32 x i16> %shuf, <32 x i16> zeroinitializer
512 define <8 x i32> @test_8xi32_perm_mask0(<8 x i32> %vec) {
513 ; CHECK-LABEL: test_8xi32_perm_mask0:
515 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [4,2,0,6,7,2,3,6]
516 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
518 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6>
521 define <8 x i32> @test_masked_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
522 ; CHECK-LABEL: test_masked_8xi32_perm_mask0:
524 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [4,2,0,6,7,2,3,6]
525 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
526 ; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1}
527 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
529 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6>
530 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
531 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
535 define <8 x i32> @test_masked_z_8xi32_perm_mask0(<8 x i32> %vec, <8 x i32> %mask) {
536 ; CHECK-LABEL: test_masked_z_8xi32_perm_mask0:
538 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [4,2,0,6,7,2,3,6]
539 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
540 ; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z}
542 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 2, i32 0, i32 6, i32 7, i32 2, i32 3, i32 6>
543 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
544 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
547 define <8 x i32> @test_masked_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
548 ; CHECK-LABEL: test_masked_8xi32_perm_mask1:
550 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [0,5,1,2,6,0,0,3]
551 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
552 ; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1}
553 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
555 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 5, i32 1, i32 2, i32 6, i32 0, i32 0, i32 3>
556 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
557 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
561 define <8 x i32> @test_masked_z_8xi32_perm_mask1(<8 x i32> %vec, <8 x i32> %mask) {
562 ; CHECK-LABEL: test_masked_z_8xi32_perm_mask1:
564 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [0,5,1,2,6,0,0,3]
565 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
566 ; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z}
568 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 0, i32 5, i32 1, i32 2, i32 6, i32 0, i32 0, i32 3>
569 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
570 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
573 define <8 x i32> @test_masked_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
574 ; CHECK-LABEL: test_masked_8xi32_perm_mask2:
576 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [3,6,5,5,1,7,3,4]
577 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
578 ; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1}
579 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
581 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 6, i32 5, i32 5, i32 1, i32 7, i32 3, i32 4>
582 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
583 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
587 define <8 x i32> @test_masked_z_8xi32_perm_mask2(<8 x i32> %vec, <8 x i32> %mask) {
588 ; CHECK-LABEL: test_masked_z_8xi32_perm_mask2:
590 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [3,6,5,5,1,7,3,4]
591 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
592 ; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z}
594 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 6, i32 5, i32 5, i32 1, i32 7, i32 3, i32 4>
595 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
596 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
599 define <8 x i32> @test_8xi32_perm_mask3(<8 x i32> %vec) {
600 ; CHECK-LABEL: test_8xi32_perm_mask3:
602 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [3,0,3,1,0,4,5,0]
603 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
605 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0>
608 define <8 x i32> @test_masked_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %vec2, <8 x i32> %mask) {
609 ; CHECK-LABEL: test_masked_8xi32_perm_mask3:
611 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm3 = [3,0,3,1,0,4,5,0]
612 ; CHECK-NEXT: vptestnmd %ymm2, %ymm2, %k1
613 ; CHECK-NEXT: vpermd %ymm0, %ymm3, %ymm1 {%k1}
614 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
616 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0>
617 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
618 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
622 define <8 x i32> @test_masked_z_8xi32_perm_mask3(<8 x i32> %vec, <8 x i32> %mask) {
623 ; CHECK-LABEL: test_masked_z_8xi32_perm_mask3:
625 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [3,0,3,1,0,4,5,0]
626 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
627 ; CHECK-NEXT: vpermd %ymm0, %ymm2, %ymm0 {%k1} {z}
629 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 0, i32 3, i32 1, i32 0, i32 4, i32 5, i32 0>
630 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
631 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
634 define <8 x i32> @test_8xi32_perm_mem_mask0(<8 x i32>* %vp) {
635 ; CHECK-LABEL: test_8xi32_perm_mem_mask0:
637 ; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [3,7,4,3,5,2,0,5]
638 ; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0
640 %vec = load <8 x i32>, <8 x i32>* %vp
641 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5>
644 define <8 x i32> @test_masked_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
645 ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask0:
647 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [3,7,4,3,5,2,0,5]
648 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
649 ; CHECK-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1}
651 %vec = load <8 x i32>, <8 x i32>* %vp
652 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5>
653 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
654 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
658 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask0(<8 x i32>* %vp, <8 x i32> %mask) {
659 ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask0:
661 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [3,7,4,3,5,2,0,5]
662 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
663 ; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z}
665 %vec = load <8 x i32>, <8 x i32>* %vp
666 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 3, i32 7, i32 4, i32 3, i32 5, i32 2, i32 0, i32 5>
667 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
668 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
672 define <8 x i32> @test_masked_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
673 ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask1:
675 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [4,6,1,7,6,7,6,5]
676 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
677 ; CHECK-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1}
679 %vec = load <8 x i32>, <8 x i32>* %vp
680 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 6, i32 1, i32 7, i32 6, i32 7, i32 6, i32 5>
681 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
682 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
686 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask1(<8 x i32>* %vp, <8 x i32> %mask) {
687 ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask1:
689 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [4,6,1,7,6,7,6,5]
690 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
691 ; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z}
693 %vec = load <8 x i32>, <8 x i32>* %vp
694 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 4, i32 6, i32 1, i32 7, i32 6, i32 7, i32 6, i32 5>
695 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
696 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
700 define <8 x i32> @test_masked_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
701 ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask2:
703 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [6,4,6,1,6,3,6,3]
704 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
705 ; CHECK-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1}
707 %vec = load <8 x i32>, <8 x i32>* %vp
708 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 4, i32 6, i32 1, i32 6, i32 3, i32 6, i32 3>
709 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
710 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
714 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask2(<8 x i32>* %vp, <8 x i32> %mask) {
715 ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask2:
717 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [6,4,6,1,6,3,6,3]
718 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
719 ; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z}
721 %vec = load <8 x i32>, <8 x i32>* %vp
722 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 4, i32 6, i32 1, i32 6, i32 3, i32 6, i32 3>
723 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
724 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
728 define <8 x i32> @test_8xi32_perm_mem_mask3(<8 x i32>* %vp) {
729 ; CHECK-LABEL: test_8xi32_perm_mem_mask3:
731 ; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [6,0,0,7,3,7,7,5]
732 ; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0
734 %vec = load <8 x i32>, <8 x i32>* %vp
735 %res = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5>
738 define <8 x i32> @test_masked_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %vec2, <8 x i32> %mask) {
739 ; CHECK-LABEL: test_masked_8xi32_perm_mem_mask3:
741 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm2 = [6,0,0,7,3,7,7,5]
742 ; CHECK-NEXT: vptestnmd %ymm1, %ymm1, %k1
743 ; CHECK-NEXT: vpermd (%rdi), %ymm2, %ymm0 {%k1}
745 %vec = load <8 x i32>, <8 x i32>* %vp
746 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5>
747 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
748 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> %vec2
752 define <8 x i32> @test_masked_z_8xi32_perm_mem_mask3(<8 x i32>* %vp, <8 x i32> %mask) {
753 ; CHECK-LABEL: test_masked_z_8xi32_perm_mem_mask3:
755 ; CHECK-NEXT: vmovdqa {{.*#+}} ymm1 = [6,0,0,7,3,7,7,5]
756 ; CHECK-NEXT: vptestnmd %ymm0, %ymm0, %k1
757 ; CHECK-NEXT: vpermd (%rdi), %ymm1, %ymm0 {%k1} {z}
759 %vec = load <8 x i32>, <8 x i32>* %vp
760 %shuf = shufflevector <8 x i32> %vec, <8 x i32> undef, <8 x i32> <i32 6, i32 0, i32 0, i32 7, i32 3, i32 7, i32 7, i32 5>
761 %cmp = icmp eq <8 x i32> %mask, zeroinitializer
762 %res = select <8 x i1> %cmp, <8 x i32> %shuf, <8 x i32> zeroinitializer
766 define <16 x i32> @test_16xi32_perm_mask0(<16 x i32> %vec) {
767 ; CHECK-LABEL: test_16xi32_perm_mask0:
769 ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7]
770 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0
772 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7>
775 define <16 x i32> @test_masked_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
776 ; CHECK-LABEL: test_masked_16xi32_perm_mask0:
778 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7]
779 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
780 ; CHECK-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1}
781 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
783 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7>
784 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
785 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
789 define <16 x i32> @test_masked_z_16xi32_perm_mask0(<16 x i32> %vec, <16 x i32> %mask) {
790 ; CHECK-LABEL: test_masked_z_16xi32_perm_mask0:
792 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [14,12,11,6,4,1,6,9,14,14,6,1,12,11,0,7]
793 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
794 ; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z}
796 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 14, i32 12, i32 11, i32 6, i32 4, i32 1, i32 6, i32 9, i32 14, i32 14, i32 6, i32 1, i32 12, i32 11, i32 0, i32 7>
797 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
798 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
801 define <16 x i32> @test_masked_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
802 ; CHECK-LABEL: test_masked_16xi32_perm_mask1:
804 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3]
805 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
806 ; CHECK-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1}
807 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
809 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 10, i32 0, i32 14, i32 15, i32 11, i32 1, i32 1, i32 5, i32 0, i32 5, i32 0, i32 15, i32 13, i32 1, i32 14, i32 3>
810 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
811 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
815 define <16 x i32> @test_masked_z_16xi32_perm_mask1(<16 x i32> %vec, <16 x i32> %mask) {
816 ; CHECK-LABEL: test_masked_z_16xi32_perm_mask1:
818 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [10,0,14,15,11,1,1,5,0,5,0,15,13,1,14,3]
819 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
820 ; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z}
822 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 10, i32 0, i32 14, i32 15, i32 11, i32 1, i32 1, i32 5, i32 0, i32 5, i32 0, i32 15, i32 13, i32 1, i32 14, i32 3>
823 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
824 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
827 define <16 x i32> @test_masked_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
828 ; CHECK-LABEL: test_masked_16xi32_perm_mask2:
830 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5]
831 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
832 ; CHECK-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1}
833 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
835 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 10, i32 15, i32 1, i32 0, i32 5, i32 0, i32 9, i32 13, i32 2, i32 1, i32 5, i32 15, i32 2, i32 15, i32 5>
836 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
837 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
841 define <16 x i32> @test_masked_z_16xi32_perm_mask2(<16 x i32> %vec, <16 x i32> %mask) {
842 ; CHECK-LABEL: test_masked_z_16xi32_perm_mask2:
844 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [3,10,15,1,0,5,0,9,13,2,1,5,15,2,15,5]
845 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
846 ; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z}
848 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 3, i32 10, i32 15, i32 1, i32 0, i32 5, i32 0, i32 9, i32 13, i32 2, i32 1, i32 5, i32 15, i32 2, i32 15, i32 5>
849 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
850 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
853 define <16 x i32> @test_16xi32_perm_mask3(<16 x i32> %vec) {
854 ; CHECK-LABEL: test_16xi32_perm_mask3:
856 ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12]
857 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0
859 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12>
862 define <16 x i32> @test_masked_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %vec2, <16 x i32> %mask) {
863 ; CHECK-LABEL: test_masked_16xi32_perm_mask3:
865 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12]
866 ; CHECK-NEXT: vptestnmd %zmm2, %zmm2, %k1
867 ; CHECK-NEXT: vpermd %zmm0, %zmm3, %zmm1 {%k1}
868 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
870 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12>
871 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
872 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
876 define <16 x i32> @test_masked_z_16xi32_perm_mask3(<16 x i32> %vec, <16 x i32> %mask) {
877 ; CHECK-LABEL: test_masked_z_16xi32_perm_mask3:
879 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,4,14,15,10,2,15,1,9,2,14,15,12,5,3,12]
880 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
881 ; CHECK-NEXT: vpermd %zmm0, %zmm2, %zmm0 {%k1} {z}
883 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 4, i32 14, i32 15, i32 10, i32 2, i32 15, i32 1, i32 9, i32 2, i32 14, i32 15, i32 12, i32 5, i32 3, i32 12>
884 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
885 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
888 define <16 x i32> @test_16xi32_perm_mem_mask0(<16 x i32>* %vp) {
889 ; CHECK-LABEL: test_16xi32_perm_mem_mask0:
891 ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6]
892 ; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0
894 %vec = load <16 x i32>, <16 x i32>* %vp
895 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6>
898 define <16 x i32> @test_masked_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
899 ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask0:
901 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6]
902 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
903 ; CHECK-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1}
905 %vec = load <16 x i32>, <16 x i32>* %vp
906 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6>
907 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
908 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
912 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask0(<16 x i32>* %vp, <16 x i32> %mask) {
913 ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask0:
915 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,1,1,6,8,11,2,6,10,1,7,5,15,0,6,6]
916 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
917 ; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z}
919 %vec = load <16 x i32>, <16 x i32>* %vp
920 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 0, i32 1, i32 1, i32 6, i32 8, i32 11, i32 2, i32 6, i32 10, i32 1, i32 7, i32 5, i32 15, i32 0, i32 6, i32 6>
921 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
922 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
926 define <16 x i32> @test_masked_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
927 ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask1:
929 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3]
930 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
931 ; CHECK-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1}
933 %vec = load <16 x i32>, <16 x i32>* %vp
934 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 5, i32 3, i32 4, i32 7, i32 15, i32 12, i32 4, i32 8, i32 11, i32 12, i32 7, i32 6, i32 12, i32 6, i32 3>
935 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
936 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
940 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask1(<16 x i32>* %vp, <16 x i32> %mask) {
941 ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask1:
943 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,5,3,4,7,15,12,4,8,11,12,7,6,12,6,3]
944 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
945 ; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z}
947 %vec = load <16 x i32>, <16 x i32>* %vp
948 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 5, i32 3, i32 4, i32 7, i32 15, i32 12, i32 4, i32 8, i32 11, i32 12, i32 7, i32 6, i32 12, i32 6, i32 3>
949 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
950 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
954 define <16 x i32> @test_masked_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
955 ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask2:
957 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2]
958 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
959 ; CHECK-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1}
961 %vec = load <16 x i32>, <16 x i32>* %vp
962 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 14, i32 2, i32 7, i32 10, i32 7, i32 3, i32 0, i32 11, i32 9, i32 0, i32 4, i32 12, i32 10, i32 8, i32 2>
963 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
964 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
968 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask2(<16 x i32>* %vp, <16 x i32> %mask) {
969 ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask2:
971 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [7,14,2,7,10,7,3,0,11,9,0,4,12,10,8,2]
972 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
973 ; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z}
975 %vec = load <16 x i32>, <16 x i32>* %vp
976 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 7, i32 14, i32 2, i32 7, i32 10, i32 7, i32 3, i32 0, i32 11, i32 9, i32 0, i32 4, i32 12, i32 10, i32 8, i32 2>
977 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
978 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
982 define <16 x i32> @test_16xi32_perm_mem_mask3(<16 x i32>* %vp) {
983 ; CHECK-LABEL: test_16xi32_perm_mem_mask3:
985 ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1]
986 ; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0
988 %vec = load <16 x i32>, <16 x i32>* %vp
989 %res = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1>
992 define <16 x i32> @test_masked_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %vec2, <16 x i32> %mask) {
993 ; CHECK-LABEL: test_masked_16xi32_perm_mem_mask3:
995 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1]
996 ; CHECK-NEXT: vptestnmd %zmm1, %zmm1, %k1
997 ; CHECK-NEXT: vpermd (%rdi), %zmm2, %zmm0 {%k1}
999 %vec = load <16 x i32>, <16 x i32>* %vp
1000 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1>
1001 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1002 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> %vec2
1006 define <16 x i32> @test_masked_z_16xi32_perm_mem_mask3(<16 x i32>* %vp, <16 x i32> %mask) {
1007 ; CHECK-LABEL: test_masked_z_16xi32_perm_mem_mask3:
1009 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [11,7,10,12,3,12,4,15,1,14,0,4,8,9,6,1]
1010 ; CHECK-NEXT: vptestnmd %zmm0, %zmm0, %k1
1011 ; CHECK-NEXT: vpermd (%rdi), %zmm1, %zmm0 {%k1} {z}
1013 %vec = load <16 x i32>, <16 x i32>* %vp
1014 %shuf = shufflevector <16 x i32> %vec, <16 x i32> undef, <16 x i32> <i32 11, i32 7, i32 10, i32 12, i32 3, i32 12, i32 4, i32 15, i32 1, i32 14, i32 0, i32 4, i32 8, i32 9, i32 6, i32 1>
1015 %cmp = icmp eq <16 x i32> %mask, zeroinitializer
1016 %res = select <16 x i1> %cmp, <16 x i32> %shuf, <16 x i32> zeroinitializer
1020 define <4 x i64> @test_4xi64_perm_mask0(<4 x i64> %vec) {
1021 ; CHECK-LABEL: test_4xi64_perm_mask0:
1023 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,0,3,1]
1025 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
1028 define <4 x i64> @test_masked_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
1029 ; CHECK-LABEL: test_masked_4xi64_perm_mask0:
1031 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1032 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,0,3,1]
1033 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1035 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
1036 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1037 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
1041 define <4 x i64> @test_masked_z_4xi64_perm_mask0(<4 x i64> %vec, <4 x i64> %mask) {
1042 ; CHECK-LABEL: test_masked_z_4xi64_perm_mask0:
1044 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1045 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,0,3,1]
1047 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 3, i32 1>
1048 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1049 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1052 define <4 x i64> @test_masked_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
1053 ; CHECK-LABEL: test_masked_4xi64_perm_mask1:
1055 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1056 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[1,2,0,3]
1057 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1059 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
1060 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1061 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
1065 define <4 x i64> @test_masked_z_4xi64_perm_mask1(<4 x i64> %vec, <4 x i64> %mask) {
1066 ; CHECK-LABEL: test_masked_z_4xi64_perm_mask1:
1068 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1069 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[1,2,0,3]
1071 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 1, i32 2, i32 0, i32 3>
1072 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1073 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1076 define <4 x i64> @test_masked_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
1077 ; CHECK-LABEL: test_masked_4xi64_perm_mask2:
1079 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1080 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,2,2,1]
1081 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1083 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 1>
1084 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1085 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
1089 define <4 x i64> @test_masked_z_4xi64_perm_mask2(<4 x i64> %vec, <4 x i64> %mask) {
1090 ; CHECK-LABEL: test_masked_z_4xi64_perm_mask2:
1092 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1093 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,2,2,1]
1095 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 2, i32 2, i32 1>
1096 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1097 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1100 define <4 x i64> @test_4xi64_perm_mask3(<4 x i64> %vec) {
1101 ; CHECK-LABEL: test_4xi64_perm_mask3:
1103 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,3]
1105 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3>
1108 define <4 x i64> @test_masked_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %vec2, <4 x i64> %mask) {
1109 ; CHECK-LABEL: test_masked_4xi64_perm_mask3:
1111 ; CHECK-NEXT: vptestnmq %ymm2, %ymm2, %k1
1112 ; CHECK-NEXT: vpermq {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,3]
1113 ; CHECK-NEXT: vmovdqa %ymm1, %ymm0
1115 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3>
1116 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1117 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
1121 define <4 x i64> @test_masked_z_4xi64_perm_mask3(<4 x i64> %vec, <4 x i64> %mask) {
1122 ; CHECK-LABEL: test_masked_z_4xi64_perm_mask3:
1124 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1125 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,3]
1127 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 3>
1128 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1129 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1132 define <4 x i64> @test_4xi64_perm_mem_mask0(<4 x i64>* %vp) {
1133 ; CHECK-LABEL: test_4xi64_perm_mem_mask0:
1135 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,1,2,0]
1137 %vec = load <4 x i64>, <4 x i64>* %vp
1138 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0>
1141 define <4 x i64> @test_masked_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
1142 ; CHECK-LABEL: test_masked_4xi64_perm_mem_mask0:
1144 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1145 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,2,0]
1147 %vec = load <4 x i64>, <4 x i64>* %vp
1148 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0>
1149 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1150 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
1154 define <4 x i64> @test_masked_z_4xi64_perm_mem_mask0(<4 x i64>* %vp, <4 x i64> %mask) {
1155 ; CHECK-LABEL: test_masked_z_4xi64_perm_mem_mask0:
1157 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
1158 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,2,0]
1160 %vec = load <4 x i64>, <4 x i64>* %vp
1161 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 2, i32 0>
1162 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1163 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1167 define <4 x i64> @test_masked_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
1168 ; CHECK-LABEL: test_masked_4xi64_perm_mem_mask1:
1170 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1171 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,1,1,1]
1173 %vec = load <4 x i64>, <4 x i64>* %vp
1174 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
1175 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1176 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
1180 define <4 x i64> @test_masked_z_4xi64_perm_mem_mask1(<4 x i64>* %vp, <4 x i64> %mask) {
1181 ; CHECK-LABEL: test_masked_z_4xi64_perm_mem_mask1:
1183 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
1184 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,1,1,1]
1186 %vec = load <4 x i64>, <4 x i64>* %vp
1187 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 1, i32 1, i32 1>
1188 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1189 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1193 define <4 x i64> @test_masked_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
1194 ; CHECK-LABEL: test_masked_4xi64_perm_mem_mask2:
1196 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1197 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[0,1,2,0]
1199 %vec = load <4 x i64>, <4 x i64>* %vp
1200 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
1201 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1202 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
1206 define <4 x i64> @test_masked_z_4xi64_perm_mem_mask2(<4 x i64>* %vp, <4 x i64> %mask) {
1207 ; CHECK-LABEL: test_masked_z_4xi64_perm_mem_mask2:
1209 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
1210 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[0,1,2,0]
1212 %vec = load <4 x i64>, <4 x i64>* %vp
1213 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 0>
1214 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1215 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1219 define <4 x i64> @test_4xi64_perm_mem_mask3(<4 x i64>* %vp) {
1220 ; CHECK-LABEL: test_4xi64_perm_mem_mask3:
1222 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[2,0,1,3]
1224 %vec = load <4 x i64>, <4 x i64>* %vp
1225 %res = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
1228 define <4 x i64> @test_masked_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %vec2, <4 x i64> %mask) {
1229 ; CHECK-LABEL: test_masked_4xi64_perm_mem_mask3:
1231 ; CHECK-NEXT: vptestnmq %ymm1, %ymm1, %k1
1232 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} = mem[2,0,1,3]
1234 %vec = load <4 x i64>, <4 x i64>* %vp
1235 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
1236 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1237 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> %vec2
1241 define <4 x i64> @test_masked_z_4xi64_perm_mem_mask3(<4 x i64>* %vp, <4 x i64> %mask) {
1242 ; CHECK-LABEL: test_masked_z_4xi64_perm_mem_mask3:
1244 ; CHECK-NEXT: vptestnmq %ymm0, %ymm0, %k1
1245 ; CHECK-NEXT: vpermq {{.*#+}} ymm0 {%k1} {z} = mem[2,0,1,3]
1247 %vec = load <4 x i64>, <4 x i64>* %vp
1248 %shuf = shufflevector <4 x i64> %vec, <4 x i64> undef, <4 x i32> <i32 2, i32 0, i32 1, i32 3>
1249 %cmp = icmp eq <4 x i64> %mask, zeroinitializer
1250 %res = select <4 x i1> %cmp, <4 x i64> %shuf, <4 x i64> zeroinitializer
1254 define <8 x i64> @test_8xi64_perm_mask0(<8 x i64> %vec) {
1255 ; CHECK-LABEL: test_8xi64_perm_mask0:
1257 ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [0,4,7,6,5,5,1,6]
1258 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0
1260 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6>
1263 define <8 x i64> @test_masked_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
1264 ; CHECK-LABEL: test_masked_8xi64_perm_mask0:
1266 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [0,4,7,6,5,5,1,6]
1267 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1268 ; CHECK-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1}
1269 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1271 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6>
1272 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1273 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1277 define <8 x i64> @test_masked_z_8xi64_perm_mask0(<8 x i64> %vec, <8 x i64> %mask) {
1278 ; CHECK-LABEL: test_masked_z_8xi64_perm_mask0:
1280 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,4,7,6,5,5,1,6]
1281 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1282 ; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z}
1284 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 4, i32 7, i32 6, i32 5, i32 5, i32 1, i32 6>
1285 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1286 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1289 define <8 x i64> @test_masked_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
1290 ; CHECK-LABEL: test_masked_8xi64_perm_imm_mask1:
1292 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1293 ; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[1,0,1,1,5,4,5,5]
1294 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1296 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 1, i32 5, i32 4, i32 5, i32 5>
1297 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1298 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1302 define <8 x i64> @test_masked_z_8xi64_perm_imm_mask1(<8 x i64> %vec, <8 x i64> %mask) {
1303 ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mask1:
1305 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1306 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[1,0,1,1,5,4,5,5]
1308 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 0, i32 1, i32 1, i32 5, i32 4, i32 5, i32 5>
1309 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1310 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1313 define <8 x i64> @test_masked_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
1314 ; CHECK-LABEL: test_masked_8xi64_perm_mask2:
1316 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [1,3,7,3,3,5,4,1]
1317 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1318 ; CHECK-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1}
1319 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1321 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 3, i32 3, i32 5, i32 4, i32 1>
1322 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1323 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1327 define <8 x i64> @test_masked_z_8xi64_perm_mask2(<8 x i64> %vec, <8 x i64> %mask) {
1328 ; CHECK-LABEL: test_masked_z_8xi64_perm_mask2:
1330 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [1,3,7,3,3,5,4,1]
1331 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1332 ; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z}
1334 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 3, i32 3, i32 5, i32 4, i32 1>
1335 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1336 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1339 define <8 x i64> @test_8xi64_perm_imm_mask3(<8 x i64> %vec) {
1340 ; CHECK-LABEL: test_8xi64_perm_imm_mask3:
1342 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[3,1,3,1,7,5,7,5]
1344 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5>
1347 define <8 x i64> @test_masked_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
1348 ; CHECK-LABEL: test_masked_8xi64_perm_imm_mask3:
1350 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1351 ; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,1,7,5,7,5]
1352 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1354 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5>
1355 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1356 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1360 define <8 x i64> @test_masked_z_8xi64_perm_imm_mask3(<8 x i64> %vec, <8 x i64> %mask) {
1361 ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mask3:
1363 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1364 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,1,7,5,7,5]
1366 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 1, i32 7, i32 5, i32 7, i32 5>
1367 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1368 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1371 define <8 x i64> @test_masked_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
1372 ; CHECK-LABEL: test_masked_8xi64_perm_mask4:
1374 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [6,3,1,1,7,4,0,3]
1375 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1376 ; CHECK-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1}
1377 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1379 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 6, i32 3, i32 1, i32 1, i32 7, i32 4, i32 0, i32 3>
1380 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1381 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1385 define <8 x i64> @test_masked_z_8xi64_perm_mask4(<8 x i64> %vec, <8 x i64> %mask) {
1386 ; CHECK-LABEL: test_masked_z_8xi64_perm_mask4:
1388 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [6,3,1,1,7,4,0,3]
1389 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1390 ; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z}
1392 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 6, i32 3, i32 1, i32 1, i32 7, i32 4, i32 0, i32 3>
1393 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1394 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1397 define <8 x i64> @test_masked_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
1398 ; CHECK-LABEL: test_masked_8xi64_perm_imm_mask5:
1400 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1401 ; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[0,0,0,0,4,4,4,4]
1402 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1404 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1405 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1406 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1410 define <8 x i64> @test_masked_z_8xi64_perm_imm_mask5(<8 x i64> %vec, <8 x i64> %mask) {
1411 ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mask5:
1413 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1414 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[0,0,0,0,4,4,4,4]
1416 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 0, i32 0, i32 0, i32 4, i32 4, i32 4, i32 4>
1417 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1418 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1421 define <8 x i64> @test_8xi64_perm_mask6(<8 x i64> %vec) {
1422 ; CHECK-LABEL: test_8xi64_perm_mask6:
1424 ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [5,1,4,4,5,4,2,7]
1425 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0
1427 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7>
1430 define <8 x i64> @test_masked_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
1431 ; CHECK-LABEL: test_masked_8xi64_perm_mask6:
1433 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm3 = [5,1,4,4,5,4,2,7]
1434 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1435 ; CHECK-NEXT: vpermq %zmm0, %zmm3, %zmm1 {%k1}
1436 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1438 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7>
1439 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1440 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1444 define <8 x i64> @test_masked_z_8xi64_perm_mask6(<8 x i64> %vec, <8 x i64> %mask) {
1445 ; CHECK-LABEL: test_masked_z_8xi64_perm_mask6:
1447 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,4,4,5,4,2,7]
1448 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1449 ; CHECK-NEXT: vpermq %zmm0, %zmm2, %zmm0 {%k1} {z}
1451 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 4, i32 4, i32 5, i32 4, i32 2, i32 7>
1452 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1453 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1456 define <8 x i64> @test_masked_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %vec2, <8 x i64> %mask) {
1457 ; CHECK-LABEL: test_masked_8xi64_perm_imm_mask7:
1459 ; CHECK-NEXT: vptestnmq %zmm2, %zmm2, %k1
1460 ; CHECK-NEXT: vpermq {{.*#+}} zmm1 {%k1} = zmm0[3,3,3,3,7,7,7,7]
1461 ; CHECK-NEXT: vmovdqa64 %zmm1, %zmm0
1463 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7>
1464 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1465 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1469 define <8 x i64> @test_masked_z_8xi64_perm_imm_mask7(<8 x i64> %vec, <8 x i64> %mask) {
1470 ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mask7:
1472 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1473 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,3,3,7,7,7,7]
1475 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 3, i32 3, i32 3, i32 7, i32 7, i32 7, i32 7>
1476 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1477 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1480 define <8 x i64> @test_8xi64_perm_mem_mask0(<8 x i64>* %vp) {
1481 ; CHECK-LABEL: test_8xi64_perm_mem_mask0:
1483 ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [5,1,6,5,7,3,7,3]
1484 ; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0
1486 %vec = load <8 x i64>, <8 x i64>* %vp
1487 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3>
1490 define <8 x i64> @test_masked_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
1491 ; CHECK-LABEL: test_masked_8xi64_perm_mem_mask0:
1493 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,1,6,5,7,3,7,3]
1494 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1495 ; CHECK-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1}
1497 %vec = load <8 x i64>, <8 x i64>* %vp
1498 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3>
1499 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1500 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1504 define <8 x i64> @test_masked_z_8xi64_perm_mem_mask0(<8 x i64>* %vp, <8 x i64> %mask) {
1505 ; CHECK-LABEL: test_masked_z_8xi64_perm_mem_mask0:
1507 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,1,6,5,7,3,7,3]
1508 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1509 ; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z}
1511 %vec = load <8 x i64>, <8 x i64>* %vp
1512 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 1, i32 6, i32 5, i32 7, i32 3, i32 7, i32 3>
1513 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1514 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1518 define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
1519 ; CHECK-LABEL: test_masked_8xi64_perm_imm_mem_mask1:
1521 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1522 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,1,1,0,5,5,5,4]
1524 %vec = load <8 x i64>, <8 x i64>* %vp
1525 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 0, i32 5, i32 5, i32 5, i32 4>
1526 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1527 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1531 define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask1(<8 x i64>* %vp, <8 x i64> %mask) {
1532 ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mem_mask1:
1534 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1535 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,1,1,0,5,5,5,4]
1537 %vec = load <8 x i64>, <8 x i64>* %vp
1538 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 1, i32 1, i32 0, i32 5, i32 5, i32 5, i32 4>
1539 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1540 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1544 define <8 x i64> @test_masked_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
1545 ; CHECK-LABEL: test_masked_8xi64_perm_mem_mask2:
1547 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,2,1,4,1,1,5,5]
1548 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1549 ; CHECK-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1}
1551 %vec = load <8 x i64>, <8 x i64>* %vp
1552 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 1, i32 4, i32 1, i32 1, i32 5, i32 5>
1553 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1554 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1558 define <8 x i64> @test_masked_z_8xi64_perm_mem_mask2(<8 x i64>* %vp, <8 x i64> %mask) {
1559 ; CHECK-LABEL: test_masked_z_8xi64_perm_mem_mask2:
1561 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,2,1,4,1,1,5,5]
1562 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1563 ; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z}
1565 %vec = load <8 x i64>, <8 x i64>* %vp
1566 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 2, i32 1, i32 4, i32 1, i32 1, i32 5, i32 5>
1567 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1568 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1572 define <8 x i64> @test_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp) {
1573 ; CHECK-LABEL: test_8xi64_perm_imm_mem_mask3:
1575 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = mem[1,3,1,1,5,7,5,5]
1577 %vec = load <8 x i64>, <8 x i64>* %vp
1578 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5>
1581 define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
1582 ; CHECK-LABEL: test_masked_8xi64_perm_imm_mem_mask3:
1584 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1585 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[1,3,1,1,5,7,5,5]
1587 %vec = load <8 x i64>, <8 x i64>* %vp
1588 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5>
1589 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1590 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1594 define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask3(<8 x i64>* %vp, <8 x i64> %mask) {
1595 ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mem_mask3:
1597 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1598 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[1,3,1,1,5,7,5,5]
1600 %vec = load <8 x i64>, <8 x i64>* %vp
1601 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 1, i32 3, i32 1, i32 1, i32 5, i32 7, i32 5, i32 5>
1602 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1603 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1607 define <8 x i64> @test_masked_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
1608 ; CHECK-LABEL: test_masked_8xi64_perm_mem_mask4:
1610 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [5,0,7,0,3,5,0,6]
1611 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1612 ; CHECK-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1}
1614 %vec = load <8 x i64>, <8 x i64>* %vp
1615 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 0, i32 7, i32 0, i32 3, i32 5, i32 0, i32 6>
1616 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1617 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1621 define <8 x i64> @test_masked_z_8xi64_perm_mem_mask4(<8 x i64>* %vp, <8 x i64> %mask) {
1622 ; CHECK-LABEL: test_masked_z_8xi64_perm_mem_mask4:
1624 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [5,0,7,0,3,5,0,6]
1625 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1626 ; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z}
1628 %vec = load <8 x i64>, <8 x i64>* %vp
1629 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 5, i32 0, i32 7, i32 0, i32 3, i32 5, i32 0, i32 6>
1630 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1631 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1635 define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
1636 ; CHECK-LABEL: test_masked_8xi64_perm_imm_mem_mask5:
1638 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1639 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,1,0,0,7,5,4,4]
1641 %vec = load <8 x i64>, <8 x i64>* %vp
1642 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 0, i32 0, i32 7, i32 5, i32 4, i32 4>
1643 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1644 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1648 define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask5(<8 x i64>* %vp, <8 x i64> %mask) {
1649 ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mem_mask5:
1651 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1652 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,1,0,0,7,5,4,4]
1654 %vec = load <8 x i64>, <8 x i64>* %vp
1655 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 1, i32 0, i32 0, i32 7, i32 5, i32 4, i32 4>
1656 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1657 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1661 define <8 x i64> @test_8xi64_perm_mem_mask6(<8 x i64>* %vp) {
1662 ; CHECK-LABEL: test_8xi64_perm_mem_mask6:
1664 ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [0,6,3,7,3,0,3,6]
1665 ; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0
1667 %vec = load <8 x i64>, <8 x i64>* %vp
1668 %res = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6>
1671 define <8 x i64> @test_masked_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
1672 ; CHECK-LABEL: test_masked_8xi64_perm_mem_mask6:
1674 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,6,3,7,3,0,3,6]
1675 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1676 ; CHECK-NEXT: vpermq (%rdi), %zmm2, %zmm0 {%k1}
1678 %vec = load <8 x i64>, <8 x i64>* %vp
1679 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6>
1680 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1681 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1685 define <8 x i64> @test_masked_z_8xi64_perm_mem_mask6(<8 x i64>* %vp, <8 x i64> %mask) {
1686 ; CHECK-LABEL: test_masked_z_8xi64_perm_mem_mask6:
1688 ; CHECK-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,6,3,7,3,0,3,6]
1689 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1690 ; CHECK-NEXT: vpermq (%rdi), %zmm1, %zmm0 {%k1} {z}
1692 %vec = load <8 x i64>, <8 x i64>* %vp
1693 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 0, i32 6, i32 3, i32 7, i32 3, i32 0, i32 3, i32 6>
1694 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1695 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1699 define <8 x i64> @test_masked_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %vec2, <8 x i64> %mask) {
1700 ; CHECK-LABEL: test_masked_8xi64_perm_imm_mem_mask7:
1702 ; CHECK-NEXT: vptestnmq %zmm1, %zmm1, %k1
1703 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} = mem[3,0,0,1,7,4,4,5]
1705 %vec = load <8 x i64>, <8 x i64>* %vp
1706 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 1, i32 7, i32 4, i32 4, i32 5>
1707 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1708 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> %vec2
1712 define <8 x i64> @test_masked_z_8xi64_perm_imm_mem_mask7(<8 x i64>* %vp, <8 x i64> %mask) {
1713 ; CHECK-LABEL: test_masked_z_8xi64_perm_imm_mem_mask7:
1715 ; CHECK-NEXT: vptestnmq %zmm0, %zmm0, %k1
1716 ; CHECK-NEXT: vpermq {{.*#+}} zmm0 {%k1} {z} = mem[3,0,0,1,7,4,4,5]
1718 %vec = load <8 x i64>, <8 x i64>* %vp
1719 %shuf = shufflevector <8 x i64> %vec, <8 x i64> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 1, i32 7, i32 4, i32 4, i32 5>
1720 %cmp = icmp eq <8 x i64> %mask, zeroinitializer
1721 %res = select <8 x i1> %cmp, <8 x i64> %shuf, <8 x i64> zeroinitializer
1725 define <8 x float> @test_8xfloat_perm_mask0(<8 x float> %vec) {
1726 ; CHECK-LABEL: test_8xfloat_perm_mask0:
1728 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [3,4,2,4,1,2,3,4]
1729 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
1731 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4>
1732 ret <8 x float> %res
1734 define <8 x float> @test_masked_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
1735 ; CHECK-LABEL: test_masked_8xfloat_perm_mask0:
1737 ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [3,4,2,4,1,2,3,4]
1738 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1739 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1
1740 ; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1}
1741 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1743 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4>
1744 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1745 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1746 ret <8 x float> %res
1749 define <8 x float> @test_masked_z_8xfloat_perm_mask0(<8 x float> %vec, <8 x float> %mask) {
1750 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask0:
1752 ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [3,4,2,4,1,2,3,4]
1753 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1754 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
1755 ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z}
1757 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 3, i32 4, i32 2, i32 4, i32 1, i32 2, i32 3, i32 4>
1758 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1759 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1760 ret <8 x float> %res
1762 define <8 x float> @test_masked_8xfloat_perm_mask1(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
1763 ; CHECK-LABEL: test_masked_8xfloat_perm_mask1:
1765 ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [4,2,1,0,6,0,5,1]
1766 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1767 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1
1768 ; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1}
1769 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1771 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 2, i32 1, i32 0, i32 6, i32 0, i32 5, i32 1>
1772 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1773 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1774 ret <8 x float> %res
1777 define <8 x float> @test_masked_z_8xfloat_perm_mask1(<8 x float> %vec, <8 x float> %mask) {
1778 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask1:
1780 ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4,2,1,0,6,0,5,1]
1781 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1782 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
1783 ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z}
1785 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 2, i32 1, i32 0, i32 6, i32 0, i32 5, i32 1>
1786 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1787 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1788 ret <8 x float> %res
1790 define <8 x float> @test_masked_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
1791 ; CHECK-LABEL: test_masked_8xfloat_perm_mask2:
1793 ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [2,5,5,5,4,6,0,5]
1794 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1795 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1
1796 ; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1}
1797 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1799 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 5, i32 5, i32 5, i32 4, i32 6, i32 0, i32 5>
1800 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1801 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1802 ret <8 x float> %res
1805 define <8 x float> @test_masked_z_8xfloat_perm_mask2(<8 x float> %vec, <8 x float> %mask) {
1806 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask2:
1808 ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [2,5,5,5,4,6,0,5]
1809 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1810 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
1811 ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z}
1813 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 2, i32 5, i32 5, i32 5, i32 4, i32 6, i32 0, i32 5>
1814 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1815 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1816 ret <8 x float> %res
1818 define <8 x float> @test_8xfloat_perm_mask3(<8 x float> %vec) {
1819 ; CHECK-LABEL: test_8xfloat_perm_mask3:
1821 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [0,5,2,5,5,5,1,6]
1822 ; CHECK-NEXT: vpermps %ymm0, %ymm1, %ymm0
1824 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6>
1825 ret <8 x float> %res
1827 define <8 x float> @test_masked_8xfloat_perm_mask3(<8 x float> %vec, <8 x float> %vec2, <8 x float> %mask) {
1828 ; CHECK-LABEL: test_masked_8xfloat_perm_mask3:
1830 ; CHECK-NEXT: vmovaps {{.*#+}} ymm3 = [0,5,2,5,5,5,1,6]
1831 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
1832 ; CHECK-NEXT: vcmpeqps %ymm4, %ymm2, %k1
1833 ; CHECK-NEXT: vpermps %ymm0, %ymm3, %ymm1 {%k1}
1834 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
1836 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6>
1837 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1838 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1839 ret <8 x float> %res
1842 define <8 x float> @test_masked_z_8xfloat_perm_mask3(<8 x float> %vec, <8 x float> %mask) {
1843 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mask3:
1845 ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [0,5,2,5,5,5,1,6]
1846 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1847 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
1848 ; CHECK-NEXT: vpermps %ymm0, %ymm2, %ymm0 {%k1} {z}
1850 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 0, i32 5, i32 2, i32 5, i32 5, i32 5, i32 1, i32 6>
1851 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1852 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1853 ret <8 x float> %res
1855 define <8 x float> @test_8xfloat_perm_mem_mask0(<8 x float>* %vp) {
1856 ; CHECK-LABEL: test_8xfloat_perm_mem_mask0:
1858 ; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,2,1,6,4,2,4,0]
1859 ; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0
1861 %vec = load <8 x float>, <8 x float>* %vp
1862 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0>
1863 ret <8 x float> %res
1865 define <8 x float> @test_masked_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
1866 ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask0:
1868 ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [5,2,1,6,4,2,4,0]
1869 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1870 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
1871 ; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1}
1873 %vec = load <8 x float>, <8 x float>* %vp
1874 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0>
1875 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1876 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1877 ret <8 x float> %res
1880 define <8 x float> @test_masked_z_8xfloat_perm_mem_mask0(<8 x float>* %vp, <8 x float> %mask) {
1881 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask0:
1883 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [5,2,1,6,4,2,4,0]
1884 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1885 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
1886 ; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z}
1888 %vec = load <8 x float>, <8 x float>* %vp
1889 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 2, i32 1, i32 6, i32 4, i32 2, i32 4, i32 0>
1890 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1891 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1892 ret <8 x float> %res
1895 define <8 x float> @test_masked_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
1896 ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask1:
1898 ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [1,3,7,4,0,6,6,6]
1899 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1900 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
1901 ; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1}
1903 %vec = load <8 x float>, <8 x float>* %vp
1904 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 4, i32 0, i32 6, i32 6, i32 6>
1905 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1906 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1907 ret <8 x float> %res
1910 define <8 x float> @test_masked_z_8xfloat_perm_mem_mask1(<8 x float>* %vp, <8 x float> %mask) {
1911 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask1:
1913 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [1,3,7,4,0,6,6,6]
1914 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1915 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
1916 ; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z}
1918 %vec = load <8 x float>, <8 x float>* %vp
1919 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 1, i32 3, i32 7, i32 4, i32 0, i32 6, i32 6, i32 6>
1920 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1921 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1922 ret <8 x float> %res
1925 define <8 x float> @test_masked_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
1926 ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask2:
1928 ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [4,5,1,5,6,6,2,4]
1929 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1930 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
1931 ; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1}
1933 %vec = load <8 x float>, <8 x float>* %vp
1934 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 1, i32 5, i32 6, i32 6, i32 2, i32 4>
1935 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1936 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1937 ret <8 x float> %res
1940 define <8 x float> @test_masked_z_8xfloat_perm_mem_mask2(<8 x float>* %vp, <8 x float> %mask) {
1941 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask2:
1943 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [4,5,1,5,6,6,2,4]
1944 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1945 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
1946 ; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z}
1948 %vec = load <8 x float>, <8 x float>* %vp
1949 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 4, i32 5, i32 1, i32 5, i32 6, i32 6, i32 2, i32 4>
1950 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1951 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1952 ret <8 x float> %res
1955 define <8 x float> @test_8xfloat_perm_mem_mask3(<8 x float>* %vp) {
1956 ; CHECK-LABEL: test_8xfloat_perm_mem_mask3:
1958 ; CHECK-NEXT: vmovaps {{.*#+}} ymm0 = [5,7,0,6,4,2,3,0]
1959 ; CHECK-NEXT: vpermps (%rdi), %ymm0, %ymm0
1961 %vec = load <8 x float>, <8 x float>* %vp
1962 %res = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0>
1963 ret <8 x float> %res
1965 define <8 x float> @test_masked_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x float> %vec2, <8 x float> %mask) {
1966 ; CHECK-LABEL: test_masked_8xfloat_perm_mem_mask3:
1968 ; CHECK-NEXT: vmovaps {{.*#+}} ymm2 = [5,7,0,6,4,2,3,0]
1969 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
1970 ; CHECK-NEXT: vcmpeqps %ymm3, %ymm1, %k1
1971 ; CHECK-NEXT: vpermps (%rdi), %ymm2, %ymm0 {%k1}
1973 %vec = load <8 x float>, <8 x float>* %vp
1974 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0>
1975 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1976 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> %vec2
1977 ret <8 x float> %res
1980 define <8 x float> @test_masked_z_8xfloat_perm_mem_mask3(<8 x float>* %vp, <8 x float> %mask) {
1981 ; CHECK-LABEL: test_masked_z_8xfloat_perm_mem_mask3:
1983 ; CHECK-NEXT: vmovaps {{.*#+}} ymm1 = [5,7,0,6,4,2,3,0]
1984 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
1985 ; CHECK-NEXT: vcmpeqps %ymm2, %ymm0, %k1
1986 ; CHECK-NEXT: vpermps (%rdi), %ymm1, %ymm0 {%k1} {z}
1988 %vec = load <8 x float>, <8 x float>* %vp
1989 %shuf = shufflevector <8 x float> %vec, <8 x float> undef, <8 x i32> <i32 5, i32 7, i32 0, i32 6, i32 4, i32 2, i32 3, i32 0>
1990 %cmp = fcmp oeq <8 x float> %mask, zeroinitializer
1991 %res = select <8 x i1> %cmp, <8 x float> %shuf, <8 x float> zeroinitializer
1992 ret <8 x float> %res
1995 define <16 x float> @test_16xfloat_perm_mask0(<16 x float> %vec) {
1996 ; CHECK-LABEL: test_16xfloat_perm_mask0:
1998 ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7]
1999 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0
2001 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7>
2002 ret <16 x float> %res
2004 define <16 x float> @test_masked_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
2005 ; CHECK-LABEL: test_masked_16xfloat_perm_mask0:
2007 ; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7]
2008 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
2009 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1
2010 ; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1}
2011 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2013 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7>
2014 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2015 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
2016 ret <16 x float> %res
2019 define <16 x float> @test_masked_z_16xfloat_perm_mask0(<16 x float> %vec, <16 x float> %mask) {
2020 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask0:
2022 ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [15,7,5,13,4,9,11,13,12,6,0,0,11,15,5,7]
2023 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2024 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
2025 ; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z}
2027 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 7, i32 5, i32 13, i32 4, i32 9, i32 11, i32 13, i32 12, i32 6, i32 0, i32 0, i32 11, i32 15, i32 5, i32 7>
2028 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2029 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2030 ret <16 x float> %res
2032 define <16 x float> @test_masked_16xfloat_perm_mask1(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
2033 ; CHECK-LABEL: test_masked_16xfloat_perm_mask1:
2035 ; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1]
2036 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
2037 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1
2038 ; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1}
2039 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2041 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 4, i32 10, i32 4, i32 5, i32 8, i32 11, i32 2, i32 0, i32 10, i32 0, i32 0, i32 3, i32 10, i32 1>
2042 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2043 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
2044 ret <16 x float> %res
2047 define <16 x float> @test_masked_z_16xfloat_perm_mask1(<16 x float> %vec, <16 x float> %mask) {
2048 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask1:
2050 ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [11,10,4,10,4,5,8,11,2,0,10,0,0,3,10,1]
2051 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2052 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
2053 ; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z}
2055 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 11, i32 10, i32 4, i32 10, i32 4, i32 5, i32 8, i32 11, i32 2, i32 0, i32 10, i32 0, i32 0, i32 3, i32 10, i32 1>
2056 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2057 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2058 ret <16 x float> %res
2060 define <16 x float> @test_masked_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
2061 ; CHECK-LABEL: test_masked_16xfloat_perm_mask2:
2063 ; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11]
2064 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
2065 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1
2066 ; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1}
2067 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2069 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 15, i32 6, i32 14, i32 3, i32 6, i32 5, i32 2, i32 5, i32 15, i32 11, i32 6, i32 6, i32 4, i32 8, i32 11>
2070 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2071 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
2072 ret <16 x float> %res
2075 define <16 x float> @test_masked_z_16xfloat_perm_mask2(<16 x float> %vec, <16 x float> %mask) {
2076 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask2:
2078 ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [0,15,6,14,3,6,5,2,5,15,11,6,6,4,8,11]
2079 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2080 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
2081 ; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z}
2083 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 0, i32 15, i32 6, i32 14, i32 3, i32 6, i32 5, i32 2, i32 5, i32 15, i32 11, i32 6, i32 6, i32 4, i32 8, i32 11>
2084 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2085 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2086 ret <16 x float> %res
2088 define <16 x float> @test_16xfloat_perm_mask3(<16 x float> %vec) {
2089 ; CHECK-LABEL: test_16xfloat_perm_mask3:
2091 ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3]
2092 ; CHECK-NEXT: vpermps %zmm0, %zmm1, %zmm0
2094 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3>
2095 ret <16 x float> %res
2097 define <16 x float> @test_masked_16xfloat_perm_mask3(<16 x float> %vec, <16 x float> %vec2, <16 x float> %mask) {
2098 ; CHECK-LABEL: test_masked_16xfloat_perm_mask3:
2100 ; CHECK-NEXT: vmovaps {{.*#+}} zmm3 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3]
2101 ; CHECK-NEXT: vxorps %xmm4, %xmm4, %xmm4
2102 ; CHECK-NEXT: vcmpeqps %zmm4, %zmm2, %k1
2103 ; CHECK-NEXT: vpermps %zmm0, %zmm3, %zmm1 {%k1}
2104 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
2106 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3>
2107 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2108 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
2109 ret <16 x float> %res
2112 define <16 x float> @test_masked_z_16xfloat_perm_mask3(<16 x float> %vec, <16 x float> %mask) {
2113 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mask3:
2115 ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,0,14,6,6,0,2,13,8,11,2,5,13,13,3]
2116 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2117 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
2118 ; CHECK-NEXT: vpermps %zmm0, %zmm2, %zmm0 {%k1} {z}
2120 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 0, i32 14, i32 6, i32 6, i32 0, i32 2, i32 13, i32 8, i32 11, i32 2, i32 5, i32 13, i32 13, i32 3>
2121 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2122 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2123 ret <16 x float> %res
2125 define <16 x float> @test_16xfloat_perm_mem_mask0(<16 x float>* %vp) {
2126 ; CHECK-LABEL: test_16xfloat_perm_mem_mask0:
2128 ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1]
2129 ; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0
2131 %vec = load <16 x float>, <16 x float>* %vp
2132 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1>
2133 ret <16 x float> %res
2135 define <16 x float> @test_masked_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
2136 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask0:
2138 ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1]
2139 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2140 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
2141 ; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1}
2143 %vec = load <16 x float>, <16 x float>* %vp
2144 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1>
2145 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2146 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
2147 ret <16 x float> %res
2150 define <16 x float> @test_masked_z_16xfloat_perm_mem_mask0(<16 x float>* %vp, <16 x float> %mask) {
2151 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask0:
2153 ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,2,1,14,9,9,7,2,9,4,12,11,0,14,0,1]
2154 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2155 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
2156 ; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z}
2158 %vec = load <16 x float>, <16 x float>* %vp
2159 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 2, i32 1, i32 14, i32 9, i32 9, i32 7, i32 2, i32 9, i32 4, i32 12, i32 11, i32 0, i32 14, i32 0, i32 1>
2160 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2161 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2162 ret <16 x float> %res
2165 define <16 x float> @test_masked_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
2166 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask1:
2168 ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4]
2169 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2170 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
2171 ; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1}
2173 %vec = load <16 x float>, <16 x float>* %vp
2174 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 4, i32 2, i32 3, i32 5, i32 11, i32 6, i32 4, i32 7, i32 6, i32 4, i32 14, i32 8, i32 15, i32 12, i32 9, i32 4>
2175 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2176 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
2177 ret <16 x float> %res
2180 define <16 x float> @test_masked_z_16xfloat_perm_mem_mask1(<16 x float>* %vp, <16 x float> %mask) {
2181 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask1:
2183 ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [4,2,3,5,11,6,4,7,6,4,14,8,15,12,9,4]
2184 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2185 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
2186 ; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z}
2188 %vec = load <16 x float>, <16 x float>* %vp
2189 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 4, i32 2, i32 3, i32 5, i32 11, i32 6, i32 4, i32 7, i32 6, i32 4, i32 14, i32 8, i32 15, i32 12, i32 9, i32 4>
2190 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2191 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2192 ret <16 x float> %res
2195 define <16 x float> @test_masked_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
2196 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask2:
2198 ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5]
2199 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2200 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
2201 ; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1}
2203 %vec = load <16 x float>, <16 x float>* %vp
2204 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 11, i32 6, i32 7, i32 0, i32 11, i32 0, i32 10, i32 9, i32 12, i32 4, i32 10, i32 3, i32 8, i32 5>
2205 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2206 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
2207 ret <16 x float> %res
2210 define <16 x float> @test_masked_z_16xfloat_perm_mem_mask2(<16 x float>* %vp, <16 x float> %mask) {
2211 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask2:
2213 ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [10,7,11,6,7,0,11,0,10,9,12,4,10,3,8,5]
2214 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2215 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
2216 ; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z}
2218 %vec = load <16 x float>, <16 x float>* %vp
2219 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 10, i32 7, i32 11, i32 6, i32 7, i32 0, i32 11, i32 0, i32 10, i32 9, i32 12, i32 4, i32 10, i32 3, i32 8, i32 5>
2220 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2221 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2222 ret <16 x float> %res
2225 define <16 x float> @test_16xfloat_perm_mem_mask3(<16 x float>* %vp) {
2226 ; CHECK-LABEL: test_16xfloat_perm_mem_mask3:
2228 ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0]
2229 ; CHECK-NEXT: vpermps (%rdi), %zmm0, %zmm0
2231 %vec = load <16 x float>, <16 x float>* %vp
2232 %res = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0>
2233 ret <16 x float> %res
2235 define <16 x float> @test_masked_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x float> %vec2, <16 x float> %mask) {
2236 ; CHECK-LABEL: test_masked_16xfloat_perm_mem_mask3:
2238 ; CHECK-NEXT: vmovaps {{.*#+}} zmm2 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0]
2239 ; CHECK-NEXT: vxorps %xmm3, %xmm3, %xmm3
2240 ; CHECK-NEXT: vcmpeqps %zmm3, %zmm1, %k1
2241 ; CHECK-NEXT: vpermps (%rdi), %zmm2, %zmm0 {%k1}
2243 %vec = load <16 x float>, <16 x float>* %vp
2244 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0>
2245 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2246 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> %vec2
2247 ret <16 x float> %res
2250 define <16 x float> @test_masked_z_16xfloat_perm_mem_mask3(<16 x float>* %vp, <16 x float> %mask) {
2251 ; CHECK-LABEL: test_masked_z_16xfloat_perm_mem_mask3:
2253 ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [15,15,3,9,5,15,14,9,11,10,5,14,14,5,11,0]
2254 ; CHECK-NEXT: vxorps %xmm2, %xmm2, %xmm2
2255 ; CHECK-NEXT: vcmpeqps %zmm2, %zmm0, %k1
2256 ; CHECK-NEXT: vpermps (%rdi), %zmm1, %zmm0 {%k1} {z}
2258 %vec = load <16 x float>, <16 x float>* %vp
2259 %shuf = shufflevector <16 x float> %vec, <16 x float> undef, <16 x i32> <i32 15, i32 15, i32 3, i32 9, i32 5, i32 15, i32 14, i32 9, i32 11, i32 10, i32 5, i32 14, i32 14, i32 5, i32 11, i32 0>
2260 %cmp = fcmp oeq <16 x float> %mask, zeroinitializer
2261 %res = select <16 x i1> %cmp, <16 x float> %shuf, <16 x float> zeroinitializer
2262 ret <16 x float> %res
2265 define <4 x double> @test_4xdouble_perm_mask0(<4 x double> %vec) {
2266 ; CHECK-LABEL: test_4xdouble_perm_mask0:
2268 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[2,1,3,2]
2270 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2>
2271 ret <4 x double> %res
2273 define <4 x double> @test_masked_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
2274 ; CHECK-LABEL: test_masked_4xdouble_perm_mask0:
2276 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2277 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
2278 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[2,1,3,2]
2279 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
2281 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2>
2282 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2283 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
2284 ret <4 x double> %res
2287 define <4 x double> @test_masked_z_4xdouble_perm_mask0(<4 x double> %vec, <4 x double> %mask) {
2288 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask0:
2290 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2291 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
2292 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[2,1,3,2]
2294 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 2, i32 1, i32 3, i32 2>
2295 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2296 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2297 ret <4 x double> %res
2299 define <4 x double> @test_masked_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
2300 ; CHECK-LABEL: test_masked_4xdouble_perm_mask1:
2302 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2303 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
2304 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,0,0,0]
2305 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
2307 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
2308 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2309 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
2310 ret <4 x double> %res
2313 define <4 x double> @test_masked_z_4xdouble_perm_mask1(<4 x double> %vec, <4 x double> %mask) {
2314 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask1:
2316 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2317 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
2318 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,0,0,0]
2320 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 0, i32 0, i32 0>
2321 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2322 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2323 ret <4 x double> %res
2325 define <4 x double> @test_masked_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
2326 ; CHECK-LABEL: test_masked_4xdouble_perm_mask2:
2328 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2329 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
2330 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[0,3,3,1]
2331 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
2333 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 1>
2334 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2335 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
2336 ret <4 x double> %res
2339 define <4 x double> @test_masked_z_4xdouble_perm_mask2(<4 x double> %vec, <4 x double> %mask) {
2340 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask2:
2342 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2343 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
2344 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[0,3,3,1]
2346 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 3, i32 3, i32 1>
2347 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2348 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2349 ret <4 x double> %res
2351 define <4 x double> @test_4xdouble_perm_mask3(<4 x double> %vec) {
2352 ; CHECK-LABEL: test_4xdouble_perm_mask3:
2354 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = ymm0[3,3,3,2]
2356 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2>
2357 ret <4 x double> %res
2359 define <4 x double> @test_masked_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %vec2, <4 x double> %mask) {
2360 ; CHECK-LABEL: test_masked_4xdouble_perm_mask3:
2362 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2363 ; CHECK-NEXT: vcmpeqpd %ymm3, %ymm2, %k1
2364 ; CHECK-NEXT: vpermpd {{.*#+}} ymm1 {%k1} = ymm0[3,3,3,2]
2365 ; CHECK-NEXT: vmovapd %ymm1, %ymm0
2367 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2>
2368 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2369 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
2370 ret <4 x double> %res
2373 define <4 x double> @test_masked_z_4xdouble_perm_mask3(<4 x double> %vec, <4 x double> %mask) {
2374 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mask3:
2376 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2377 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
2378 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = ymm0[3,3,3,2]
2380 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 3, i32 3, i32 2>
2381 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2382 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2383 ret <4 x double> %res
2385 define <4 x double> @test_4xdouble_perm_mem_mask0(<4 x double>* %vp) {
2386 ; CHECK-LABEL: test_4xdouble_perm_mem_mask0:
2388 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[0,0,2,0]
2390 %vec = load <4 x double>, <4 x double>* %vp
2391 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
2392 ret <4 x double> %res
2394 define <4 x double> @test_masked_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
2395 ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask0:
2397 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2398 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
2399 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,0,2,0]
2401 %vec = load <4 x double>, <4 x double>* %vp
2402 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
2403 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2404 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
2405 ret <4 x double> %res
2408 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask0(<4 x double>* %vp, <4 x double> %mask) {
2409 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask0:
2411 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
2412 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
2413 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,0,2,0]
2415 %vec = load <4 x double>, <4 x double>* %vp
2416 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 0, i32 2, i32 0>
2417 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2418 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2419 ret <4 x double> %res
2422 define <4 x double> @test_masked_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
2423 ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask1:
2425 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2426 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
2427 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[0,2,3,2]
2429 %vec = load <4 x double>, <4 x double>* %vp
2430 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 2>
2431 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2432 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
2433 ret <4 x double> %res
2436 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask1(<4 x double>* %vp, <4 x double> %mask) {
2437 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask1:
2439 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
2440 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
2441 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[0,2,3,2]
2443 %vec = load <4 x double>, <4 x double>* %vp
2444 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 0, i32 2, i32 3, i32 2>
2445 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2446 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2447 ret <4 x double> %res
2450 define <4 x double> @test_masked_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
2451 ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask2:
2453 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2454 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
2455 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,1,1,1]
2457 %vec = load <4 x double>, <4 x double>* %vp
2458 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
2459 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2460 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
2461 ret <4 x double> %res
2464 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask2(<4 x double>* %vp, <4 x double> %mask) {
2465 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask2:
2467 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
2468 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
2469 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,1,1,1]
2471 %vec = load <4 x double>, <4 x double>* %vp
2472 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 1, i32 1, i32 1>
2473 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2474 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2475 ret <4 x double> %res
2478 define <4 x double> @test_4xdouble_perm_mem_mask3(<4 x double>* %vp) {
2479 ; CHECK-LABEL: test_4xdouble_perm_mem_mask3:
2481 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 = mem[3,2,3,2]
2483 %vec = load <4 x double>, <4 x double>* %vp
2484 %res = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
2485 ret <4 x double> %res
2487 define <4 x double> @test_masked_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %vec2, <4 x double> %mask) {
2488 ; CHECK-LABEL: test_masked_4xdouble_perm_mem_mask3:
2490 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2491 ; CHECK-NEXT: vcmpeqpd %ymm2, %ymm1, %k1
2492 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} = mem[3,2,3,2]
2494 %vec = load <4 x double>, <4 x double>* %vp
2495 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
2496 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2497 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> %vec2
2498 ret <4 x double> %res
2501 define <4 x double> @test_masked_z_4xdouble_perm_mem_mask3(<4 x double>* %vp, <4 x double> %mask) {
2502 ; CHECK-LABEL: test_masked_z_4xdouble_perm_mem_mask3:
2504 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
2505 ; CHECK-NEXT: vcmpeqpd %ymm1, %ymm0, %k1
2506 ; CHECK-NEXT: vpermpd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,3,2]
2508 %vec = load <4 x double>, <4 x double>* %vp
2509 %shuf = shufflevector <4 x double> %vec, <4 x double> undef, <4 x i32> <i32 3, i32 2, i32 3, i32 2>
2510 %cmp = fcmp oeq <4 x double> %mask, zeroinitializer
2511 %res = select <4 x i1> %cmp, <4 x double> %shuf, <4 x double> zeroinitializer
2512 ret <4 x double> %res
2515 define <8 x double> @test_8xdouble_perm_mask0(<8 x double> %vec) {
2516 ; CHECK-LABEL: test_8xdouble_perm_mask0:
2518 ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [5,7,4,2,7,4,3,4]
2519 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0
2521 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4>
2522 ret <8 x double> %res
2524 define <8 x double> @test_masked_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
2525 ; CHECK-LABEL: test_masked_8xdouble_perm_mask0:
2527 ; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [5,7,4,2,7,4,3,4]
2528 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2529 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1
2530 ; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1}
2531 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2533 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4>
2534 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2535 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2536 ret <8 x double> %res
2539 define <8 x double> @test_masked_z_8xdouble_perm_mask0(<8 x double> %vec, <8 x double> %mask) {
2540 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask0:
2542 ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [5,7,4,2,7,4,3,4]
2543 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2544 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
2545 ; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
2547 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 5, i32 7, i32 4, i32 2, i32 7, i32 4, i32 3, i32 4>
2548 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2549 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2550 ret <8 x double> %res
2552 define <8 x double> @test_masked_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
2553 ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask1:
2555 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2556 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
2557 ; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,0,0,2,7,4,4,6]
2558 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2560 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 7, i32 4, i32 4, i32 6>
2561 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2562 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2563 ret <8 x double> %res
2566 define <8 x double> @test_masked_z_8xdouble_perm_imm_mask1(<8 x double> %vec, <8 x double> %mask) {
2567 ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask1:
2569 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2570 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
2571 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,0,0,2,7,4,4,6]
2573 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 0, i32 0, i32 2, i32 7, i32 4, i32 4, i32 6>
2574 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2575 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2576 ret <8 x double> %res
2578 define <8 x double> @test_masked_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
2579 ; CHECK-LABEL: test_masked_8xdouble_perm_mask2:
2581 ; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [7,5,5,5,3,5,1,7]
2582 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2583 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1
2584 ; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1}
2585 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2587 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 5, i32 1, i32 7>
2588 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2589 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2590 ret <8 x double> %res
2593 define <8 x double> @test_masked_z_8xdouble_perm_mask2(<8 x double> %vec, <8 x double> %mask) {
2594 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask2:
2596 ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [7,5,5,5,3,5,1,7]
2597 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2598 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
2599 ; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
2601 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 7, i32 5, i32 5, i32 5, i32 3, i32 5, i32 1, i32 7>
2602 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2603 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2604 ret <8 x double> %res
2606 define <8 x double> @test_8xdouble_perm_imm_mask3(<8 x double> %vec) {
2607 ; CHECK-LABEL: test_8xdouble_perm_imm_mask3:
2609 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = zmm0[1,3,3,0,5,7,7,4]
2611 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4>
2612 ret <8 x double> %res
2614 define <8 x double> @test_masked_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
2615 ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask3:
2617 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2618 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
2619 ; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[1,3,3,0,5,7,7,4]
2620 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2622 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4>
2623 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2624 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2625 ret <8 x double> %res
2628 define <8 x double> @test_masked_z_8xdouble_perm_imm_mask3(<8 x double> %vec, <8 x double> %mask) {
2629 ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask3:
2631 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2632 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
2633 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[1,3,3,0,5,7,7,4]
2635 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 3, i32 3, i32 0, i32 5, i32 7, i32 7, i32 4>
2636 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2637 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2638 ret <8 x double> %res
2640 define <8 x double> @test_masked_8xdouble_perm_mask4(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
2641 ; CHECK-LABEL: test_masked_8xdouble_perm_mask4:
2643 ; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [3,5,3,4,6,5,7,1]
2644 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2645 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1
2646 ; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1}
2647 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2649 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 5, i32 3, i32 4, i32 6, i32 5, i32 7, i32 1>
2650 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2651 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2652 ret <8 x double> %res
2655 define <8 x double> @test_masked_z_8xdouble_perm_mask4(<8 x double> %vec, <8 x double> %mask) {
2656 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask4:
2658 ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [3,5,3,4,6,5,7,1]
2659 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2660 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
2661 ; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
2663 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 5, i32 3, i32 4, i32 6, i32 5, i32 7, i32 1>
2664 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2665 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2666 ret <8 x double> %res
2668 define <8 x double> @test_masked_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
2669 ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask5:
2671 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2672 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
2673 ; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,3,2,3,7,7,6,7]
2674 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2676 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7>
2677 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2678 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2679 ret <8 x double> %res
2682 define <8 x double> @test_masked_z_8xdouble_perm_imm_mask5(<8 x double> %vec, <8 x double> %mask) {
2683 ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask5:
2685 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2686 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
2687 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,3,2,3,7,7,6,7]
2689 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 3, i32 2, i32 3, i32 7, i32 7, i32 6, i32 7>
2690 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2691 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2692 ret <8 x double> %res
2694 define <8 x double> @test_8xdouble_perm_mask6(<8 x double> %vec) {
2695 ; CHECK-LABEL: test_8xdouble_perm_mask6:
2697 ; CHECK-NEXT: vmovaps {{.*#+}} zmm1 = [2,7,6,4,0,0,0,2]
2698 ; CHECK-NEXT: vpermpd %zmm0, %zmm1, %zmm0
2700 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2>
2701 ret <8 x double> %res
2703 define <8 x double> @test_masked_8xdouble_perm_mask6(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
2704 ; CHECK-LABEL: test_masked_8xdouble_perm_mask6:
2706 ; CHECK-NEXT: vmovapd {{.*#+}} zmm3 = [2,7,6,4,0,0,0,2]
2707 ; CHECK-NEXT: vxorpd %xmm4, %xmm4, %xmm4
2708 ; CHECK-NEXT: vcmpeqpd %zmm4, %zmm2, %k1
2709 ; CHECK-NEXT: vpermpd %zmm0, %zmm3, %zmm1 {%k1}
2710 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2712 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2>
2713 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2714 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2715 ret <8 x double> %res
2718 define <8 x double> @test_masked_z_8xdouble_perm_mask6(<8 x double> %vec, <8 x double> %mask) {
2719 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mask6:
2721 ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [2,7,6,4,0,0,0,2]
2722 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2723 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
2724 ; CHECK-NEXT: vpermpd %zmm0, %zmm2, %zmm0 {%k1} {z}
2726 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 7, i32 6, i32 4, i32 0, i32 0, i32 0, i32 2>
2727 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2728 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2729 ret <8 x double> %res
2731 define <8 x double> @test_masked_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %vec2, <8 x double> %mask) {
2732 ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mask7:
2734 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2735 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm2, %k1
2736 ; CHECK-NEXT: vpermpd {{.*#+}} zmm1 {%k1} = zmm0[3,1,3,2,7,5,7,6]
2737 ; CHECK-NEXT: vmovapd %zmm1, %zmm0
2739 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 2, i32 7, i32 5, i32 7, i32 6>
2740 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2741 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2742 ret <8 x double> %res
2745 define <8 x double> @test_masked_z_8xdouble_perm_imm_mask7(<8 x double> %vec, <8 x double> %mask) {
2746 ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mask7:
2748 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2749 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
2750 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = zmm0[3,1,3,2,7,5,7,6]
2752 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 3, i32 1, i32 3, i32 2, i32 7, i32 5, i32 7, i32 6>
2753 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2754 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2755 ret <8 x double> %res
2757 define <8 x double> @test_8xdouble_perm_mem_mask0(<8 x double>* %vp) {
2758 ; CHECK-LABEL: test_8xdouble_perm_mem_mask0:
2760 ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [0,3,4,0,4,2,0,1]
2761 ; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0
2763 %vec = load <8 x double>, <8 x double>* %vp
2764 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1>
2765 ret <8 x double> %res
2767 define <8 x double> @test_masked_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
2768 ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask0:
2770 ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [0,3,4,0,4,2,0,1]
2771 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2772 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
2773 ; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1}
2775 %vec = load <8 x double>, <8 x double>* %vp
2776 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1>
2777 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2778 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2779 ret <8 x double> %res
2782 define <8 x double> @test_masked_z_8xdouble_perm_mem_mask0(<8 x double>* %vp, <8 x double> %mask) {
2783 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask0:
2785 ; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [0,3,4,0,4,2,0,1]
2786 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2787 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1
2788 ; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z}
2790 %vec = load <8 x double>, <8 x double>* %vp
2791 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 4, i32 0, i32 4, i32 2, i32 0, i32 1>
2792 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2793 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2794 ret <8 x double> %res
2797 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
2798 ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask1:
2800 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2801 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
2802 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,2,0,3,4,6,4,7]
2804 %vec = load <8 x double>, <8 x double>* %vp
2805 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 2, i32 0, i32 3, i32 4, i32 6, i32 4, i32 7>
2806 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2807 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2808 ret <8 x double> %res
2811 define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask1(<8 x double>* %vp, <8 x double> %mask) {
2812 ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask1:
2814 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
2815 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
2816 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,2,0,3,4,6,4,7]
2818 %vec = load <8 x double>, <8 x double>* %vp
2819 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 2, i32 0, i32 3, i32 4, i32 6, i32 4, i32 7>
2820 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2821 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2822 ret <8 x double> %res
2825 define <8 x double> @test_masked_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
2826 ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask2:
2828 ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [6,7,2,7,7,6,2,5]
2829 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2830 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
2831 ; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1}
2833 %vec = load <8 x double>, <8 x double>* %vp
2834 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 6, i32 7, i32 2, i32 7, i32 7, i32 6, i32 2, i32 5>
2835 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2836 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2837 ret <8 x double> %res
2840 define <8 x double> @test_masked_z_8xdouble_perm_mem_mask2(<8 x double>* %vp, <8 x double> %mask) {
2841 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask2:
2843 ; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [6,7,2,7,7,6,2,5]
2844 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2845 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1
2846 ; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z}
2848 %vec = load <8 x double>, <8 x double>* %vp
2849 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 6, i32 7, i32 2, i32 7, i32 7, i32 6, i32 2, i32 5>
2850 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2851 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2852 ret <8 x double> %res
2855 define <8 x double> @test_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp) {
2856 ; CHECK-LABEL: test_8xdouble_perm_imm_mem_mask3:
2858 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 = mem[2,1,1,0,6,5,5,4]
2860 %vec = load <8 x double>, <8 x double>* %vp
2861 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4>
2862 ret <8 x double> %res
2864 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
2865 ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask3:
2867 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2868 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
2869 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,1,1,0,6,5,5,4]
2871 %vec = load <8 x double>, <8 x double>* %vp
2872 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4>
2873 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2874 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2875 ret <8 x double> %res
2878 define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask3(<8 x double>* %vp, <8 x double> %mask) {
2879 ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask3:
2881 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
2882 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
2883 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,1,1,0,6,5,5,4]
2885 %vec = load <8 x double>, <8 x double>* %vp
2886 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 1, i32 1, i32 0, i32 6, i32 5, i32 5, i32 4>
2887 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2888 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2889 ret <8 x double> %res
2892 define <8 x double> @test_masked_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
2893 ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask4:
2895 ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [1,1,3,5,6,0,6,0]
2896 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2897 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
2898 ; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1}
2900 %vec = load <8 x double>, <8 x double>* %vp
2901 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 5, i32 6, i32 0, i32 6, i32 0>
2902 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2903 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2904 ret <8 x double> %res
2907 define <8 x double> @test_masked_z_8xdouble_perm_mem_mask4(<8 x double>* %vp, <8 x double> %mask) {
2908 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask4:
2910 ; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [1,1,3,5,6,0,6,0]
2911 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2912 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1
2913 ; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z}
2915 %vec = load <8 x double>, <8 x double>* %vp
2916 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 1, i32 1, i32 3, i32 5, i32 6, i32 0, i32 6, i32 0>
2917 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2918 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2919 ret <8 x double> %res
2922 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
2923 ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask5:
2925 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2926 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
2927 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[2,2,2,3,6,6,6,7]
2929 %vec = load <8 x double>, <8 x double>* %vp
2930 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 3, i32 6, i32 6, i32 6, i32 7>
2931 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2932 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2933 ret <8 x double> %res
2936 define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask5(<8 x double>* %vp, <8 x double> %mask) {
2937 ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask5:
2939 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
2940 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
2941 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[2,2,2,3,6,6,6,7]
2943 %vec = load <8 x double>, <8 x double>* %vp
2944 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 2, i32 2, i32 3, i32 6, i32 6, i32 6, i32 7>
2945 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2946 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2947 ret <8 x double> %res
2950 define <8 x double> @test_8xdouble_perm_mem_mask6(<8 x double>* %vp) {
2951 ; CHECK-LABEL: test_8xdouble_perm_mem_mask6:
2953 ; CHECK-NEXT: vmovaps {{.*#+}} zmm0 = [2,4,0,4,6,1,2,5]
2954 ; CHECK-NEXT: vpermpd (%rdi), %zmm0, %zmm0
2956 %vec = load <8 x double>, <8 x double>* %vp
2957 %res = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5>
2958 ret <8 x double> %res
2960 define <8 x double> @test_masked_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
2961 ; CHECK-LABEL: test_masked_8xdouble_perm_mem_mask6:
2963 ; CHECK-NEXT: vmovapd {{.*#+}} zmm2 = [2,4,0,4,6,1,2,5]
2964 ; CHECK-NEXT: vxorpd %xmm3, %xmm3, %xmm3
2965 ; CHECK-NEXT: vcmpeqpd %zmm3, %zmm1, %k1
2966 ; CHECK-NEXT: vpermpd (%rdi), %zmm2, %zmm0 {%k1}
2968 %vec = load <8 x double>, <8 x double>* %vp
2969 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5>
2970 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2971 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
2972 ret <8 x double> %res
2975 define <8 x double> @test_masked_z_8xdouble_perm_mem_mask6(<8 x double>* %vp, <8 x double> %mask) {
2976 ; CHECK-LABEL: test_masked_z_8xdouble_perm_mem_mask6:
2978 ; CHECK-NEXT: vmovapd {{.*#+}} zmm1 = [2,4,0,4,6,1,2,5]
2979 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2980 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm0, %k1
2981 ; CHECK-NEXT: vpermpd (%rdi), %zmm1, %zmm0 {%k1} {z}
2983 %vec = load <8 x double>, <8 x double>* %vp
2984 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 2, i32 4, i32 0, i32 4, i32 6, i32 1, i32 2, i32 5>
2985 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
2986 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
2987 ret <8 x double> %res
2990 define <8 x double> @test_masked_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %vec2, <8 x double> %mask) {
2991 ; CHECK-LABEL: test_masked_8xdouble_perm_imm_mem_mask7:
2993 ; CHECK-NEXT: vxorpd %xmm2, %xmm2, %xmm2
2994 ; CHECK-NEXT: vcmpeqpd %zmm2, %zmm1, %k1
2995 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} = mem[0,3,2,0,4,7,6,4]
2997 %vec = load <8 x double>, <8 x double>* %vp
2998 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4>
2999 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
3000 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> %vec2
3001 ret <8 x double> %res
3004 define <8 x double> @test_masked_z_8xdouble_perm_imm_mem_mask7(<8 x double>* %vp, <8 x double> %mask) {
3005 ; CHECK-LABEL: test_masked_z_8xdouble_perm_imm_mem_mask7:
3007 ; CHECK-NEXT: vxorpd %xmm1, %xmm1, %xmm1
3008 ; CHECK-NEXT: vcmpeqpd %zmm1, %zmm0, %k1
3009 ; CHECK-NEXT: vpermpd {{.*#+}} zmm0 {%k1} {z} = mem[0,3,2,0,4,7,6,4]
3011 %vec = load <8 x double>, <8 x double>* %vp
3012 %shuf = shufflevector <8 x double> %vec, <8 x double> undef, <8 x i32> <i32 0, i32 3, i32 2, i32 0, i32 4, i32 7, i32 6, i32 4>
3013 %cmp = fcmp oeq <8 x double> %mask, zeroinitializer
3014 %res = select <8 x i1> %cmp, <8 x double> %shuf, <8 x double> zeroinitializer
3015 ret <8 x double> %res