1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server | FileCheck %s --check-prefixes=CHECK,CHECK-ICX,CHECK-ICX-NO-BYPASS-DELAY
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=icelake-server -mattr=-no-bypass-delay-shuffle | FileCheck %s --check-prefixes=CHECK,CHECK-ICX,CHECK-ICX-BYPASS-DELAY
4 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=x86-64-v4 | FileCheck %s --check-prefixes=CHECK,CHECK-V4
5 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx512f,+avx512vl,+avx512bw,+avx512dq | FileCheck %s --check-prefixes=CHECK,CHECK-AVX512
6 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver4 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
7 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mcpu=znver5 | FileCheck %s --check-prefixes=CHECK,CHECK-ZNVER4
9 define <16 x float> @transform_VPERMILPSZrr(<16 x float> %a) nounwind {
10 ; CHECK-LABEL: transform_VPERMILPSZrr:
12 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
14 %shufp = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
15 ret <16 x float> %shufp
18 define <8 x float> @transform_VPERMILPSYrr(<8 x float> %a) nounwind {
19 ; CHECK-LABEL: transform_VPERMILPSYrr:
21 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 = ymm0[3,2,1,0,7,6,5,4]
23 %shufp = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
24 ret <8 x float> %shufp
27 define <4 x float> @transform_VPERMILPSrr(<4 x float> %a) nounwind {
28 ; CHECK-LABEL: transform_VPERMILPSrr:
30 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 = xmm0[3,2,1,0]
32 %shufp = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
33 ret <4 x float> %shufp
36 define <16 x float> @transform_VPERMILPSZrrkz(<16 x float> %a, i16 %mask_int) nounwind {
37 ; CHECK-LABEL: transform_VPERMILPSZrrkz:
39 ; CHECK-NEXT: kmovd %edi, %k1
40 ; CHECK-NEXT: vshufps {{.*#+}} zmm0 {%k1} {z} = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
42 %mask = bitcast i16 %mask_int to <16 x i1>
43 %shufp = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
44 %res = select <16 x i1> %mask, <16 x float> %shufp, <16 x float> zeroinitializer
48 define <8 x float> @transform_VPERMILPSYrrkz(<8 x float> %a, i8 %mask_int) nounwind {
49 ; CHECK-LABEL: transform_VPERMILPSYrrkz:
51 ; CHECK-NEXT: kmovd %edi, %k1
52 ; CHECK-NEXT: vshufps {{.*#+}} ymm0 {%k1} {z} = ymm0[3,2,1,0,7,6,5,4]
54 %mask = bitcast i8 %mask_int to <8 x i1>
55 %shufp = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
56 %res = select <8 x i1> %mask, <8 x float> %shufp, <8 x float> zeroinitializer
60 define <4 x float> @transform_VPERMILPSrrkz(<4 x float> %a, i4 %mask_int) nounwind {
61 ; CHECK-LABEL: transform_VPERMILPSrrkz:
63 ; CHECK-NEXT: kmovd %edi, %k1
64 ; CHECK-NEXT: vshufps {{.*#+}} xmm0 {%k1} {z} = xmm0[3,2,1,0]
66 %mask = bitcast i4 %mask_int to <4 x i1>
67 %shufp = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
68 %res = select <4 x i1> %mask, <4 x float> %shufp, <4 x float> zeroinitializer
72 define <16 x float> @transform_VPERMILPSZrrk(<16 x float> %a, <16 x float> %b, i16 %mask_int) nounwind {
73 ; CHECK-LABEL: transform_VPERMILPSZrrk:
75 ; CHECK-NEXT: kmovd %edi, %k1
76 ; CHECK-NEXT: vshufps {{.*#+}} zmm1 {%k1} = zmm0[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
77 ; CHECK-NEXT: vmovaps %zmm1, %zmm0
79 %mask = bitcast i16 %mask_int to <16 x i1>
80 %shufp = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
81 %res = select <16 x i1> %mask, <16 x float> %shufp, <16 x float> %b
85 define <8 x float> @transform_VPERMILPSYrrk(<8 x float> %a, <8 x float> %b, i8 %mask_int) nounwind {
86 ; CHECK-LABEL: transform_VPERMILPSYrrk:
88 ; CHECK-NEXT: kmovd %edi, %k1
89 ; CHECK-NEXT: vshufps {{.*#+}} ymm1 {%k1} = ymm0[3,2,1,0,7,6,5,4]
90 ; CHECK-NEXT: vmovaps %ymm1, %ymm0
92 %mask = bitcast i8 %mask_int to <8 x i1>
93 %shufp = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
94 %res = select <8 x i1> %mask, <8 x float> %shufp, <8 x float> %b
98 define <4 x float> @transform_VPERMILPSrrk(<4 x float> %a, <4 x float> %b, i4 %mask_int) nounwind {
99 ; CHECK-LABEL: transform_VPERMILPSrrk:
101 ; CHECK-NEXT: kmovd %edi, %k1
102 ; CHECK-NEXT: vshufps {{.*#+}} xmm1 {%k1} = xmm0[3,2,1,0]
103 ; CHECK-NEXT: vmovaps %xmm1, %xmm0
105 %mask = bitcast i4 %mask_int to <4 x i1>
106 %shufp = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
107 %res = select <4 x i1> %mask, <4 x float> %shufp, <4 x float> %b
111 define <16 x float> @transform_VPERMILPSZrm(ptr %ap) nounwind {
112 ; CHECK-LABEL: transform_VPERMILPSZrm:
114 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
116 %a = load <16 x float>, ptr %ap
117 %shufp = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
118 ret <16 x float> %shufp
121 define <8 x float> @transform_VPERMILPSYrm(ptr %ap) nounwind {
122 ; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSYrm:
123 ; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
124 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
125 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
127 ; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSYrm:
128 ; CHECK-ICX-BYPASS-DELAY: # %bb.0:
129 ; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
130 ; CHECK-ICX-BYPASS-DELAY-NEXT: retq
132 ; CHECK-V4-LABEL: transform_VPERMILPSYrm:
134 ; CHECK-V4-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
135 ; CHECK-V4-NEXT: retq
137 ; CHECK-AVX512-LABEL: transform_VPERMILPSYrm:
138 ; CHECK-AVX512: # %bb.0:
139 ; CHECK-AVX512-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
140 ; CHECK-AVX512-NEXT: retq
142 ; CHECK-ZNVER4-LABEL: transform_VPERMILPSYrm:
143 ; CHECK-ZNVER4: # %bb.0:
144 ; CHECK-ZNVER4-NEXT: vpermilps {{.*#+}} ymm0 = mem[3,2,1,0,7,6,5,4]
145 ; CHECK-ZNVER4-NEXT: retq
146 %a = load <8 x float>, ptr %ap
147 %shufp = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
148 ret <8 x float> %shufp
151 define <4 x float> @transform_VPERMILPSrm(ptr %ap) nounwind {
152 ; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSrm:
153 ; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
154 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} xmm0 = mem[3,2,1,0]
155 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
157 ; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSrm:
158 ; CHECK-ICX-BYPASS-DELAY: # %bb.0:
159 ; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
160 ; CHECK-ICX-BYPASS-DELAY-NEXT: retq
162 ; CHECK-V4-LABEL: transform_VPERMILPSrm:
164 ; CHECK-V4-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
165 ; CHECK-V4-NEXT: retq
167 ; CHECK-AVX512-LABEL: transform_VPERMILPSrm:
168 ; CHECK-AVX512: # %bb.0:
169 ; CHECK-AVX512-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
170 ; CHECK-AVX512-NEXT: retq
172 ; CHECK-ZNVER4-LABEL: transform_VPERMILPSrm:
173 ; CHECK-ZNVER4: # %bb.0:
174 ; CHECK-ZNVER4-NEXT: vpermilps {{.*#+}} xmm0 = mem[3,2,1,0]
175 ; CHECK-ZNVER4-NEXT: retq
176 %a = load <4 x float>, ptr %ap
177 %shufp = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
178 ret <4 x float> %shufp
181 define <16 x float> @transform_VPERMILPSZrmkz(ptr %ap, i16 %mask_int) nounwind {
182 ; CHECK-LABEL: transform_VPERMILPSZrmkz:
184 ; CHECK-NEXT: kmovd %esi, %k1
185 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
187 %mask = bitcast i16 %mask_int to <16 x i1>
188 %a = load <16 x float>, ptr %ap
189 %shufp = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
190 %res = select <16 x i1> %mask, <16 x float> %shufp, <16 x float> zeroinitializer
191 ret <16 x float> %res
194 define <8 x float> @transform_VPERMILPSYrmkz(ptr %ap, i8 %mask_int) nounwind {
195 ; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSYrmkz:
196 ; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
197 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: kmovd %esi, %k1
198 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} ymm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4]
199 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
201 ; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSYrmkz:
202 ; CHECK-ICX-BYPASS-DELAY: # %bb.0:
203 ; CHECK-ICX-BYPASS-DELAY-NEXT: kmovd %esi, %k1
204 ; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4]
205 ; CHECK-ICX-BYPASS-DELAY-NEXT: retq
207 ; CHECK-V4-LABEL: transform_VPERMILPSYrmkz:
209 ; CHECK-V4-NEXT: kmovd %esi, %k1
210 ; CHECK-V4-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4]
211 ; CHECK-V4-NEXT: retq
213 ; CHECK-AVX512-LABEL: transform_VPERMILPSYrmkz:
214 ; CHECK-AVX512: # %bb.0:
215 ; CHECK-AVX512-NEXT: kmovd %esi, %k1
216 ; CHECK-AVX512-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4]
217 ; CHECK-AVX512-NEXT: retq
219 ; CHECK-ZNVER4-LABEL: transform_VPERMILPSYrmkz:
220 ; CHECK-ZNVER4: # %bb.0:
221 ; CHECK-ZNVER4-NEXT: kmovd %esi, %k1
222 ; CHECK-ZNVER4-NEXT: vpermilps {{.*#+}} ymm0 {%k1} {z} = mem[3,2,1,0,7,6,5,4]
223 ; CHECK-ZNVER4-NEXT: retq
224 %mask = bitcast i8 %mask_int to <8 x i1>
225 %a = load <8 x float>, ptr %ap
226 %shufp = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
227 %res = select <8 x i1> %mask, <8 x float> %shufp, <8 x float> zeroinitializer
231 define <4 x float> @transform_VPERMILPSrmkz(ptr %ap, i4 %mask_int) nounwind {
232 ; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSrmkz:
233 ; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
234 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: kmovd %esi, %k1
235 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} xmm0 {%k1} {z} = mem[3,2,1,0]
236 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
238 ; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSrmkz:
239 ; CHECK-ICX-BYPASS-DELAY: # %bb.0:
240 ; CHECK-ICX-BYPASS-DELAY-NEXT: kmovd %esi, %k1
241 ; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[3,2,1,0]
242 ; CHECK-ICX-BYPASS-DELAY-NEXT: retq
244 ; CHECK-V4-LABEL: transform_VPERMILPSrmkz:
246 ; CHECK-V4-NEXT: kmovd %esi, %k1
247 ; CHECK-V4-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[3,2,1,0]
248 ; CHECK-V4-NEXT: retq
250 ; CHECK-AVX512-LABEL: transform_VPERMILPSrmkz:
251 ; CHECK-AVX512: # %bb.0:
252 ; CHECK-AVX512-NEXT: kmovd %esi, %k1
253 ; CHECK-AVX512-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[3,2,1,0]
254 ; CHECK-AVX512-NEXT: retq
256 ; CHECK-ZNVER4-LABEL: transform_VPERMILPSrmkz:
257 ; CHECK-ZNVER4: # %bb.0:
258 ; CHECK-ZNVER4-NEXT: kmovd %esi, %k1
259 ; CHECK-ZNVER4-NEXT: vpermilps {{.*#+}} xmm0 {%k1} {z} = mem[3,2,1,0]
260 ; CHECK-ZNVER4-NEXT: retq
261 %mask = bitcast i4 %mask_int to <4 x i1>
262 %a = load <4 x float>, ptr %ap
263 %shufp = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
264 %res = select <4 x i1> %mask, <4 x float> %shufp, <4 x float> zeroinitializer
268 define <16 x float> @transform_VPERMILPSZrmk(ptr %ap, <16 x float> %b, i16 %mask_int) nounwind {
269 ; CHECK-LABEL: transform_VPERMILPSZrmk:
271 ; CHECK-NEXT: kmovd %esi, %k1
272 ; CHECK-NEXT: vpermilps {{.*#+}} zmm0 {%k1} = mem[3,2,1,0,7,6,5,4,11,10,9,8,15,14,13,12]
274 %mask = bitcast i16 %mask_int to <16 x i1>
275 %a = load <16 x float>, ptr %ap
276 %shufp = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4, i32 11, i32 10, i32 9, i32 8, i32 15, i32 14, i32 13, i32 12>
277 %res = select <16 x i1> %mask, <16 x float> %shufp, <16 x float> %b
278 ret <16 x float> %res
281 define <8 x float> @transform_VPERMILPSYrmk(ptr %ap, <8 x float> %b, i8 %mask_int) nounwind {
282 ; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSYrmk:
283 ; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
284 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: kmovd %esi, %k1
285 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} ymm0 {%k1} = mem[3,2,1,0,7,6,5,4]
286 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
288 ; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSYrmk:
289 ; CHECK-ICX-BYPASS-DELAY: # %bb.0:
290 ; CHECK-ICX-BYPASS-DELAY-NEXT: kmovd %esi, %k1
291 ; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[3,2,1,0,7,6,5,4]
292 ; CHECK-ICX-BYPASS-DELAY-NEXT: retq
294 ; CHECK-V4-LABEL: transform_VPERMILPSYrmk:
296 ; CHECK-V4-NEXT: kmovd %esi, %k1
297 ; CHECK-V4-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[3,2,1,0,7,6,5,4]
298 ; CHECK-V4-NEXT: retq
300 ; CHECK-AVX512-LABEL: transform_VPERMILPSYrmk:
301 ; CHECK-AVX512: # %bb.0:
302 ; CHECK-AVX512-NEXT: kmovd %esi, %k1
303 ; CHECK-AVX512-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[3,2,1,0,7,6,5,4]
304 ; CHECK-AVX512-NEXT: retq
306 ; CHECK-ZNVER4-LABEL: transform_VPERMILPSYrmk:
307 ; CHECK-ZNVER4: # %bb.0:
308 ; CHECK-ZNVER4-NEXT: kmovd %esi, %k1
309 ; CHECK-ZNVER4-NEXT: vpermilps {{.*#+}} ymm0 {%k1} = mem[3,2,1,0,7,6,5,4]
310 ; CHECK-ZNVER4-NEXT: retq
311 %mask = bitcast i8 %mask_int to <8 x i1>
312 %a = load <8 x float>, ptr %ap
313 %shufp = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 3, i32 2, i32 1, i32 0, i32 7, i32 6, i32 5, i32 4>
314 %res = select <8 x i1> %mask, <8 x float> %shufp, <8 x float> %b
318 define <4 x float> @transform_VPERMILPSrmk(ptr %ap, <4 x float> %b, i4 %mask_int) nounwind {
319 ; CHECK-ICX-NO-BYPASS-DELAY-LABEL: transform_VPERMILPSrmk:
320 ; CHECK-ICX-NO-BYPASS-DELAY: # %bb.0:
321 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: kmovd %esi, %k1
322 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: vpshufd {{.*#+}} xmm0 {%k1} = mem[3,2,1,0]
323 ; CHECK-ICX-NO-BYPASS-DELAY-NEXT: retq
325 ; CHECK-ICX-BYPASS-DELAY-LABEL: transform_VPERMILPSrmk:
326 ; CHECK-ICX-BYPASS-DELAY: # %bb.0:
327 ; CHECK-ICX-BYPASS-DELAY-NEXT: kmovd %esi, %k1
328 ; CHECK-ICX-BYPASS-DELAY-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[3,2,1,0]
329 ; CHECK-ICX-BYPASS-DELAY-NEXT: retq
331 ; CHECK-V4-LABEL: transform_VPERMILPSrmk:
333 ; CHECK-V4-NEXT: kmovd %esi, %k1
334 ; CHECK-V4-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[3,2,1,0]
335 ; CHECK-V4-NEXT: retq
337 ; CHECK-AVX512-LABEL: transform_VPERMILPSrmk:
338 ; CHECK-AVX512: # %bb.0:
339 ; CHECK-AVX512-NEXT: kmovd %esi, %k1
340 ; CHECK-AVX512-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[3,2,1,0]
341 ; CHECK-AVX512-NEXT: retq
343 ; CHECK-ZNVER4-LABEL: transform_VPERMILPSrmk:
344 ; CHECK-ZNVER4: # %bb.0:
345 ; CHECK-ZNVER4-NEXT: kmovd %esi, %k1
346 ; CHECK-ZNVER4-NEXT: vpermilps {{.*#+}} xmm0 {%k1} = mem[3,2,1,0]
347 ; CHECK-ZNVER4-NEXT: retq
348 %mask = bitcast i4 %mask_int to <4 x i1>
349 %a = load <4 x float>, ptr %ap
350 %shufp = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 3, i32 2, i32 1, i32 0>
351 %res = select <4 x i1> %mask, <4 x float> %shufp, <4 x float> %b
354 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: