1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx | FileCheck --check-prefix=SKX %s
5 define <8 x i16> @extract_subvector128_v32i16(<32 x i16> %x) nounwind {
6 ; SKX-LABEL: extract_subvector128_v32i16:
8 ; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0
11 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23>
15 define <8 x i16> @extract_subvector128_v32i16_first_element(<32 x i16> %x) nounwind {
16 ; SKX-LABEL: extract_subvector128_v32i16_first_element:
18 ; SKX-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
19 ; SKX-NEXT: vzeroupper
21 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
25 define <16 x i8> @extract_subvector128_v64i8(<64 x i8> %x) nounwind {
26 ; SKX-LABEL: extract_subvector128_v64i8:
28 ; SKX-NEXT: vextractf32x4 $2, %zmm0, %xmm0
29 ; SKX-NEXT: vzeroupper
31 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38,i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47>
35 define <16 x i8> @extract_subvector128_v64i8_first_element(<64 x i8> %x) nounwind {
36 ; SKX-LABEL: extract_subvector128_v64i8_first_element:
38 ; SKX-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0
39 ; SKX-NEXT: vzeroupper
41 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
46 define <16 x i16> @extract_subvector256_v32i16(<32 x i16> %x) nounwind {
47 ; SKX-LABEL: extract_subvector256_v32i16:
49 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0
51 %r1 = shufflevector <32 x i16> %x, <32 x i16> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
55 define <32 x i8> @extract_subvector256_v64i8(<64 x i8> %x) nounwind {
56 ; SKX-LABEL: extract_subvector256_v64i8:
58 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0
60 %r1 = shufflevector <64 x i8> %x, <64 x i8> undef, <32 x i32> <i32 32, i32 33, i32 34, i32 35, i32 36, i32 37, i32 38, i32 39, i32 40, i32 41, i32 42, i32 43, i32 44, i32 45, i32 46, i32 47, i32 48, i32 49, i32 50, i32 51, i32 52, i32 53, i32 54, i32 55, i32 56, i32 57, i32 58, i32 59, i32 60, i32 61, i32 62, i32 63>
64 define void @extract_subvector256_v8f64_store(ptr nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
65 ; SKX-LABEL: extract_subvector256_v8f64_store:
66 ; SKX: ## %bb.0: ## %entry
67 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
68 ; SKX-NEXT: vzeroupper
71 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 2, i32 3>
72 store <2 x double> %0, ptr %addr, align 1
76 define void @extract_subvector256_v8f32_store(ptr nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
77 ; SKX-LABEL: extract_subvector256_v8f32_store:
78 ; SKX: ## %bb.0: ## %entry
79 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
80 ; SKX-NEXT: vzeroupper
83 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
84 store <4 x float> %0, ptr %addr, align 1
88 define void @extract_subvector256_v4i64_store(ptr nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
89 ; SKX-LABEL: extract_subvector256_v4i64_store:
90 ; SKX: ## %bb.0: ## %entry
91 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
92 ; SKX-NEXT: vzeroupper
95 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
96 store <2 x i64> %0, ptr %addr, align 1
100 define void @extract_subvector256_v8i32_store(ptr nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
101 ; SKX-LABEL: extract_subvector256_v8i32_store:
102 ; SKX: ## %bb.0: ## %entry
103 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
104 ; SKX-NEXT: vzeroupper
107 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
108 store <4 x i32> %0, ptr %addr, align 1
112 define void @extract_subvector256_v16i16_store(ptr nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
113 ; SKX-LABEL: extract_subvector256_v16i16_store:
114 ; SKX: ## %bb.0: ## %entry
115 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
116 ; SKX-NEXT: vzeroupper
119 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
120 store <8 x i16> %0, ptr %addr, align 1
124 define void @extract_subvector256_v32i8_store(ptr nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
125 ; SKX-LABEL: extract_subvector256_v32i8_store:
126 ; SKX: ## %bb.0: ## %entry
127 ; SKX-NEXT: vextractf128 $1, %ymm0, (%rdi)
128 ; SKX-NEXT: vzeroupper
131 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
132 store <16 x i8> %0, ptr %addr, align 1
136 define void @extract_subvector256_v4f64_store_lo(ptr nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
137 ; SKX-LABEL: extract_subvector256_v4f64_store_lo:
138 ; SKX: ## %bb.0: ## %entry
139 ; SKX-NEXT: vmovups %xmm0, (%rdi)
140 ; SKX-NEXT: vzeroupper
143 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
144 store <2 x double> %0, ptr %addr, align 1
148 define void @extract_subvector256_v4f64_store_lo_align_16(ptr nocapture %addr, <4 x double> %a) nounwind uwtable ssp {
149 ; SKX-LABEL: extract_subvector256_v4f64_store_lo_align_16:
150 ; SKX: ## %bb.0: ## %entry
151 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
152 ; SKX-NEXT: vzeroupper
155 %0 = shufflevector <4 x double> %a, <4 x double> undef, <2 x i32> <i32 0, i32 1>
156 store <2 x double> %0, ptr %addr, align 16
160 define void @extract_subvector256_v4f32_store_lo(ptr nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
161 ; SKX-LABEL: extract_subvector256_v4f32_store_lo:
162 ; SKX: ## %bb.0: ## %entry
163 ; SKX-NEXT: vmovups %xmm0, (%rdi)
164 ; SKX-NEXT: vzeroupper
167 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
168 store <4 x float> %0, ptr %addr, align 1
172 define void @extract_subvector256_v4f32_store_lo_align_16(ptr nocapture %addr, <8 x float> %a) nounwind uwtable ssp {
173 ; SKX-LABEL: extract_subvector256_v4f32_store_lo_align_16:
174 ; SKX: ## %bb.0: ## %entry
175 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
176 ; SKX-NEXT: vzeroupper
179 %0 = shufflevector <8 x float> %a, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
180 store <4 x float> %0, ptr %addr, align 16
184 define void @extract_subvector256_v2i64_store_lo(ptr nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
185 ; SKX-LABEL: extract_subvector256_v2i64_store_lo:
186 ; SKX: ## %bb.0: ## %entry
187 ; SKX-NEXT: vmovups %xmm0, (%rdi)
188 ; SKX-NEXT: vzeroupper
191 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
192 store <2 x i64> %0, ptr %addr, align 1
196 define void @extract_subvector256_v2i64_store_lo_align_16(ptr nocapture %addr, <4 x i64> %a) nounwind uwtable ssp {
197 ; SKX-LABEL: extract_subvector256_v2i64_store_lo_align_16:
198 ; SKX: ## %bb.0: ## %entry
199 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
200 ; SKX-NEXT: vzeroupper
203 %0 = shufflevector <4 x i64> %a, <4 x i64> undef, <2 x i32> <i32 0, i32 1>
204 store <2 x i64> %0, ptr %addr, align 16
208 define void @extract_subvector256_v4i32_store_lo(ptr nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
209 ; SKX-LABEL: extract_subvector256_v4i32_store_lo:
210 ; SKX: ## %bb.0: ## %entry
211 ; SKX-NEXT: vmovups %xmm0, (%rdi)
212 ; SKX-NEXT: vzeroupper
215 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
216 store <4 x i32> %0, ptr %addr, align 1
220 define void @extract_subvector256_v4i32_store_lo_align_16(ptr nocapture %addr, <8 x i32> %a) nounwind uwtable ssp {
221 ; SKX-LABEL: extract_subvector256_v4i32_store_lo_align_16:
222 ; SKX: ## %bb.0: ## %entry
223 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
224 ; SKX-NEXT: vzeroupper
227 %0 = shufflevector <8 x i32> %a, <8 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
228 store <4 x i32> %0, ptr %addr, align 16
232 define void @extract_subvector256_v8i16_store_lo(ptr nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
233 ; SKX-LABEL: extract_subvector256_v8i16_store_lo:
234 ; SKX: ## %bb.0: ## %entry
235 ; SKX-NEXT: vmovups %xmm0, (%rdi)
236 ; SKX-NEXT: vzeroupper
239 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
240 store <8 x i16> %0, ptr %addr, align 1
244 define void @extract_subvector256_v8i16_store_lo_align_16(ptr nocapture %addr, <16 x i16> %a) nounwind uwtable ssp {
245 ; SKX-LABEL: extract_subvector256_v8i16_store_lo_align_16:
246 ; SKX: ## %bb.0: ## %entry
247 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
248 ; SKX-NEXT: vzeroupper
251 %0 = shufflevector <16 x i16> %a, <16 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
252 store <8 x i16> %0, ptr %addr, align 16
256 define void @extract_subvector256_v16i8_store_lo(ptr nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
257 ; SKX-LABEL: extract_subvector256_v16i8_store_lo:
258 ; SKX: ## %bb.0: ## %entry
259 ; SKX-NEXT: vmovups %xmm0, (%rdi)
260 ; SKX-NEXT: vzeroupper
263 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
264 store <16 x i8> %0, ptr %addr, align 1
268 define void @extract_subvector256_v16i8_store_lo_align_16(ptr nocapture %addr, <32 x i8> %a) nounwind uwtable ssp {
269 ; SKX-LABEL: extract_subvector256_v16i8_store_lo_align_16:
270 ; SKX: ## %bb.0: ## %entry
271 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
272 ; SKX-NEXT: vzeroupper
275 %0 = shufflevector <32 x i8> %a, <32 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
276 store <16 x i8> %0, ptr %addr, align 16
280 define void @extract_subvector512_v2f64_store_lo(ptr nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
281 ; SKX-LABEL: extract_subvector512_v2f64_store_lo:
282 ; SKX: ## %bb.0: ## %entry
283 ; SKX-NEXT: vmovups %xmm0, (%rdi)
284 ; SKX-NEXT: vzeroupper
287 %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
288 store <2 x double> %0, ptr %addr, align 1
292 define void @extract_subvector512_v2f64_store_lo_align_16(ptr nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
293 ; SKX-LABEL: extract_subvector512_v2f64_store_lo_align_16:
294 ; SKX: ## %bb.0: ## %entry
295 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
296 ; SKX-NEXT: vzeroupper
299 %0 = shufflevector <8 x double> %a, <8 x double> undef, <2 x i32> <i32 0, i32 1>
300 store <2 x double> %0, ptr %addr, align 16
304 define void @extract_subvector512_v4f32_store_lo(ptr nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
305 ; SKX-LABEL: extract_subvector512_v4f32_store_lo:
306 ; SKX: ## %bb.0: ## %entry
307 ; SKX-NEXT: vmovups %xmm0, (%rdi)
308 ; SKX-NEXT: vzeroupper
311 %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
312 store <4 x float> %0, ptr %addr, align 1
316 define void @extract_subvector512_v4f32_store_lo_align_16(ptr nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
317 ; SKX-LABEL: extract_subvector512_v4f32_store_lo_align_16:
318 ; SKX: ## %bb.0: ## %entry
319 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
320 ; SKX-NEXT: vzeroupper
323 %0 = shufflevector <16 x float> %a, <16 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
324 store <4 x float> %0, ptr %addr, align 16
328 define void @extract_subvector512_v2i64_store_lo(ptr nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
329 ; SKX-LABEL: extract_subvector512_v2i64_store_lo:
330 ; SKX: ## %bb.0: ## %entry
331 ; SKX-NEXT: vmovups %xmm0, (%rdi)
332 ; SKX-NEXT: vzeroupper
335 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
336 store <2 x i64> %0, ptr %addr, align 1
340 define void @extract_subvector512_v2i64_store_lo_align_16(ptr nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
341 ; SKX-LABEL: extract_subvector512_v2i64_store_lo_align_16:
342 ; SKX: ## %bb.0: ## %entry
343 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
344 ; SKX-NEXT: vzeroupper
347 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
348 store <2 x i64> %0, ptr %addr, align 16
352 define void @extract_subvector512_v4i32_store_lo(ptr nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
353 ; SKX-LABEL: extract_subvector512_v4i32_store_lo:
354 ; SKX: ## %bb.0: ## %entry
355 ; SKX-NEXT: vmovups %xmm0, (%rdi)
356 ; SKX-NEXT: vzeroupper
359 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
360 store <4 x i32> %0, ptr %addr, align 1
364 define void @extract_subvector512_v4i32_store_lo_align_16(ptr nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
365 ; SKX-LABEL: extract_subvector512_v4i32_store_lo_align_16:
366 ; SKX: ## %bb.0: ## %entry
367 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
368 ; SKX-NEXT: vzeroupper
371 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
372 store <4 x i32> %0, ptr %addr, align 16
376 define void @extract_subvector512_v8i16_store_lo(ptr nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
377 ; SKX-LABEL: extract_subvector512_v8i16_store_lo:
378 ; SKX: ## %bb.0: ## %entry
379 ; SKX-NEXT: vmovups %xmm0, (%rdi)
380 ; SKX-NEXT: vzeroupper
383 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
384 store <8 x i16> %0, ptr %addr, align 1
388 define void @extract_subvector512_v16i8_store_lo(ptr nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
389 ; SKX-LABEL: extract_subvector512_v16i8_store_lo:
390 ; SKX: ## %bb.0: ## %entry
391 ; SKX-NEXT: vmovups %xmm0, (%rdi)
392 ; SKX-NEXT: vzeroupper
395 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
396 store <16 x i8> %0, ptr %addr, align 1
400 define void @extract_subvector512_v16i8_store_lo_align_16(ptr nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
401 ; SKX-LABEL: extract_subvector512_v16i8_store_lo_align_16:
402 ; SKX: ## %bb.0: ## %entry
403 ; SKX-NEXT: vmovaps %xmm0, (%rdi)
404 ; SKX-NEXT: vzeroupper
407 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
408 store <16 x i8> %0, ptr %addr, align 16
412 define void @extract_subvector512_v4f64_store_lo(ptr nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
413 ; SKX-LABEL: extract_subvector512_v4f64_store_lo:
414 ; SKX: ## %bb.0: ## %entry
415 ; SKX-NEXT: vmovups %ymm0, (%rdi)
416 ; SKX-NEXT: vzeroupper
419 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
420 store <4 x double> %0, ptr %addr, align 1
424 define void @extract_subvector512_v4f64_store_lo_align_16(ptr nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
425 ; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_16:
426 ; SKX: ## %bb.0: ## %entry
427 ; SKX-NEXT: vmovups %ymm0, (%rdi)
428 ; SKX-NEXT: vzeroupper
431 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
432 store <4 x double> %0, ptr %addr, align 16
436 define void @extract_subvector512_v4f64_store_lo_align_32(ptr nocapture %addr, <8 x double> %a) nounwind uwtable ssp {
437 ; SKX-LABEL: extract_subvector512_v4f64_store_lo_align_32:
438 ; SKX: ## %bb.0: ## %entry
439 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
440 ; SKX-NEXT: vzeroupper
443 %0 = shufflevector <8 x double> %a, <8 x double> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
444 store <4 x double> %0, ptr %addr, align 32
448 define void @extract_subvector512_v8f32_store_lo(ptr nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
449 ; SKX-LABEL: extract_subvector512_v8f32_store_lo:
450 ; SKX: ## %bb.0: ## %entry
451 ; SKX-NEXT: vmovups %ymm0, (%rdi)
452 ; SKX-NEXT: vzeroupper
455 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
456 store <8 x float> %0, ptr %addr, align 1
460 define void @extract_subvector512_v8f32_store_lo_align_16(ptr nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
461 ; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_16:
462 ; SKX: ## %bb.0: ## %entry
463 ; SKX-NEXT: vmovups %ymm0, (%rdi)
464 ; SKX-NEXT: vzeroupper
467 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
468 store <8 x float> %0, ptr %addr, align 16
472 define void @extract_subvector512_v8f32_store_lo_align_32(ptr nocapture %addr, <16 x float> %a) nounwind uwtable ssp {
473 ; SKX-LABEL: extract_subvector512_v8f32_store_lo_align_32:
474 ; SKX: ## %bb.0: ## %entry
475 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
476 ; SKX-NEXT: vzeroupper
479 %0 = shufflevector <16 x float> %a, <16 x float> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
480 store <8 x float> %0, ptr %addr, align 32
484 define void @extract_subvector512_v4i64_store_lo(ptr nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
485 ; SKX-LABEL: extract_subvector512_v4i64_store_lo:
486 ; SKX: ## %bb.0: ## %entry
487 ; SKX-NEXT: vmovups %ymm0, (%rdi)
488 ; SKX-NEXT: vzeroupper
491 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
492 store <4 x i64> %0, ptr %addr, align 1
496 define void @extract_subvector512_v4i64_store_lo_align_16(ptr nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
497 ; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_16:
498 ; SKX: ## %bb.0: ## %entry
499 ; SKX-NEXT: vmovups %ymm0, (%rdi)
500 ; SKX-NEXT: vzeroupper
503 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
504 store <4 x i64> %0, ptr %addr, align 16
508 define void @extract_subvector512_v4i64_store_lo_align_32(ptr nocapture %addr, <8 x i64> %a) nounwind uwtable ssp {
509 ; SKX-LABEL: extract_subvector512_v4i64_store_lo_align_32:
510 ; SKX: ## %bb.0: ## %entry
511 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
512 ; SKX-NEXT: vzeroupper
515 %0 = shufflevector <8 x i64> %a, <8 x i64> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
516 store <4 x i64> %0, ptr %addr, align 32
520 define void @extract_subvector512_v8i32_store_lo(ptr nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
521 ; SKX-LABEL: extract_subvector512_v8i32_store_lo:
522 ; SKX: ## %bb.0: ## %entry
523 ; SKX-NEXT: vmovups %ymm0, (%rdi)
524 ; SKX-NEXT: vzeroupper
527 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
528 store <8 x i32> %0, ptr %addr, align 1
532 define void @extract_subvector512_v8i32_store_lo_align_16(ptr nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
533 ; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_16:
534 ; SKX: ## %bb.0: ## %entry
535 ; SKX-NEXT: vmovups %ymm0, (%rdi)
536 ; SKX-NEXT: vzeroupper
539 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
540 store <8 x i32> %0, ptr %addr, align 16
544 define void @extract_subvector512_v8i32_store_lo_align_32(ptr nocapture %addr, <16 x i32> %a) nounwind uwtable ssp {
545 ; SKX-LABEL: extract_subvector512_v8i32_store_lo_align_32:
546 ; SKX: ## %bb.0: ## %entry
547 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
548 ; SKX-NEXT: vzeroupper
551 %0 = shufflevector <16 x i32> %a, <16 x i32> undef, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
552 store <8 x i32> %0, ptr %addr, align 32
556 define void @extract_subvector512_v16i16_store_lo(ptr nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
557 ; SKX-LABEL: extract_subvector512_v16i16_store_lo:
558 ; SKX: ## %bb.0: ## %entry
559 ; SKX-NEXT: vmovups %ymm0, (%rdi)
560 ; SKX-NEXT: vzeroupper
563 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
564 store <16 x i16> %0, ptr %addr, align 1
568 define void @extract_subvector512_v16i16_store_lo_align_16(ptr nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
569 ; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_16:
570 ; SKX: ## %bb.0: ## %entry
571 ; SKX-NEXT: vmovups %ymm0, (%rdi)
572 ; SKX-NEXT: vzeroupper
575 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
576 store <16 x i16> %0, ptr %addr, align 16
580 define void @extract_subvector512_v16i16_store_lo_align_32(ptr nocapture %addr, <32 x i16> %a) nounwind uwtable ssp {
581 ; SKX-LABEL: extract_subvector512_v16i16_store_lo_align_32:
582 ; SKX: ## %bb.0: ## %entry
583 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
584 ; SKX-NEXT: vzeroupper
587 %0 = shufflevector <32 x i16> %a, <32 x i16> undef, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
588 store <16 x i16> %0, ptr %addr, align 32
592 define void @extract_subvector512_v32i8_store_lo(ptr nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
593 ; SKX-LABEL: extract_subvector512_v32i8_store_lo:
594 ; SKX: ## %bb.0: ## %entry
595 ; SKX-NEXT: vmovups %ymm0, (%rdi)
596 ; SKX-NEXT: vzeroupper
599 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
600 store <32 x i8> %0, ptr %addr, align 1
604 define void @extract_subvector512_v32i8_store_lo_align_16(ptr nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
605 ; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_16:
606 ; SKX: ## %bb.0: ## %entry
607 ; SKX-NEXT: vmovups %ymm0, (%rdi)
608 ; SKX-NEXT: vzeroupper
611 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
612 store <32 x i8> %0, ptr %addr, align 16
616 define void @extract_subvector512_v32i8_store_lo_align_32(ptr nocapture %addr, <64 x i8> %a) nounwind uwtable ssp {
617 ; SKX-LABEL: extract_subvector512_v32i8_store_lo_align_32:
618 ; SKX: ## %bb.0: ## %entry
619 ; SKX-NEXT: vmovaps %ymm0, (%rdi)
620 ; SKX-NEXT: vzeroupper
623 %0 = shufflevector <64 x i8> %a, <64 x i8> undef, <32 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16, i32 17, i32 18, i32 19, i32 20, i32 21, i32 22, i32 23, i32 24, i32 25, i32 26, i32 27, i32 28, i32 29, i32 30, i32 31>
624 store <32 x i8> %0, ptr %addr, align 32
628 define <4 x double> @test_mm512_mask_extractf64x4_pd(<4 x double> %__W, i8 %__U, <8 x double> %__A) {
629 ; SKX-LABEL: test_mm512_mask_extractf64x4_pd:
630 ; SKX: ## %bb.0: ## %entry
631 ; SKX-NEXT: kmovd %edi, %k1
632 ; SKX-NEXT: vextractf64x4 $1, %zmm1, %ymm0 {%k1}
635 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
636 %0 = bitcast i8 %__U to <8 x i1>
637 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
638 %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> %__W
642 define <4 x double> @test_mm512_maskz_extractf64x4_pd(i8 %__U, <8 x double> %__A) {
643 ; SKX-LABEL: test_mm512_maskz_extractf64x4_pd:
644 ; SKX: ## %bb.0: ## %entry
645 ; SKX-NEXT: kmovd %edi, %k1
646 ; SKX-NEXT: vextractf64x4 $1, %zmm0, %ymm0 {%k1} {z}
649 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
650 %0 = bitcast i8 %__U to <8 x i1>
651 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
652 %1 = select <4 x i1> %extract, <4 x double> %shuffle, <4 x double> zeroinitializer
656 define <4 x float> @test_mm512_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x double> %__A) {
657 ; SKX-LABEL: test_mm512_mask_extractf32x4_ps:
658 ; SKX: ## %bb.0: ## %entry
659 ; SKX-NEXT: kmovd %edi, %k1
660 ; SKX-NEXT: vextractf32x4 $1, %zmm1, %xmm0 {%k1}
661 ; SKX-NEXT: vzeroupper
664 %0 = bitcast <8 x double> %__A to <16 x float>
665 %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
666 %1 = bitcast i8 %__U to <8 x i1>
667 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
668 %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W
672 define <4 x float> @test_mm512_maskz_extractf32x4_ps(i8 %__U, <8 x double> %__A) {
673 ; SKX-LABEL: test_mm512_maskz_extractf32x4_ps:
674 ; SKX: ## %bb.0: ## %entry
675 ; SKX-NEXT: kmovd %edi, %k1
676 ; SKX-NEXT: vextractf32x4 $1, %zmm0, %xmm0 {%k1} {z}
677 ; SKX-NEXT: vzeroupper
680 %0 = bitcast <8 x double> %__A to <16 x float>
681 %shuffle = shufflevector <16 x float> %0, <16 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
682 %1 = bitcast i8 %__U to <8 x i1>
683 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
684 %2 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer
688 define <2 x double> @test_mm256_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <4 x double> %__A) {
689 ; SKX-LABEL: test_mm256_mask_extractf64x2_pd:
690 ; SKX: ## %bb.0: ## %entry
691 ; SKX-NEXT: kmovd %edi, %k1
692 ; SKX-NEXT: vextractf64x2 $1, %ymm1, %xmm0 {%k1}
693 ; SKX-NEXT: vzeroupper
696 %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3>
697 %0 = bitcast i8 %__U to <8 x i1>
698 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
699 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W
703 define <2 x double> @test_mm256_maskz_extractf64x2_pd(i8 %__U, <4 x double> %__A) {
704 ; SKX-LABEL: test_mm256_maskz_extractf64x2_pd:
705 ; SKX: ## %bb.0: ## %entry
706 ; SKX-NEXT: kmovd %edi, %k1
707 ; SKX-NEXT: vextractf64x2 $1, %ymm0, %xmm0 {%k1} {z}
708 ; SKX-NEXT: vzeroupper
711 %shuffle = shufflevector <4 x double> %__A, <4 x double> undef, <2 x i32> <i32 2, i32 3>
712 %0 = bitcast i8 %__U to <8 x i1>
713 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
714 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer
718 define <2 x i64> @test_mm256_mask_extracti64x2_epi64(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
719 ; SKX-LABEL: test_mm256_mask_extracti64x2_epi64:
720 ; SKX: ## %bb.0: ## %entry
721 ; SKX-NEXT: kmovd %edi, %k1
722 ; SKX-NEXT: vextracti64x2 $1, %ymm1, %xmm0 {%k1}
723 ; SKX-NEXT: vzeroupper
726 %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
727 %0 = bitcast i8 %__U to <8 x i1>
728 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
729 %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> %__W
733 define <2 x i64> @test_mm256_maskz_extracti64x2_epi64(i8 %__U, <4 x i64> %__A) {
734 ; SKX-LABEL: test_mm256_maskz_extracti64x2_epi64:
735 ; SKX: ## %bb.0: ## %entry
736 ; SKX-NEXT: kmovd %edi, %k1
737 ; SKX-NEXT: vextracti64x2 $1, %ymm0, %xmm0 {%k1} {z}
738 ; SKX-NEXT: vzeroupper
741 %shuffle = shufflevector <4 x i64> %__A, <4 x i64> undef, <2 x i32> <i32 2, i32 3>
742 %0 = bitcast i8 %__U to <8 x i1>
743 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
744 %1 = select <2 x i1> %extract, <2 x i64> %shuffle, <2 x i64> zeroinitializer
748 define <4 x float> @test_mm256_mask_extractf32x4_ps(<4 x float> %__W, i8 %__U, <8 x float> %__A) {
749 ; SKX-LABEL: test_mm256_mask_extractf32x4_ps:
750 ; SKX: ## %bb.0: ## %entry
751 ; SKX-NEXT: kmovd %edi, %k1
752 ; SKX-NEXT: vextractf32x4 $1, %ymm1, %xmm0 {%k1}
753 ; SKX-NEXT: vzeroupper
756 %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
757 %0 = bitcast i8 %__U to <8 x i1>
758 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
759 %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> %__W
763 define <4 x float> @test_mm256_maskz_extractf32x4_ps(i8 %__U, <8 x float> %__A) {
764 ; SKX-LABEL: test_mm256_maskz_extractf32x4_ps:
765 ; SKX: ## %bb.0: ## %entry
766 ; SKX-NEXT: kmovd %edi, %k1
767 ; SKX-NEXT: vextractf32x4 $1, %ymm0, %xmm0 {%k1} {z}
768 ; SKX-NEXT: vzeroupper
771 %shuffle = shufflevector <8 x float> %__A, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
772 %0 = bitcast i8 %__U to <8 x i1>
773 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
774 %1 = select <4 x i1> %extract, <4 x float> %shuffle, <4 x float> zeroinitializer
778 define <2 x i64> @test_mm256_mask_extracti32x4_epi32(<2 x i64> %__W, i8 %__U, <4 x i64> %__A) {
779 ; SKX-LABEL: test_mm256_mask_extracti32x4_epi32:
780 ; SKX: ## %bb.0: ## %entry
781 ; SKX-NEXT: kmovd %edi, %k1
782 ; SKX-NEXT: vextracti32x4 $1, %ymm1, %xmm0 {%k1}
783 ; SKX-NEXT: vzeroupper
786 %0 = bitcast <4 x i64> %__A to <8 x i32>
787 %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
788 %1 = bitcast <2 x i64> %__W to <4 x i32>
789 %2 = bitcast i8 %__U to <8 x i1>
790 %extract = shufflevector <8 x i1> %2, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
791 %3 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> %1
792 %4 = bitcast <4 x i32> %3 to <2 x i64>
796 define <2 x i64> @test_mm256_maskz_extracti32x4_epi32(i8 %__U, <4 x i64> %__A) {
797 ; SKX-LABEL: test_mm256_maskz_extracti32x4_epi32:
798 ; SKX: ## %bb.0: ## %entry
799 ; SKX-NEXT: kmovd %edi, %k1
800 ; SKX-NEXT: vextracti32x4 $1, %ymm0, %xmm0 {%k1} {z}
801 ; SKX-NEXT: vzeroupper
804 %0 = bitcast <4 x i64> %__A to <8 x i32>
805 %shuffle = shufflevector <8 x i32> %0, <8 x i32> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
806 %1 = bitcast i8 %__U to <8 x i1>
807 %extract = shufflevector <8 x i1> %1, <8 x i1> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
808 %2 = select <4 x i1> %extract, <4 x i32> %shuffle, <4 x i32> zeroinitializer
809 %3 = bitcast <4 x i32> %2 to <2 x i64>
813 define <8 x float> @test_mm512_mask_extractf32x8_ps(<8 x float> %__W, i8 %__U, <16 x float> %__A) {
814 ; SKX-LABEL: test_mm512_mask_extractf32x8_ps:
815 ; SKX: ## %bb.0: ## %entry
816 ; SKX-NEXT: kmovd %edi, %k1
817 ; SKX-NEXT: vextractf32x8 $1, %zmm1, %ymm0 {%k1}
820 %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
821 %0 = bitcast i8 %__U to <8 x i1>
822 %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> %__W
826 define <8 x float> @test_mm512_maskz_extractf32x8_ps(i8 %__U, <16 x float> %__A) {
827 ; SKX-LABEL: test_mm512_maskz_extractf32x8_ps:
828 ; SKX: ## %bb.0: ## %entry
829 ; SKX-NEXT: kmovd %edi, %k1
830 ; SKX-NEXT: vextractf32x8 $1, %zmm0, %ymm0 {%k1} {z}
833 %shuffle = shufflevector <16 x float> %__A, <16 x float> undef, <8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
834 %0 = bitcast i8 %__U to <8 x i1>
835 %1 = select <8 x i1> %0, <8 x float> %shuffle, <8 x float> zeroinitializer
839 define <2 x double> @test_mm512_mask_extractf64x2_pd(<2 x double> %__W, i8 %__U, <8 x double> %__A) {
840 ; SKX-LABEL: test_mm512_mask_extractf64x2_pd:
841 ; SKX: ## %bb.0: ## %entry
842 ; SKX-NEXT: kmovd %edi, %k1
843 ; SKX-NEXT: vextractf64x2 $3, %zmm1, %xmm0 {%k1}
844 ; SKX-NEXT: vzeroupper
847 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7>
848 %0 = bitcast i8 %__U to <8 x i1>
849 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
850 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> %__W
854 define <2 x double> @test_mm512_maskz_extractf64x2_pd(i8 %__U, <8 x double> %__A) {
855 ; SKX-LABEL: test_mm512_maskz_extractf64x2_pd:
856 ; SKX: ## %bb.0: ## %entry
857 ; SKX-NEXT: kmovd %edi, %k1
858 ; SKX-NEXT: vextractf64x2 $3, %zmm0, %xmm0 {%k1} {z}
859 ; SKX-NEXT: vzeroupper
862 %shuffle = shufflevector <8 x double> %__A, <8 x double> undef, <2 x i32> <i32 6, i32 7>
863 %0 = bitcast i8 %__U to <8 x i1>
864 %extract = shufflevector <8 x i1> %0, <8 x i1> undef, <2 x i32> <i32 0, i32 1>
865 %1 = select <2 x i1> %extract, <2 x double> %shuffle, <2 x double> zeroinitializer