1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f | FileCheck %s --check-prefix=ALL
3 ; RUN: llc < %s -mtriple=x86_64-unknown -mattr=+avx512f,+avx512bw | FileCheck %s --check-prefix=ALL
5 ; Just one 32-bit run to make sure we do reasonable things.
6 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx512f | FileCheck %s --check-prefix=X86-AVX512F
8 define <8 x double> @merge_8f64_2f64_12u4(ptr %ptr) nounwind uwtable noinline ssp {
9 ; ALL-LABEL: merge_8f64_2f64_12u4:
11 ; ALL-NEXT: vmovups 16(%rdi), %zmm0
14 ; X86-AVX512F-LABEL: merge_8f64_2f64_12u4:
15 ; X86-AVX512F: # %bb.0:
16 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
17 ; X86-AVX512F-NEXT: vmovups 16(%eax), %zmm0
18 ; X86-AVX512F-NEXT: retl
19 %ptr0 = getelementptr inbounds <2 x double>, ptr %ptr, i64 1
20 %ptr1 = getelementptr inbounds <2 x double>, ptr %ptr, i64 2
21 %ptr3 = getelementptr inbounds <2 x double>, ptr %ptr, i64 4
22 %val0 = load <2 x double>, ptr %ptr0
23 %val1 = load <2 x double>, ptr %ptr1
24 %val3 = load <2 x double>, ptr %ptr3
25 %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
26 %res23 = shufflevector <2 x double> undef, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
27 %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
31 define <8 x double> @merge_8f64_2f64_23z5(ptr %ptr) nounwind uwtable noinline ssp {
32 ; ALL-LABEL: merge_8f64_2f64_23z5:
34 ; ALL-NEXT: vmovdqu64 32(%rdi), %zmm0
35 ; ALL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
38 ; X86-AVX512F-LABEL: merge_8f64_2f64_23z5:
39 ; X86-AVX512F: # %bb.0:
40 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
41 ; X86-AVX512F-NEXT: vmovdqu64 32(%eax), %zmm0
42 ; X86-AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
43 ; X86-AVX512F-NEXT: retl
44 %ptr0 = getelementptr inbounds <2 x double>, ptr %ptr, i64 2
45 %ptr1 = getelementptr inbounds <2 x double>, ptr %ptr, i64 3
46 %ptr3 = getelementptr inbounds <2 x double>, ptr %ptr, i64 5
47 %val0 = load <2 x double>, ptr %ptr0
48 %val1 = load <2 x double>, ptr %ptr1
49 %val3 = load <2 x double>, ptr %ptr3
50 %res01 = shufflevector <2 x double> %val0, <2 x double> %val1, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
51 %res23 = shufflevector <2 x double> zeroinitializer, <2 x double> %val3, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
52 %res = shufflevector <4 x double> %res01, <4 x double> %res23, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
56 define <8 x double> @merge_8f64_4f64_z2(ptr %ptr) nounwind uwtable noinline ssp {
57 ; ALL-LABEL: merge_8f64_4f64_z2:
59 ; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0
60 ; ALL-NEXT: vinsertf64x4 $1, 64(%rdi), %zmm0, %zmm0
63 ; X86-AVX512F-LABEL: merge_8f64_4f64_z2:
64 ; X86-AVX512F: # %bb.0:
65 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
66 ; X86-AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
67 ; X86-AVX512F-NEXT: vinsertf64x4 $1, 64(%eax), %zmm0, %zmm0
68 ; X86-AVX512F-NEXT: retl
69 %ptr1 = getelementptr inbounds <4 x double>, ptr %ptr, i64 2
70 %val1 = load <4 x double>, ptr %ptr1
71 %res = shufflevector <4 x double> zeroinitializer, <4 x double> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
75 define <8 x double> @merge_8f64_f64_23uuuuu9(ptr %ptr) nounwind uwtable noinline ssp {
76 ; ALL-LABEL: merge_8f64_f64_23uuuuu9:
78 ; ALL-NEXT: vmovups 16(%rdi), %zmm0
81 ; X86-AVX512F-LABEL: merge_8f64_f64_23uuuuu9:
82 ; X86-AVX512F: # %bb.0:
83 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
84 ; X86-AVX512F-NEXT: vmovups 16(%eax), %zmm0
85 ; X86-AVX512F-NEXT: retl
86 %ptr0 = getelementptr inbounds double, ptr %ptr, i64 2
87 %ptr1 = getelementptr inbounds double, ptr %ptr, i64 3
88 %ptr7 = getelementptr inbounds double, ptr %ptr, i64 9
89 %val0 = load double, ptr %ptr0
90 %val1 = load double, ptr %ptr1
91 %val7 = load double, ptr %ptr7
92 %res0 = insertelement <8 x double> undef, double %val0, i32 0
93 %res1 = insertelement <8 x double> %res0, double %val1, i32 1
94 %res7 = insertelement <8 x double> %res1, double %val7, i32 7
95 ret <8 x double> %res7
98 define <8 x double> @merge_8f64_f64_12zzuuzz(ptr %ptr) nounwind uwtable noinline ssp {
99 ; ALL-LABEL: merge_8f64_f64_12zzuuzz:
101 ; ALL-NEXT: vmovups 8(%rdi), %xmm0
104 ; X86-AVX512F-LABEL: merge_8f64_f64_12zzuuzz:
105 ; X86-AVX512F: # %bb.0:
106 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
107 ; X86-AVX512F-NEXT: vmovups 8(%eax), %xmm0
108 ; X86-AVX512F-NEXT: retl
109 %ptr0 = getelementptr inbounds double, ptr %ptr, i64 1
110 %ptr1 = getelementptr inbounds double, ptr %ptr, i64 2
111 %val0 = load double, ptr %ptr0
112 %val1 = load double, ptr %ptr1
113 %res0 = insertelement <8 x double> undef, double %val0, i32 0
114 %res1 = insertelement <8 x double> %res0, double %val1, i32 1
115 %res2 = insertelement <8 x double> %res1, double 0.0, i32 2
116 %res3 = insertelement <8 x double> %res2, double 0.0, i32 3
117 %res6 = insertelement <8 x double> %res3, double 0.0, i32 6
118 %res7 = insertelement <8 x double> %res6, double 0.0, i32 7
119 ret <8 x double> %res7
122 define <8 x double> @merge_8f64_f64_1u3u5zu8(ptr %ptr) nounwind uwtable noinline ssp {
123 ; ALL-LABEL: merge_8f64_f64_1u3u5zu8:
125 ; ALL-NEXT: vmovdqu64 8(%rdi), %zmm0
126 ; ALL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
129 ; X86-AVX512F-LABEL: merge_8f64_f64_1u3u5zu8:
130 ; X86-AVX512F: # %bb.0:
131 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
132 ; X86-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0
133 ; X86-AVX512F-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
134 ; X86-AVX512F-NEXT: retl
135 %ptr0 = getelementptr inbounds double, ptr %ptr, i64 1
136 %ptr2 = getelementptr inbounds double, ptr %ptr, i64 3
137 %ptr4 = getelementptr inbounds double, ptr %ptr, i64 5
138 %ptr7 = getelementptr inbounds double, ptr %ptr, i64 8
139 %val0 = load double, ptr %ptr0
140 %val2 = load double, ptr %ptr2
141 %val4 = load double, ptr %ptr4
142 %val7 = load double, ptr %ptr7
143 %res0 = insertelement <8 x double> undef, double %val0, i32 0
144 %res2 = insertelement <8 x double> %res0, double %val2, i32 2
145 %res4 = insertelement <8 x double> %res2, double %val4, i32 4
146 %res5 = insertelement <8 x double> %res4, double 0.0, i32 5
147 %res7 = insertelement <8 x double> %res5, double %val7, i32 7
148 ret <8 x double> %res7
151 define <8 x i64> @merge_8i64_4i64_z3(ptr %ptr) nounwind uwtable noinline ssp {
152 ; ALL-LABEL: merge_8i64_4i64_z3:
154 ; ALL-NEXT: vxorps %xmm0, %xmm0, %xmm0
155 ; ALL-NEXT: vinsertf64x4 $1, 96(%rdi), %zmm0, %zmm0
158 ; X86-AVX512F-LABEL: merge_8i64_4i64_z3:
159 ; X86-AVX512F: # %bb.0:
160 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
161 ; X86-AVX512F-NEXT: vxorps %xmm0, %xmm0, %xmm0
162 ; X86-AVX512F-NEXT: vinsertf64x4 $1, 96(%eax), %zmm0, %zmm0
163 ; X86-AVX512F-NEXT: retl
164 %ptr1 = getelementptr inbounds <4 x i64>, ptr %ptr, i64 3
165 %val1 = load <4 x i64>, ptr %ptr1
166 %res = shufflevector <4 x i64> zeroinitializer, <4 x i64> %val1, <8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
170 define <8 x i64> @merge_8i64_i64_56zz9uzz(ptr %ptr) nounwind uwtable noinline ssp {
171 ; ALL-LABEL: merge_8i64_i64_56zz9uzz:
173 ; ALL-NEXT: vmovups 40(%rdi), %xmm0
174 ; ALL-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
175 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
178 ; X86-AVX512F-LABEL: merge_8i64_i64_56zz9uzz:
179 ; X86-AVX512F: # %bb.0:
180 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
181 ; X86-AVX512F-NEXT: vmovups 40(%eax), %xmm0
182 ; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm1 = mem[0],zero
183 ; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
184 ; X86-AVX512F-NEXT: retl
185 %ptr0 = getelementptr inbounds i64, ptr %ptr, i64 5
186 %ptr1 = getelementptr inbounds i64, ptr %ptr, i64 6
187 %ptr4 = getelementptr inbounds i64, ptr %ptr, i64 9
188 %val0 = load i64, ptr %ptr0
189 %val1 = load i64, ptr %ptr1
190 %val4 = load i64, ptr %ptr4
191 %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0
192 %res1 = insertelement <8 x i64> %res0, i64 %val1, i32 1
193 %res2 = insertelement <8 x i64> %res1, i64 0, i32 2
194 %res3 = insertelement <8 x i64> %res2, i64 0, i32 3
195 %res4 = insertelement <8 x i64> %res3, i64 %val4, i32 4
196 %res6 = insertelement <8 x i64> %res4, i64 0, i32 6
197 %res7 = insertelement <8 x i64> %res6, i64 0, i32 7
201 define <8 x i64> @merge_8i64_i64_1u3u5zu8(ptr %ptr) nounwind uwtable noinline ssp {
202 ; ALL-LABEL: merge_8i64_i64_1u3u5zu8:
204 ; ALL-NEXT: vmovdqu64 8(%rdi), %zmm0
205 ; ALL-NEXT: vpandq {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
208 ; X86-AVX512F-LABEL: merge_8i64_i64_1u3u5zu8:
209 ; X86-AVX512F: # %bb.0:
210 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
211 ; X86-AVX512F-NEXT: vmovdqu64 8(%eax), %zmm0
212 ; X86-AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
213 ; X86-AVX512F-NEXT: retl
214 %ptr0 = getelementptr inbounds i64, ptr %ptr, i64 1
215 %ptr2 = getelementptr inbounds i64, ptr %ptr, i64 3
216 %ptr4 = getelementptr inbounds i64, ptr %ptr, i64 5
217 %ptr7 = getelementptr inbounds i64, ptr %ptr, i64 8
218 %val0 = load i64, ptr %ptr0
219 %val2 = load i64, ptr %ptr2
220 %val4 = load i64, ptr %ptr4
221 %val7 = load i64, ptr %ptr7
222 %res0 = insertelement <8 x i64> undef, i64 %val0, i32 0
223 %res2 = insertelement <8 x i64> %res0, i64 %val2, i32 2
224 %res4 = insertelement <8 x i64> %res2, i64 %val4, i32 4
225 %res5 = insertelement <8 x i64> %res4, i64 0, i32 5
226 %res7 = insertelement <8 x i64> %res5, i64 %val7, i32 7
230 define <16 x float> @merge_16f32_f32_89zzzuuuuuuuuuuuz(ptr %ptr) nounwind uwtable noinline ssp {
231 ; ALL-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz:
233 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
236 ; X86-AVX512F-LABEL: merge_16f32_f32_89zzzuuuuuuuuuuuz:
237 ; X86-AVX512F: # %bb.0:
238 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
239 ; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
240 ; X86-AVX512F-NEXT: retl
241 %ptr0 = getelementptr inbounds float, ptr %ptr, i64 8
242 %ptr1 = getelementptr inbounds float, ptr %ptr, i64 9
243 %val0 = load float, ptr %ptr0
244 %val1 = load float, ptr %ptr1
245 %res0 = insertelement <16 x float> undef, float %val0, i32 0
246 %res1 = insertelement <16 x float> %res0, float %val1, i32 1
247 %res2 = insertelement <16 x float> %res1, float 0.0, i32 2
248 %res3 = insertelement <16 x float> %res2, float 0.0, i32 3
249 %res4 = insertelement <16 x float> %res3, float 0.0, i32 4
250 %resF = insertelement <16 x float> %res4, float 0.0, i32 15
251 ret <16 x float> %resF
254 define <16 x float> @merge_16f32_f32_45u7uuuuuuuuuuuu(ptr %ptr) nounwind uwtable noinline ssp {
255 ; ALL-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu:
257 ; ALL-NEXT: vmovups 16(%rdi), %xmm0
260 ; X86-AVX512F-LABEL: merge_16f32_f32_45u7uuuuuuuuuuuu:
261 ; X86-AVX512F: # %bb.0:
262 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
263 ; X86-AVX512F-NEXT: vmovups 16(%eax), %xmm0
264 ; X86-AVX512F-NEXT: retl
265 %ptr0 = getelementptr inbounds float, ptr %ptr, i64 4
266 %ptr1 = getelementptr inbounds float, ptr %ptr, i64 5
267 %ptr3 = getelementptr inbounds float, ptr %ptr, i64 7
268 %val0 = load float, ptr %ptr0
269 %val1 = load float, ptr %ptr1
270 %val3 = load float, ptr %ptr3
271 %res0 = insertelement <16 x float> undef, float %val0, i32 0
272 %res1 = insertelement <16 x float> %res0, float %val1, i32 1
273 %res3 = insertelement <16 x float> %res1, float %val3, i32 3
274 ret <16 x float> %res3
277 define <16 x float> @merge_16f32_f32_0uu3uuuuuuuuCuEF(ptr %ptr) nounwind uwtable noinline ssp {
278 ; ALL-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF:
280 ; ALL-NEXT: vmovups (%rdi), %zmm0
283 ; X86-AVX512F-LABEL: merge_16f32_f32_0uu3uuuuuuuuCuEF:
284 ; X86-AVX512F: # %bb.0:
285 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
286 ; X86-AVX512F-NEXT: vmovups (%eax), %zmm0
287 ; X86-AVX512F-NEXT: retl
288 %ptr3 = getelementptr inbounds float, ptr %ptr, i64 3
289 %ptrC = getelementptr inbounds float, ptr %ptr, i64 12
290 %ptrE = getelementptr inbounds float, ptr %ptr, i64 14
291 %ptrF = getelementptr inbounds float, ptr %ptr, i64 15
292 %val0 = load float, ptr %ptr
293 %val3 = load float, ptr %ptr3
294 %valC = load float, ptr %ptrC
295 %valE = load float, ptr %ptrE
296 %valF = load float, ptr %ptrF
297 %res0 = insertelement <16 x float> undef, float %val0, i32 0
298 %res3 = insertelement <16 x float> %res0, float %val3, i32 3
299 %resC = insertelement <16 x float> %res3, float %valC, i32 12
300 %resE = insertelement <16 x float> %resC, float %valE, i32 14
301 %resF = insertelement <16 x float> %resE, float %valF, i32 15
302 ret <16 x float> %resF
305 define <16 x float> @merge_16f32_f32_0uu3zzuuuuuzCuEF(ptr %ptr) nounwind uwtable noinline ssp {
306 ; ALL-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
308 ; ALL-NEXT: vmovdqu64 (%rdi), %zmm0
309 ; ALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
312 ; X86-AVX512F-LABEL: merge_16f32_f32_0uu3zzuuuuuzCuEF:
313 ; X86-AVX512F: # %bb.0:
314 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
315 ; X86-AVX512F-NEXT: vmovdqu64 (%eax), %zmm0
316 ; X86-AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
317 ; X86-AVX512F-NEXT: retl
318 %ptr3 = getelementptr inbounds float, ptr %ptr, i64 3
319 %ptrC = getelementptr inbounds float, ptr %ptr, i64 12
320 %ptrE = getelementptr inbounds float, ptr %ptr, i64 14
321 %ptrF = getelementptr inbounds float, ptr %ptr, i64 15
322 %val0 = load float, ptr %ptr
323 %val3 = load float, ptr %ptr3
324 %valC = load float, ptr %ptrC
325 %valE = load float, ptr %ptrE
326 %valF = load float, ptr %ptrF
327 %res0 = insertelement <16 x float> undef, float %val0, i32 0
328 %res3 = insertelement <16 x float> %res0, float %val3, i32 3
329 %res4 = insertelement <16 x float> %res3, float 0.0, i32 4
330 %res5 = insertelement <16 x float> %res4, float 0.0, i32 5
331 %resC = insertelement <16 x float> %res5, float %valC, i32 12
332 %resD = insertelement <16 x float> %resC, float 0.0, i32 13
333 %resE = insertelement <16 x float> %resD, float %valE, i32 14
334 %resF = insertelement <16 x float> %resE, float %valF, i32 15
335 ret <16 x float> %resF
338 define <16 x i32> @merge_16i32_i32_12zzzuuuuuuuuuuuz(ptr %ptr) nounwind uwtable noinline ssp {
339 ; ALL-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz:
341 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
344 ; X86-AVX512F-LABEL: merge_16i32_i32_12zzzuuuuuuuuuuuz:
345 ; X86-AVX512F: # %bb.0:
346 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
347 ; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
348 ; X86-AVX512F-NEXT: retl
349 %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 1
350 %ptr1 = getelementptr inbounds i32, ptr %ptr, i64 2
351 %val0 = load i32, ptr %ptr0
352 %val1 = load i32, ptr %ptr1
353 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
354 %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1
355 %res2 = insertelement <16 x i32> %res1, i32 0, i32 2
356 %res3 = insertelement <16 x i32> %res2, i32 0, i32 3
357 %res4 = insertelement <16 x i32> %res3, i32 0, i32 4
358 %resF = insertelement <16 x i32> %res4, i32 0, i32 15
362 define <16 x i32> @merge_16i32_i32_23u5uuuuuuuuuuuu(ptr %ptr) nounwind uwtable noinline ssp {
363 ; ALL-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu:
365 ; ALL-NEXT: vmovups 8(%rdi), %xmm0
368 ; X86-AVX512F-LABEL: merge_16i32_i32_23u5uuuuuuuuuuuu:
369 ; X86-AVX512F: # %bb.0:
370 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
371 ; X86-AVX512F-NEXT: vmovups 8(%eax), %xmm0
372 ; X86-AVX512F-NEXT: retl
373 %ptr0 = getelementptr inbounds i32, ptr %ptr, i64 2
374 %ptr1 = getelementptr inbounds i32, ptr %ptr, i64 3
375 %ptr3 = getelementptr inbounds i32, ptr %ptr, i64 5
376 %val0 = load i32, ptr %ptr0
377 %val1 = load i32, ptr %ptr1
378 %val3 = load i32, ptr %ptr3
379 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
380 %res1 = insertelement <16 x i32> %res0, i32 %val1, i32 1
381 %res3 = insertelement <16 x i32> %res1, i32 %val3, i32 3
385 define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF(ptr %ptr) nounwind uwtable noinline ssp {
386 ; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
388 ; ALL-NEXT: vmovups (%rdi), %zmm0
391 ; X86-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF:
392 ; X86-AVX512F: # %bb.0:
393 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
394 ; X86-AVX512F-NEXT: vmovups (%eax), %zmm0
395 ; X86-AVX512F-NEXT: retl
396 %ptr3 = getelementptr inbounds i32, ptr %ptr, i64 3
397 %ptrC = getelementptr inbounds i32, ptr %ptr, i64 12
398 %ptrE = getelementptr inbounds i32, ptr %ptr, i64 14
399 %ptrF = getelementptr inbounds i32, ptr %ptr, i64 15
400 %val0 = load i32, ptr %ptr
401 %val3 = load i32, ptr %ptr3
402 %valC = load i32, ptr %ptrC
403 %valE = load i32, ptr %ptrE
404 %valF = load i32, ptr %ptrF
405 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
406 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
407 %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12
408 %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
409 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
413 define <16 x i32> @merge_16i32_i32_0uu3zzuuuuuzCuEF(ptr %ptr) nounwind uwtable noinline ssp {
414 ; ALL-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
416 ; ALL-NEXT: vmovdqu64 (%rdi), %zmm0
417 ; ALL-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %zmm0, %zmm0
420 ; X86-AVX512F-LABEL: merge_16i32_i32_0uu3zzuuuuuzCuEF:
421 ; X86-AVX512F: # %bb.0:
422 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
423 ; X86-AVX512F-NEXT: vmovdqu64 (%eax), %zmm0
424 ; X86-AVX512F-NEXT: vpandd {{\.?LCPI[0-9]+_[0-9]+}}, %zmm0, %zmm0
425 ; X86-AVX512F-NEXT: retl
426 %ptr3 = getelementptr inbounds i32, ptr %ptr, i64 3
427 %ptrC = getelementptr inbounds i32, ptr %ptr, i64 12
428 %ptrE = getelementptr inbounds i32, ptr %ptr, i64 14
429 %ptrF = getelementptr inbounds i32, ptr %ptr, i64 15
430 %val0 = load i32, ptr %ptr
431 %val3 = load i32, ptr %ptr3
432 %valC = load i32, ptr %ptrC
433 %valE = load i32, ptr %ptrE
434 %valF = load i32, ptr %ptrF
435 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
436 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
437 %res4 = insertelement <16 x i32> %res3, i32 0, i32 4
438 %res5 = insertelement <16 x i32> %res4, i32 0, i32 5
439 %resC = insertelement <16 x i32> %res5, i32 %valC, i32 12
440 %resD = insertelement <16 x i32> %resC, i32 0, i32 13
441 %resE = insertelement <16 x i32> %resD, i32 %valE, i32 14
442 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15
446 define <32 x i16> @merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz(ptr %ptr) nounwind uwtable noinline ssp {
447 ; ALL-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
449 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
452 ; X86-AVX512F-LABEL: merge_32i16_i16_12u4uuuuuuuuuuuuuuuuuuuuuuuuuuzz:
453 ; X86-AVX512F: # %bb.0:
454 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
455 ; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
456 ; X86-AVX512F-NEXT: retl
457 %ptr0 = getelementptr inbounds i16, ptr %ptr, i64 1
458 %ptr1 = getelementptr inbounds i16, ptr %ptr, i64 2
459 %ptr3 = getelementptr inbounds i16, ptr %ptr, i64 4
460 %val0 = load i16, ptr %ptr0
461 %val1 = load i16, ptr %ptr1
462 %val3 = load i16, ptr %ptr3
463 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
464 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
465 %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3
466 %res30 = insertelement <32 x i16> %res3, i16 0, i16 30
467 %res31 = insertelement <32 x i16> %res30, i16 0, i16 31
468 ret <32 x i16> %res31
471 define <32 x i16> @merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu(ptr %ptr) nounwind uwtable noinline ssp {
472 ; ALL-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
474 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
477 ; X86-AVX512F-LABEL: merge_32i16_i16_45u7uuuuuuuuuuuuuuuuuuuuuuuuuuuu:
478 ; X86-AVX512F: # %bb.0:
479 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
480 ; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
481 ; X86-AVX512F-NEXT: retl
482 %ptr0 = getelementptr inbounds i16, ptr %ptr, i64 4
483 %ptr1 = getelementptr inbounds i16, ptr %ptr, i64 5
484 %ptr3 = getelementptr inbounds i16, ptr %ptr, i64 7
485 %val0 = load i16, ptr %ptr0
486 %val1 = load i16, ptr %ptr1
487 %val3 = load i16, ptr %ptr3
488 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
489 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
490 %res3 = insertelement <32 x i16> %res1, i16 %val3, i16 3
494 define <32 x i16> @merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu(ptr %ptr) nounwind uwtable noinline ssp {
495 ; ALL-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
497 ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
500 ; X86-AVX512F-LABEL: merge_32i16_i16_23uzuuuuuuuuuuzzzzuuuuuuuuuuuuuu:
501 ; X86-AVX512F: # %bb.0:
502 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
503 ; X86-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
504 ; X86-AVX512F-NEXT: retl
505 %ptr0 = getelementptr inbounds i16, ptr %ptr, i64 2
506 %ptr1 = getelementptr inbounds i16, ptr %ptr, i64 3
507 %val0 = load i16, ptr %ptr0
508 %val1 = load i16, ptr %ptr1
509 %res0 = insertelement <32 x i16> undef, i16 %val0, i16 0
510 %res1 = insertelement <32 x i16> %res0, i16 %val1, i16 1
511 %res3 = insertelement <32 x i16> %res1, i16 0, i16 3
512 %resE = insertelement <32 x i16> %res3, i16 0, i16 14
513 %resF = insertelement <32 x i16> %resE, i16 0, i16 15
514 %resG = insertelement <32 x i16> %resF, i16 0, i16 16
515 %resH = insertelement <32 x i16> %resG, i16 0, i16 17
519 define <64 x i8> @merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(ptr %ptr) nounwind uwtable noinline ssp {
520 ; ALL-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
522 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
525 ; X86-AVX512F-LABEL: merge_64i8_i8_12u4uuu8uuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
526 ; X86-AVX512F: # %bb.0:
527 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
528 ; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
529 ; X86-AVX512F-NEXT: retl
530 %ptr0 = getelementptr inbounds i8, ptr %ptr, i64 1
531 %ptr1 = getelementptr inbounds i8, ptr %ptr, i64 2
532 %ptr3 = getelementptr inbounds i8, ptr %ptr, i64 4
533 %ptr7 = getelementptr inbounds i8, ptr %ptr, i64 8
534 %val0 = load i8, ptr %ptr0
535 %val1 = load i8, ptr %ptr1
536 %val3 = load i8, ptr %ptr3
537 %val7 = load i8, ptr %ptr7
538 %res0 = insertelement <64 x i8> undef, i8 %val0, i8 0
539 %res1 = insertelement <64 x i8> %res0, i8 %val1, i8 1
540 %res3 = insertelement <64 x i8> %res1, i8 %val3, i8 3
541 %res7 = insertelement <64 x i8> %res3, i8 %val7, i8 7
542 %res14 = insertelement <64 x i8> %res7, i8 0, i8 14
543 %res15 = insertelement <64 x i8> %res14, i8 0, i8 15
544 %res16 = insertelement <64 x i8> %res15, i8 0, i8 16
545 %res17 = insertelement <64 x i8> %res16, i8 0, i8 17
546 %res63 = insertelement <64 x i8> %res17, i8 0, i8 63
550 define <64 x i8> @merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz(ptr %ptr) nounwind uwtable noinline ssp {
551 ; ALL-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
553 ; ALL-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
556 ; X86-AVX512F-LABEL: merge_64i8_i8_12u4uuuuuuuuuuzzzzuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuuz:
557 ; X86-AVX512F: # %bb.0:
558 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
559 ; X86-AVX512F-NEXT: vmovss {{.*#+}} xmm0 = mem[0],zero,zero,zero
560 ; X86-AVX512F-NEXT: retl
561 %ptr0 = getelementptr inbounds i8, ptr %ptr, i64 1
562 %ptr1 = getelementptr inbounds i8, ptr %ptr, i64 2
563 %ptr3 = getelementptr inbounds i8, ptr %ptr, i64 4
564 %val0 = load i8, ptr %ptr0
565 %val1 = load i8, ptr %ptr1
566 %val3 = load i8, ptr %ptr3
567 %res0 = insertelement <64 x i8> undef, i8 %val0, i8 0
568 %res1 = insertelement <64 x i8> %res0, i8 %val1, i8 1
569 %res3 = insertelement <64 x i8> %res1, i8 %val3, i8 3
570 %res14 = insertelement <64 x i8> %res3, i8 0, i8 14
571 %res15 = insertelement <64 x i8> %res14, i8 0, i8 15
572 %res16 = insertelement <64 x i8> %res15, i8 0, i8 16
573 %res17 = insertelement <64 x i8> %res16, i8 0, i8 17
574 %res63 = insertelement <64 x i8> %res17, i8 0, i8 63
579 ; consecutive loads including any/all volatiles may not be combined
582 define <8 x double> @merge_8f64_f64_23uuuuu9_volatile(ptr %ptr) nounwind uwtable noinline ssp {
583 ; ALL-LABEL: merge_8f64_f64_23uuuuu9_volatile:
585 ; ALL-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
586 ; ALL-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
587 ; ALL-NEXT: vbroadcastsd 72(%rdi), %ymm1
588 ; ALL-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
591 ; X86-AVX512F-LABEL: merge_8f64_f64_23uuuuu9_volatile:
592 ; X86-AVX512F: # %bb.0:
593 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
594 ; X86-AVX512F-NEXT: vmovsd {{.*#+}} xmm0 = mem[0],zero
595 ; X86-AVX512F-NEXT: vmovhps {{.*#+}} xmm0 = xmm0[0,1],mem[0,1]
596 ; X86-AVX512F-NEXT: vbroadcastsd 72(%eax), %ymm1
597 ; X86-AVX512F-NEXT: vinsertf64x4 $1, %ymm1, %zmm0, %zmm0
598 ; X86-AVX512F-NEXT: retl
599 %ptr0 = getelementptr inbounds double, ptr %ptr, i64 2
600 %ptr1 = getelementptr inbounds double, ptr %ptr, i64 3
601 %ptr7 = getelementptr inbounds double, ptr %ptr, i64 9
602 %val0 = load volatile double, ptr %ptr0
603 %val1 = load double, ptr %ptr1
604 %val7 = load double, ptr %ptr7
605 %res0 = insertelement <8 x double> undef, double %val0, i32 0
606 %res1 = insertelement <8 x double> %res0, double %val1, i32 1
607 %res7 = insertelement <8 x double> %res1, double %val7, i32 7
608 ret <8 x double> %res7
611 define <16 x i32> @merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile(ptr %ptr) nounwind uwtable noinline ssp {
612 ; ALL-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile:
614 ; ALL-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
615 ; ALL-NEXT: vpinsrd $3, 12(%rdi), %xmm0, %xmm0
616 ; ALL-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
617 ; ALL-NEXT: vpinsrd $2, 56(%rdi), %xmm1, %xmm1
618 ; ALL-NEXT: vpinsrd $3, 60(%rdi), %xmm1, %xmm1
619 ; ALL-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
620 ; ALL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
623 ; X86-AVX512F-LABEL: merge_16i32_i32_0uu3uuuuuuuuCuEF_volatile:
624 ; X86-AVX512F: # %bb.0:
625 ; X86-AVX512F-NEXT: movl {{[0-9]+}}(%esp), %eax
626 ; X86-AVX512F-NEXT: vmovd {{.*#+}} xmm0 = mem[0],zero,zero,zero
627 ; X86-AVX512F-NEXT: vpinsrd $3, 12(%eax), %xmm0, %xmm0
628 ; X86-AVX512F-NEXT: vmovd {{.*#+}} xmm1 = mem[0],zero,zero,zero
629 ; X86-AVX512F-NEXT: vpinsrd $2, 56(%eax), %xmm1, %xmm1
630 ; X86-AVX512F-NEXT: vpinsrd $3, 60(%eax), %xmm1, %xmm1
631 ; X86-AVX512F-NEXT: vinserti128 $1, %xmm1, %ymm0, %ymm1
632 ; X86-AVX512F-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0
633 ; X86-AVX512F-NEXT: retl
634 %ptr3 = getelementptr inbounds i32, ptr %ptr, i64 3
635 %ptrC = getelementptr inbounds i32, ptr %ptr, i64 12
636 %ptrE = getelementptr inbounds i32, ptr %ptr, i64 14
637 %ptrF = getelementptr inbounds i32, ptr %ptr, i64 15
638 %val0 = load volatile i32, ptr %ptr
639 %val3 = load volatile i32, ptr %ptr3
640 %valC = load volatile i32, ptr %ptrC
641 %valE = load volatile i32, ptr %ptrE
642 %valF = load volatile i32, ptr %ptrF
643 %res0 = insertelement <16 x i32> undef, i32 %val0, i32 0
644 %res3 = insertelement <16 x i32> %res0, i32 %val3, i32 3
645 %resC = insertelement <16 x i32> %res3, i32 %valC, i32 12
646 %resE = insertelement <16 x i32> %resC, i32 %valE, i32 14
647 %resF = insertelement <16 x i32> %resE, i32 %valF, i32 15