1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx | FileCheck %s --check-prefix=CHECK --check-prefix=AVX
3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2 | FileCheck %s --check-prefix=CHECK --check-prefix=AVX2
5 ; Check constant loads of every 128-bit and 256-bit vector type
6 ; for size optimization using splat ops available with AVX and AVX2.
8 ; There is no AVX broadcast from double to 128-bit vector because movddup has been around since SSE3 (grrr).
9 define <2 x double> @splat_v2f64(<2 x double> %x) #0 {
10 ; CHECK-LABEL: splat_v2f64:
12 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [1.0E+0,1.0E+0]
13 ; CHECK-NEXT: # xmm1 = mem[0,0]
14 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
16 %add = fadd <2 x double> %x, <double 1.0, double 1.0>
20 define <2 x double> @splat_v2f64_pgso(<2 x double> %x) !prof !14 {
21 ; CHECK-LABEL: splat_v2f64_pgso:
23 ; CHECK-NEXT: vmovddup {{.*#+}} xmm1 = [1.0E+0,1.0E+0]
24 ; CHECK-NEXT: # xmm1 = mem[0,0]
25 ; CHECK-NEXT: vaddpd %xmm1, %xmm0, %xmm0
27 %add = fadd <2 x double> %x, <double 1.0, double 1.0>
31 define <4 x double> @splat_v4f64(<4 x double> %x) #1 {
32 ; CHECK-LABEL: splat_v4f64:
34 ; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
35 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
37 %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
41 define <4 x double> @splat_v4f64_pgso(<4 x double> %x) !prof !14 {
42 ; CHECK-LABEL: splat_v4f64_pgso:
44 ; CHECK-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
45 ; CHECK-NEXT: vaddpd %ymm1, %ymm0, %ymm0
47 %add = fadd <4 x double> %x, <double 1.0, double 1.0, double 1.0, double 1.0>
51 define <4 x float> @splat_v4f32(<4 x float> %x) #0 {
52 ; CHECK-LABEL: splat_v4f32:
54 ; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
55 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
57 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
61 define <4 x float> @splat_v4f32_pgso(<4 x float> %x) !prof !14 {
62 ; CHECK-LABEL: splat_v4f32_pgso:
64 ; CHECK-NEXT: vbroadcastss {{.*#+}} xmm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0]
65 ; CHECK-NEXT: vaddps %xmm1, %xmm0, %xmm0
67 %add = fadd <4 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0>
71 define <8 x float> @splat_v8f32(<8 x float> %x) #1 {
72 ; CHECK-LABEL: splat_v8f32:
74 ; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
75 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
77 %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
81 define <8 x float> @splat_v8f32_pgso(<8 x float> %x) !prof !14 {
82 ; CHECK-LABEL: splat_v8f32_pgso:
84 ; CHECK-NEXT: vbroadcastss {{.*#+}} ymm1 = [1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0,1.0E+0]
85 ; CHECK-NEXT: vaddps %ymm1, %ymm0, %ymm0
87 %add = fadd <8 x float> %x, <float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0, float 1.0>
91 ; AVX can't do integer splats, so fake it: use vmovddup to splat 64-bit value.
92 ; We also generate vmovddup for AVX2 because it's one byte smaller than vpbroadcastq.
93 define <2 x i64> @splat_v2i64(<2 x i64> %x) #1 {
94 ; AVX-LABEL: splat_v2i64:
96 ; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [2,2]
97 ; AVX-NEXT: # xmm1 = mem[0,0]
98 ; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
101 ; AVX2-LABEL: splat_v2i64:
103 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2]
104 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
106 %add = add <2 x i64> %x, <i64 2, i64 2>
110 define <2 x i64> @splat_v2i64_pgso(<2 x i64> %x) !prof !14 {
111 ; AVX-LABEL: splat_v2i64_pgso:
113 ; AVX-NEXT: vmovddup {{.*#+}} xmm1 = [2,2]
114 ; AVX-NEXT: # xmm1 = mem[0,0]
115 ; AVX-NEXT: vpaddq %xmm1, %xmm0, %xmm0
118 ; AVX2-LABEL: splat_v2i64_pgso:
120 ; AVX2-NEXT: vpbroadcastq {{.*#+}} xmm1 = [2,2]
121 ; AVX2-NEXT: vpaddq %xmm1, %xmm0, %xmm0
123 %add = add <2 x i64> %x, <i64 2, i64 2>
127 ; AVX can't do 256-bit integer ops, so we split this into two 128-bit vectors,
128 ; and then we fake it: use vmovddup to splat 64-bit value.
129 define <4 x i64> @splat_v4i64(<4 x i64> %x) #0 {
130 ; AVX-LABEL: splat_v4i64:
132 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
133 ; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [2,2]
134 ; AVX-NEXT: # xmm2 = mem[0,0]
135 ; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1
136 ; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0
137 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
140 ; AVX2-LABEL: splat_v4i64:
142 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2]
143 ; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
145 %add = add <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
149 define <4 x i64> @splat_v4i64_pgso(<4 x i64> %x) !prof !14 {
150 ; AVX-LABEL: splat_v4i64_pgso:
152 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
153 ; AVX-NEXT: vmovddup {{.*#+}} xmm2 = [2,2]
154 ; AVX-NEXT: # xmm2 = mem[0,0]
155 ; AVX-NEXT: vpaddq %xmm2, %xmm1, %xmm1
156 ; AVX-NEXT: vpaddq %xmm2, %xmm0, %xmm0
157 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
160 ; AVX2-LABEL: splat_v4i64_pgso:
162 ; AVX2-NEXT: vpbroadcastq {{.*#+}} ymm1 = [2,2,2,2]
163 ; AVX2-NEXT: vpaddq %ymm1, %ymm0, %ymm0
165 %add = add <4 x i64> %x, <i64 2, i64 2, i64 2, i64 2>
169 ; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
170 define <4 x i32> @splat_v4i32(<4 x i32> %x) #1 {
171 ; AVX-LABEL: splat_v4i32:
173 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [2,2,2,2]
174 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
177 ; AVX2-LABEL: splat_v4i32:
179 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
180 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
182 %add = add <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
186 define <4 x i32> @splat_v4i32_pgso(<4 x i32> %x) !prof !14 {
187 ; AVX-LABEL: splat_v4i32_pgso:
189 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm1 = [2,2,2,2]
190 ; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0
193 ; AVX2-LABEL: splat_v4i32_pgso:
195 ; AVX2-NEXT: vpbroadcastd {{.*#+}} xmm1 = [2,2,2,2]
196 ; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0
198 %add = add <4 x i32> %x, <i32 2, i32 2, i32 2, i32 2>
202 ; AVX can't do integer splats, so fake it: use vbroadcastss to splat 32-bit value.
203 define <8 x i32> @splat_v8i32(<8 x i32> %x) #0 {
204 ; AVX-LABEL: splat_v8i32:
206 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
207 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2]
208 ; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
209 ; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
210 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
213 ; AVX2-LABEL: splat_v8i32:
215 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
216 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
218 %add = add <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
222 define <8 x i32> @splat_v8i32_pgso(<8 x i32> %x) !prof !14 {
223 ; AVX-LABEL: splat_v8i32_pgso:
225 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
226 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2]
227 ; AVX-NEXT: vpaddd %xmm2, %xmm1, %xmm1
228 ; AVX-NEXT: vpaddd %xmm2, %xmm0, %xmm0
229 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
232 ; AVX2-LABEL: splat_v8i32_pgso:
234 ; AVX2-NEXT: vpbroadcastd {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2]
235 ; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0
237 %add = add <8 x i32> %x, <i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2, i32 2>
241 ; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
242 define <8 x i16> @splat_v8i16(<8 x i16> %x) #1 {
243 ; AVX-LABEL: splat_v8i16:
245 ; AVX-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
248 ; AVX2-LABEL: splat_v8i16:
250 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2]
251 ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
253 %add = add <8 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
257 define <8 x i16> @splat_v8i16_pgso(<8 x i16> %x) !prof !14 {
258 ; AVX-LABEL: splat_v8i16_pgso:
260 ; AVX-NEXT: vpaddw {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
263 ; AVX2-LABEL: splat_v8i16_pgso:
265 ; AVX2-NEXT: vpbroadcastw {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2]
266 ; AVX2-NEXT: vpaddw %xmm1, %xmm0, %xmm0
268 %add = add <8 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
272 ; AVX can't do integer splats, and there's no broadcast fakery for 16-bit. Could use pshuflw, etc?
273 define <16 x i16> @splat_v16i16(<16 x i16> %x) #0 {
274 ; AVX-LABEL: splat_v16i16:
276 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
277 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2]
278 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
279 ; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
280 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
283 ; AVX2-LABEL: splat_v16i16:
285 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
286 ; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
288 %add = add <16 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
292 define <16 x i16> @splat_v16i16_pgso(<16 x i16> %x) !prof !14 {
293 ; AVX-LABEL: splat_v16i16_pgso:
295 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
296 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2]
297 ; AVX-NEXT: vpaddw %xmm2, %xmm1, %xmm1
298 ; AVX-NEXT: vpaddw %xmm2, %xmm0, %xmm0
299 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
302 ; AVX2-LABEL: splat_v16i16_pgso:
304 ; AVX2-NEXT: vpbroadcastw {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
305 ; AVX2-NEXT: vpaddw %ymm1, %ymm0, %ymm0
307 %add = add <16 x i16> %x, <i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2, i16 2>
311 ; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
312 define <16 x i8> @splat_v16i8(<16 x i8> %x) #1 {
313 ; AVX-LABEL: splat_v16i8:
315 ; AVX-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
318 ; AVX2-LABEL: splat_v16i8:
320 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
321 ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
323 %add = add <16 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
327 define <16 x i8> @splat_v16i8_pgso(<16 x i8> %x) !prof !14 {
328 ; AVX-LABEL: splat_v16i8_pgso:
330 ; AVX-NEXT: vpaddb {{\.?LCPI[0-9]+_[0-9]+}}(%rip), %xmm0, %xmm0
333 ; AVX2-LABEL: splat_v16i8_pgso:
335 ; AVX2-NEXT: vpbroadcastb {{.*#+}} xmm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
336 ; AVX2-NEXT: vpaddb %xmm1, %xmm0, %xmm0
338 %add = add <16 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
342 ; AVX can't do integer splats, and there's no broadcast fakery for 8-bit. Could use pshufb, etc?
343 define <32 x i8> @splat_v32i8(<32 x i8> %x) #0 {
344 ; AVX-LABEL: splat_v32i8:
346 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
347 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
348 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
349 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
350 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
353 ; AVX2-LABEL: splat_v32i8:
355 ; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
356 ; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
358 %add = add <32 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
362 define <32 x i8> @splat_v32i8_pgso(<32 x i8> %x) !prof !14 {
363 ; AVX-LABEL: splat_v32i8_pgso:
365 ; AVX-NEXT: vextractf128 $1, %ymm0, %xmm1
366 ; AVX-NEXT: vbroadcastss {{.*#+}} xmm2 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
367 ; AVX-NEXT: vpaddb %xmm2, %xmm1, %xmm1
368 ; AVX-NEXT: vpaddb %xmm2, %xmm0, %xmm0
369 ; AVX-NEXT: vinsertf128 $1, %xmm1, %ymm0, %ymm0
372 ; AVX2-LABEL: splat_v32i8_pgso:
374 ; AVX2-NEXT: vpbroadcastb {{.*#+}} ymm1 = [2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2]
375 ; AVX2-NEXT: vpaddb %ymm1, %ymm0, %ymm0
377 %add = add <32 x i8> %x, <i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2, i8 2>
381 ; PR23259: Verify that ISel doesn't crash with a 'fatal error in backend'
382 ; due to a missing AVX pattern to select a v2i64 X86ISD::BROADCAST of a
383 ; loadi64 with multiple uses.
385 @A = common dso_local global <3 x i64> zeroinitializer, align 32
387 define <8 x i64> @pr23259() #1 {
388 ; AVX-LABEL: pr23259:
389 ; AVX: # %bb.0: # %entry
390 ; AVX-NEXT: vmovaps A+16(%rip), %xmm0
391 ; AVX-NEXT: vblendps {{.*#+}} xmm0 = xmm0[0,1],mem[2,3]
392 ; AVX-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1,2,3],mem[4,5,6,7]
393 ; AVX-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1]
396 ; AVX2-LABEL: pr23259:
397 ; AVX2: # %bb.0: # %entry
398 ; AVX2-NEXT: vmovaps A+16(%rip), %xmm0
399 ; AVX2-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0,1],mem[2,3,4,5,6,7]
400 ; AVX2-NEXT: vbroadcastsd {{.*#+}} ymm1 = [1,1,1,1]
403 %0 = load <4 x i64>, ptr @A, align 32
404 %1 = shufflevector <4 x i64> %0, <4 x i64> undef, <3 x i32> <i32 undef, i32 undef, i32 2>
405 %shuffle = shufflevector <3 x i64> <i64 1, i64 undef, i64 undef>, <3 x i64> %1, <8 x i32> <i32 5, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0, i32 0>
406 ret <8 x i64> %shuffle
409 attributes #0 = { nounwind optsize }
410 attributes #1 = { nounwind minsize }
412 !llvm.module.flags = !{!0}
413 !0 = !{i32 1, !"ProfileSummary", !1}
414 !1 = !{!2, !3, !4, !5, !6, !7, !8, !9}
415 !2 = !{!"ProfileFormat", !"InstrProf"}
416 !3 = !{!"TotalCount", i64 10000}
417 !4 = !{!"MaxCount", i64 10}
418 !5 = !{!"MaxInternalCount", i64 1}
419 !6 = !{!"MaxFunctionCount", i64 1000}
420 !7 = !{!"NumCounts", i64 3}
421 !8 = !{!"NumFunctions", i64 3}
422 !9 = !{!"DetailedSummary", !10}
423 !10 = !{!11, !12, !13}
424 !11 = !{i32 10000, i64 100, i32 1}
425 !12 = !{i32 999000, i64 100, i32 1}
426 !13 = !{i32 999999, i64 1, i32 2}
427 !14 = !{!"function_entry_count", i64 0}