1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+neon | FileCheck %s --check-prefix=CHECK
4 ; Same as vecreduce-fadd-legalization.ll, but without fmf.
6 declare half @llvm.vector.reduce.fadd.f16.v1f16(half, <1 x half>)
7 declare float @llvm.vector.reduce.fadd.f32.v1f32(float, <1 x float>)
8 declare double @llvm.vector.reduce.fadd.f64.v1f64(double, <1 x double>)
9 declare fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128, <1 x fp128>)
11 declare float @llvm.vector.reduce.fadd.f32.v3f32(float, <3 x float>)
12 declare float @llvm.vector.reduce.fadd.f32.v5f32(float, <5 x float>)
13 declare fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128, <2 x fp128>)
14 declare float @llvm.vector.reduce.fadd.f32.v16f32(float, <16 x float>)
16 define half @test_v1f16(<1 x half> %a, half %s) nounwind {
17 ; CHECK-LABEL: test_v1f16:
19 ; CHECK-NEXT: fcvt s0, h0
20 ; CHECK-NEXT: fcvt s1, h1
21 ; CHECK-NEXT: fadd s0, s1, s0
22 ; CHECK-NEXT: fcvt h0, s0
24 %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half %s, <1 x half> %a)
28 define half @test_v1f16_neutral(<1 x half> %a) nounwind {
29 ; CHECK-LABEL: test_v1f16_neutral:
32 %b = call half @llvm.vector.reduce.fadd.f16.v1f16(half -0.0, <1 x half> %a)
36 define float @test_v1f32(<1 x float> %a, float %s) nounwind {
37 ; CHECK-LABEL: test_v1f32:
39 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
40 ; CHECK-NEXT: fadd s0, s1, s0
42 %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float %s, <1 x float> %a)
46 define float @test_v1f32_neutral(<1 x float> %a) nounwind {
47 ; CHECK-LABEL: test_v1f32_neutral:
49 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
50 ; CHECK-NEXT: // kill: def $s0 killed $s0 killed $q0
52 %b = call float @llvm.vector.reduce.fadd.f32.v1f32(float -0.0, <1 x float> %a)
56 define double @test_v1f64(<1 x double> %a, double %s) nounwind {
57 ; CHECK-LABEL: test_v1f64:
59 ; CHECK-NEXT: fadd d0, d1, d0
61 %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double %s, <1 x double> %a)
65 define double @test_v1f64_neutral(<1 x double> %a) nounwind {
66 ; CHECK-LABEL: test_v1f64_neutral:
69 %b = call double @llvm.vector.reduce.fadd.f64.v1f64(double -0.0, <1 x double> %a)
73 define fp128 @test_v1f128(<1 x fp128> %a, fp128 %s) nounwind {
74 ; CHECK-LABEL: test_v1f128:
76 ; CHECK-NEXT: mov v2.16b, v0.16b
77 ; CHECK-NEXT: mov v0.16b, v1.16b
78 ; CHECK-NEXT: mov v1.16b, v2.16b
79 ; CHECK-NEXT: b __addtf3
80 %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 %s, <1 x fp128> %a)
84 define fp128 @test_v1f128_neutral(<1 x fp128> %a) nounwind {
85 ; CHECK-LABEL: test_v1f128_neutral:
88 %b = call fp128 @llvm.vector.reduce.fadd.f128.v1f128(fp128 0xL00000000000000008000000000000000, <1 x fp128> %a)
92 define float @test_v3f32(<3 x float> %a, float %s) nounwind {
93 ; CHECK-LABEL: test_v3f32:
95 ; CHECK-NEXT: fadd s1, s1, s0
96 ; CHECK-NEXT: mov s2, v0.s[1]
97 ; CHECK-NEXT: mov s0, v0.s[2]
98 ; CHECK-NEXT: fadd s1, s1, s2
99 ; CHECK-NEXT: fadd s0, s1, s0
101 %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float %s, <3 x float> %a)
105 define float @test_v3f32_neutral(<3 x float> %a) nounwind {
106 ; CHECK-LABEL: test_v3f32_neutral:
108 ; CHECK-NEXT: mov s1, v0.s[2]
109 ; CHECK-NEXT: faddp s0, v0.2s
110 ; CHECK-NEXT: fadd s0, s0, s1
112 %b = call float @llvm.vector.reduce.fadd.f32.v3f32(float -0.0, <3 x float> %a)
116 define float @test_v5f32(<5 x float> %a, float %s) nounwind {
117 ; CHECK-LABEL: test_v5f32:
119 ; CHECK-NEXT: fadd s0, s5, s0
120 ; CHECK-NEXT: fadd s0, s0, s1
121 ; CHECK-NEXT: fadd s0, s0, s2
122 ; CHECK-NEXT: fadd s0, s0, s3
123 ; CHECK-NEXT: fadd s0, s0, s4
125 %b = call float @llvm.vector.reduce.fadd.f32.v5f32(float %s, <5 x float> %a)
129 define float @test_v5f32_neutral(<5 x float> %a) nounwind {
130 ; CHECK-LABEL: test_v5f32_neutral:
132 ; CHECK-NEXT: fadd s0, s0, s1
133 ; CHECK-NEXT: fadd s0, s0, s2
134 ; CHECK-NEXT: fadd s0, s0, s3
135 ; CHECK-NEXT: fadd s0, s0, s4
137 %b = call float @llvm.vector.reduce.fadd.f32.v5f32(float -0.0, <5 x float> %a)
141 define fp128 @test_v2f128(<2 x fp128> %a, fp128 %s) nounwind {
142 ; CHECK-LABEL: test_v2f128:
144 ; CHECK-NEXT: sub sp, sp, #32
145 ; CHECK-NEXT: str q1, [sp] // 16-byte Folded Spill
146 ; CHECK-NEXT: mov v1.16b, v0.16b
147 ; CHECK-NEXT: mov v0.16b, v2.16b
148 ; CHECK-NEXT: str x30, [sp, #16] // 8-byte Folded Spill
149 ; CHECK-NEXT: bl __addtf3
150 ; CHECK-NEXT: ldr q1, [sp] // 16-byte Folded Reload
151 ; CHECK-NEXT: ldr x30, [sp, #16] // 8-byte Folded Reload
152 ; CHECK-NEXT: add sp, sp, #32
153 ; CHECK-NEXT: b __addtf3
154 %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 %s, <2 x fp128> %a)
158 define fp128 @test_v2f128_neutral(<2 x fp128> %a) nounwind {
159 ; CHECK-LABEL: test_v2f128_neutral:
161 ; CHECK-NEXT: b __addtf3
162 %b = call fp128 @llvm.vector.reduce.fadd.f128.v2f128(fp128 0xL00000000000000008000000000000000, <2 x fp128> %a)
166 define float @test_v16f32(<16 x float> %a, float %s) nounwind {
167 ; CHECK-LABEL: test_v16f32:
169 ; CHECK-NEXT: mov s6, v0.s[1]
170 ; CHECK-NEXT: fadd s4, s4, s0
171 ; CHECK-NEXT: mov s7, v0.s[2]
172 ; CHECK-NEXT: mov s0, v0.s[3]
173 ; CHECK-NEXT: mov s5, v2.s[1]
174 ; CHECK-NEXT: fadd s4, s4, s6
175 ; CHECK-NEXT: mov s6, v1.s[2]
176 ; CHECK-NEXT: fadd s4, s4, s7
177 ; CHECK-NEXT: fadd s0, s4, s0
178 ; CHECK-NEXT: mov s4, v1.s[1]
179 ; CHECK-NEXT: fadd s0, s0, s1
180 ; CHECK-NEXT: mov s1, v1.s[3]
181 ; CHECK-NEXT: fadd s0, s0, s4
182 ; CHECK-NEXT: fadd s0, s0, s6
183 ; CHECK-NEXT: fadd s0, s0, s1
184 ; CHECK-NEXT: mov s1, v2.s[2]
185 ; CHECK-NEXT: fadd s0, s0, s2
186 ; CHECK-NEXT: mov s2, v2.s[3]
187 ; CHECK-NEXT: fadd s0, s0, s5
188 ; CHECK-NEXT: fadd s0, s0, s1
189 ; CHECK-NEXT: mov s1, v3.s[1]
190 ; CHECK-NEXT: fadd s0, s0, s2
191 ; CHECK-NEXT: mov s2, v3.s[2]
192 ; CHECK-NEXT: fadd s0, s0, s3
193 ; CHECK-NEXT: fadd s0, s0, s1
194 ; CHECK-NEXT: mov s1, v3.s[3]
195 ; CHECK-NEXT: fadd s0, s0, s2
196 ; CHECK-NEXT: fadd s0, s0, s1
198 %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float %s, <16 x float> %a)
202 define float @test_v16f32_neutral(<16 x float> %a) nounwind {
203 ; CHECK-LABEL: test_v16f32_neutral:
205 ; CHECK-NEXT: mov s5, v0.s[2]
206 ; CHECK-NEXT: faddp s6, v0.2s
207 ; CHECK-NEXT: mov s0, v0.s[3]
208 ; CHECK-NEXT: mov s4, v1.s[1]
209 ; CHECK-NEXT: fadd s5, s6, s5
210 ; CHECK-NEXT: fadd s0, s5, s0
211 ; CHECK-NEXT: mov s5, v1.s[2]
212 ; CHECK-NEXT: fadd s0, s0, s1
213 ; CHECK-NEXT: mov s1, v1.s[3]
214 ; CHECK-NEXT: fadd s0, s0, s4
215 ; CHECK-NEXT: mov s4, v2.s[2]
216 ; CHECK-NEXT: fadd s0, s0, s5
217 ; CHECK-NEXT: fadd s0, s0, s1
218 ; CHECK-NEXT: mov s1, v2.s[1]
219 ; CHECK-NEXT: fadd s0, s0, s2
220 ; CHECK-NEXT: fadd s0, s0, s1
221 ; CHECK-NEXT: mov s1, v2.s[3]
222 ; CHECK-NEXT: mov s2, v3.s[2]
223 ; CHECK-NEXT: fadd s0, s0, s4
224 ; CHECK-NEXT: fadd s0, s0, s1
225 ; CHECK-NEXT: mov s1, v3.s[1]
226 ; CHECK-NEXT: fadd s0, s0, s3
227 ; CHECK-NEXT: fadd s0, s0, s1
228 ; CHECK-NEXT: mov s1, v3.s[3]
229 ; CHECK-NEXT: fadd s0, s0, s2
230 ; CHECK-NEXT: fadd s0, s0, s1
232 %b = call float @llvm.vector.reduce.fadd.f32.v16f32(float -0.0, <16 x float> %a)