1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
3 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
4 ; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
7 define float @add_HalfS(<2 x float> %bin.rdx) {
8 ; CHECK-SD-LABEL: add_HalfS:
10 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
11 ; CHECK-SD-NEXT: faddp s0, v0.2s
14 ; CHECK-GI-LABEL: add_HalfS:
16 ; CHECK-GI-NEXT: faddp s0, v0.2s
18 %r = call float @llvm.vector.reduce.fadd.f32.v2f32(float -0.0, <2 x float> %bin.rdx)
22 define half @add_HalfH(<4 x half> %bin.rdx) {
23 ; CHECK-SD-NOFP16-LABEL: add_HalfH:
24 ; CHECK-SD-NOFP16: // %bb.0:
25 ; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
26 ; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
27 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
28 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
29 ; CHECK-SD-NOFP16-NEXT: fadd s1, s2, s1
30 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
31 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[3]
32 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
33 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
34 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
35 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
36 ; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2
37 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
38 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
39 ; CHECK-SD-NOFP16-NEXT: fadd s0, s1, s0
40 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
41 ; CHECK-SD-NOFP16-NEXT: ret
43 ; CHECK-SD-FP16-LABEL: add_HalfH:
44 ; CHECK-SD-FP16: // %bb.0:
45 ; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
46 ; CHECK-SD-FP16-NEXT: mov h1, v0.h[2]
47 ; CHECK-SD-FP16-NEXT: faddp h2, v0.2h
48 ; CHECK-SD-FP16-NEXT: mov h0, v0.h[3]
49 ; CHECK-SD-FP16-NEXT: fadd h1, h2, h1
50 ; CHECK-SD-FP16-NEXT: fadd h0, h1, h0
51 ; CHECK-SD-FP16-NEXT: ret
53 ; CHECK-GI-NOFP16-LABEL: add_HalfH:
54 ; CHECK-GI-NOFP16: // %bb.0:
55 ; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
56 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
57 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
58 ; CHECK-GI-NOFP16-NEXT: fmov s1, w8
59 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
60 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
61 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
62 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
63 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
64 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
65 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
66 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
67 ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
68 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
69 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
70 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
71 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
72 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
73 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
74 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
75 ; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0
76 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
77 ; CHECK-GI-NOFP16-NEXT: ret
79 ; CHECK-GI-FP16-LABEL: add_HalfH:
80 ; CHECK-GI-FP16: // %bb.0:
81 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
82 ; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
83 ; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
84 ; CHECK-GI-FP16-NEXT: fadd h1, h0, h1
85 ; CHECK-GI-FP16-NEXT: mov h0, v0.h[3]
86 ; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
87 ; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
88 ; CHECK-GI-FP16-NEXT: ret
89 %r = call half @llvm.vector.reduce.fadd.f16.v4f16(half -0.0, <4 x half> %bin.rdx)
94 define half @add_H(<8 x half> %bin.rdx) {
95 ; CHECK-SD-NOFP16-LABEL: add_H:
96 ; CHECK-SD-NOFP16: // %bb.0:
97 ; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
98 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
99 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
100 ; CHECK-SD-NOFP16-NEXT: fadd s1, s2, s1
101 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
102 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
103 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
104 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
105 ; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2
106 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[3]
107 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
108 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
109 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
110 ; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2
111 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[4]
112 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
113 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
114 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
115 ; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2
116 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[5]
117 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
118 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
119 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
120 ; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2
121 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
122 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
123 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
124 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
125 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
126 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
127 ; CHECK-SD-NOFP16-NEXT: fadd s1, s1, s2
128 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
129 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
130 ; CHECK-SD-NOFP16-NEXT: fadd s0, s1, s0
131 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
132 ; CHECK-SD-NOFP16-NEXT: ret
134 ; CHECK-SD-FP16-LABEL: add_H:
135 ; CHECK-SD-FP16: // %bb.0:
136 ; CHECK-SD-FP16-NEXT: mov h1, v0.h[2]
137 ; CHECK-SD-FP16-NEXT: faddp h2, v0.2h
138 ; CHECK-SD-FP16-NEXT: mov h3, v0.h[3]
139 ; CHECK-SD-FP16-NEXT: fadd h1, h2, h1
140 ; CHECK-SD-FP16-NEXT: mov h2, v0.h[4]
141 ; CHECK-SD-FP16-NEXT: fadd h1, h1, h3
142 ; CHECK-SD-FP16-NEXT: mov h3, v0.h[5]
143 ; CHECK-SD-FP16-NEXT: fadd h1, h1, h2
144 ; CHECK-SD-FP16-NEXT: mov h2, v0.h[6]
145 ; CHECK-SD-FP16-NEXT: mov h0, v0.h[7]
146 ; CHECK-SD-FP16-NEXT: fadd h1, h1, h3
147 ; CHECK-SD-FP16-NEXT: fadd h1, h1, h2
148 ; CHECK-SD-FP16-NEXT: fadd h0, h1, h0
149 ; CHECK-SD-FP16-NEXT: ret
151 ; CHECK-GI-NOFP16-LABEL: add_H:
152 ; CHECK-GI-NOFP16: // %bb.0:
153 ; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
154 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
155 ; CHECK-GI-NOFP16-NEXT: fmov s1, w8
156 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
157 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
158 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
159 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
160 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
161 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
162 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
163 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
164 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
165 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
166 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
167 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
168 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[3]
169 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
170 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
171 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
172 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
173 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[4]
174 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
175 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
176 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
177 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
178 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5]
179 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
180 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
181 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
182 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
183 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[6]
184 ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
185 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
186 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
187 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
188 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
189 ; CHECK-GI-NOFP16-NEXT: fadd s1, s1, s2
190 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
191 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
192 ; CHECK-GI-NOFP16-NEXT: fadd s0, s1, s0
193 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
194 ; CHECK-GI-NOFP16-NEXT: ret
196 ; CHECK-GI-FP16-LABEL: add_H:
197 ; CHECK-GI-FP16: // %bb.0:
198 ; CHECK-GI-FP16-NEXT: mov h1, v0.h[2]
199 ; CHECK-GI-FP16-NEXT: faddp h2, v0.2h
200 ; CHECK-GI-FP16-NEXT: mov h3, v0.h[3]
201 ; CHECK-GI-FP16-NEXT: fadd h1, h2, h1
202 ; CHECK-GI-FP16-NEXT: mov h2, v0.h[4]
203 ; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
204 ; CHECK-GI-FP16-NEXT: mov h3, v0.h[5]
205 ; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
206 ; CHECK-GI-FP16-NEXT: mov h2, v0.h[6]
207 ; CHECK-GI-FP16-NEXT: mov h0, v0.h[7]
208 ; CHECK-GI-FP16-NEXT: fadd h1, h1, h3
209 ; CHECK-GI-FP16-NEXT: fadd h1, h1, h2
210 ; CHECK-GI-FP16-NEXT: fadd h0, h1, h0
211 ; CHECK-GI-FP16-NEXT: ret
212 %r = call half @llvm.vector.reduce.fadd.f16.v8f16(half -0.0, <8 x half> %bin.rdx)
216 define float @add_S(<4 x float> %bin.rdx) {
217 ; CHECK-LABEL: add_S:
219 ; CHECK-NEXT: mov s1, v0.s[2]
220 ; CHECK-NEXT: faddp s2, v0.2s
221 ; CHECK-NEXT: mov s0, v0.s[3]
222 ; CHECK-NEXT: fadd s1, s2, s1
223 ; CHECK-NEXT: fadd s0, s1, s0
225 %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float -0.0, <4 x float> %bin.rdx)
229 define double @add_D(<2 x double> %bin.rdx) {
230 ; CHECK-LABEL: add_D:
232 ; CHECK-NEXT: faddp d0, v0.2d
234 %r = call double @llvm.vector.reduce.fadd.f64.v2f64(double -0.0, <2 x double> %bin.rdx)
238 define half @add_2H(<16 x half> %bin.rdx) {
239 ; CHECK-SD-NOFP16-LABEL: add_2H:
240 ; CHECK-SD-NOFP16: // %bb.0:
241 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1]
242 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
243 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
244 ; CHECK-SD-NOFP16-NEXT: fadd s2, s3, s2
245 ; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[2]
246 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
247 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
248 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
249 ; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3
250 ; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[3]
251 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
252 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
253 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
254 ; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3
255 ; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[4]
256 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
257 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
258 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
259 ; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3
260 ; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
261 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
262 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
263 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
264 ; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3
265 ; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[6]
266 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
267 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
268 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
269 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
270 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
271 ; CHECK-SD-NOFP16-NEXT: fadd s2, s2, s3
272 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
273 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
274 ; CHECK-SD-NOFP16-NEXT: fadd s0, s2, s0
275 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h1
276 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
277 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
278 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2
279 ; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[1]
280 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
281 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
282 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
283 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2
284 ; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[2]
285 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
286 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
287 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
288 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2
289 ; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[3]
290 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
291 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
292 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
293 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2
294 ; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[4]
295 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
296 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
297 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
298 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2
299 ; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[5]
300 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
301 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
302 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
303 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2
304 ; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[6]
305 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
306 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
307 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
308 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
309 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
310 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s2
311 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
312 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
313 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1
314 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
315 ; CHECK-SD-NOFP16-NEXT: ret
317 ; CHECK-SD-FP16-LABEL: add_2H:
318 ; CHECK-SD-FP16: // %bb.0:
319 ; CHECK-SD-FP16-NEXT: mov h2, v0.h[2]
320 ; CHECK-SD-FP16-NEXT: faddp h3, v0.2h
321 ; CHECK-SD-FP16-NEXT: mov h4, v0.h[3]
322 ; CHECK-SD-FP16-NEXT: fadd h2, h3, h2
323 ; CHECK-SD-FP16-NEXT: mov h3, v0.h[4]
324 ; CHECK-SD-FP16-NEXT: fadd h2, h2, h4
325 ; CHECK-SD-FP16-NEXT: mov h4, v0.h[5]
326 ; CHECK-SD-FP16-NEXT: fadd h2, h2, h3
327 ; CHECK-SD-FP16-NEXT: mov h3, v0.h[6]
328 ; CHECK-SD-FP16-NEXT: mov h0, v0.h[7]
329 ; CHECK-SD-FP16-NEXT: fadd h2, h2, h4
330 ; CHECK-SD-FP16-NEXT: fadd h2, h2, h3
331 ; CHECK-SD-FP16-NEXT: mov h3, v1.h[2]
332 ; CHECK-SD-FP16-NEXT: fadd h0, h2, h0
333 ; CHECK-SD-FP16-NEXT: mov h2, v1.h[1]
334 ; CHECK-SD-FP16-NEXT: fadd h0, h0, h1
335 ; CHECK-SD-FP16-NEXT: fadd h0, h0, h2
336 ; CHECK-SD-FP16-NEXT: mov h2, v1.h[3]
337 ; CHECK-SD-FP16-NEXT: fadd h0, h0, h3
338 ; CHECK-SD-FP16-NEXT: mov h3, v1.h[4]
339 ; CHECK-SD-FP16-NEXT: fadd h0, h0, h2
340 ; CHECK-SD-FP16-NEXT: mov h2, v1.h[5]
341 ; CHECK-SD-FP16-NEXT: fadd h0, h0, h3
342 ; CHECK-SD-FP16-NEXT: mov h3, v1.h[6]
343 ; CHECK-SD-FP16-NEXT: mov h1, v1.h[7]
344 ; CHECK-SD-FP16-NEXT: fadd h0, h0, h2
345 ; CHECK-SD-FP16-NEXT: fadd h0, h0, h3
346 ; CHECK-SD-FP16-NEXT: fadd h0, h0, h1
347 ; CHECK-SD-FP16-NEXT: ret
349 ; CHECK-GI-NOFP16-LABEL: add_2H:
350 ; CHECK-GI-NOFP16: // %bb.0:
351 ; CHECK-GI-NOFP16-NEXT: mov w8, #32768 // =0x8000
352 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
353 ; CHECK-GI-NOFP16-NEXT: fmov s2, w8
354 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
355 ; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3
356 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]
357 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
358 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
359 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
360 ; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3
361 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2]
362 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
363 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
364 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
365 ; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3
366 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[3]
367 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
368 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
369 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
370 ; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3
371 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[4]
372 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
373 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
374 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
375 ; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3
376 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5]
377 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
378 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
379 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
380 ; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3
381 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
382 ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
383 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
384 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
385 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
386 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
387 ; CHECK-GI-NOFP16-NEXT: fadd s2, s2, s3
388 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
389 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
390 ; CHECK-GI-NOFP16-NEXT: fadd s0, s2, s0
391 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h1
392 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
393 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
394 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2
395 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
396 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
397 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
398 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
399 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2
400 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[2]
401 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
402 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
403 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
404 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2
405 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[3]
406 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
407 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
408 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
409 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2
410 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[4]
411 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
412 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
413 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
414 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2
415 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[5]
416 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
417 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
418 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
419 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2
420 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[6]
421 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
422 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
423 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
424 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
425 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
426 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s2
427 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
428 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
429 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
430 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
431 ; CHECK-GI-NOFP16-NEXT: ret
433 ; CHECK-GI-FP16-LABEL: add_2H:
434 ; CHECK-GI-FP16: // %bb.0:
435 ; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
436 ; CHECK-GI-FP16-NEXT: faddp h3, v0.2h
437 ; CHECK-GI-FP16-NEXT: mov h4, v0.h[3]
438 ; CHECK-GI-FP16-NEXT: fadd h2, h3, h2
439 ; CHECK-GI-FP16-NEXT: mov h3, v0.h[4]
440 ; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
441 ; CHECK-GI-FP16-NEXT: mov h4, v0.h[5]
442 ; CHECK-GI-FP16-NEXT: fadd h2, h2, h3
443 ; CHECK-GI-FP16-NEXT: mov h3, v0.h[6]
444 ; CHECK-GI-FP16-NEXT: mov h0, v0.h[7]
445 ; CHECK-GI-FP16-NEXT: fadd h2, h2, h4
446 ; CHECK-GI-FP16-NEXT: fadd h2, h2, h3
447 ; CHECK-GI-FP16-NEXT: mov h3, v1.h[2]
448 ; CHECK-GI-FP16-NEXT: fadd h0, h2, h0
449 ; CHECK-GI-FP16-NEXT: mov h2, v1.h[1]
450 ; CHECK-GI-FP16-NEXT: fadd h0, h0, h1
451 ; CHECK-GI-FP16-NEXT: fadd h0, h0, h2
452 ; CHECK-GI-FP16-NEXT: mov h2, v1.h[3]
453 ; CHECK-GI-FP16-NEXT: fadd h0, h0, h3
454 ; CHECK-GI-FP16-NEXT: mov h3, v1.h[4]
455 ; CHECK-GI-FP16-NEXT: fadd h0, h0, h2
456 ; CHECK-GI-FP16-NEXT: mov h2, v1.h[5]
457 ; CHECK-GI-FP16-NEXT: fadd h0, h0, h3
458 ; CHECK-GI-FP16-NEXT: mov h3, v1.h[6]
459 ; CHECK-GI-FP16-NEXT: mov h1, v1.h[7]
460 ; CHECK-GI-FP16-NEXT: fadd h0, h0, h2
461 ; CHECK-GI-FP16-NEXT: fadd h0, h0, h3
462 ; CHECK-GI-FP16-NEXT: fadd h0, h0, h1
463 ; CHECK-GI-FP16-NEXT: ret
464 %r = call half @llvm.vector.reduce.fadd.f16.v16f16(half -0.0, <16 x half> %bin.rdx)
468 define float @add_2S(<8 x float> %bin.rdx) {
469 ; CHECK-LABEL: add_2S:
471 ; CHECK-NEXT: mov s2, v0.s[2]
472 ; CHECK-NEXT: faddp s3, v0.2s
473 ; CHECK-NEXT: mov s0, v0.s[3]
474 ; CHECK-NEXT: fadd s2, s3, s2
475 ; CHECK-NEXT: mov s3, v1.s[2]
476 ; CHECK-NEXT: fadd s0, s2, s0
477 ; CHECK-NEXT: mov s2, v1.s[1]
478 ; CHECK-NEXT: fadd s0, s0, s1
479 ; CHECK-NEXT: mov s1, v1.s[3]
480 ; CHECK-NEXT: fadd s0, s0, s2
481 ; CHECK-NEXT: fadd s0, s0, s3
482 ; CHECK-NEXT: fadd s0, s0, s1
484 %r = call float @llvm.vector.reduce.fadd.f32.v8f32(float -0.0, <8 x float> %bin.rdx)
488 define double @add_2D(<4 x double> %bin.rdx) {
489 ; CHECK-LABEL: add_2D:
491 ; CHECK-NEXT: faddp d0, v0.2d
492 ; CHECK-NEXT: mov d2, v1.d[1]
493 ; CHECK-NEXT: fadd d0, d0, d1
494 ; CHECK-NEXT: fadd d0, d0, d2
496 %r = call double @llvm.vector.reduce.fadd.f64.v4f64(double -0.0, <4 x double> %bin.rdx)
500 ; Added at least one test where the start value is not -0.0.
501 define float @add_S_init_42(<4 x float> %bin.rdx) {
502 ; CHECK-LABEL: add_S_init_42:
504 ; CHECK-NEXT: mov w8, #1109917696 // =0x42280000
505 ; CHECK-NEXT: mov s2, v0.s[1]
506 ; CHECK-NEXT: mov s3, v0.s[2]
507 ; CHECK-NEXT: fmov s1, w8
508 ; CHECK-NEXT: fadd s1, s0, s1
509 ; CHECK-NEXT: mov s0, v0.s[3]
510 ; CHECK-NEXT: fadd s1, s1, s2
511 ; CHECK-NEXT: fadd s1, s1, s3
512 ; CHECK-NEXT: fadd s0, s1, s0
514 %r = call float @llvm.vector.reduce.fadd.f32.v4f32(float 42.0, <4 x float> %bin.rdx)
518 ; Function Attrs: nounwind readnone
519 declare half @llvm.vector.reduce.fadd.f16.v4f16(half, <4 x half>)
520 declare half @llvm.vector.reduce.fadd.f16.v8f16(half, <8 x half>)
521 declare half @llvm.vector.reduce.fadd.f16.v16f16(half, <16 x half>)
522 declare float @llvm.vector.reduce.fadd.f32.v2f32(float, <2 x float>)
523 declare float @llvm.vector.reduce.fadd.f32.v4f32(float, <4 x float>)
524 declare float @llvm.vector.reduce.fadd.f32.v8f32(float, <8 x float>)
525 declare double @llvm.vector.reduce.fadd.f64.v2f64(double, <2 x double>)
526 declare double @llvm.vector.reduce.fadd.f64.v4f64(double, <4 x double>)