1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc --mtriple aarch64 -mattr=+fullfp16 < %s | FileCheck %s
4 define float @faddp_2xfloat(<2 x float> %a) {
5 ; CHECK-LABEL: faddp_2xfloat:
6 ; CHECK: // %bb.0: // %entry
7 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
8 ; CHECK-NEXT: faddp s0, v0.2s
11 %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
12 %0 = fadd <2 x float> %a, %shift
13 %1 = extractelement <2 x float> %0, i32 0
17 define float @faddp_4xfloat(<4 x float> %a) {
18 ; CHECK-LABEL: faddp_4xfloat:
19 ; CHECK: // %bb.0: // %entry
20 ; CHECK-NEXT: faddp s0, v0.2s
23 %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
24 %0 = fadd <4 x float> %a, %shift
25 %1 = extractelement <4 x float> %0, i32 0
29 define float @faddp_4xfloat_commute(<4 x float> %a) {
30 ; CHECK-LABEL: faddp_4xfloat_commute:
31 ; CHECK: // %bb.0: // %entry
32 ; CHECK-NEXT: faddp s0, v0.2s
35 %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
36 %0 = fadd <4 x float> %shift, %a
37 %1 = extractelement <4 x float> %0, i32 0
41 define float @faddp_2xfloat_commute(<2 x float> %a) {
42 ; CHECK-LABEL: faddp_2xfloat_commute:
43 ; CHECK: // %bb.0: // %entry
44 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
45 ; CHECK-NEXT: faddp s0, v0.2s
48 %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
49 %0 = fadd <2 x float> %shift, %a
50 %1 = extractelement <2 x float> %0, i32 0
54 define double @faddp_2xdouble(<2 x double> %a) {
55 ; CHECK-LABEL: faddp_2xdouble:
56 ; CHECK: // %bb.0: // %entry
57 ; CHECK-NEXT: faddp d0, v0.2d
60 %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
61 %0 = fadd <2 x double> %a, %shift
62 %1 = extractelement <2 x double> %0, i32 0
66 define double @faddp_2xdouble_commute(<2 x double> %a) {
67 ; CHECK-LABEL: faddp_2xdouble_commute:
68 ; CHECK: // %bb.0: // %entry
69 ; CHECK-NEXT: faddp d0, v0.2d
72 %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
73 %0 = fadd <2 x double> %shift, %a
74 %1 = extractelement <2 x double> %0, i32 0
78 define i64 @addp_2xi64(<2 x i64> %a) {
79 ; CHECK-LABEL: addp_2xi64:
80 ; CHECK: // %bb.0: // %entry
81 ; CHECK-NEXT: addp d0, v0.2d
82 ; CHECK-NEXT: fmov x0, d0
85 %shift = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
86 %0 = add <2 x i64> %a, %shift
87 %1 = extractelement <2 x i64> %0, i32 0
91 define i64 @addp_2xi64_commute(<2 x i64> %a) {
92 ; CHECK-LABEL: addp_2xi64_commute:
93 ; CHECK: // %bb.0: // %entry
94 ; CHECK-NEXT: addp d0, v0.2d
95 ; CHECK-NEXT: fmov x0, d0
98 %shift = shufflevector <2 x i64> %a, <2 x i64> undef, <2 x i32> <i32 1, i32 undef>
99 %0 = add <2 x i64> %shift, %a
100 %1 = extractelement <2 x i64> %0, i32 0
104 define float @faddp_2xfloat_strict(<2 x float> %a) #0 {
105 ; CHECK-LABEL: faddp_2xfloat_strict:
106 ; CHECK: // %bb.0: // %entry
107 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
108 ; CHECK-NEXT: faddp s0, v0.2s
111 %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
112 %0 = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %a, <2 x float> %shift, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
113 %1 = extractelement <2 x float> %0, i32 0
117 define float @faddp_4xfloat_strict(<4 x float> %a) #0 {
118 ; CHECK-LABEL: faddp_4xfloat_strict:
119 ; CHECK: // %bb.0: // %entry
120 ; CHECK-NEXT: faddp s0, v0.2s
123 %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
124 %0 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %a, <4 x float> %shift, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
125 %1 = extractelement <4 x float> %0, i32 0
129 define float @faddp_4xfloat_commute_strict(<4 x float> %a) #0 {
130 ; CHECK-LABEL: faddp_4xfloat_commute_strict:
131 ; CHECK: // %bb.0: // %entry
132 ; CHECK-NEXT: faddp s0, v0.2s
135 %shift = shufflevector <4 x float> %a, <4 x float> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
136 %0 = call <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float> %shift, <4 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
137 %1 = extractelement <4 x float> %0, i32 0
141 define float @faddp_2xfloat_commute_strict(<2 x float> %a) #0 {
142 ; CHECK-LABEL: faddp_2xfloat_commute_strict:
143 ; CHECK: // %bb.0: // %entry
144 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
145 ; CHECK-NEXT: faddp s0, v0.2s
148 %shift = shufflevector <2 x float> %a, <2 x float> undef, <2 x i32> <i32 1, i32 undef>
149 %0 = call <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float> %shift, <2 x float> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
150 %1 = extractelement <2 x float> %0, i32 0
154 define double @faddp_2xdouble_strict(<2 x double> %a) #0 {
155 ; CHECK-LABEL: faddp_2xdouble_strict:
156 ; CHECK: // %bb.0: // %entry
157 ; CHECK-NEXT: faddp d0, v0.2d
160 %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
161 %0 = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %a, <2 x double> %shift, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
162 %1 = extractelement <2 x double> %0, i32 0
166 define double @faddp_2xdouble_commute_strict(<2 x double> %a) #0 {
167 ; CHECK-LABEL: faddp_2xdouble_commute_strict:
168 ; CHECK: // %bb.0: // %entry
169 ; CHECK-NEXT: faddp d0, v0.2d
172 %shift = shufflevector <2 x double> %a, <2 x double> undef, <2 x i32> <i32 1, i32 undef>
173 %0 = call <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double> %shift, <2 x double> %a, metadata !"round.tonearest", metadata !"fpexcept.strict") #0
174 %1 = extractelement <2 x double> %0, i32 0
179 define <2 x double> @addp_v2f64(<2 x double> %a) {
180 ; CHECK-LABEL: addp_v2f64:
181 ; CHECK: // %bb.0: // %entry
182 ; CHECK-NEXT: ext v1.16b, v0.16b, v0.16b, #8
183 ; CHECK-NEXT: fadd v0.2d, v1.2d, v0.2d
186 %s = shufflevector <2 x double> %a, <2 x double> poison, <2 x i32> <i32 1, i32 0>
187 %b = fadd reassoc <2 x double> %s, %a
191 define <4 x double> @addp_v4f64(<4 x double> %a) {
192 ; CHECK-LABEL: addp_v4f64:
193 ; CHECK: // %bb.0: // %entry
194 ; CHECK-NEXT: faddp v1.2d, v0.2d, v1.2d
195 ; CHECK-NEXT: dup v0.2d, v1.d[0]
196 ; CHECK-NEXT: dup v1.2d, v1.d[1]
199 %s = shufflevector <4 x double> %a, <4 x double> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
200 %b = fadd reassoc <4 x double> %s, %a
204 define <4 x float> @addp_v4f32(<4 x float> %a) {
205 ; CHECK-LABEL: addp_v4f32:
206 ; CHECK: // %bb.0: // %entry
207 ; CHECK-NEXT: rev64 v1.4s, v0.4s
208 ; CHECK-NEXT: fadd v0.4s, v1.4s, v0.4s
211 %s = shufflevector <4 x float> %a, <4 x float> poison, <4 x i32> <i32 1, i32 0, i32 3, i32 2>
212 %b = fadd reassoc <4 x float> %s, %a
216 define <8 x float> @addp_v8f32(<8 x float> %a) {
217 ; CHECK-LABEL: addp_v8f32:
218 ; CHECK: // %bb.0: // %entry
219 ; CHECK-NEXT: rev64 v2.4s, v1.4s
220 ; CHECK-NEXT: rev64 v3.4s, v0.4s
221 ; CHECK-NEXT: fadd v0.4s, v3.4s, v0.4s
222 ; CHECK-NEXT: fadd v1.4s, v2.4s, v1.4s
225 %s = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
226 %b = fadd <8 x float> %s, %a
230 define <8 x float> @addp_v8f32_slow(<8 x float> %a) {
231 ; CHECK-LABEL: addp_v8f32_slow:
232 ; CHECK: // %bb.0: // %entry
233 ; CHECK-NEXT: faddp v1.4s, v0.4s, v1.4s
234 ; CHECK-NEXT: zip1 v0.4s, v1.4s, v1.4s
235 ; CHECK-NEXT: zip2 v1.4s, v1.4s, v1.4s
238 %s = shufflevector <8 x float> %a, <8 x float> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
239 %b = fadd reassoc <8 x float> %s, %a
243 define <16 x float> @addp_v16f32(<16 x float> %a) {
244 ; CHECK-LABEL: addp_v16f32:
245 ; CHECK: // %bb.0: // %entry
246 ; CHECK-NEXT: faddp v3.4s, v2.4s, v3.4s
247 ; CHECK-NEXT: faddp v1.4s, v0.4s, v1.4s
248 ; CHECK-NEXT: zip1 v2.4s, v3.4s, v3.4s
249 ; CHECK-NEXT: zip1 v0.4s, v1.4s, v1.4s
250 ; CHECK-NEXT: zip2 v1.4s, v1.4s, v1.4s
251 ; CHECK-NEXT: zip2 v3.4s, v3.4s, v3.4s
254 %s = shufflevector <16 x float> %a, <16 x float> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
255 %b = fadd reassoc <16 x float> %s, %a
259 define float @faddp_v4f32(<4 x float> %a, <4 x float> %b) {
260 ; CHECK-LABEL: faddp_v4f32:
262 ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
263 ; CHECK-NEXT: faddp v0.4s, v0.4s, v0.4s
264 ; CHECK-NEXT: faddp s0, v0.2s
266 %1 = fadd <4 x float> %a, %b
267 %2 = shufflevector <4 x float> %1, <4 x float> poison, <2 x i32> <i32 0, i32 1>
268 %3 = shufflevector <4 x float> %1, <4 x float> poison, <2 x i32> <i32 2, i32 3>
269 %4 = tail call <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float> %2, <2 x float> %3)
270 %5 = shufflevector <2 x float> %4, <2 x float> poison, <2 x i32> <i32 1, i32 poison>
271 %6 = fadd <2 x float> %4, %5
272 %7 = extractelement <2 x float> %6, i64 0
276 define <4 x half> @faddp_v8f16(<8 x half> %a, <8 x half> %b) {
277 ; CHECK-LABEL: faddp_v8f16:
279 ; CHECK-NEXT: fadd v0.8h, v0.8h, v1.8h
280 ; CHECK-NEXT: faddp v0.8h, v0.8h, v0.8h
281 ; CHECK-NEXT: // kill: def $d0 killed $d0 killed $q0
283 %1 = fadd <8 x half> %a, %b
284 %2 = shufflevector <8 x half> %1, <8 x half> poison, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
285 %3 = shufflevector <8 x half> %1, <8 x half> poison, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
286 %4 = tail call <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half> %2, <4 x half> %3)
290 declare <2 x float> @llvm.aarch64.neon.faddp.v2f32(<2 x float>, <2 x float>)
291 declare <4 x half> @llvm.aarch64.neon.faddp.v4f16(<4 x half>, <4 x half>)
293 attributes #0 = { strictfp }
295 declare <2 x float> @llvm.experimental.constrained.fadd.v2f32(<2 x float>, <2 x float>, metadata, metadata)
296 declare <4 x float> @llvm.experimental.constrained.fadd.v4f32(<4 x float>, <4 x float>, metadata, metadata)
297 declare <2 x double> @llvm.experimental.constrained.fadd.v2f64(<2 x double>, <2 x double>, metadata, metadata)