1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
3 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
4 ; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
7 define double @fadd_f64(double %a, double %b) {
8 ; CHECK-LABEL: fadd_f64:
9 ; CHECK: // %bb.0: // %entry
10 ; CHECK-NEXT: fadd d0, d0, d1
13 %c = fadd double %a, %b
17 define float @fadd_f32(float %a, float %b) {
18 ; CHECK-LABEL: fadd_f32:
19 ; CHECK: // %bb.0: // %entry
20 ; CHECK-NEXT: fadd s0, s0, s1
23 %c = fadd float %a, %b
27 define half @fadd_f16(half %a, half %b) {
28 ; CHECK-SD-NOFP16-LABEL: fadd_f16:
29 ; CHECK-SD-NOFP16: // %bb.0: // %entry
30 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
31 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
32 ; CHECK-SD-NOFP16-NEXT: fadd s0, s0, s1
33 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
34 ; CHECK-SD-NOFP16-NEXT: ret
36 ; CHECK-SD-FP16-LABEL: fadd_f16:
37 ; CHECK-SD-FP16: // %bb.0: // %entry
38 ; CHECK-SD-FP16-NEXT: fadd h0, h0, h1
39 ; CHECK-SD-FP16-NEXT: ret
41 ; CHECK-GI-NOFP16-LABEL: fadd_f16:
42 ; CHECK-GI-NOFP16: // %bb.0: // %entry
43 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
44 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
45 ; CHECK-GI-NOFP16-NEXT: fadd s0, s0, s1
46 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
47 ; CHECK-GI-NOFP16-NEXT: ret
49 ; CHECK-GI-FP16-LABEL: fadd_f16:
50 ; CHECK-GI-FP16: // %bb.0: // %entry
51 ; CHECK-GI-FP16-NEXT: fadd h0, h0, h1
52 ; CHECK-GI-FP16-NEXT: ret
58 define <2 x double> @fadd_v2f64(<2 x double> %a, <2 x double> %b) {
59 ; CHECK-LABEL: fadd_v2f64:
60 ; CHECK: // %bb.0: // %entry
61 ; CHECK-NEXT: fadd v0.2d, v0.2d, v1.2d
64 %c = fadd <2 x double> %a, %b
68 define <3 x double> @fadd_v3f64(<3 x double> %a, <3 x double> %b) {
69 ; CHECK-SD-LABEL: fadd_v3f64:
70 ; CHECK-SD: // %bb.0: // %entry
71 ; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
72 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
73 ; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
74 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
75 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
76 ; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
77 ; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
78 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
79 ; CHECK-SD-NEXT: fadd v2.2d, v2.2d, v5.2d
80 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
81 ; CHECK-SD-NEXT: fadd v0.2d, v0.2d, v3.2d
82 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
83 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
84 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
87 ; CHECK-GI-LABEL: fadd_v3f64:
88 ; CHECK-GI: // %bb.0: // %entry
89 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
90 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
91 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
92 ; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
93 ; CHECK-GI-NEXT: fadd d2, d2, d5
94 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
95 ; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
96 ; CHECK-GI-NEXT: fadd v0.2d, v0.2d, v3.2d
97 ; CHECK-GI-NEXT: mov d1, v0.d[1]
98 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
101 %c = fadd <3 x double> %a, %b
105 define <4 x double> @fadd_v4f64(<4 x double> %a, <4 x double> %b) {
106 ; CHECK-SD-LABEL: fadd_v4f64:
107 ; CHECK-SD: // %bb.0: // %entry
108 ; CHECK-SD-NEXT: fadd v1.2d, v1.2d, v3.2d
109 ; CHECK-SD-NEXT: fadd v0.2d, v0.2d, v2.2d
112 ; CHECK-GI-LABEL: fadd_v4f64:
113 ; CHECK-GI: // %bb.0: // %entry
114 ; CHECK-GI-NEXT: fadd v0.2d, v0.2d, v2.2d
115 ; CHECK-GI-NEXT: fadd v1.2d, v1.2d, v3.2d
118 %c = fadd <4 x double> %a, %b
122 define <2 x float> @fadd_v2f32(<2 x float> %a, <2 x float> %b) {
123 ; CHECK-LABEL: fadd_v2f32:
124 ; CHECK: // %bb.0: // %entry
125 ; CHECK-NEXT: fadd v0.2s, v0.2s, v1.2s
128 %c = fadd <2 x float> %a, %b
132 define <3 x float> @fadd_v3f32(<3 x float> %a, <3 x float> %b) {
133 ; CHECK-LABEL: fadd_v3f32:
134 ; CHECK: // %bb.0: // %entry
135 ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
138 %c = fadd <3 x float> %a, %b
142 define <4 x float> @fadd_v4f32(<4 x float> %a, <4 x float> %b) {
143 ; CHECK-LABEL: fadd_v4f32:
144 ; CHECK: // %bb.0: // %entry
145 ; CHECK-NEXT: fadd v0.4s, v0.4s, v1.4s
148 %c = fadd <4 x float> %a, %b
152 define <8 x float> @fadd_v8f32(<8 x float> %a, <8 x float> %b) {
153 ; CHECK-SD-LABEL: fadd_v8f32:
154 ; CHECK-SD: // %bb.0: // %entry
155 ; CHECK-SD-NEXT: fadd v1.4s, v1.4s, v3.4s
156 ; CHECK-SD-NEXT: fadd v0.4s, v0.4s, v2.4s
159 ; CHECK-GI-LABEL: fadd_v8f32:
160 ; CHECK-GI: // %bb.0: // %entry
161 ; CHECK-GI-NEXT: fadd v0.4s, v0.4s, v2.4s
162 ; CHECK-GI-NEXT: fadd v1.4s, v1.4s, v3.4s
165 %c = fadd <8 x float> %a, %b
169 define <7 x half> @fadd_v7f16(<7 x half> %a, <7 x half> %b) {
170 ; CHECK-SD-NOFP16-LABEL: fadd_v7f16:
171 ; CHECK-SD-NOFP16: // %bb.0: // %entry
172 ; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v1.4h
173 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v0.4h
174 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
175 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
176 ; CHECK-SD-NOFP16-NEXT: fadd v2.4s, v3.4s, v2.4s
177 ; CHECK-SD-NOFP16-NEXT: fadd v1.4s, v0.4s, v1.4s
178 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v2.4s
179 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
180 ; CHECK-SD-NOFP16-NEXT: ret
182 ; CHECK-SD-FP16-LABEL: fadd_v7f16:
183 ; CHECK-SD-FP16: // %bb.0: // %entry
184 ; CHECK-SD-FP16-NEXT: fadd v0.8h, v0.8h, v1.8h
185 ; CHECK-SD-FP16-NEXT: ret
187 ; CHECK-GI-NOFP16-LABEL: fadd_v7f16:
188 ; CHECK-GI-NOFP16: // %bb.0: // %entry
189 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
190 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
191 ; CHECK-GI-NOFP16-NEXT: mov v4.h[0], v0.h[4]
192 ; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v2.4s, v3.4s
193 ; CHECK-GI-NOFP16-NEXT: mov v3.h[0], v1.h[4]
194 ; CHECK-GI-NOFP16-NEXT: mov v4.h[1], v0.h[5]
195 ; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v1.h[5]
196 ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s
197 ; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v0.h[6]
198 ; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v1.h[6]
199 ; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v2.h[0]
200 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v4.4h
201 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
202 ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[1]
203 ; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v1.4s, v3.4s
204 ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[2]
205 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
206 ; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v2.h[3]
207 ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
208 ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
209 ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
210 ; CHECK-GI-NOFP16-NEXT: ret
212 ; CHECK-GI-FP16-LABEL: fadd_v7f16:
213 ; CHECK-GI-FP16: // %bb.0: // %entry
214 ; CHECK-GI-FP16-NEXT: fadd v0.8h, v0.8h, v1.8h
215 ; CHECK-GI-FP16-NEXT: ret
217 %c = fadd <7 x half> %a, %b
221 define <4 x half> @fadd_v4f16(<4 x half> %a, <4 x half> %b) {
222 ; CHECK-SD-NOFP16-LABEL: fadd_v4f16:
223 ; CHECK-SD-NOFP16: // %bb.0: // %entry
224 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
225 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
226 ; CHECK-SD-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
227 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
228 ; CHECK-SD-NOFP16-NEXT: ret
230 ; CHECK-SD-FP16-LABEL: fadd_v4f16:
231 ; CHECK-SD-FP16: // %bb.0: // %entry
232 ; CHECK-SD-FP16-NEXT: fadd v0.4h, v0.4h, v1.4h
233 ; CHECK-SD-FP16-NEXT: ret
235 ; CHECK-GI-NOFP16-LABEL: fadd_v4f16:
236 ; CHECK-GI-NOFP16: // %bb.0: // %entry
237 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
238 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
239 ; CHECK-GI-NOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
240 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
241 ; CHECK-GI-NOFP16-NEXT: ret
243 ; CHECK-GI-FP16-LABEL: fadd_v4f16:
244 ; CHECK-GI-FP16: // %bb.0: // %entry
245 ; CHECK-GI-FP16-NEXT: fadd v0.4h, v0.4h, v1.4h
246 ; CHECK-GI-FP16-NEXT: ret
248 %c = fadd <4 x half> %a, %b
252 define <8 x half> @fadd_v8f16(<8 x half> %a, <8 x half> %b) {
253 ; CHECK-SD-NOFP16-LABEL: fadd_v8f16:
254 ; CHECK-SD-NOFP16: // %bb.0: // %entry
255 ; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v1.4h
256 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v0.4h
257 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
258 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
259 ; CHECK-SD-NOFP16-NEXT: fadd v2.4s, v3.4s, v2.4s
260 ; CHECK-SD-NOFP16-NEXT: fadd v1.4s, v0.4s, v1.4s
261 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v2.4s
262 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
263 ; CHECK-SD-NOFP16-NEXT: ret
265 ; CHECK-SD-FP16-LABEL: fadd_v8f16:
266 ; CHECK-SD-FP16: // %bb.0: // %entry
267 ; CHECK-SD-FP16-NEXT: fadd v0.8h, v0.8h, v1.8h
268 ; CHECK-SD-FP16-NEXT: ret
270 ; CHECK-GI-NOFP16-LABEL: fadd_v8f16:
271 ; CHECK-GI-NOFP16: // %bb.0: // %entry
272 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
273 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
274 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
275 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
276 ; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v2.4s, v3.4s
277 ; CHECK-GI-NOFP16-NEXT: fadd v1.4s, v0.4s, v1.4s
278 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
279 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
280 ; CHECK-GI-NOFP16-NEXT: ret
282 ; CHECK-GI-FP16-LABEL: fadd_v8f16:
283 ; CHECK-GI-FP16: // %bb.0: // %entry
284 ; CHECK-GI-FP16-NEXT: fadd v0.8h, v0.8h, v1.8h
285 ; CHECK-GI-FP16-NEXT: ret
287 %c = fadd <8 x half> %a, %b
291 define <16 x half> @fadd_v16f16(<16 x half> %a, <16 x half> %b) {
292 ; CHECK-SD-NOFP16-LABEL: fadd_v16f16:
293 ; CHECK-SD-NOFP16: // %bb.0: // %entry
294 ; CHECK-SD-NOFP16-NEXT: fcvtl v4.4s, v2.4h
295 ; CHECK-SD-NOFP16-NEXT: fcvtl v5.4s, v0.4h
296 ; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v3.4h
297 ; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v1.4h
298 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
299 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
300 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
301 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
302 ; CHECK-SD-NOFP16-NEXT: fadd v4.4s, v5.4s, v4.4s
303 ; CHECK-SD-NOFP16-NEXT: fadd v5.4s, v7.4s, v6.4s
304 ; CHECK-SD-NOFP16-NEXT: fadd v2.4s, v0.4s, v2.4s
305 ; CHECK-SD-NOFP16-NEXT: fadd v3.4s, v1.4s, v3.4s
306 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v4.4s
307 ; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v5.4s
308 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
309 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v3.4s
310 ; CHECK-SD-NOFP16-NEXT: ret
312 ; CHECK-SD-FP16-LABEL: fadd_v16f16:
313 ; CHECK-SD-FP16: // %bb.0: // %entry
314 ; CHECK-SD-FP16-NEXT: fadd v1.8h, v1.8h, v3.8h
315 ; CHECK-SD-FP16-NEXT: fadd v0.8h, v0.8h, v2.8h
316 ; CHECK-SD-FP16-NEXT: ret
318 ; CHECK-GI-NOFP16-LABEL: fadd_v16f16:
319 ; CHECK-GI-NOFP16: // %bb.0: // %entry
320 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v0.4h
321 ; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v1.4h
322 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v2.4h
323 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v3.4h
324 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
325 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
326 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
327 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
328 ; CHECK-GI-NOFP16-NEXT: fadd v4.4s, v4.4s, v6.4s
329 ; CHECK-GI-NOFP16-NEXT: fadd v5.4s, v5.4s, v7.4s
330 ; CHECK-GI-NOFP16-NEXT: fadd v2.4s, v0.4s, v2.4s
331 ; CHECK-GI-NOFP16-NEXT: fadd v3.4s, v1.4s, v3.4s
332 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v4.4s
333 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v5.4s
334 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
335 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v3.4s
336 ; CHECK-GI-NOFP16-NEXT: ret
338 ; CHECK-GI-FP16-LABEL: fadd_v16f16:
339 ; CHECK-GI-FP16: // %bb.0: // %entry
340 ; CHECK-GI-FP16-NEXT: fadd v0.8h, v0.8h, v2.8h
341 ; CHECK-GI-FP16-NEXT: fadd v1.8h, v1.8h, v3.8h
342 ; CHECK-GI-FP16-NEXT: ret
344 %c = fadd <16 x half> %a, %b
348 define double @fsub_f64(double %a, double %b) {
349 ; CHECK-LABEL: fsub_f64:
350 ; CHECK: // %bb.0: // %entry
351 ; CHECK-NEXT: fsub d0, d0, d1
354 %c = fsub double %a, %b
358 define float @fsub_f32(float %a, float %b) {
359 ; CHECK-LABEL: fsub_f32:
360 ; CHECK: // %bb.0: // %entry
361 ; CHECK-NEXT: fsub s0, s0, s1
364 %c = fsub float %a, %b
368 define half @fsub_f16(half %a, half %b) {
369 ; CHECK-SD-NOFP16-LABEL: fsub_f16:
370 ; CHECK-SD-NOFP16: // %bb.0: // %entry
371 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
372 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
373 ; CHECK-SD-NOFP16-NEXT: fsub s0, s0, s1
374 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
375 ; CHECK-SD-NOFP16-NEXT: ret
377 ; CHECK-SD-FP16-LABEL: fsub_f16:
378 ; CHECK-SD-FP16: // %bb.0: // %entry
379 ; CHECK-SD-FP16-NEXT: fsub h0, h0, h1
380 ; CHECK-SD-FP16-NEXT: ret
382 ; CHECK-GI-NOFP16-LABEL: fsub_f16:
383 ; CHECK-GI-NOFP16: // %bb.0: // %entry
384 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
385 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
386 ; CHECK-GI-NOFP16-NEXT: fsub s0, s0, s1
387 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
388 ; CHECK-GI-NOFP16-NEXT: ret
390 ; CHECK-GI-FP16-LABEL: fsub_f16:
391 ; CHECK-GI-FP16: // %bb.0: // %entry
392 ; CHECK-GI-FP16-NEXT: fsub h0, h0, h1
393 ; CHECK-GI-FP16-NEXT: ret
395 %c = fsub half %a, %b
399 define <2 x double> @fsub_v2f64(<2 x double> %a, <2 x double> %b) {
400 ; CHECK-LABEL: fsub_v2f64:
401 ; CHECK: // %bb.0: // %entry
402 ; CHECK-NEXT: fsub v0.2d, v0.2d, v1.2d
405 %c = fsub <2 x double> %a, %b
409 define <3 x double> @fsub_v3f64(<3 x double> %a, <3 x double> %b) {
410 ; CHECK-SD-LABEL: fsub_v3f64:
411 ; CHECK-SD: // %bb.0: // %entry
412 ; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
413 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
414 ; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
415 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
416 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
417 ; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
418 ; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
419 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
420 ; CHECK-SD-NEXT: fsub v2.2d, v2.2d, v5.2d
421 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
422 ; CHECK-SD-NEXT: fsub v0.2d, v0.2d, v3.2d
423 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
424 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
425 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
428 ; CHECK-GI-LABEL: fsub_v3f64:
429 ; CHECK-GI: // %bb.0: // %entry
430 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
431 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
432 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
433 ; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
434 ; CHECK-GI-NEXT: fsub d2, d2, d5
435 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
436 ; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
437 ; CHECK-GI-NEXT: fsub v0.2d, v0.2d, v3.2d
438 ; CHECK-GI-NEXT: mov d1, v0.d[1]
439 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
442 %c = fsub <3 x double> %a, %b
446 define <4 x double> @fsub_v4f64(<4 x double> %a, <4 x double> %b) {
447 ; CHECK-SD-LABEL: fsub_v4f64:
448 ; CHECK-SD: // %bb.0: // %entry
449 ; CHECK-SD-NEXT: fsub v1.2d, v1.2d, v3.2d
450 ; CHECK-SD-NEXT: fsub v0.2d, v0.2d, v2.2d
453 ; CHECK-GI-LABEL: fsub_v4f64:
454 ; CHECK-GI: // %bb.0: // %entry
455 ; CHECK-GI-NEXT: fsub v0.2d, v0.2d, v2.2d
456 ; CHECK-GI-NEXT: fsub v1.2d, v1.2d, v3.2d
459 %c = fsub <4 x double> %a, %b
463 define <2 x float> @fsub_v2f32(<2 x float> %a, <2 x float> %b) {
464 ; CHECK-LABEL: fsub_v2f32:
465 ; CHECK: // %bb.0: // %entry
466 ; CHECK-NEXT: fsub v0.2s, v0.2s, v1.2s
469 %c = fsub <2 x float> %a, %b
473 define <3 x float> @fsub_v3f32(<3 x float> %a, <3 x float> %b) {
474 ; CHECK-LABEL: fsub_v3f32:
475 ; CHECK: // %bb.0: // %entry
476 ; CHECK-NEXT: fsub v0.4s, v0.4s, v1.4s
479 %c = fsub <3 x float> %a, %b
483 define <4 x float> @fsub_v4f32(<4 x float> %a, <4 x float> %b) {
484 ; CHECK-LABEL: fsub_v4f32:
485 ; CHECK: // %bb.0: // %entry
486 ; CHECK-NEXT: fsub v0.4s, v0.4s, v1.4s
489 %c = fsub <4 x float> %a, %b
493 define <8 x float> @fsub_v8f32(<8 x float> %a, <8 x float> %b) {
494 ; CHECK-SD-LABEL: fsub_v8f32:
495 ; CHECK-SD: // %bb.0: // %entry
496 ; CHECK-SD-NEXT: fsub v1.4s, v1.4s, v3.4s
497 ; CHECK-SD-NEXT: fsub v0.4s, v0.4s, v2.4s
500 ; CHECK-GI-LABEL: fsub_v8f32:
501 ; CHECK-GI: // %bb.0: // %entry
502 ; CHECK-GI-NEXT: fsub v0.4s, v0.4s, v2.4s
503 ; CHECK-GI-NEXT: fsub v1.4s, v1.4s, v3.4s
506 %c = fsub <8 x float> %a, %b
510 define <7 x half> @fsub_v7f16(<7 x half> %a, <7 x half> %b) {
511 ; CHECK-SD-NOFP16-LABEL: fsub_v7f16:
512 ; CHECK-SD-NOFP16: // %bb.0: // %entry
513 ; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v1.4h
514 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v0.4h
515 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
516 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
517 ; CHECK-SD-NOFP16-NEXT: fsub v2.4s, v3.4s, v2.4s
518 ; CHECK-SD-NOFP16-NEXT: fsub v1.4s, v0.4s, v1.4s
519 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v2.4s
520 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
521 ; CHECK-SD-NOFP16-NEXT: ret
523 ; CHECK-SD-FP16-LABEL: fsub_v7f16:
524 ; CHECK-SD-FP16: // %bb.0: // %entry
525 ; CHECK-SD-FP16-NEXT: fsub v0.8h, v0.8h, v1.8h
526 ; CHECK-SD-FP16-NEXT: ret
528 ; CHECK-GI-NOFP16-LABEL: fsub_v7f16:
529 ; CHECK-GI-NOFP16: // %bb.0: // %entry
530 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
531 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
532 ; CHECK-GI-NOFP16-NEXT: mov v4.h[0], v0.h[4]
533 ; CHECK-GI-NOFP16-NEXT: fsub v2.4s, v2.4s, v3.4s
534 ; CHECK-GI-NOFP16-NEXT: mov v3.h[0], v1.h[4]
535 ; CHECK-GI-NOFP16-NEXT: mov v4.h[1], v0.h[5]
536 ; CHECK-GI-NOFP16-NEXT: mov v3.h[1], v1.h[5]
537 ; CHECK-GI-NOFP16-NEXT: fcvtn v2.4h, v2.4s
538 ; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v0.h[6]
539 ; CHECK-GI-NOFP16-NEXT: mov v3.h[2], v1.h[6]
540 ; CHECK-GI-NOFP16-NEXT: mov v0.h[0], v2.h[0]
541 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v4.4h
542 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v3.4h
543 ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v2.h[1]
544 ; CHECK-GI-NOFP16-NEXT: fsub v1.4s, v1.4s, v3.4s
545 ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v2.h[2]
546 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
547 ; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v2.h[3]
548 ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
549 ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v1.h[1]
550 ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[2]
551 ; CHECK-GI-NOFP16-NEXT: ret
553 ; CHECK-GI-FP16-LABEL: fsub_v7f16:
554 ; CHECK-GI-FP16: // %bb.0: // %entry
555 ; CHECK-GI-FP16-NEXT: fsub v0.8h, v0.8h, v1.8h
556 ; CHECK-GI-FP16-NEXT: ret
558 %c = fsub <7 x half> %a, %b
562 define <4 x half> @fsub_v4f16(<4 x half> %a, <4 x half> %b) {
563 ; CHECK-SD-NOFP16-LABEL: fsub_v4f16:
564 ; CHECK-SD-NOFP16: // %bb.0: // %entry
565 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
566 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
567 ; CHECK-SD-NOFP16-NEXT: fsub v0.4s, v0.4s, v1.4s
568 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
569 ; CHECK-SD-NOFP16-NEXT: ret
571 ; CHECK-SD-FP16-LABEL: fsub_v4f16:
572 ; CHECK-SD-FP16: // %bb.0: // %entry
573 ; CHECK-SD-FP16-NEXT: fsub v0.4h, v0.4h, v1.4h
574 ; CHECK-SD-FP16-NEXT: ret
576 ; CHECK-GI-NOFP16-LABEL: fsub_v4f16:
577 ; CHECK-GI-NOFP16: // %bb.0: // %entry
578 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
579 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
580 ; CHECK-GI-NOFP16-NEXT: fsub v0.4s, v0.4s, v1.4s
581 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
582 ; CHECK-GI-NOFP16-NEXT: ret
584 ; CHECK-GI-FP16-LABEL: fsub_v4f16:
585 ; CHECK-GI-FP16: // %bb.0: // %entry
586 ; CHECK-GI-FP16-NEXT: fsub v0.4h, v0.4h, v1.4h
587 ; CHECK-GI-FP16-NEXT: ret
589 %c = fsub <4 x half> %a, %b
593 define <8 x half> @fsub_v8f16(<8 x half> %a, <8 x half> %b) {
594 ; CHECK-SD-NOFP16-LABEL: fsub_v8f16:
595 ; CHECK-SD-NOFP16: // %bb.0: // %entry
596 ; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v1.4h
597 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v0.4h
598 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
599 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
600 ; CHECK-SD-NOFP16-NEXT: fsub v2.4s, v3.4s, v2.4s
601 ; CHECK-SD-NOFP16-NEXT: fsub v1.4s, v0.4s, v1.4s
602 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v2.4s
603 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
604 ; CHECK-SD-NOFP16-NEXT: ret
606 ; CHECK-SD-FP16-LABEL: fsub_v8f16:
607 ; CHECK-SD-FP16: // %bb.0: // %entry
608 ; CHECK-SD-FP16-NEXT: fsub v0.8h, v0.8h, v1.8h
609 ; CHECK-SD-FP16-NEXT: ret
611 ; CHECK-GI-NOFP16-LABEL: fsub_v8f16:
612 ; CHECK-GI-NOFP16: // %bb.0: // %entry
613 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
614 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
615 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
616 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
617 ; CHECK-GI-NOFP16-NEXT: fsub v2.4s, v2.4s, v3.4s
618 ; CHECK-GI-NOFP16-NEXT: fsub v1.4s, v0.4s, v1.4s
619 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
620 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
621 ; CHECK-GI-NOFP16-NEXT: ret
623 ; CHECK-GI-FP16-LABEL: fsub_v8f16:
624 ; CHECK-GI-FP16: // %bb.0: // %entry
625 ; CHECK-GI-FP16-NEXT: fsub v0.8h, v0.8h, v1.8h
626 ; CHECK-GI-FP16-NEXT: ret
628 %c = fsub <8 x half> %a, %b
632 define <16 x half> @fsub_v16f16(<16 x half> %a, <16 x half> %b) {
633 ; CHECK-SD-NOFP16-LABEL: fsub_v16f16:
634 ; CHECK-SD-NOFP16: // %bb.0: // %entry
635 ; CHECK-SD-NOFP16-NEXT: fcvtl v4.4s, v2.4h
636 ; CHECK-SD-NOFP16-NEXT: fcvtl v5.4s, v0.4h
637 ; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v3.4h
638 ; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v1.4h
639 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
640 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
641 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
642 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
643 ; CHECK-SD-NOFP16-NEXT: fsub v4.4s, v5.4s, v4.4s
644 ; CHECK-SD-NOFP16-NEXT: fsub v5.4s, v7.4s, v6.4s
645 ; CHECK-SD-NOFP16-NEXT: fsub v2.4s, v0.4s, v2.4s
646 ; CHECK-SD-NOFP16-NEXT: fsub v3.4s, v1.4s, v3.4s
647 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v4.4s
648 ; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v5.4s
649 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
650 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v3.4s
651 ; CHECK-SD-NOFP16-NEXT: ret
653 ; CHECK-SD-FP16-LABEL: fsub_v16f16:
654 ; CHECK-SD-FP16: // %bb.0: // %entry
655 ; CHECK-SD-FP16-NEXT: fsub v1.8h, v1.8h, v3.8h
656 ; CHECK-SD-FP16-NEXT: fsub v0.8h, v0.8h, v2.8h
657 ; CHECK-SD-FP16-NEXT: ret
659 ; CHECK-GI-NOFP16-LABEL: fsub_v16f16:
660 ; CHECK-GI-NOFP16: // %bb.0: // %entry
661 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v0.4h
662 ; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v1.4h
663 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v2.4h
664 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v3.4h
665 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
666 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
667 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
668 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
669 ; CHECK-GI-NOFP16-NEXT: fsub v4.4s, v4.4s, v6.4s
670 ; CHECK-GI-NOFP16-NEXT: fsub v5.4s, v5.4s, v7.4s
671 ; CHECK-GI-NOFP16-NEXT: fsub v2.4s, v0.4s, v2.4s
672 ; CHECK-GI-NOFP16-NEXT: fsub v3.4s, v1.4s, v3.4s
673 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v4.4s
674 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v5.4s
675 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
676 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v3.4s
677 ; CHECK-GI-NOFP16-NEXT: ret
679 ; CHECK-GI-FP16-LABEL: fsub_v16f16:
680 ; CHECK-GI-FP16: // %bb.0: // %entry
681 ; CHECK-GI-FP16-NEXT: fsub v0.8h, v0.8h, v2.8h
682 ; CHECK-GI-FP16-NEXT: fsub v1.8h, v1.8h, v3.8h
683 ; CHECK-GI-FP16-NEXT: ret
685 %c = fsub <16 x half> %a, %b