1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc --mtriple=aarch64 -mattr=+fullfp16 < %s | FileCheck %s
3 ; RUN: llc --mtriple=aarch64 < %s | FileCheck %s --check-prefix=CHECKNOFP16
5 define half @faddp_2xhalf(<2 x half> %a) {
6 ; CHECK-LABEL: faddp_2xhalf:
7 ; CHECK: // %bb.0: // %entry
8 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
9 ; CHECK-NEXT: faddp h0, v0.2h
12 ; CHECKNOFP16-LABEL: faddp_2xhalf:
13 ; CHECKNOFP16: // %bb.0: // %entry
14 ; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
15 ; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1]
16 ; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h
17 ; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h
18 ; CHECKNOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
19 ; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s
20 ; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0
21 ; CHECKNOFP16-NEXT: ret
23 %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef>
24 %0 = fadd <2 x half> %a, %shift
25 %1 = extractelement <2 x half> %0, i32 0
29 define half @faddp_2xhalf_commute(<2 x half> %a) {
30 ; CHECK-LABEL: faddp_2xhalf_commute:
31 ; CHECK: // %bb.0: // %entry
32 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
33 ; CHECK-NEXT: faddp h0, v0.2h
36 ; CHECKNOFP16-LABEL: faddp_2xhalf_commute:
37 ; CHECKNOFP16: // %bb.0: // %entry
38 ; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
39 ; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1]
40 ; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h
41 ; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h
42 ; CHECKNOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
43 ; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s
44 ; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0
45 ; CHECKNOFP16-NEXT: ret
47 %shift = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 undef>
48 %0 = fadd <2 x half> %shift, %a
49 %1 = extractelement <2 x half> %0, i32 0
53 define half @faddp_4xhalf(<4 x half> %a) {
54 ; CHECK-LABEL: faddp_4xhalf:
55 ; CHECK: // %bb.0: // %entry
56 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
57 ; CHECK-NEXT: faddp h0, v0.2h
60 ; CHECKNOFP16-LABEL: faddp_4xhalf:
61 ; CHECKNOFP16: // %bb.0: // %entry
62 ; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
63 ; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1]
64 ; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h
65 ; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h
66 ; CHECKNOFP16-NEXT: fadd v0.4s, v0.4s, v1.4s
67 ; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s
68 ; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0
69 ; CHECKNOFP16-NEXT: ret
71 %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
72 %0 = fadd <4 x half> %a, %shift
73 %1 = extractelement <4 x half> %0, i32 0
77 define half @faddp_4xhalf_commute(<4 x half> %a) {
78 ; CHECK-LABEL: faddp_4xhalf_commute:
79 ; CHECK: // %bb.0: // %entry
80 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
81 ; CHECK-NEXT: faddp h0, v0.2h
84 ; CHECKNOFP16-LABEL: faddp_4xhalf_commute:
85 ; CHECKNOFP16: // %bb.0: // %entry
86 ; CHECKNOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
87 ; CHECKNOFP16-NEXT: dup v1.4h, v0.h[1]
88 ; CHECKNOFP16-NEXT: fcvtl v0.4s, v0.4h
89 ; CHECKNOFP16-NEXT: fcvtl v1.4s, v1.4h
90 ; CHECKNOFP16-NEXT: fadd v0.4s, v1.4s, v0.4s
91 ; CHECKNOFP16-NEXT: fcvtn v0.4h, v0.4s
92 ; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0
93 ; CHECKNOFP16-NEXT: ret
95 %shift = shufflevector <4 x half> %a, <4 x half> undef, <4 x i32> <i32 1, i32 undef, i32 undef, i32 undef>
96 %0 = fadd <4 x half> %shift, %a
97 %1 = extractelement <4 x half> %0, i32 0
101 define half @faddp_8xhalf(<8 x half> %a) {
102 ; CHECK-LABEL: faddp_8xhalf:
103 ; CHECK: // %bb.0: // %entry
104 ; CHECK-NEXT: faddp h0, v0.2h
107 ; CHECKNOFP16-LABEL: faddp_8xhalf:
108 ; CHECKNOFP16: // %bb.0: // %entry
109 ; CHECKNOFP16-NEXT: dup v1.8h, v0.h[1]
110 ; CHECKNOFP16-NEXT: fcvtl v2.4s, v0.4h
111 ; CHECKNOFP16-NEXT: fcvtl2 v0.4s, v0.8h
112 ; CHECKNOFP16-NEXT: fcvtl v3.4s, v1.4h
113 ; CHECKNOFP16-NEXT: fcvtl2 v1.4s, v1.8h
114 ; CHECKNOFP16-NEXT: fadd v2.4s, v2.4s, v3.4s
115 ; CHECKNOFP16-NEXT: fadd v1.4s, v0.4s, v1.4s
116 ; CHECKNOFP16-NEXT: fcvtn v0.4h, v2.4s
117 ; CHECKNOFP16-NEXT: fcvtn2 v0.8h, v1.4s
118 ; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0
119 ; CHECKNOFP16-NEXT: ret
121 %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
122 %0 = fadd <8 x half> %a, %shift
123 %1 = extractelement <8 x half> %0, i32 0
127 define half @faddp_8xhalf_commute(<8 x half> %a) {
128 ; CHECK-LABEL: faddp_8xhalf_commute:
129 ; CHECK: // %bb.0: // %entry
130 ; CHECK-NEXT: faddp h0, v0.2h
133 ; CHECKNOFP16-LABEL: faddp_8xhalf_commute:
134 ; CHECKNOFP16: // %bb.0: // %entry
135 ; CHECKNOFP16-NEXT: dup v1.8h, v0.h[1]
136 ; CHECKNOFP16-NEXT: fcvtl v2.4s, v0.4h
137 ; CHECKNOFP16-NEXT: fcvtl2 v0.4s, v0.8h
138 ; CHECKNOFP16-NEXT: fcvtl v3.4s, v1.4h
139 ; CHECKNOFP16-NEXT: fcvtl2 v1.4s, v1.8h
140 ; CHECKNOFP16-NEXT: fadd v2.4s, v3.4s, v2.4s
141 ; CHECKNOFP16-NEXT: fadd v1.4s, v1.4s, v0.4s
142 ; CHECKNOFP16-NEXT: fcvtn v0.4h, v2.4s
143 ; CHECKNOFP16-NEXT: fcvtn2 v0.8h, v1.4s
144 ; CHECKNOFP16-NEXT: // kill: def $h0 killed $h0 killed $q0
145 ; CHECKNOFP16-NEXT: ret
147 %shift = shufflevector <8 x half> %a, <8 x half> undef, <8 x i32> <i32 1, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
148 %0 = fadd <8 x half> %shift, %a
149 %1 = extractelement <8 x half> %0, i32 0
153 define <8 x half> @addp_v8f16(<8 x half> %a) {
154 ; CHECK-LABEL: addp_v8f16:
155 ; CHECK: // %bb.0: // %entry
156 ; CHECK-NEXT: rev32 v1.8h, v0.8h
157 ; CHECK-NEXT: fadd v0.8h, v1.8h, v0.8h
160 ; CHECKNOFP16-LABEL: addp_v8f16:
161 ; CHECKNOFP16: // %bb.0: // %entry
162 ; CHECKNOFP16-NEXT: rev32 v1.8h, v0.8h
163 ; CHECKNOFP16-NEXT: fcvtl v2.4s, v0.4h
164 ; CHECKNOFP16-NEXT: fcvtl2 v0.4s, v0.8h
165 ; CHECKNOFP16-NEXT: fcvtl v3.4s, v1.4h
166 ; CHECKNOFP16-NEXT: fcvtl2 v1.4s, v1.8h
167 ; CHECKNOFP16-NEXT: fadd v2.4s, v3.4s, v2.4s
168 ; CHECKNOFP16-NEXT: fadd v1.4s, v1.4s, v0.4s
169 ; CHECKNOFP16-NEXT: fcvtn v0.4h, v2.4s
170 ; CHECKNOFP16-NEXT: fcvtn2 v0.8h, v1.4s
171 ; CHECKNOFP16-NEXT: ret
173 %s = shufflevector <8 x half> %a, <8 x half> poison, <8 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6>
174 %b = fadd reassoc <8 x half> %s, %a
178 define <16 x half> @addp_v16f16(<16 x half> %a) {
179 ; CHECK-LABEL: addp_v16f16:
180 ; CHECK: // %bb.0: // %entry
181 ; CHECK-NEXT: faddp v1.8h, v0.8h, v1.8h
182 ; CHECK-NEXT: zip1 v0.8h, v1.8h, v1.8h
183 ; CHECK-NEXT: zip2 v1.8h, v1.8h, v1.8h
186 ; CHECKNOFP16-LABEL: addp_v16f16:
187 ; CHECKNOFP16: // %bb.0: // %entry
188 ; CHECKNOFP16-NEXT: rev32 v2.8h, v0.8h
189 ; CHECKNOFP16-NEXT: rev32 v3.8h, v1.8h
190 ; CHECKNOFP16-NEXT: fcvtl v4.4s, v0.4h
191 ; CHECKNOFP16-NEXT: fcvtl v6.4s, v1.4h
192 ; CHECKNOFP16-NEXT: fcvtl2 v0.4s, v0.8h
193 ; CHECKNOFP16-NEXT: fcvtl2 v1.4s, v1.8h
194 ; CHECKNOFP16-NEXT: fcvtl v5.4s, v2.4h
195 ; CHECKNOFP16-NEXT: fcvtl v7.4s, v3.4h
196 ; CHECKNOFP16-NEXT: fcvtl2 v2.4s, v2.8h
197 ; CHECKNOFP16-NEXT: fcvtl2 v3.4s, v3.8h
198 ; CHECKNOFP16-NEXT: fadd v4.4s, v5.4s, v4.4s
199 ; CHECKNOFP16-NEXT: fadd v5.4s, v7.4s, v6.4s
200 ; CHECKNOFP16-NEXT: fadd v2.4s, v2.4s, v0.4s
201 ; CHECKNOFP16-NEXT: fadd v3.4s, v3.4s, v1.4s
202 ; CHECKNOFP16-NEXT: fcvtn v0.4h, v4.4s
203 ; CHECKNOFP16-NEXT: fcvtn v1.4h, v5.4s
204 ; CHECKNOFP16-NEXT: fcvtn2 v0.8h, v2.4s
205 ; CHECKNOFP16-NEXT: fcvtn2 v1.8h, v3.4s
206 ; CHECKNOFP16-NEXT: ret
208 %s = shufflevector <16 x half> %a, <16 x half> poison, <16 x i32> <i32 1, i32 0, i32 3, i32 2, i32 5, i32 4, i32 7, i32 6, i32 9, i32 8, i32 11, i32 10, i32 13, i32 12, i32 15, i32 14>
209 %b = fadd reassoc <16 x half> %s, %a