1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
3 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
4 ; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
7 define double @fmul_f64(double %a, double %b) {
8 ; CHECK-LABEL: fmul_f64:
9 ; CHECK: // %bb.0: // %entry
10 ; CHECK-NEXT: fmul d0, d0, d1
13 %c = fmul double %a, %b
17 define float @fmul_f32(float %a, float %b) {
18 ; CHECK-LABEL: fmul_f32:
19 ; CHECK: // %bb.0: // %entry
20 ; CHECK-NEXT: fmul s0, s0, s1
23 %c = fmul float %a, %b
27 define half @fmul_f16(half %a, half %b) {
28 ; CHECK-SD-NOFP16-LABEL: fmul_f16:
29 ; CHECK-SD-NOFP16: // %bb.0: // %entry
30 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
31 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
32 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1
33 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
34 ; CHECK-SD-NOFP16-NEXT: ret
36 ; CHECK-SD-FP16-LABEL: fmul_f16:
37 ; CHECK-SD-FP16: // %bb.0: // %entry
38 ; CHECK-SD-FP16-NEXT: fmul h0, h0, h1
39 ; CHECK-SD-FP16-NEXT: ret
41 ; CHECK-GI-NOFP16-LABEL: fmul_f16:
42 ; CHECK-GI-NOFP16: // %bb.0: // %entry
43 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
44 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
45 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1
46 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
47 ; CHECK-GI-NOFP16-NEXT: ret
49 ; CHECK-GI-FP16-LABEL: fmul_f16:
50 ; CHECK-GI-FP16: // %bb.0: // %entry
51 ; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
52 ; CHECK-GI-FP16-NEXT: ret
58 define <2 x double> @fmul_v2f64(<2 x double> %a, <2 x double> %b) {
59 ; CHECK-LABEL: fmul_v2f64:
60 ; CHECK: // %bb.0: // %entry
61 ; CHECK-NEXT: fmul v0.2d, v0.2d, v1.2d
64 %c = fmul <2 x double> %a, %b
68 define <3 x double> @fmul_v3f64(<3 x double> %a, <3 x double> %b) {
69 ; CHECK-SD-LABEL: fmul_v3f64:
70 ; CHECK-SD: // %bb.0: // %entry
71 ; CHECK-SD-NEXT: // kill: def $d3 killed $d3 def $q3
72 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
73 ; CHECK-SD-NEXT: // kill: def $d4 killed $d4 def $q4
74 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 def $q1
75 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 def $q2
76 ; CHECK-SD-NEXT: // kill: def $d5 killed $d5 def $q5
77 ; CHECK-SD-NEXT: mov v3.d[1], v4.d[0]
78 ; CHECK-SD-NEXT: mov v0.d[1], v1.d[0]
79 ; CHECK-SD-NEXT: fmul v2.2d, v2.2d, v5.2d
80 ; CHECK-SD-NEXT: // kill: def $d2 killed $d2 killed $q2
81 ; CHECK-SD-NEXT: fmul v0.2d, v0.2d, v3.2d
82 ; CHECK-SD-NEXT: ext v1.16b, v0.16b, v0.16b, #8
83 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 killed $q0
84 ; CHECK-SD-NEXT: // kill: def $d1 killed $d1 killed $q1
87 ; CHECK-GI-LABEL: fmul_v3f64:
88 ; CHECK-GI: // %bb.0: // %entry
89 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
90 ; CHECK-GI-NEXT: // kill: def $d3 killed $d3 def $q3
91 ; CHECK-GI-NEXT: // kill: def $d1 killed $d1 def $q1
92 ; CHECK-GI-NEXT: // kill: def $d4 killed $d4 def $q4
93 ; CHECK-GI-NEXT: fmul d2, d2, d5
94 ; CHECK-GI-NEXT: mov v0.d[1], v1.d[0]
95 ; CHECK-GI-NEXT: mov v3.d[1], v4.d[0]
96 ; CHECK-GI-NEXT: fmul v0.2d, v0.2d, v3.2d
97 ; CHECK-GI-NEXT: mov d1, v0.d[1]
98 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 killed $q0
101 %c = fmul <3 x double> %a, %b
105 define <4 x double> @fmul_v4f64(<4 x double> %a, <4 x double> %b) {
106 ; CHECK-SD-LABEL: fmul_v4f64:
107 ; CHECK-SD: // %bb.0: // %entry
108 ; CHECK-SD-NEXT: fmul v1.2d, v1.2d, v3.2d
109 ; CHECK-SD-NEXT: fmul v0.2d, v0.2d, v2.2d
112 ; CHECK-GI-LABEL: fmul_v4f64:
113 ; CHECK-GI: // %bb.0: // %entry
114 ; CHECK-GI-NEXT: fmul v0.2d, v0.2d, v2.2d
115 ; CHECK-GI-NEXT: fmul v1.2d, v1.2d, v3.2d
118 %c = fmul <4 x double> %a, %b
122 define <2 x float> @fmul_v2f32(<2 x float> %a, <2 x float> %b) {
123 ; CHECK-LABEL: fmul_v2f32:
124 ; CHECK: // %bb.0: // %entry
125 ; CHECK-NEXT: fmul v0.2s, v0.2s, v1.2s
128 %c = fmul <2 x float> %a, %b
132 define <3 x float> @fmul_v3f32(<3 x float> %a, <3 x float> %b) {
133 ; CHECK-LABEL: fmul_v3f32:
134 ; CHECK: // %bb.0: // %entry
135 ; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
138 %c = fmul <3 x float> %a, %b
142 define <4 x float> @fmul_v4f32(<4 x float> %a, <4 x float> %b) {
143 ; CHECK-LABEL: fmul_v4f32:
144 ; CHECK: // %bb.0: // %entry
145 ; CHECK-NEXT: fmul v0.4s, v0.4s, v1.4s
148 %c = fmul <4 x float> %a, %b
152 define <8 x float> @fmul_v8f32(<8 x float> %a, <8 x float> %b) {
153 ; CHECK-SD-LABEL: fmul_v8f32:
154 ; CHECK-SD: // %bb.0: // %entry
155 ; CHECK-SD-NEXT: fmul v1.4s, v1.4s, v3.4s
156 ; CHECK-SD-NEXT: fmul v0.4s, v0.4s, v2.4s
159 ; CHECK-GI-LABEL: fmul_v8f32:
160 ; CHECK-GI: // %bb.0: // %entry
161 ; CHECK-GI-NEXT: fmul v0.4s, v0.4s, v2.4s
162 ; CHECK-GI-NEXT: fmul v1.4s, v1.4s, v3.4s
165 %c = fmul <8 x float> %a, %b
169 define <7 x half> @fmul_v7f16(<7 x half> %a, <7 x half> %b) {
170 ; CHECK-SD-NOFP16-LABEL: fmul_v7f16:
171 ; CHECK-SD-NOFP16: // %bb.0: // %entry
172 ; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v1.4h
173 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v0.4h
174 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
175 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
176 ; CHECK-SD-NOFP16-NEXT: fmul v2.4s, v3.4s, v2.4s
177 ; CHECK-SD-NOFP16-NEXT: fmul v1.4s, v0.4s, v1.4s
178 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v2.4s
179 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
180 ; CHECK-SD-NOFP16-NEXT: ret
182 ; CHECK-SD-FP16-LABEL: fmul_v7f16:
183 ; CHECK-SD-FP16: // %bb.0: // %entry
184 ; CHECK-SD-FP16-NEXT: fmul v0.8h, v0.8h, v1.8h
185 ; CHECK-SD-FP16-NEXT: ret
187 ; CHECK-GI-NOFP16-LABEL: fmul_v7f16:
188 ; CHECK-GI-NOFP16: // %bb.0: // %entry
189 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
190 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
191 ; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[4]
192 ; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[5]
193 ; CHECK-GI-NOFP16-NEXT: mov h6, v1.h[4]
194 ; CHECK-GI-NOFP16-NEXT: mov h7, v1.h[5]
195 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[6]
196 ; CHECK-GI-NOFP16-NEXT: fmul v2.4s, v2.4s, v3.4s
197 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
198 ; CHECK-GI-NOFP16-NEXT: mov v4.h[1], v5.h[0]
199 ; CHECK-GI-NOFP16-NEXT: mov v6.h[1], v7.h[0]
200 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
201 ; CHECK-GI-NOFP16-NEXT: mov v4.h[2], v3.h[0]
202 ; CHECK-GI-NOFP16-NEXT: mov v6.h[2], v1.h[0]
203 ; CHECK-GI-NOFP16-NEXT: mov h1, v0.h[1]
204 ; CHECK-GI-NOFP16-NEXT: mov h5, v0.h[3]
205 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v4.4h
206 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v6.4h
207 ; CHECK-GI-NOFP16-NEXT: mov h4, v0.h[2]
208 ; CHECK-GI-NOFP16-NEXT: mov v0.h[1], v1.h[0]
209 ; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v2.4s, v3.4s
210 ; CHECK-GI-NOFP16-NEXT: mov v0.h[2], v4.h[0]
211 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v1.4s
212 ; CHECK-GI-NOFP16-NEXT: mov v0.h[3], v5.h[0]
213 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
214 ; CHECK-GI-NOFP16-NEXT: mov v0.h[4], v1.h[0]
215 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[2]
216 ; CHECK-GI-NOFP16-NEXT: mov v0.h[5], v2.h[0]
217 ; CHECK-GI-NOFP16-NEXT: mov v0.h[6], v1.h[0]
218 ; CHECK-GI-NOFP16-NEXT: ret
220 ; CHECK-GI-FP16-LABEL: fmul_v7f16:
221 ; CHECK-GI-FP16: // %bb.0: // %entry
222 ; CHECK-GI-FP16-NEXT: fmul v0.8h, v0.8h, v1.8h
223 ; CHECK-GI-FP16-NEXT: ret
225 %c = fmul <7 x half> %a, %b
229 define <4 x half> @fmul_v4f16(<4 x half> %a, <4 x half> %b) {
230 ; CHECK-SD-NOFP16-LABEL: fmul_v4f16:
231 ; CHECK-SD-NOFP16: // %bb.0: // %entry
232 ; CHECK-SD-NOFP16-NEXT: fcvtl v1.4s, v1.4h
233 ; CHECK-SD-NOFP16-NEXT: fcvtl v0.4s, v0.4h
234 ; CHECK-SD-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
235 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v0.4s
236 ; CHECK-SD-NOFP16-NEXT: ret
238 ; CHECK-SD-FP16-LABEL: fmul_v4f16:
239 ; CHECK-SD-FP16: // %bb.0: // %entry
240 ; CHECK-SD-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h
241 ; CHECK-SD-FP16-NEXT: ret
243 ; CHECK-GI-NOFP16-LABEL: fmul_v4f16:
244 ; CHECK-GI-NOFP16: // %bb.0: // %entry
245 ; CHECK-GI-NOFP16-NEXT: fcvtl v0.4s, v0.4h
246 ; CHECK-GI-NOFP16-NEXT: fcvtl v1.4s, v1.4h
247 ; CHECK-GI-NOFP16-NEXT: fmul v0.4s, v0.4s, v1.4s
248 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v0.4s
249 ; CHECK-GI-NOFP16-NEXT: ret
251 ; CHECK-GI-FP16-LABEL: fmul_v4f16:
252 ; CHECK-GI-FP16: // %bb.0: // %entry
253 ; CHECK-GI-FP16-NEXT: fmul v0.4h, v0.4h, v1.4h
254 ; CHECK-GI-FP16-NEXT: ret
256 %c = fmul <4 x half> %a, %b
260 define <8 x half> @fmul_v8f16(<8 x half> %a, <8 x half> %b) {
261 ; CHECK-SD-NOFP16-LABEL: fmul_v8f16:
262 ; CHECK-SD-NOFP16: // %bb.0: // %entry
263 ; CHECK-SD-NOFP16-NEXT: fcvtl v2.4s, v1.4h
264 ; CHECK-SD-NOFP16-NEXT: fcvtl v3.4s, v0.4h
265 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
266 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
267 ; CHECK-SD-NOFP16-NEXT: fmul v2.4s, v3.4s, v2.4s
268 ; CHECK-SD-NOFP16-NEXT: fmul v1.4s, v0.4s, v1.4s
269 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v2.4s
270 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
271 ; CHECK-SD-NOFP16-NEXT: ret
273 ; CHECK-SD-FP16-LABEL: fmul_v8f16:
274 ; CHECK-SD-FP16: // %bb.0: // %entry
275 ; CHECK-SD-FP16-NEXT: fmul v0.8h, v0.8h, v1.8h
276 ; CHECK-SD-FP16-NEXT: ret
278 ; CHECK-GI-NOFP16-LABEL: fmul_v8f16:
279 ; CHECK-GI-NOFP16: // %bb.0: // %entry
280 ; CHECK-GI-NOFP16-NEXT: fcvtl v2.4s, v0.4h
281 ; CHECK-GI-NOFP16-NEXT: fcvtl v3.4s, v1.4h
282 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
283 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
284 ; CHECK-GI-NOFP16-NEXT: fmul v2.4s, v2.4s, v3.4s
285 ; CHECK-GI-NOFP16-NEXT: fmul v1.4s, v0.4s, v1.4s
286 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v2.4s
287 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v1.4s
288 ; CHECK-GI-NOFP16-NEXT: ret
290 ; CHECK-GI-FP16-LABEL: fmul_v8f16:
291 ; CHECK-GI-FP16: // %bb.0: // %entry
292 ; CHECK-GI-FP16-NEXT: fmul v0.8h, v0.8h, v1.8h
293 ; CHECK-GI-FP16-NEXT: ret
295 %c = fmul <8 x half> %a, %b
299 define <16 x half> @fmul_v16f16(<16 x half> %a, <16 x half> %b) {
300 ; CHECK-SD-NOFP16-LABEL: fmul_v16f16:
301 ; CHECK-SD-NOFP16: // %bb.0: // %entry
302 ; CHECK-SD-NOFP16-NEXT: fcvtl v4.4s, v2.4h
303 ; CHECK-SD-NOFP16-NEXT: fcvtl v5.4s, v0.4h
304 ; CHECK-SD-NOFP16-NEXT: fcvtl v6.4s, v3.4h
305 ; CHECK-SD-NOFP16-NEXT: fcvtl v7.4s, v1.4h
306 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
307 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
308 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
309 ; CHECK-SD-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
310 ; CHECK-SD-NOFP16-NEXT: fmul v4.4s, v5.4s, v4.4s
311 ; CHECK-SD-NOFP16-NEXT: fmul v5.4s, v7.4s, v6.4s
312 ; CHECK-SD-NOFP16-NEXT: fmul v2.4s, v0.4s, v2.4s
313 ; CHECK-SD-NOFP16-NEXT: fmul v3.4s, v1.4s, v3.4s
314 ; CHECK-SD-NOFP16-NEXT: fcvtn v0.4h, v4.4s
315 ; CHECK-SD-NOFP16-NEXT: fcvtn v1.4h, v5.4s
316 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
317 ; CHECK-SD-NOFP16-NEXT: fcvtn2 v1.8h, v3.4s
318 ; CHECK-SD-NOFP16-NEXT: ret
320 ; CHECK-SD-FP16-LABEL: fmul_v16f16:
321 ; CHECK-SD-FP16: // %bb.0: // %entry
322 ; CHECK-SD-FP16-NEXT: fmul v1.8h, v1.8h, v3.8h
323 ; CHECK-SD-FP16-NEXT: fmul v0.8h, v0.8h, v2.8h
324 ; CHECK-SD-FP16-NEXT: ret
326 ; CHECK-GI-NOFP16-LABEL: fmul_v16f16:
327 ; CHECK-GI-NOFP16: // %bb.0: // %entry
328 ; CHECK-GI-NOFP16-NEXT: fcvtl v4.4s, v0.4h
329 ; CHECK-GI-NOFP16-NEXT: fcvtl v5.4s, v1.4h
330 ; CHECK-GI-NOFP16-NEXT: fcvtl v6.4s, v2.4h
331 ; CHECK-GI-NOFP16-NEXT: fcvtl v7.4s, v3.4h
332 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v0.4s, v0.8h
333 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v1.4s, v1.8h
334 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v2.4s, v2.8h
335 ; CHECK-GI-NOFP16-NEXT: fcvtl2 v3.4s, v3.8h
336 ; CHECK-GI-NOFP16-NEXT: fmul v4.4s, v4.4s, v6.4s
337 ; CHECK-GI-NOFP16-NEXT: fmul v5.4s, v5.4s, v7.4s
338 ; CHECK-GI-NOFP16-NEXT: fmul v2.4s, v0.4s, v2.4s
339 ; CHECK-GI-NOFP16-NEXT: fmul v3.4s, v1.4s, v3.4s
340 ; CHECK-GI-NOFP16-NEXT: fcvtn v0.4h, v4.4s
341 ; CHECK-GI-NOFP16-NEXT: fcvtn v1.4h, v5.4s
342 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v0.8h, v2.4s
343 ; CHECK-GI-NOFP16-NEXT: fcvtn2 v1.8h, v3.4s
344 ; CHECK-GI-NOFP16-NEXT: ret
346 ; CHECK-GI-FP16-LABEL: fmul_v16f16:
347 ; CHECK-GI-FP16: // %bb.0: // %entry
348 ; CHECK-GI-FP16-NEXT: fmul v0.8h, v0.8h, v2.8h
349 ; CHECK-GI-FP16-NEXT: fmul v1.8h, v1.8h, v3.8h
350 ; CHECK-GI-FP16-NEXT: ret
352 %c = fmul <16 x half> %a, %b