1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-NOFP16
3 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-SD,CHECK-SD-FP16
4 ; RUN: llc -mtriple=aarch64 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-NOFP16
5 ; RUN: llc -mtriple=aarch64 -mattr=+fullfp16 -global-isel -verify-machineinstrs %s -o - | FileCheck %s --check-prefixes=CHECK,CHECK-GI,CHECK-GI-FP16
7 define float @mul_HalfS(<2 x float> %bin.rdx) {
8 ; CHECK-SD-LABEL: mul_HalfS:
10 ; CHECK-SD-NEXT: // kill: def $d0 killed $d0 def $q0
11 ; CHECK-SD-NEXT: fmul s0, s0, v0.s[1]
14 ; CHECK-GI-LABEL: mul_HalfS:
16 ; CHECK-GI-NEXT: // kill: def $d0 killed $d0 def $q0
17 ; CHECK-GI-NEXT: mov s1, v0.s[1]
18 ; CHECK-GI-NEXT: fmul s0, s0, s1
20 %r = call float @llvm.vector.reduce.fmul.f32.v2f32(float 1.0, <2 x float> %bin.rdx)
24 define half @mul_HalfH(<4 x half> %bin.rdx) {
25 ; CHECK-SD-NOFP16-LABEL: mul_HalfH:
26 ; CHECK-SD-NOFP16: // %bb.0:
27 ; CHECK-SD-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
28 ; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
29 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
30 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
31 ; CHECK-SD-NOFP16-NEXT: fmul s1, s2, s1
32 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
33 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[3]
34 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
35 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
36 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
37 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
38 ; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s2
39 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
40 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
41 ; CHECK-SD-NOFP16-NEXT: fmul s0, s1, s0
42 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
43 ; CHECK-SD-NOFP16-NEXT: ret
45 ; CHECK-SD-FP16-LABEL: mul_HalfH:
46 ; CHECK-SD-FP16: // %bb.0:
47 ; CHECK-SD-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
48 ; CHECK-SD-FP16-NEXT: fmul h1, h0, v0.h[1]
49 ; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[2]
50 ; CHECK-SD-FP16-NEXT: fmul h0, h1, v0.h[3]
51 ; CHECK-SD-FP16-NEXT: ret
53 ; CHECK-GI-NOFP16-LABEL: mul_HalfH:
54 ; CHECK-GI-NOFP16: // %bb.0:
55 ; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
56 ; CHECK-GI-NOFP16-NEXT: // kill: def $d0 killed $d0 def $q0
57 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
58 ; CHECK-GI-NOFP16-NEXT: fmov s1, w8
59 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
60 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
61 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
62 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
63 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
64 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
65 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
66 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
67 ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[3]
68 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
69 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
70 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
71 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
72 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
73 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
74 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
75 ; CHECK-GI-NOFP16-NEXT: fmul s0, s1, s0
76 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
77 ; CHECK-GI-NOFP16-NEXT: ret
79 ; CHECK-GI-FP16-LABEL: mul_HalfH:
80 ; CHECK-GI-FP16: // %bb.0:
81 ; CHECK-GI-FP16-NEXT: // kill: def $d0 killed $d0 def $q0
82 ; CHECK-GI-FP16-NEXT: mov h1, v0.h[1]
83 ; CHECK-GI-FP16-NEXT: mov h2, v0.h[2]
84 ; CHECK-GI-FP16-NEXT: fmul h1, h0, h1
85 ; CHECK-GI-FP16-NEXT: mov h0, v0.h[3]
86 ; CHECK-GI-FP16-NEXT: fmul h1, h1, h2
87 ; CHECK-GI-FP16-NEXT: fmul h0, h1, h0
88 ; CHECK-GI-FP16-NEXT: ret
89 %r = call half @llvm.vector.reduce.fmul.f16.v4f16(half 1.0, <4 x half> %bin.rdx)
94 define half @mul_H(<8 x half> %bin.rdx) {
95 ; CHECK-SD-NOFP16-LABEL: mul_H:
96 ; CHECK-SD-NOFP16: // %bb.0:
97 ; CHECK-SD-NOFP16-NEXT: mov h1, v0.h[1]
98 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h0
99 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
100 ; CHECK-SD-NOFP16-NEXT: fmul s1, s2, s1
101 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[2]
102 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
103 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
104 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
105 ; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s2
106 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[3]
107 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
108 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
109 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
110 ; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s2
111 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[4]
112 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
113 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
114 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
115 ; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s2
116 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[5]
117 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
118 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
119 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
120 ; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s2
121 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[6]
122 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
123 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
124 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
125 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
126 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
127 ; CHECK-SD-NOFP16-NEXT: fmul s1, s1, s2
128 ; CHECK-SD-NOFP16-NEXT: fcvt h1, s1
129 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
130 ; CHECK-SD-NOFP16-NEXT: fmul s0, s1, s0
131 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
132 ; CHECK-SD-NOFP16-NEXT: ret
134 ; CHECK-SD-FP16-LABEL: mul_H:
135 ; CHECK-SD-FP16: // %bb.0:
136 ; CHECK-SD-FP16-NEXT: fmul h1, h0, v0.h[1]
137 ; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[2]
138 ; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[3]
139 ; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[4]
140 ; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[5]
141 ; CHECK-SD-FP16-NEXT: fmul h1, h1, v0.h[6]
142 ; CHECK-SD-FP16-NEXT: fmul h0, h1, v0.h[7]
143 ; CHECK-SD-FP16-NEXT: ret
145 ; CHECK-GI-NOFP16-LABEL: mul_H:
146 ; CHECK-GI-NOFP16: // %bb.0:
147 ; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
148 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h0
149 ; CHECK-GI-NOFP16-NEXT: fmov s1, w8
150 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
151 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
152 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[1]
153 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
154 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
155 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
156 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
157 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[2]
158 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
159 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
160 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
161 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
162 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[3]
163 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
164 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
165 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
166 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
167 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[4]
168 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
169 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
170 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
171 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
172 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[5]
173 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
174 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
175 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
176 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
177 ; CHECK-GI-NOFP16-NEXT: mov h2, v0.h[6]
178 ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
179 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
180 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
181 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
182 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
183 ; CHECK-GI-NOFP16-NEXT: fmul s1, s1, s2
184 ; CHECK-GI-NOFP16-NEXT: fcvt h1, s1
185 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
186 ; CHECK-GI-NOFP16-NEXT: fmul s0, s1, s0
187 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
188 ; CHECK-GI-NOFP16-NEXT: ret
190 ; CHECK-GI-FP16-LABEL: mul_H:
191 ; CHECK-GI-FP16: // %bb.0:
192 ; CHECK-GI-FP16-NEXT: fmul h1, h0, v0.h[1]
193 ; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[2]
194 ; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[3]
195 ; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[4]
196 ; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[5]
197 ; CHECK-GI-FP16-NEXT: fmul h1, h1, v0.h[6]
198 ; CHECK-GI-FP16-NEXT: fmul h0, h1, v0.h[7]
199 ; CHECK-GI-FP16-NEXT: ret
200 %r = call half @llvm.vector.reduce.fmul.f16.v8f16(half 1.0, <8 x half> %bin.rdx)
204 define float @mul_S(<4 x float> %bin.rdx) {
205 ; CHECK-LABEL: mul_S:
207 ; CHECK-NEXT: fmul s1, s0, v0.s[1]
208 ; CHECK-NEXT: fmul s1, s1, v0.s[2]
209 ; CHECK-NEXT: fmul s0, s1, v0.s[3]
211 %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float 1.0, <4 x float> %bin.rdx)
215 define double @mul_D(<2 x double> %bin.rdx) {
216 ; CHECK-LABEL: mul_D:
218 ; CHECK-NEXT: fmul d0, d0, v0.d[1]
220 %r = call double @llvm.vector.reduce.fmul.f64.v2f64(double 1.0, <2 x double> %bin.rdx)
224 define half @mul_2H(<16 x half> %bin.rdx) {
225 ; CHECK-SD-NOFP16-LABEL: mul_2H:
226 ; CHECK-SD-NOFP16: // %bb.0:
227 ; CHECK-SD-NOFP16-NEXT: mov h2, v0.h[1]
228 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h0
229 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
230 ; CHECK-SD-NOFP16-NEXT: fmul s2, s3, s2
231 ; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[2]
232 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
233 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
234 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
235 ; CHECK-SD-NOFP16-NEXT: fmul s2, s2, s3
236 ; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[3]
237 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
238 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
239 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
240 ; CHECK-SD-NOFP16-NEXT: fmul s2, s2, s3
241 ; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[4]
242 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
243 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
244 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
245 ; CHECK-SD-NOFP16-NEXT: fmul s2, s2, s3
246 ; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[5]
247 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
248 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
249 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
250 ; CHECK-SD-NOFP16-NEXT: fmul s2, s2, s3
251 ; CHECK-SD-NOFP16-NEXT: mov h3, v0.h[6]
252 ; CHECK-SD-NOFP16-NEXT: mov h0, v0.h[7]
253 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
254 ; CHECK-SD-NOFP16-NEXT: fcvt s3, h3
255 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
256 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
257 ; CHECK-SD-NOFP16-NEXT: fmul s2, s2, s3
258 ; CHECK-SD-NOFP16-NEXT: fcvt h2, s2
259 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
260 ; CHECK-SD-NOFP16-NEXT: fmul s0, s2, s0
261 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h1
262 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
263 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
264 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2
265 ; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[1]
266 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
267 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
268 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
269 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2
270 ; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[2]
271 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
272 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
273 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
274 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2
275 ; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[3]
276 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
277 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
278 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
279 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2
280 ; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[4]
281 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
282 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
283 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
284 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2
285 ; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[5]
286 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
287 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
288 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
289 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2
290 ; CHECK-SD-NOFP16-NEXT: mov h2, v1.h[6]
291 ; CHECK-SD-NOFP16-NEXT: mov h1, v1.h[7]
292 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
293 ; CHECK-SD-NOFP16-NEXT: fcvt s2, h2
294 ; CHECK-SD-NOFP16-NEXT: fcvt s1, h1
295 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
296 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s2
297 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
298 ; CHECK-SD-NOFP16-NEXT: fcvt s0, h0
299 ; CHECK-SD-NOFP16-NEXT: fmul s0, s0, s1
300 ; CHECK-SD-NOFP16-NEXT: fcvt h0, s0
301 ; CHECK-SD-NOFP16-NEXT: ret
303 ; CHECK-SD-FP16-LABEL: mul_2H:
304 ; CHECK-SD-FP16: // %bb.0:
305 ; CHECK-SD-FP16-NEXT: fmul h2, h0, v0.h[1]
306 ; CHECK-SD-FP16-NEXT: fmul h2, h2, v0.h[2]
307 ; CHECK-SD-FP16-NEXT: fmul h2, h2, v0.h[3]
308 ; CHECK-SD-FP16-NEXT: fmul h2, h2, v0.h[4]
309 ; CHECK-SD-FP16-NEXT: fmul h2, h2, v0.h[5]
310 ; CHECK-SD-FP16-NEXT: fmul h2, h2, v0.h[6]
311 ; CHECK-SD-FP16-NEXT: fmul h0, h2, v0.h[7]
312 ; CHECK-SD-FP16-NEXT: fmul h0, h0, h1
313 ; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[1]
314 ; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[2]
315 ; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[3]
316 ; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[4]
317 ; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[5]
318 ; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[6]
319 ; CHECK-SD-FP16-NEXT: fmul h0, h0, v1.h[7]
320 ; CHECK-SD-FP16-NEXT: ret
322 ; CHECK-GI-NOFP16-LABEL: mul_2H:
323 ; CHECK-GI-NOFP16: // %bb.0:
324 ; CHECK-GI-NOFP16-NEXT: mov w8, #15360 // =0x3c00
325 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h0
326 ; CHECK-GI-NOFP16-NEXT: fmov s2, w8
327 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
328 ; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3
329 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[1]
330 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
331 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
332 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
333 ; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3
334 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[2]
335 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
336 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
337 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
338 ; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3
339 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[3]
340 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
341 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
342 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
343 ; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3
344 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[4]
345 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
346 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
347 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
348 ; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3
349 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[5]
350 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
351 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
352 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
353 ; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3
354 ; CHECK-GI-NOFP16-NEXT: mov h3, v0.h[6]
355 ; CHECK-GI-NOFP16-NEXT: mov h0, v0.h[7]
356 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
357 ; CHECK-GI-NOFP16-NEXT: fcvt s3, h3
358 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
359 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
360 ; CHECK-GI-NOFP16-NEXT: fmul s2, s2, s3
361 ; CHECK-GI-NOFP16-NEXT: fcvt h2, s2
362 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
363 ; CHECK-GI-NOFP16-NEXT: fmul s0, s2, s0
364 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h1
365 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
366 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
367 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2
368 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[1]
369 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
370 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
371 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
372 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2
373 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[2]
374 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
375 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
376 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
377 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2
378 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[3]
379 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
380 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
381 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
382 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2
383 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[4]
384 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
385 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
386 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
387 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2
388 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[5]
389 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
390 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
391 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
392 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2
393 ; CHECK-GI-NOFP16-NEXT: mov h2, v1.h[6]
394 ; CHECK-GI-NOFP16-NEXT: mov h1, v1.h[7]
395 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
396 ; CHECK-GI-NOFP16-NEXT: fcvt s2, h2
397 ; CHECK-GI-NOFP16-NEXT: fcvt s1, h1
398 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
399 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s2
400 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
401 ; CHECK-GI-NOFP16-NEXT: fcvt s0, h0
402 ; CHECK-GI-NOFP16-NEXT: fmul s0, s0, s1
403 ; CHECK-GI-NOFP16-NEXT: fcvt h0, s0
404 ; CHECK-GI-NOFP16-NEXT: ret
406 ; CHECK-GI-FP16-LABEL: mul_2H:
407 ; CHECK-GI-FP16: // %bb.0:
408 ; CHECK-GI-FP16-NEXT: fmul h2, h0, v0.h[1]
409 ; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[2]
410 ; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[3]
411 ; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[4]
412 ; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[5]
413 ; CHECK-GI-FP16-NEXT: fmul h2, h2, v0.h[6]
414 ; CHECK-GI-FP16-NEXT: fmul h0, h2, v0.h[7]
415 ; CHECK-GI-FP16-NEXT: fmul h0, h0, h1
416 ; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[1]
417 ; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[2]
418 ; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[3]
419 ; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[4]
420 ; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[5]
421 ; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[6]
422 ; CHECK-GI-FP16-NEXT: fmul h0, h0, v1.h[7]
423 ; CHECK-GI-FP16-NEXT: ret
424 %r = call half @llvm.vector.reduce.fmul.f16.v16f16(half 1.0, <16 x half> %bin.rdx)
428 define float @mul_2S(<8 x float> %bin.rdx) {
429 ; CHECK-LABEL: mul_2S:
431 ; CHECK-NEXT: fmul s2, s0, v0.s[1]
432 ; CHECK-NEXT: fmul s2, s2, v0.s[2]
433 ; CHECK-NEXT: fmul s0, s2, v0.s[3]
434 ; CHECK-NEXT: fmul s0, s0, s1
435 ; CHECK-NEXT: fmul s0, s0, v1.s[1]
436 ; CHECK-NEXT: fmul s0, s0, v1.s[2]
437 ; CHECK-NEXT: fmul s0, s0, v1.s[3]
439 %r = call float @llvm.vector.reduce.fmul.f32.v8f32(float 1.0, <8 x float> %bin.rdx)
443 define double @mul_2D(<4 x double> %bin.rdx) {
444 ; CHECK-LABEL: mul_2D:
446 ; CHECK-NEXT: fmul d0, d0, v0.d[1]
447 ; CHECK-NEXT: fmul d0, d0, d1
448 ; CHECK-NEXT: fmul d0, d0, v1.d[1]
450 %r = call double @llvm.vector.reduce.fmul.f64.v4f64(double 1.0, <4 x double> %bin.rdx)
454 ; Added at least one test where the start value is not 1.0.
455 define float @mul_S_init_42(<4 x float> %bin.rdx) {
456 ; CHECK-LABEL: mul_S_init_42:
458 ; CHECK-NEXT: mov w8, #1109917696 // =0x42280000
459 ; CHECK-NEXT: fmov s1, w8
460 ; CHECK-NEXT: fmul s1, s1, s0
461 ; CHECK-NEXT: fmul s1, s1, v0.s[1]
462 ; CHECK-NEXT: fmul s1, s1, v0.s[2]
463 ; CHECK-NEXT: fmul s0, s1, v0.s[3]
465 %r = call float @llvm.vector.reduce.fmul.f32.v4f32(float 42.0, <4 x float> %bin.rdx)
469 ; Function Attrs: nounwind readnone
470 declare half @llvm.vector.reduce.fmul.f16.v4f16(half, <4 x half>)
471 declare half @llvm.vector.reduce.fmul.f16.v8f16(half, <8 x half>)
472 declare half @llvm.vector.reduce.fmul.f16.v16f16(half, <16 x half>)
473 declare float @llvm.vector.reduce.fmul.f32.v2f32(float, <2 x float>)
474 declare float @llvm.vector.reduce.fmul.f32.v4f32(float, <4 x float>)
475 declare float @llvm.vector.reduce.fmul.f32.v8f32(float, <8 x float>)
476 declare double @llvm.vector.reduce.fmul.f64.v2f64(double, <2 x double>)
477 declare double @llvm.vector.reduce.fmul.f64.v4f64(double, <4 x double>)