1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc < %s -mtriple=aarch64 -mattr=+v8.2a,+fullfp16 | FileCheck %s
4 declare half @llvm.aarch64.neon.fmulx.f16(half, half)
5 declare <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half>, <4 x half>)
6 declare <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half>, <8 x half>)
7 declare <4 x half> @llvm.fma.v4f16(<4 x half>, <4 x half>, <4 x half>)
8 declare <8 x half> @llvm.fma.v8f16(<8 x half>, <8 x half>, <8 x half>)
9 declare half @llvm.fma.f16(half, half, half) #1
11 define dso_local <4 x half> @t_vfma_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
12 ; CHECK-LABEL: t_vfma_lane_f16:
13 ; CHECK: // %bb.0: // %entry
14 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
15 ; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0]
18 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
19 %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %lane1, <4 x half> %a)
23 define dso_local <8 x half> @t_vfmaq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
24 ; CHECK-LABEL: t_vfmaq_lane_f16:
25 ; CHECK: // %bb.0: // %entry
26 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
27 ; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0]
30 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer
31 %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %lane1, <8 x half> %a)
35 define dso_local <4 x half> @t_vfma_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) {
36 ; CHECK-LABEL: t_vfma_laneq_f16:
37 ; CHECK: // %bb.0: // %entry
38 ; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0]
41 %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer
42 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %b, <4 x half> %a)
46 define dso_local <8 x half> @t_vfmaq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) {
47 ; CHECK-LABEL: t_vfmaq_laneq_f16:
48 ; CHECK: // %bb.0: // %entry
49 ; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0]
52 %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer
53 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %b, <8 x half> %a)
57 define dso_local <4 x half> @t_vfma_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
58 ; CHECK-LABEL: t_vfma_n_f16:
59 ; CHECK: // %bb.0: // %entry
60 ; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
61 ; CHECK-NEXT: fmla v0.4h, v1.4h, v2.h[0]
64 %vecinit = insertelement <4 x half> undef, half %c, i32 0
65 %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
66 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %b, <4 x half> %vecinit3, <4 x half> %a) #4
70 define dso_local <8 x half> @t_vfmaq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
71 ; CHECK-LABEL: t_vfmaq_n_f16:
72 ; CHECK: // %bb.0: // %entry
73 ; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
74 ; CHECK-NEXT: fmla v0.8h, v1.8h, v2.h[0]
77 %vecinit = insertelement <8 x half> undef, half %c, i32 0
78 %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
79 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %b, <8 x half> %vecinit7, <8 x half> %a) #4
83 define dso_local half @t_vfmah_lane_f16_0(half %a, half %b, <4 x half> %c, i32 %lane) {
84 ; CHECK-LABEL: t_vfmah_lane_f16_0:
85 ; CHECK: // %bb.0: // %entry
86 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
87 ; CHECK-NEXT: fmadd h0, h1, h2, h0
90 %extract = extractelement <4 x half> %c, i32 0
91 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
95 define dso_local half @t_vfmah_lane_f16_0_swap(half %a, half %b, <4 x half> %c, i32 %lane) {
96 ; CHECK-LABEL: t_vfmah_lane_f16_0_swap:
97 ; CHECK: // %bb.0: // %entry
98 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
99 ; CHECK-NEXT: fmadd h0, h2, h1, h0
102 %extract = extractelement <4 x half> %c, i32 0
103 %0 = tail call half @llvm.fma.f16(half %extract, half %b, half %a)
107 define dso_local half @t_vfmah_lane_f16_3(half %a, half %b, <4 x half> %c, i32 %lane) {
108 ; CHECK-LABEL: t_vfmah_lane_f16_3:
109 ; CHECK: // %bb.0: // %entry
110 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
111 ; CHECK-NEXT: fmla h0, h1, v2.h[3]
114 %extract = extractelement <4 x half> %c, i32 3
115 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
119 define dso_local half @t_vfmah_laneq_f16_0(half %a, half %b, <8 x half> %c, i32 %lane) {
120 ; CHECK-LABEL: t_vfmah_laneq_f16_0:
121 ; CHECK: // %bb.0: // %entry
122 ; CHECK-NEXT: fmadd h0, h1, h2, h0
125 %extract = extractelement <8 x half> %c, i32 0
126 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
130 define dso_local half @t_vfmah_laneq_f16_0_swap(half %a, half %b, <8 x half> %c, i32 %lane) {
131 ; CHECK-LABEL: t_vfmah_laneq_f16_0_swap:
132 ; CHECK: // %bb.0: // %entry
133 ; CHECK-NEXT: fmadd h0, h2, h1, h0
136 %extract = extractelement <8 x half> %c, i32 0
137 %0 = tail call half @llvm.fma.f16(half %extract, half %b, half %a)
141 define dso_local half @t_vfmah_laneq_f16_7(half %a, half %b, <8 x half> %c, i32 %lane) {
142 ; CHECK-LABEL: t_vfmah_laneq_f16_7:
143 ; CHECK: // %bb.0: // %entry
144 ; CHECK-NEXT: fmla h0, h1, v2.h[7]
147 %extract = extractelement <8 x half> %c, i32 7
148 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
152 define dso_local <4 x half> @t_vfms_lane_f16(<4 x half> %a, <4 x half> %b, <4 x half> %c, i32 %lane) {
153 ; CHECK-LABEL: t_vfms_lane_f16:
154 ; CHECK: // %bb.0: // %entry
155 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
156 ; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0]
159 %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
160 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <4 x i32> zeroinitializer
161 %fmla3 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %lane1, <4 x half> %a)
162 ret <4 x half> %fmla3
165 define dso_local <8 x half> @t_vfmsq_lane_f16(<8 x half> %a, <8 x half> %b, <4 x half> %c, i32 %lane) {
166 ; CHECK-LABEL: t_vfmsq_lane_f16:
167 ; CHECK: // %bb.0: // %entry
168 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
169 ; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0]
172 %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
173 %lane1 = shufflevector <4 x half> %c, <4 x half> undef, <8 x i32> zeroinitializer
174 %fmla3 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %lane1, <8 x half> %a)
175 ret <8 x half> %fmla3
178 define dso_local <4 x half> @t_vfms_laneq_f16(<4 x half> %a, <4 x half> %b, <8 x half> %c, i32 %lane) {
179 ; CHECK-LABEL: t_vfms_laneq_f16:
180 ; CHECK: // %bb.0: // %entry
181 ; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0]
184 %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
185 %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <4 x i32> zeroinitializer
186 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %lane1, <4 x half> %sub, <4 x half> %a)
190 define dso_local <8 x half> @t_vfmsq_laneq_f16(<8 x half> %a, <8 x half> %b, <8 x half> %c, i32 %lane) {
191 ; CHECK-LABEL: t_vfmsq_laneq_f16:
192 ; CHECK: // %bb.0: // %entry
193 ; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0]
196 %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
197 %lane1 = shufflevector <8 x half> %c, <8 x half> undef, <8 x i32> zeroinitializer
198 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %lane1, <8 x half> %sub, <8 x half> %a)
202 define dso_local <4 x half> @t_vfms_n_f16(<4 x half> %a, <4 x half> %b, half %c) {
203 ; CHECK-LABEL: t_vfms_n_f16:
204 ; CHECK: // %bb.0: // %entry
205 ; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
206 ; CHECK-NEXT: fmls v0.4h, v1.4h, v2.h[0]
209 %sub = fsub <4 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
210 %vecinit = insertelement <4 x half> undef, half %c, i32 0
211 %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
212 %0 = tail call <4 x half> @llvm.fma.v4f16(<4 x half> %sub, <4 x half> %vecinit3, <4 x half> %a) #4
216 define dso_local <8 x half> @t_vfmsq_n_f16(<8 x half> %a, <8 x half> %b, half %c) {
217 ; CHECK-LABEL: t_vfmsq_n_f16:
218 ; CHECK: // %bb.0: // %entry
219 ; CHECK-NEXT: // kill: def $h2 killed $h2 def $q2
220 ; CHECK-NEXT: fmls v0.8h, v1.8h, v2.h[0]
223 %sub = fsub <8 x half> <half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000, half 0xH8000>, %b
224 %vecinit = insertelement <8 x half> undef, half %c, i32 0
225 %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
226 %0 = tail call <8 x half> @llvm.fma.v8f16(<8 x half> %sub, <8 x half> %vecinit7, <8 x half> %a) #4
230 define dso_local half @t_vfmsh_lane_f16_0(half %a, half %b, <4 x half> %c, i32 %lane) {
231 ; CHECK-LABEL: t_vfmsh_lane_f16_0:
232 ; CHECK: // %bb.0: // %entry
233 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
234 ; CHECK-NEXT: fmsub h0, h2, h1, h0
237 %0 = fsub half 0xH8000, %b
238 %extract = extractelement <4 x half> %c, i32 0
239 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
243 define dso_local half @t_vfmsh_lane_f16_0_swap(half %a, half %b, <4 x half> %c, i32 %lane) {
244 ; CHECK-LABEL: t_vfmsh_lane_f16_0_swap:
245 ; CHECK: // %bb.0: // %entry
246 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
247 ; CHECK-NEXT: fmsub h0, h2, h1, h0
250 %0 = fsub half 0xH8000, %b
251 %extract = extractelement <4 x half> %c, i32 0
252 %1 = tail call half @llvm.fma.f16(half %extract, half %0, half %a)
256 define dso_local half @t_vfmsh_lane_f16_3(half %a, half %b, <4 x half> %c, i32 %lane) {
257 ; CHECK-LABEL: t_vfmsh_lane_f16_3:
258 ; CHECK: // %bb.0: // %entry
259 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
260 ; CHECK-NEXT: fmls h0, h1, v2.h[3]
263 %0 = fsub half 0xH8000, %b
264 %extract = extractelement <4 x half> %c, i32 3
265 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
269 define dso_local half @t_vfmsh_laneq_f16_0(half %a, half %b, <8 x half> %c, i32 %lane) {
270 ; CHECK-LABEL: t_vfmsh_laneq_f16_0:
271 ; CHECK: // %bb.0: // %entry
272 ; CHECK-NEXT: fmsub h0, h2, h1, h0
275 %0 = fsub half 0xH8000, %b
276 %extract = extractelement <8 x half> %c, i32 0
277 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
281 define dso_local half @t_vfmsh_laneq_f16_0_swap(half %a, half %b, <8 x half> %c, i32 %lane) {
282 ; CHECK-LABEL: t_vfmsh_laneq_f16_0_swap:
283 ; CHECK: // %bb.0: // %entry
284 ; CHECK-NEXT: fmsub h0, h2, h1, h0
287 %0 = fsub half 0xH8000, %b
288 %extract = extractelement <8 x half> %c, i32 0
289 %1 = tail call half @llvm.fma.f16(half %extract, half %0, half %a)
293 define dso_local half @t_vfmsh_laneq_f16_7(half %a, half %b, <8 x half> %c, i32 %lane) {
294 ; CHECK-LABEL: t_vfmsh_laneq_f16_7:
295 ; CHECK: // %bb.0: // %entry
296 ; CHECK-NEXT: fmls h0, h1, v2.h[7]
299 %0 = fsub half 0xH8000, %b
300 %extract = extractelement <8 x half> %c, i32 7
301 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
305 define dso_local <4 x half> @t_vmul_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) {
306 ; CHECK-LABEL: t_vmul_laneq_f16:
307 ; CHECK: // %bb.0: // %entry
308 ; CHECK-NEXT: fmul v0.4h, v0.4h, v1.h[0]
311 %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer
312 %mul = fmul <4 x half> %shuffle, %a
316 define dso_local <8 x half> @t_vmulq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) {
317 ; CHECK-LABEL: t_vmulq_laneq_f16:
318 ; CHECK: // %bb.0: // %entry
319 ; CHECK-NEXT: fmul v0.8h, v0.8h, v1.h[0]
322 %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer
323 %mul = fmul <8 x half> %shuffle, %a
327 define dso_local half @t_vmulh_lane0_f16(half %a, <4 x half> %c, i32 %lane) {
328 ; CHECK-LABEL: t_vmulh_lane0_f16:
329 ; CHECK: // %bb.0: // %entry
330 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
331 ; CHECK-NEXT: fmul h0, h0, h1
334 %0 = extractelement <4 x half> %c, i32 0
335 %1 = fmul half %0, %a
339 define dso_local half @t_vmulh_lane3_f16(half %a, <4 x half> %c, i32 %lane) {
340 ; CHECK-LABEL: t_vmulh_lane3_f16:
341 ; CHECK: // %bb.0: // %entry
342 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
343 ; CHECK-NEXT: fmul h0, h0, v1.h[3]
346 %0 = extractelement <4 x half> %c, i32 3
347 %1 = fmul half %0, %a
351 define dso_local half @t_vmulh_laneq0_f16(half %a, <8 x half> %c, i32 %lane) {
352 ; CHECK-LABEL: t_vmulh_laneq0_f16:
353 ; CHECK: // %bb.0: // %entry
354 ; CHECK-NEXT: fmul h0, h0, h1
357 %0 = extractelement <8 x half> %c, i32 0
358 %1 = fmul half %0, %a
362 define dso_local half @t_vmulh_laneq7_f16(half %a, <8 x half> %c, i32 %lane) {
363 ; CHECK-LABEL: t_vmulh_laneq7_f16:
364 ; CHECK: // %bb.0: // %entry
365 ; CHECK-NEXT: fmul h0, h0, v1.h[7]
368 %0 = extractelement <8 x half> %c, i32 7
369 %1 = fmul half %0, %a
373 define dso_local half @t_vmulx_f16(half %a, half %b) {
374 ; CHECK-LABEL: t_vmulx_f16:
375 ; CHECK: // %bb.0: // %entry
376 ; CHECK-NEXT: fmulx h0, h0, h1
379 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %b)
383 define dso_local half @t_vmulxh_lane0_f16(half %a, <4 x half> %b) {
384 ; CHECK-LABEL: t_vmulxh_lane0_f16:
385 ; CHECK: // %bb.0: // %entry
386 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
387 ; CHECK-NEXT: fmulx h0, h0, h1
390 %extract = extractelement <4 x half> %b, i32 0
391 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract)
395 define dso_local half @t_vmulxh_lane3_f16(half %a, <4 x half> %b, i32 %lane) {
396 ; CHECK-LABEL: t_vmulxh_lane3_f16:
397 ; CHECK: // %bb.0: // %entry
398 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
399 ; CHECK-NEXT: fmulx h0, h0, v1.h[3]
402 %extract = extractelement <4 x half> %b, i32 3
403 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract)
407 define dso_local <4 x half> @t_vmulx_lane_f16(<4 x half> %a, <4 x half> %b, i32 %lane) {
408 ; CHECK-LABEL: t_vmulx_lane_f16:
409 ; CHECK: // %bb.0: // %entry
410 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
411 ; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0]
414 %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <4 x i32> zeroinitializer
415 %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4
416 ret <4 x half> %vmulx2.i
419 define dso_local <8 x half> @t_vmulxq_lane_f16(<8 x half> %a, <4 x half> %b, i32 %lane) {
420 ; CHECK-LABEL: t_vmulxq_lane_f16:
421 ; CHECK: // %bb.0: // %entry
422 ; CHECK-NEXT: // kill: def $d1 killed $d1 def $q1
423 ; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0]
426 %shuffle = shufflevector <4 x half> %b, <4 x half> undef, <8 x i32> zeroinitializer
427 %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4
428 ret <8 x half> %vmulx2.i
431 define dso_local <4 x half> @t_vmulx_laneq_f16(<4 x half> %a, <8 x half> %b, i32 %lane) {
432 ; CHECK-LABEL: t_vmulx_laneq_f16:
433 ; CHECK: // %bb.0: // %entry
434 ; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.h[0]
437 %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <4 x i32> zeroinitializer
438 %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %shuffle) #4
439 ret <4 x half> %vmulx2.i
442 define dso_local <8 x half> @t_vmulxq_laneq_f16(<8 x half> %a, <8 x half> %b, i32 %lane) {
443 ; CHECK-LABEL: t_vmulxq_laneq_f16:
444 ; CHECK: // %bb.0: // %entry
445 ; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.h[0]
448 %shuffle = shufflevector <8 x half> %b, <8 x half> undef, <8 x i32> zeroinitializer
449 %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %shuffle) #4
450 ret <8 x half> %vmulx2.i
453 define dso_local half @t_vmulxh_laneq0_f16(half %a, <8 x half> %b) {
454 ; CHECK-LABEL: t_vmulxh_laneq0_f16:
455 ; CHECK: // %bb.0: // %entry
456 ; CHECK-NEXT: fmulx h0, h0, h1
459 %extract = extractelement <8 x half> %b, i32 0
460 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract)
464 define dso_local half @t_vmulxh_laneq7_f16(half %a, <8 x half> %b, i32 %lane) {
465 ; CHECK-LABEL: t_vmulxh_laneq7_f16:
466 ; CHECK: // %bb.0: // %entry
467 ; CHECK-NEXT: fmulx h0, h0, v1.h[7]
470 %extract = extractelement <8 x half> %b, i32 7
471 %fmulx.i = tail call half @llvm.aarch64.neon.fmulx.f16(half %a, half %extract)
475 define dso_local <4 x half> @t_vmulx_n_f16(<4 x half> %a, half %c) {
476 ; CHECK-LABEL: t_vmulx_n_f16:
477 ; CHECK: // %bb.0: // %entry
478 ; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1
479 ; CHECK-NEXT: dup v1.4h, v1.h[0]
480 ; CHECK-NEXT: fmulx v0.4h, v0.4h, v1.4h
483 %vecinit = insertelement <4 x half> undef, half %c, i32 0
484 %vecinit3 = shufflevector <4 x half> %vecinit, <4 x half> undef, <4 x i32> zeroinitializer
485 %vmulx2.i = tail call <4 x half> @llvm.aarch64.neon.fmulx.v4f16(<4 x half> %a, <4 x half> %vecinit3) #4
486 ret <4 x half> %vmulx2.i
489 define dso_local <8 x half> @t_vmulxq_n_f16(<8 x half> %a, half %c) {
490 ; CHECK-LABEL: t_vmulxq_n_f16:
491 ; CHECK: // %bb.0: // %entry
492 ; CHECK-NEXT: // kill: def $h1 killed $h1 def $q1
493 ; CHECK-NEXT: dup v1.8h, v1.h[0]
494 ; CHECK-NEXT: fmulx v0.8h, v0.8h, v1.8h
497 %vecinit = insertelement <8 x half> undef, half %c, i32 0
498 %vecinit7 = shufflevector <8 x half> %vecinit, <8 x half> undef, <8 x i32> zeroinitializer
499 %vmulx2.i = tail call <8 x half> @llvm.aarch64.neon.fmulx.v8f16(<8 x half> %a, <8 x half> %vecinit7) #4
500 ret <8 x half> %vmulx2.i
503 define dso_local half @t_vfmah_lane3_f16(half %a, half %b, <4 x half> %c) {
504 ; CHECK-LABEL: t_vfmah_lane3_f16:
505 ; CHECK: // %bb.0: // %entry
506 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
507 ; CHECK-NEXT: fmla h0, h1, v2.h[3]
510 %extract = extractelement <4 x half> %c, i32 3
511 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
515 define dso_local half @t_vfmah_laneq7_f16(half %a, half %b, <8 x half> %c) {
516 ; CHECK-LABEL: t_vfmah_laneq7_f16:
517 ; CHECK: // %bb.0: // %entry
518 ; CHECK-NEXT: fmla h0, h1, v2.h[7]
521 %extract = extractelement <8 x half> %c, i32 7
522 %0 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
526 define dso_local half @t_vfmsh_lane3_f16(half %a, half %b, <4 x half> %c) {
527 ; CHECK-LABEL: t_vfmsh_lane3_f16:
528 ; CHECK: // %bb.0: // %entry
529 ; CHECK-NEXT: // kill: def $d2 killed $d2 def $q2
530 ; CHECK-NEXT: fmls h0, h1, v2.h[3]
533 %0 = fsub half 0xH8000, %b
534 %extract = extractelement <4 x half> %c, i32 3
535 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
539 define dso_local half @t_vfmsh_laneq7_f16(half %a, half %b, <8 x half> %c) {
540 ; CHECK-LABEL: t_vfmsh_laneq7_f16:
541 ; CHECK: // %bb.0: // %entry
542 ; CHECK-NEXT: fmls h0, h1, v2.h[7]
545 %0 = fsub half 0xH8000, %b
546 %extract = extractelement <8 x half> %c, i32 7
547 %1 = tail call half @llvm.fma.f16(half %0, half %extract, half %a)
551 define dso_local half @t_fadd_vfmah_f16(half %a, half %b, <4 x half> %c, <4 x half> %d) {
552 ; CHECK-LABEL: t_fadd_vfmah_f16:
553 ; CHECK: // %bb.0: // %entry
554 ; CHECK-NEXT: fadd v2.4h, v2.4h, v3.4h
555 ; CHECK-NEXT: fmla h0, h1, v2.h[3]
558 %0 = fadd <4 x half> %c, %d
559 %extract = extractelement <4 x half> %0, i32 3
560 %1 = tail call half @llvm.fma.f16(half %b, half %extract, half %a)
564 define half @test_fmulx_horizontal_f16(<2 x half> %v) {
565 ; CHECK-LABEL: test_fmulx_horizontal_f16:
566 ; CHECK: // %bb.0: // %entry
567 ; CHECK-NEXT: // kill: def $d0 killed $d0 def $q0
568 ; CHECK-NEXT: fmulx h0, h0, v0.h[1]
571 %0 = extractelement <2 x half> %v, i32 0
572 %1 = extractelement <2 x half> %v, i32 1
573 %2 = call half @llvm.aarch64.neon.fmulx.f16(half %0, half %1)