1 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+v8.2a,+fullfp16 -fp-contract=fast | FileCheck %s
3 define half @test_FMULADDH_OP1(half %a, half %b, half %c) {
4 ; CHECK-LABEL: test_FMULADDH_OP1:
5 ; CHECK: fmadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
7 %mul = fmul fast half %c, %b
8 %add = fadd fast half %mul, %a
12 define half @test_FMULADDH_OP2(half %a, half %b, half %c) {
13 ; CHECK-LABEL: test_FMULADDH_OP2:
14 ; CHECK: fmadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
16 %mul = fmul fast half %c, %b
17 %add = fadd fast half %a, %mul
21 define half @test_FMULSUBH_OP1(half %a, half %b, half %c) {
22 ; CHECK-LABEL: test_FMULSUBH_OP1:
23 ; CHECK: fnmsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
25 %mul = fmul fast half %c, %b
26 %sub = fsub fast half %mul, %a
30 define half @test_FMULSUBH_OP2(half %a, half %b, half %c) {
31 ; CHECK-LABEL: test_FMULSUBH_OP2:
32 ; CHECK: fmsub {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
34 %mul = fmul fast half %c, %b
35 %add = fsub fast half %a, %mul
39 define half @test_FNMULSUBH_OP1(half %a, half %b, half %c) {
40 ; CHECK-LABEL: test_FNMULSUBH_OP1:
41 ; CHECK: fnmadd {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
43 %mul = fmul fast half %c, %b
44 %neg = fsub fast half -0.0, %mul
45 %add = fsub fast half %neg, %a
49 define <4 x half> @test_FMLAv4f16_OP1(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
50 ; CHECK-LABEL: test_FMLAv4f16_OP1:
51 ; CHECK: fmla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
53 %mul = fmul fast <4 x half> %c, %b
54 %add = fadd fast <4 x half> %mul, %a
58 define <4 x half> @test_FMLAv4f16_OP2(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
59 ; CHECK-LABEL: test_FMLAv4f16_OP2:
60 ; CHECK: fmla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
62 %mul = fmul fast <4 x half> %c, %b
63 %add = fadd fast <4 x half> %a, %mul
67 define <8 x half> @test_FMLAv8f16_OP1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
68 ; CHECK-LABEL: test_FMLAv8f16_OP1:
69 ; CHECK: fmla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
71 %mul = fmul fast <8 x half> %c, %b
72 %add = fadd fast <8 x half> %mul, %a
76 define <8 x half> @test_FMLAv8f16_OP2(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
77 ; CHECK-LABEL: test_FMLAv8f16_OP2:
78 ; CHECK: fmla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
80 %mul = fmul fast <8 x half> %c, %b
81 %add = fadd fast <8 x half> %a, %mul
85 define <4 x half> @test_FMLAv4i16_indexed_OP1(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) {
86 ; CHECK-LABEL: test_FMLAv4i16_indexed_OP1:
87 ; CHECK-FIXME: Currently LLVM produces inefficient code:
90 ; CHECK-FIXME: It should instead produce the following instruction:
91 ; CHECK-FIXME: fmla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
93 %mul = mul <4 x i16> %c, %b
94 %m = bitcast <4 x i16> %mul to <4 x half>
95 %add = fadd fast <4 x half> %m, %a
99 define <4 x half> @test_FMLAv4i16_indexed_OP2(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) {
100 ; CHECK-LABEL: test_FMLAv4i16_indexed_OP2:
101 ; CHECK-FIXME: Currently LLVM produces inefficient code:
104 ; CHECK-FIXME: It should instead produce the following instruction:
105 ; CHECK-FIXME: fmla {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
107 %mul = mul <4 x i16> %c, %b
108 %m = bitcast <4 x i16> %mul to <4 x half>
109 %add = fadd fast <4 x half> %a, %m
113 define <8 x half> @test_FMLAv8i16_indexed_OP1(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
114 ; CHECK-LABEL: test_FMLAv8i16_indexed_OP1:
115 ; CHECK-FIXME: Currently LLVM produces inefficient code:
118 ; CHECK-FIXME: It should instead produce the following instruction:
119 ; CHECK-FIXME: fmla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
121 %mul = mul <8 x i16> %c, %b
122 %m = bitcast <8 x i16> %mul to <8 x half>
123 %add = fadd fast <8 x half> %m, %a
127 define <8 x half> @test_FMLAv8i16_indexed_OP2(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
128 ; CHECK-LABEL: test_FMLAv8i16_indexed_OP2:
129 ; CHECK-FIXME: Currently LLVM produces inefficient code:
132 ; CHECK-FIXME: It should instead produce the following instruction:
133 ; CHECK-FIXME: fmla {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
135 %mul = mul <8 x i16> %c, %b
136 %m = bitcast <8 x i16> %mul to <8 x half>
137 %add = fadd fast <8 x half> %a, %m
141 define <4 x half> @test_FMLSv4f16_OP2(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
142 ; CHECK-LABEL: test_FMLSv4f16_OP2:
143 ; CHECK: fmls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
145 %mul = fmul fast <4 x half> %c, %b
146 %sub = fsub fast <4 x half> %a, %mul
150 define <8 x half> @test_FMLSv8f16_OP1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
151 ; CHECK-LABEL: test_FMLSv8f16_OP1:
152 ; CHECK: fmls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
154 %mul = fmul fast <8 x half> %c, %b
155 %sub = fsub fast <8 x half> %mul, %a
159 define <8 x half> @test_FMLSv8f16_OP2(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
160 ; CHECK-LABEL: test_FMLSv8f16_OP2:
161 ; CHECK: fmls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
163 %mul = fmul fast <8 x half> %c, %b
164 %sub = fsub fast <8 x half> %a, %mul
168 define <4 x half> @test_FMLSv4i16_indexed_OP2(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) {
169 ; CHECK-LABEL: test_FMLSv4i16_indexed_OP2:
170 ; CHECK-FIXME: Currently LLVM produces inefficient code:
173 ; CHECK-FIXME: It should instead produce the following instruction:
174 ; CHECK-FIXME: fmls {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
176 %mul = mul <4 x i16> %c, %b
177 %m = bitcast <4 x i16> %mul to <4 x half>
178 %sub = fsub fast <4 x half> %a, %m
182 define <8 x half> @test_FMLSv8i16_indexed_OP1(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
183 ; CHECK-LABEL: test_FMLSv8i16_indexed_OP1:
184 ; CHECK-FIXME: Currently LLVM produces inefficient code:
187 ; CHECK-FIXME: It should instead produce the following instruction:
188 ; CHECK-FIXME: fmls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
190 %mul = mul <8 x i16> %c, %b
191 %m = bitcast <8 x i16> %mul to <8 x half>
192 %sub = fsub fast <8 x half> %m, %a
196 define <8 x half> @test_FMLSv8i16_indexed_OP2(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
197 ; CHECK-LABEL: test_FMLSv8i16_indexed_OP2:
198 ; CHECK-FIXME: Currently LLVM produces inefficient code:
201 ; CHECK-FIXME: It should instead produce the following instruction:
202 ; CHECK-FIXME: fmls {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
204 %mul = mul <8 x i16> %c, %b
205 %m = bitcast <8 x i16> %mul to <8 x half>
206 %sub = fsub fast <8 x half> %a, %m