test/CodeGen/AArch64/fp16-fmla.ll

   1 ; RUN: llc < %s -mtriple=aarch64-none-linux-gnu -mattr=+v8.2a,+fullfp16 -fp-contract=fast  | FileCheck %s
   2
   3 define half @test_FMULADDH_OP1(half %a, half %b, half %c) {
   4 ; CHECK-LABEL: test_FMULADDH_OP1:
   5 ; CHECK: fmadd    {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
   6 entry:
   7   %mul = fmul fast half %c, %b
   8   %add = fadd fast half %mul, %a
   9   ret half %add
  10 }
  11
  12 define half @test_FMULADDH_OP2(half %a, half %b, half %c) {
  13 ; CHECK-LABEL: test_FMULADDH_OP2:
  14 ; CHECK: fmadd    {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
  15 entry:
  16   %mul = fmul fast half %c, %b
  17   %add = fadd fast half %a, %mul
  18   ret half %add
  19 }
  20
  21 define half @test_FMULSUBH_OP1(half %a, half %b, half %c) {
  22 ; CHECK-LABEL: test_FMULSUBH_OP1:
  23 ; CHECK: fnmsub    {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
  24 entry:
  25   %mul = fmul fast half %c, %b
  26   %sub = fsub fast half %mul, %a
  27   ret half %sub
  28 }
  29
  30 define half @test_FMULSUBH_OP2(half %a, half %b, half %c) {
  31 ; CHECK-LABEL: test_FMULSUBH_OP2:
  32 ; CHECK: fmsub    {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
  33 entry:
  34   %mul = fmul fast half %c, %b
  35   %add = fsub fast half %a, %mul
  36   ret half %add
  37 }
  38
  39 define half @test_FNMULSUBH_OP1(half %a, half %b, half %c) {
  40 ; CHECK-LABEL: test_FNMULSUBH_OP1:
  41 ; CHECK: fnmadd    {{h[0-9]+}}, {{h[0-9]+}}, {{h[0-9]+}}
  42 entry:
  43   %mul = fmul fast half %c, %b
  44   %neg = fsub fast half -0.0, %mul
  45   %add = fsub fast half %neg, %a
  46   ret half %add
  47 }
  48
  49 define <4 x half> @test_FMLAv4f16_OP1(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
  50 ; CHECK-LABEL: test_FMLAv4f16_OP1:
  51 ; CHECK: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
  52 entry:
  53   %mul = fmul fast <4 x half> %c, %b
  54   %add = fadd fast <4 x half> %mul, %a
  55   ret <4 x half> %add
  56 }
  57
  58 define <4 x half> @test_FMLAv4f16_OP2(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
  59 ; CHECK-LABEL: test_FMLAv4f16_OP2:
  60 ; CHECK: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
  61 entry:
  62   %mul = fmul fast <4 x half> %c, %b
  63   %add = fadd fast <4 x half> %a, %mul
  64   ret <4 x half> %add
  65 }
  66
  67 define <8 x half> @test_FMLAv8f16_OP1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
  68 ; CHECK-LABEL: test_FMLAv8f16_OP1:
  69 ; CHECK: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
  70 entry:
  71   %mul = fmul fast <8 x half> %c, %b
  72   %add = fadd fast <8 x half> %mul, %a
  73   ret <8 x half> %add
  74 }
  75
  76 define <8 x half> @test_FMLAv8f16_OP2(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
  77 ; CHECK-LABEL: test_FMLAv8f16_OP2:
  78 ; CHECK: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
  79 entry:
  80   %mul = fmul fast <8 x half> %c, %b
  81   %add = fadd fast <8 x half> %a, %mul
  82   ret <8 x half> %add
  83 }
  84
  85 define <4 x half> @test_FMLAv4i16_indexed_OP1(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) {
  86 ; CHECK-LABEL: test_FMLAv4i16_indexed_OP1:
  87 ; CHECK-FIXME: Currently LLVM produces inefficient code:
  88 ; CHECK: mul
  89 ; CHECK: fadd
  90 ; CHECK-FIXME: It should instead produce the following instruction:
  91 ; CHECK-FIXME: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
  92 entry:
  93   %mul = mul <4 x i16> %c, %b
  94   %m = bitcast <4 x i16> %mul to <4 x half>
  95   %add = fadd fast <4 x half> %m, %a
  96   ret <4 x half> %add
  97 }
  98
  99 define <4 x half> @test_FMLAv4i16_indexed_OP2(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) {
 100 ; CHECK-LABEL: test_FMLAv4i16_indexed_OP2:
 101 ; CHECK-FIXME: Currently LLVM produces inefficient code:
 102 ; CHECK: mul
 103 ; CHECK: fadd
 104 ; CHECK-FIXME: It should instead produce the following instruction:
 105 ; CHECK-FIXME: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 106 entry:
 107   %mul = mul <4 x i16> %c, %b
 108   %m = bitcast <4 x i16> %mul to <4 x half>
 109   %add = fadd fast <4 x half> %a, %m
 110   ret <4 x half> %add
 111 }
 112
 113 define <8 x half> @test_FMLAv8i16_indexed_OP1(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
 114 ; CHECK-LABEL: test_FMLAv8i16_indexed_OP1:
 115 ; CHECK-FIXME: Currently LLVM produces inefficient code:
 116 ; CHECK: mul
 117 ; CHECK: fadd
 118 ; CHECK-FIXME: It should instead produce the following instruction:
 119 ; CHECK-FIXME: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 120 entry:
 121   %mul = mul <8 x i16> %c, %b
 122   %m = bitcast <8 x i16> %mul to <8 x half>
 123   %add = fadd fast <8 x half> %m, %a
 124   ret <8 x half> %add
 125 }
 126
 127 define <8 x half> @test_FMLAv8i16_indexed_OP2(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
 128 ; CHECK-LABEL: test_FMLAv8i16_indexed_OP2:
 129 ; CHECK-FIXME: Currently LLVM produces inefficient code:
 130 ; CHECK: mul
 131 ; CHECK: fadd
 132 ; CHECK-FIXME: It should instead produce the following instruction:
 133 ; CHECK-FIXME: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 134 entry:
 135   %mul = mul <8 x i16> %c, %b
 136   %m = bitcast <8 x i16> %mul to <8 x half>
 137   %add = fadd fast <8 x half> %a, %m
 138   ret <8 x half> %add
 139 }
 140
 141 define <4 x half> @test_FMLSv4f16_OP1(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 142 ; CHECK-LABEL: test_FMLSv4f16_OP1:
 143 ; CHECK: fneg    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 144 ; CHECK: fmla    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 145 entry:
 146   %mul = fmul fast <4 x half> %c, %b
 147   %sub = fsub fast <4 x half> %mul, %a
 148   ret <4 x half> %sub
 149 }
 150
 151 define <4 x half> @test_FMLSv4f16_OP2(<4 x half> %a, <4 x half> %b, <4 x half> %c) {
 152 ; CHECK-LABEL: test_FMLSv4f16_OP2:
 153 ; CHECK: fmls    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 154 entry:
 155   %mul = fmul fast <4 x half> %c, %b
 156   %sub = fsub fast <4 x half> %a, %mul
 157   ret <4 x half> %sub
 158 }
 159
 160 define <8 x half> @test_FMLSv8f16_OP1(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 161 ; CHECK-LABEL: test_FMLSv8f16_OP1:
 162 ; CHECK: fneg    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 163 ; CHECK: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 164 entry:
 165   %mul = fmul fast <8 x half> %c, %b
 166   %sub = fsub fast <8 x half> %mul, %a
 167   ret <8 x half> %sub
 168 }
 169
 170 define <8 x half> @test_FMLSv8f16_OP2(<8 x half> %a, <8 x half> %b, <8 x half> %c) {
 171 ; CHECK-LABEL: test_FMLSv8f16_OP2:
 172 ; CHECK: fmls    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 173 entry:
 174   %mul = fmul fast <8 x half> %c, %b
 175   %sub = fsub fast <8 x half> %a, %mul
 176   ret <8 x half> %sub
 177 }
 178
 179 define <4 x half> @test_FMLSv4i16_indexed_OP2(<4 x half> %a, <4 x i16> %b, <4 x i16> %c) {
 180 ; CHECK-LABEL: test_FMLSv4i16_indexed_OP2:
 181 ; CHECK-FIXME: Currently LLVM produces inefficient code:
 182 ; CHECK: mul
 183 ; CHECK: fsub
 184 ; CHECK-FIXME: It should instead produce the following instruction:
 185 ; CHECK-FIXME: fmls    {{v[0-9]+}}.4h, {{v[0-9]+}}.4h, {{v[0-9]+}}.4h
 186 entry:
 187   %mul = mul <4 x i16> %c, %b
 188   %m = bitcast <4 x i16> %mul to <4 x half>
 189   %sub = fsub fast <4 x half> %a, %m
 190   ret <4 x half> %sub
 191 }
 192
 193 define <8 x half> @test_FMLSv8i16_indexed_OP1(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
 194 ; CHECK-LABEL: test_FMLSv8i16_indexed_OP1:
 195 ; CHECK-FIXME: Currently LLVM produces inefficient code:
 196 ; CHECK: mul
 197 ; CHECK: fsub
 198 ; CHECK-FIXME: It should instead produce the following instruction:
 199 ; CHECK-FIXME: fneg    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 200 ; CHECK-FIXME: fmla    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 201 entry:
 202   %mul = mul <8 x i16> %c, %b
 203   %m = bitcast <8 x i16> %mul to <8 x half>
 204   %sub = fsub fast <8 x half> %m, %a
 205   ret <8 x half> %sub
 206 }
 207
 208 define <8 x half> @test_FMLSv8i16_indexed_OP2(<8 x half> %a, <8 x i16> %b, <8 x i16> %c) {
 209 ; CHECK-LABEL: test_FMLSv8i16_indexed_OP2:
 210 ; CHECK-FIXME: Currently LLVM produces inefficient code:
 211 ; CHECK: mul
 212 ; CHECK: fsub
 213 ; CHECK-FIXME: It should instead produce the following instruction:
 214 ; CHECK-FIXME: fmls    {{v[0-9]+}}.8h, {{v[0-9]+}}.8h, {{v[0-9]+}}.8h
 215 entry:
 216   %mul = mul <8 x i16> %c, %b
 217   %m = bitcast <8 x i16> %mul to <8 x half>
 218   %sub = fsub fast <8 x half> %a, %m
 219   ret <8 x half> %sub
 220 }