llvm/test/CodeGen/AArch64/neon-fma.ll

   1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
   2
   3 define <2 x float> @fmla2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
   4 ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
   5         %tmp1 = fmul <2 x float> %A, %B;
   6         %tmp2 = fadd <2 x float> %C, %tmp1;
   7         ret <2 x float> %tmp2
   8 }
   9
  10 define <4 x float> @fmla4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
  11 ;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
  12         %tmp1 = fmul <4 x float> %A, %B;
  13         %tmp2 = fadd <4 x float> %C, %tmp1;
  14         ret <4 x float> %tmp2
  15 }
  16
  17 define <2 x double> @fmla2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
  18 ;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
  19         %tmp1 = fmul <2 x double> %A, %B;
  20         %tmp2 = fadd <2 x double> %C, %tmp1;
  21         ret <2 x double> %tmp2
  22 }
  23
  24
  25 define <2 x float> @fmls2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
  26 ;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
  27         %tmp1 = fmul <2 x float> %A, %B;
  28         %tmp2 = fsub <2 x float> %C, %tmp1;
  29         ret <2 x float> %tmp2
  30 }
  31
  32 define <4 x float> @fmls4xfloat(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
  33 ;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
  34         %tmp1 = fmul <4 x float> %A, %B;
  35         %tmp2 = fsub <4 x float> %C, %tmp1;
  36         ret <4 x float> %tmp2
  37 }
  38
  39 define <2 x double> @fmls2xdouble(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
  40 ;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
  41         %tmp1 = fmul <2 x double> %A, %B;
  42         %tmp2 = fsub <2 x double> %C, %tmp1;
  43         ret <2 x double> %tmp2
  44 }
  45
  46
  47 ; Another set of tests for when the intrinsic is used.
  48
  49 declare <2 x float> @llvm.fma.v2f32(<2 x float>, <2 x float>, <2 x float>)
  50 declare <4 x float> @llvm.fma.v4f32(<4 x float>, <4 x float>, <4 x float>)
  51 declare <2 x double> @llvm.fma.v2f64(<2 x double>, <2 x double>, <2 x double>)
  52
  53 define <2 x float> @fmla2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
  54 ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
  55         %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C)
  56         ret <2 x float> %val
  57 }
  58
  59 define <4 x float> @fmla4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
  60 ;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
  61         %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C)
  62         ret <4 x float> %val
  63 }
  64
  65 define <2 x double> @fmla2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
  66 ;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
  67         %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C)
  68         ret <2 x double> %val
  69 }
  70
  71 define <2 x float> @fmls2xfloat_fused(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
  72 ;CHECK: fmls {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
  73         %negA = fsub <2 x float> <float -0.0, float -0.0>, %A
  74         %val = call <2 x float> @llvm.fma.v2f32(<2 x float> %negA, <2 x float> %B, <2 x float> %C)
  75         ret <2 x float> %val
  76 }
  77
  78 define <4 x float> @fmls4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
  79 ;CHECK: fmls {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
  80         %negA = fsub <4 x float> <float -0.0, float -0.0, float -0.0, float -0.0>, %A
  81         %val = call <4 x float> @llvm.fma.v4f32(<4 x float> %negA, <4 x float> %B, <4 x float> %C)
  82         ret <4 x float> %val
  83 }
  84
  85 define <2 x double> @fmls2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
  86 ;CHECK: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
  87         %negA = fsub <2 x double> <double -0.0, double -0.0>, %A
  88         %val = call <2 x double> @llvm.fma.v2f64(<2 x double> %negA, <2 x double> %B, <2 x double> %C)
  89         ret <2 x double> %val
  90 }
  91
  92 declare <2 x float> @llvm.fmuladd.v2f32(<2 x float>, <2 x float>, <2 x float>)
  93 declare <4 x float> @llvm.fmuladd.v4f32(<4 x float>, <4 x float>, <4 x float>)
  94 declare <2 x double> @llvm.fmuladd.v2f64(<2 x double>, <2 x double>, <2 x double>)
  95
  96 define <2 x float> @fmuladd2xfloat(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
  97 ;CHECK: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
  98         %val = call <2 x float> @llvm.fmuladd.v2f32(<2 x float> %A, <2 x float> %B, <2 x float> %C)
  99         ret <2 x float> %val
 100 }
 101
 102 define <4 x float> @fmuladd4xfloat_fused(<4 x float> %A, <4 x float> %B, <4 x float> %C) {
 103 ;CHECK: fmla {{v[0-9]+}}.4s, {{v[0-9]+}}.4s, {{v[0-9]+}}.4s
 104         %val = call <4 x float> @llvm.fmuladd.v4f32(<4 x float> %A, <4 x float> %B, <4 x float> %C)
 105         ret <4 x float> %val
 106 }
 107
 108 define <2 x double> @fmuladd2xdouble_fused(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
 109 ;CHECK: fmla {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 110         %val = call <2 x double> @llvm.fmuladd.v2f64(<2 x double> %A, <2 x double> %B, <2 x double> %C)
 111         ret <2 x double> %val
 112 }
 113
 114
 115 ; Another set of tests that check for multiply single use
 116
 117 define <2 x float> @fmla2xfloati_su(<2 x float> %A, <2 x float> %B, <2 x float> %C) {
 118 ;CHECK-NOT: fmla {{v[0-9]+}}.2s, {{v[0-9]+}}.2s, {{v[0-9]+}}.2s
 119   %tmp1 = fmul <2 x float> %A, %B;
 120   %tmp2 = fadd <2 x float> %C, %tmp1;
 121   %tmp3 = fadd <2 x float> %tmp2, %tmp1;
 122   ret <2 x float> %tmp3
 123 }
 124
 125 define <2 x double> @fmls2xdouble_su(<2 x double> %A, <2 x double> %B, <2 x double> %C) {
 126 ;CHECK-NOT: fmls {{v[0-9]+}}.2d, {{v[0-9]+}}.2d, {{v[0-9]+}}.2d
 127         %tmp1 = fmul <2 x double> %A, %B;
 128         %tmp2 = fsub <2 x double> %C, %tmp1;
 129         %tmp3 = fsub <2 x double> %tmp2, %tmp1;
 130         ret <2 x double> %tmp3
 131 }
 132