test/CodeGen/AArch64/neon-scalar-by-elem-fma.ll

   1 ; RUN: llc < %s -verify-machineinstrs -mtriple=aarch64-none-linux-gnu -mattr=+neon -fp-contract=fast | FileCheck %s
   2
   3 declare float @llvm.fma.f32(float, float, float)
   4 declare double @llvm.fma.f64(double, double, double)
   5
   6 define float @test_fmla_ss4S(float %a, float %b, <4 x float> %v) {
   7   ; CHECK-LABEL: test_fmla_ss4S
   8   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
   9   %tmp1 = extractelement <4 x float> %v, i32 3
  10   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
  11   ret float %tmp2
  12 }
  13
  14 define float @test_fmla_ss4S_swap(float %a, float %b, <4 x float> %v) {
  15   ; CHECK-LABEL: test_fmla_ss4S_swap
  16   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
  17   %tmp1 = extractelement <4 x float> %v, i32 3
  18   %tmp2 = call float @llvm.fma.f32(float %tmp1, float %a, float %a)
  19   ret float %tmp2
  20 }
  21
  22 define float @test_fmla_ss2S(float %a, float %b, <2 x float> %v) {
  23   ; CHECK-LABEL: test_fmla_ss2S
  24   ; CHECK: fmla {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
  25   %tmp1 = extractelement <2 x float> %v, i32 1
  26   %tmp2 = call float @llvm.fma.f32(float %b, float %tmp1, float %a)
  27   ret float %tmp2
  28 }
  29
  30 define double @test_fmla_ddD(double %a, double %b, <1 x double> %v) {
  31   ; CHECK-LABEL: test_fmla_ddD
  32   ; CHECK: {{fmla d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmadd d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
  33   %tmp1 = extractelement <1 x double> %v, i32 0
  34   %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
  35   ret double %tmp2
  36 }
  37
  38 define double @test_fmla_dd2D(double %a, double %b, <2 x double> %v) {
  39   ; CHECK-LABEL: test_fmla_dd2D
  40   ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
  41   %tmp1 = extractelement <2 x double> %v, i32 1
  42   %tmp2 = call double @llvm.fma.f64(double %b, double %tmp1, double %a)
  43   ret double %tmp2
  44 }
  45
  46 define double @test_fmla_dd2D_swap(double %a, double %b, <2 x double> %v) {
  47   ; CHECK-LABEL: test_fmla_dd2D_swap
  48   ; CHECK: fmla {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
  49   %tmp1 = extractelement <2 x double> %v, i32 1
  50   %tmp2 = call double @llvm.fma.f64(double %tmp1, double %b, double %a)
  51   ret double %tmp2
  52 }
  53
  54 define float @test_fmls_ss4S(float %a, float %b, <4 x float> %v) {
  55   ; CHECK-LABEL: test_fmls_ss4S
  56   ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
  57   %tmp1 = extractelement <4 x float> %v, i32 3
  58   %tmp2 = fsub float -0.0, %tmp1
  59   %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
  60   ret float %tmp3
  61 }
  62
  63 define float @test_fmls_ss4S_swap(float %a, float %b, <4 x float> %v) {
  64   ; CHECK-LABEL: test_fmls_ss4S_swap
  65   ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[3]
  66   %tmp1 = extractelement <4 x float> %v, i32 3
  67   %tmp2 = fsub float -0.0, %tmp1
  68   %tmp3 = call float @llvm.fma.f32(float %tmp1, float %tmp2, float %a)
  69   ret float %tmp3
  70 }
  71
  72
  73 define float @test_fmls_ss2S(float %a, float %b, <2 x float> %v) {
  74   ; CHECK-LABEL: test_fmls_ss2S
  75   ; CHECK: fmls {{s[0-9]+}}, {{s[0-9]+}}, {{v[0-9]+}}.s[1]
  76   %tmp1 = extractelement <2 x float> %v, i32 1
  77   %tmp2 = fsub float -0.0, %tmp1
  78   %tmp3 = call float @llvm.fma.f32(float %tmp2, float %tmp1, float %a)
  79   ret float %tmp3
  80 }
  81
  82 define double @test_fmls_ddD(double %a, double %b, <1 x double> %v) {
  83   ; CHECK-LABEL: test_fmls_ddD
  84   ; CHECK: {{fmls d[0-9]+, d[0-9]+, v[0-9]+.d\[0]|fmsub d[0-9]+, d[0-9]+, d[0-9]+, d[0-9]+}}
  85   %tmp1 = extractelement <1 x double> %v, i32 0
  86   %tmp2 = fsub double -0.0, %tmp1
  87   %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
  88   ret double %tmp3
  89 }
  90
  91 define double @test_fmls_dd2D(double %a, double %b, <2 x double> %v) {
  92   ; CHECK-LABEL: test_fmls_dd2D
  93   ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
  94   %tmp1 = extractelement <2 x double> %v, i32 1
  95   %tmp2 = fsub double -0.0, %tmp1
  96   %tmp3 = call double @llvm.fma.f64(double %tmp2, double %tmp1, double %a)
  97   ret double %tmp3
  98 }
  99
 100 define double @test_fmls_dd2D_swap(double %a, double %b, <2 x double> %v) {
 101   ; CHECK-LABEL: test_fmls_dd2D_swap
 102   ; CHECK: fmls {{d[0-9]+}}, {{d[0-9]+}}, {{v[0-9]+}}.d[1]
 103   %tmp1 = extractelement <2 x double> %v, i32 1
 104   %tmp2 = fsub double -0.0, %tmp1
 105   %tmp3 = call double @llvm.fma.f64(double %tmp1, double %tmp2, double %a)
 106   ret double %tmp3
 107 }
 108