test/CodeGen/X86/avx2-fma-fneg-combine.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=i686-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X32
   3 ; RUN: llc < %s -mtriple=x86_64-unknown-unknown -mattr=+avx2,+fma | FileCheck %s --check-prefix=X64
   4
   5 ; This test checks combinations of FNEG and FMA intrinsics
   6
   7 define <8 x float> @test1(<8 x float> %a, <8 x float> %b, <8 x float> %c)  {
   8 ; X32-LABEL: test1:
   9 ; X32:       # %bb.0: # %entry
  10 ; X32-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
  11 ; X32-NEXT:    retl
  12 ;
  13 ; X64-LABEL: test1:
  14 ; X64:       # %bb.0: # %entry
  15 ; X64-NEXT:    vfmsub213ps {{.*#+}} ymm0 = (ymm1 * ymm0) - ymm2
  16 ; X64-NEXT:    retq
  17 entry:
  18   %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
  19   %0 = tail call <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.i) #2
  20   ret <8 x float> %0
  21 }
  22
  23 declare <8 x float> @llvm.x86.fma.vfmadd.ps.256(<8 x float>, <8 x float>, <8 x float>)
  24
  25 define <4 x float> @test2(<4 x float> %a, <4 x float> %b, <4 x float> %c) {
  26 ; X32-LABEL: test2:
  27 ; X32:       # %bb.0: # %entry
  28 ; X32-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
  29 ; X32-NEXT:    retl
  30 ;
  31 ; X64-LABEL: test2:
  32 ; X64:       # %bb.0: # %entry
  33 ; X64-NEXT:    vfnmsub213ps {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
  34 ; X64-NEXT:    retq
  35 entry:
  36   %0 = tail call <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
  37   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
  38   ret <4 x float> %sub.i
  39 }
  40
  41 declare <4 x float> @llvm.x86.fma.vfmadd.ps(<4 x float> %a, <4 x float> %b, <4 x float> %c)
  42
  43 define <4 x float> @test3(<4 x float> %a, <4 x float> %b, <4 x float> %c)  {
  44 ; X32-LABEL: test3:
  45 ; X32:       # %bb.0: # %entry
  46 ; X32-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
  47 ; X32-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
  48 ; X32-NEXT:    vxorps %xmm1, %xmm0, %xmm0
  49 ; X32-NEXT:    retl
  50 ;
  51 ; X64-LABEL: test3:
  52 ; X64:       # %bb.0: # %entry
  53 ; X64-NEXT:    vfnmadd213ss {{.*#+}} xmm0 = -(xmm1 * xmm0) + xmm2
  54 ; X64-NEXT:    vbroadcastss {{.*#+}} xmm1 = [-0.0E+0,-0.0E+0,-0.0E+0,-0.0E+0]
  55 ; X64-NEXT:    vxorps %xmm1, %xmm0, %xmm0
  56 ; X64-NEXT:    retq
  57 entry:
  58   %0 = tail call <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c) #2
  59   %sub.i = fsub <4 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
  60   ret <4 x float> %sub.i
  61 }
  62
  63 declare <4 x float> @llvm.x86.fma.vfnmadd.ss(<4 x float> %a, <4 x float> %b, <4 x float> %c)
  64
  65 define <8 x float> @test4(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
  66 ; X32-LABEL: test4:
  67 ; X32:       # %bb.0: # %entry
  68 ; X32-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
  69 ; X32-NEXT:    retl
  70 ;
  71 ; X64-LABEL: test4:
  72 ; X64:       # %bb.0: # %entry
  73 ; X64-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
  74 ; X64-NEXT:    retq
  75 entry:
  76   %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %c) #2
  77   %sub.i = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %0
  78   ret <8 x float> %sub.i
  79 }
  80
  81 define <8 x float> @test5(<8 x float> %a, <8 x float> %b, <8 x float> %c) {
  82 ; X32-LABEL: test5:
  83 ; X32:       # %bb.0: # %entry
  84 ; X32-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
  85 ; X32-NEXT:    retl
  86 ;
  87 ; X64-LABEL: test5:
  88 ; X64:       # %bb.0: # %entry
  89 ; X64-NEXT:    vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
  90 ; X64-NEXT:    retq
  91 entry:
  92   %sub.c = fsub <8 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00>, %c
  93   %0 = tail call <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float> %a, <8 x float> %b, <8 x float> %sub.c) #2
  94   ret <8 x float> %0
  95 }
  96
  97 declare <8 x float> @llvm.x86.fma.vfmsub.ps.256(<8 x float>, <8 x float>, <8 x float>)
  98
  99
 100 define <2 x double> @test6(<2 x double> %a, <2 x double> %b, <2 x double> %c) {
 101 ; X32-LABEL: test6:
 102 ; X32:       # %bb.0: # %entry
 103 ; X32-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
 104 ; X32-NEXT:    retl
 105 ;
 106 ; X64-LABEL: test6:
 107 ; X64:       # %bb.0: # %entry
 108 ; X64-NEXT:    vfnmsub213pd {{.*#+}} xmm0 = -(xmm1 * xmm0) - xmm2
 109 ; X64-NEXT:    retq
 110 entry:
 111   %0 = tail call <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c) #2
 112   %sub.i = fsub <2 x double> <double -0.000000e+00, double -0.000000e+00>, %0
 113   ret <2 x double> %sub.i
 114 }
 115
 116 declare <2 x double> @llvm.x86.fma.vfmadd.pd(<2 x double> %a, <2 x double> %b, <2 x double> %c)
 117
 118 define <8 x float> @test7(float %a, <8 x float> %b, <8 x float> %c)  {
 119 ; X32-LABEL: test7:
 120 ; X32:       # %bb.0: # %entry
 121 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm2
 122 ; X32-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm1
 123 ; X32-NEXT:    retl
 124 ;
 125 ; X64-LABEL: test7:
 126 ; X64:       # %bb.0: # %entry
 127 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
 128 ; X64-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
 129 ; X64-NEXT:    retq
 130 entry:
 131   %0 = insertelement <8 x float> undef, float %a, i32 0
 132   %1 = fsub <8 x float> <float -0.000000e+00, float undef, float undef, float undef, float undef, float undef, float undef, float undef>, %0
 133   %2 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> zeroinitializer
 134   %3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %2, <8 x float> %b, <8 x float> %c)
 135   ret <8 x float> %3
 136
 137 }
 138
 139 define <8 x float> @test8(float %a, <8 x float> %b, <8 x float> %c)  {
 140 ; X32-LABEL: test8:
 141 ; X32:       # %bb.0: # %entry
 142 ; X32-NEXT:    vbroadcastss {{[0-9]+}}(%esp), %ymm2
 143 ; X32-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm2 * ymm0) + ymm1
 144 ; X32-NEXT:    retl
 145 ;
 146 ; X64-LABEL: test8:
 147 ; X64:       # %bb.0: # %entry
 148 ; X64-NEXT:    vbroadcastss %xmm0, %ymm0
 149 ; X64-NEXT:    vfnmadd213ps {{.*#+}} ymm0 = -(ymm1 * ymm0) + ymm2
 150 ; X64-NEXT:    retq
 151 entry:
 152   %0 = fsub float -0.0, %a
 153   %1 = insertelement <8 x float> undef, float %0, i32 0
 154   %2 = shufflevector <8 x float> %1, <8 x float> undef, <8 x i32> zeroinitializer
 155   %3 = tail call <8 x float> @llvm.fma.v8f32(<8 x float> %2, <8 x float> %b, <8 x float> %c)
 156   ret <8 x float> %3
 157 }
 158
 159 declare <8 x float> @llvm.fma.v8f32(<8 x float> %a, <8 x float> %b, <8 x float> %c)