test/CodeGen/X86/avx512-fma.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mattr=+avx512f -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=KNL
   3 ; RUN: llc < %s -mtriple=x86_64-apple-darwin -mcpu=skx -fp-contract=fast | FileCheck %s --check-prefix=ALL --check-prefix=SKX
   4
   5 define <16 x float> @test_x86_fmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
   6 ; ALL-LABEL: test_x86_fmadd_ps_z:
   7 ; ALL:       ## %bb.0:
   8 ; ALL-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
   9 ; ALL-NEXT:    retq
  10   %x = fmul <16 x float> %a0, %a1
  11   %res = fadd <16 x float> %x, %a2
  12   ret <16 x float> %res
  13 }
  14
  15 define <16 x float> @test_x86_fmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
  16 ; ALL-LABEL: test_x86_fmsub_ps_z:
  17 ; ALL:       ## %bb.0:
  18 ; ALL-NEXT:    vfmsub213ps {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
  19 ; ALL-NEXT:    retq
  20   %x = fmul <16 x float> %a0, %a1
  21   %res = fsub <16 x float> %x, %a2
  22   ret <16 x float> %res
  23 }
  24
  25 define <16 x float> @test_x86_fnmadd_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
  26 ; ALL-LABEL: test_x86_fnmadd_ps_z:
  27 ; ALL:       ## %bb.0:
  28 ; ALL-NEXT:    vfnmadd213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) + zmm2
  29 ; ALL-NEXT:    retq
  30   %x = fmul <16 x float> %a0, %a1
  31   %res = fsub <16 x float> %a2, %x
  32   ret <16 x float> %res
  33 }
  34
  35 define <16 x float> @test_x86_fnmsub_ps_z(<16 x float> %a0, <16 x float> %a1, <16 x float> %a2) {
  36 ; ALL-LABEL: test_x86_fnmsub_ps_z:
  37 ; ALL:       ## %bb.0:
  38 ; ALL-NEXT:    vfnmsub213ps {{.*#+}} zmm0 = -(zmm1 * zmm0) - zmm2
  39 ; ALL-NEXT:    retq
  40   %x = fmul <16 x float> %a0, %a1
  41   %y = fsub <16 x float> <float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
  42                           float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
  43                           float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00, float -0.000000e+00,
  44                           float -0.000000e+00>, %x
  45   %res = fsub <16 x float> %y, %a2
  46   ret <16 x float> %res
  47 }
  48
  49 define <8 x double> @test_x86_fmadd_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
  50 ; ALL-LABEL: test_x86_fmadd_pd_z:
  51 ; ALL:       ## %bb.0:
  52 ; ALL-NEXT:    vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
  53 ; ALL-NEXT:    retq
  54   %x = fmul <8 x double> %a0, %a1
  55   %res = fadd <8 x double> %x, %a2
  56   ret <8 x double> %res
  57 }
  58
  59 define <8 x double> @test_x86_fmsub_pd_z(<8 x double> %a0, <8 x double> %a1, <8 x double> %a2) {
  60 ; ALL-LABEL: test_x86_fmsub_pd_z:
  61 ; ALL:       ## %bb.0:
  62 ; ALL-NEXT:    vfmsub213pd {{.*#+}} zmm0 = (zmm1 * zmm0) - zmm2
  63 ; ALL-NEXT:    retq
  64   %x = fmul <8 x double> %a0, %a1
  65   %res = fsub <8 x double> %x, %a2
  66   ret <8 x double> %res
  67 }
  68
  69 define double @test_x86_fmsub_213(double %a0, double %a1, double %a2) {
  70 ; ALL-LABEL: test_x86_fmsub_213:
  71 ; ALL:       ## %bb.0:
  72 ; ALL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - xmm2
  73 ; ALL-NEXT:    retq
  74   %x = fmul double %a0, %a1
  75   %res = fsub double %x, %a2
  76   ret double %res
  77 }
  78
  79 define double @test_x86_fmsub_213_m(double %a0, double %a1, double * %a2_ptr) {
  80 ; ALL-LABEL: test_x86_fmsub_213_m:
  81 ; ALL:       ## %bb.0:
  82 ; ALL-NEXT:    vfmsub213sd {{.*#+}} xmm0 = (xmm1 * xmm0) - mem
  83 ; ALL-NEXT:    retq
  84   %a2 = load double , double *%a2_ptr
  85   %x = fmul double %a0, %a1
  86   %res = fsub double %x, %a2
  87   ret double %res
  88 }
  89
  90 define double @test_x86_fmsub_231_m(double %a0, double %a1, double * %a2_ptr) {
  91 ; ALL-LABEL: test_x86_fmsub_231_m:
  92 ; ALL:       ## %bb.0:
  93 ; ALL-NEXT:    vfmsub132sd {{.*#+}} xmm0 = (xmm0 * mem) - xmm1
  94 ; ALL-NEXT:    retq
  95   %a2 = load double , double *%a2_ptr
  96   %x = fmul double %a0, %a2
  97   %res = fsub double %x, %a1
  98   ret double %res
  99 }
 100
 101 define <16 x float> @test231_br(<16 x float> %a1, <16 x float> %a2) nounwind {
 102 ; ALL-LABEL: test231_br:
 103 ; ALL:       ## %bb.0:
 104 ; ALL-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
 105 ; ALL-NEXT:    retq
 106   %b1 = fmul <16 x float> %a1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
 107   %b2 = fadd <16 x float> %b1, %a2
 108   ret <16 x float> %b2
 109 }
 110
 111 define <16 x float> @test213_br(<16 x float> %a1, <16 x float> %a2) nounwind {
 112 ; ALL-LABEL: test213_br:
 113 ; ALL:       ## %bb.0:
 114 ; ALL-NEXT:    vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + mem
 115 ; ALL-NEXT:    retq
 116   %b1 = fmul <16 x float> %a1, %a2
 117   %b2 = fadd <16 x float> %b1, <float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000, float 0x3FB99999A0000000>
 118   ret <16 x float> %b2
 119 }
 120
 121 ;mask (a*c+b , a)
 122 define <16 x float> @test_x86_fmadd132_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
 123 ; KNL-LABEL: test_x86_fmadd132_ps:
 124 ; KNL:       ## %bb.0:
 125 ; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
 126 ; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
 127 ; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
 128 ; KNL-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
 129 ; KNL-NEXT:    retq
 130 ;
 131 ; SKX-LABEL: test_x86_fmadd132_ps:
 132 ; SKX:       ## %bb.0:
 133 ; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
 134 ; SKX-NEXT:    vpmovb2m %xmm2, %k1
 135 ; SKX-NEXT:    vfmadd132ps {{.*#+}} zmm0 = (zmm0 * mem) + zmm1
 136 ; SKX-NEXT:    retq
 137   %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
 138   %x = fmul <16 x float> %a0, %a2
 139   %y = fadd <16 x float> %x, %a1
 140   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a0
 141   ret <16 x float> %res
 142 }
 143
 144 ;mask (a*c+b , b)
 145 define <16 x float> @test_x86_fmadd231_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
 146 ; KNL-LABEL: test_x86_fmadd231_ps:
 147 ; KNL:       ## %bb.0:
 148 ; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
 149 ; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
 150 ; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
 151 ; KNL-NEXT:    vfmadd231ps {{.*#+}} zmm1 = (zmm0 * mem) + zmm1
 152 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
 153 ; KNL-NEXT:    retq
 154 ;
 155 ; SKX-LABEL: test_x86_fmadd231_ps:
 156 ; SKX:       ## %bb.0:
 157 ; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
 158 ; SKX-NEXT:    vpmovb2m %xmm2, %k1
 159 ; SKX-NEXT:    vfmadd231ps {{.*#+}} zmm1 = (zmm0 * mem) + zmm1
 160 ; SKX-NEXT:    vmovaps %zmm1, %zmm0
 161 ; SKX-NEXT:    retq
 162   %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
 163   %x = fmul <16 x float> %a0, %a2
 164   %y = fadd <16 x float> %x, %a1
 165   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
 166   ret <16 x float> %res
 167 }
 168
 169 ;mask (b*a+c , b)
 170 define <16 x float> @test_x86_fmadd213_ps(<16 x float> %a0, <16 x float> %a1, <16 x float> *%a2_ptrt, <16 x i1> %mask) {
 171 ; KNL-LABEL: test_x86_fmadd213_ps:
 172 ; KNL:       ## %bb.0:
 173 ; KNL-NEXT:    vpmovsxbd %xmm2, %zmm2
 174 ; KNL-NEXT:    vpslld $31, %zmm2, %zmm2
 175 ; KNL-NEXT:    vptestmd %zmm2, %zmm2, %k1
 176 ; KNL-NEXT:    vfmadd213ps {{.*#+}} zmm1 = (zmm0 * zmm1) + mem
 177 ; KNL-NEXT:    vmovaps %zmm1, %zmm0
 178 ; KNL-NEXT:    retq
 179 ;
 180 ; SKX-LABEL: test_x86_fmadd213_ps:
 181 ; SKX:       ## %bb.0:
 182 ; SKX-NEXT:    vpsllw $7, %xmm2, %xmm2
 183 ; SKX-NEXT:    vpmovb2m %xmm2, %k1
 184 ; SKX-NEXT:    vfmadd213ps {{.*#+}} zmm1 = (zmm0 * zmm1) + mem
 185 ; SKX-NEXT:    vmovaps %zmm1, %zmm0
 186 ; SKX-NEXT:    retq
 187   %a2   = load <16 x float>,<16 x float> *%a2_ptrt,align 1
 188   %x = fmul <16 x float> %a1, %a0
 189   %y = fadd <16 x float> %x, %a2
 190   %res = select <16 x i1> %mask, <16 x float> %y, <16 x float> %a1
 191   ret <16 x float> %res
 192 }
 193