llvm/test/CodeGen/NVPTX/fma.ll

   1 ; RUN: llc < %s -march=nvptx -mcpu=sm_20 -fp-contract=fast -verify-machineinstrs | FileCheck %s\r
   2 \r
   3 declare float @dummy_f32(float, float) #0\r
   4 declare double @dummy_f64(double, double) #0\r
   5 \r
   6 define ptx_device float @t1_f32(float %x, float %y, float %z) {\r
   7 ; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};\r
   8 ; CHECK: ret;\r
   9   %a = fmul float %x, %y\r
  10   %b = fadd float %a, %z\r
  11   ret float %b\r
  12 }\r
  13 \r
  14 define ptx_device float @t2_f32(float %x, float %y, float %z, float %w) {\r
  15 ; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};\r
  16 ; CHECK: fma.rn.f32 %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}}, %f{{[0-9]+}};\r
  17 ; CHECK: ret;\r
  18   %a = fmul float %x, %y\r
  19   %b = fadd float %a, %z\r
  20   %c = fadd float %a, %w\r
  21   %d = call float @dummy_f32(float %b, float %c)\r
  22   ret float %d\r
  23 }\r
  24 \r
  25 define ptx_device double @t1_f64(double %x, double %y, double %z) {\r
  26 ; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};\r
  27 ; CHECK: ret;\r
  28   %a = fmul double %x, %y\r
  29   %b = fadd double %a, %z\r
  30   ret double %b\r
  31 }\r
  32 \r
  33 define ptx_device double @t2_f64(double %x, double %y, double %z, double %w) {\r
  34 ; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};\r
  35 ; CHECK: fma.rn.f64 %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}}, %fd{{[0-9]+}};\r
  36 ; CHECK: ret;\r
  37   %a = fmul double %x, %y\r
  38   %b = fadd double %a, %z\r
  39   %c = fadd double %a, %w\r
  40   %d = call double @dummy_f64(double %b, double %c)\r
  41   ret double %d\r
  42 }\r