llvm/test/Transforms/InstCombine/AArch64/sve-intrinsic-fma-binops.ll

   1 ; RUN: opt -S -instcombine < %s | FileCheck %s
   2
   3 target triple = "aarch64-unknown-linux-gnu"
   4
   5 declare <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32)
   6 declare <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32)
   7 declare <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32)
   8
   9 ; SVE intrinsics fmul and fadd should be replaced with regular fmul and fadd
  10 declare <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
  11 define <vscale x 8 x half> @replace_fmul_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
  12 ; CHECK-LABEL: @replace_fmul_intrinsic_half
  13 ; CHECK-NEXT:  %1 = fmul fast <vscale x 8 x half> %a, %b
  14 ; CHECK-NEXT:  ret <vscale x 8 x half> %1
  15   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
  16   %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fmul.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
  17   ret <vscale x 8 x half> %2
  18 }
  19
  20 declare <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
  21 define <vscale x 4 x float> @replace_fmul_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
  22 ; CHECK-LABEL: @replace_fmul_intrinsic_float
  23 ; CHECK-NEXT:  %1 = fmul fast <vscale x 4 x float> %a, %b
  24 ; CHECK-NEXT:  ret <vscale x 4 x float> %1
  25   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
  26   %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fmul.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
  27   ret <vscale x 4 x float> %2
  28 }
  29
  30 declare <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
  31 define <vscale x 2 x double> @replace_fmul_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
  32 ; CHECK-LABEL: @replace_fmul_intrinsic_double
  33 ; CHECK-NEXT:  %1 = fmul fast <vscale x 2 x double> %a, %b
  34 ; CHECK-NEXT:  ret <vscale x 2 x double> %1
  35   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
  36   %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fmul.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
  37   ret <vscale x 2 x double> %2
  38 }
  39
  40 declare <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
  41 define <vscale x 8 x half> @replace_fadd_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
  42 ; CHECK-LABEL: @replace_fadd_intrinsic_half
  43 ; CHECK-NEXT:  %1 = fadd fast <vscale x 8 x half> %a, %b
  44 ; CHECK-NEXT:  ret <vscale x 8 x half> %1
  45   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
  46   %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fadd.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
  47   ret <vscale x 8 x half> %2
  48 }
  49
  50 declare <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
  51 define <vscale x 4 x float> @replace_fadd_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
  52 ; CHECK-LABEL: @replace_fadd_intrinsic_float
  53 ; CHECK-NEXT:  %1 = fadd fast <vscale x 4 x float> %a, %b
  54 ; CHECK-NEXT:  ret <vscale x 4 x float> %1
  55   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
  56   %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fadd.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
  57   ret <vscale x 4 x float> %2
  58 }
  59
  60 declare <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
  61 define <vscale x 2 x double> @replace_fadd_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
  62 ; CHECK-LABEL: @replace_fadd_intrinsic_double
  63 ; CHECK-NEXT:  %1 = fadd fast <vscale x 2 x double> %a, %b
  64 ; CHECK-NEXT:  ret <vscale x 2 x double> %1
  65   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
  66   %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fadd.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
  67   ret <vscale x 2 x double> %2
  68 }
  69
  70 declare <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1>, <vscale x 8 x half>, <vscale x 8 x half>)
  71 define <vscale x 8 x half> @replace_fsub_intrinsic_half(<vscale x 8 x half> %a, <vscale x 8 x half> %b) #0 {
  72 ; CHECK-LABEL: @replace_fsub_intrinsic_half
  73 ; CHECK-NEXT:  %1 = fsub fast <vscale x 8 x half> %a, %b
  74 ; CHECK-NEXT:  ret <vscale x 8 x half> %1
  75   %1 = tail call <vscale x 8 x i1> @llvm.aarch64.sve.ptrue.nxv8i1(i32 31)
  76   %2 = tail call fast <vscale x 8 x half> @llvm.aarch64.sve.fsub.nxv8f16(<vscale x 8 x i1> %1, <vscale x 8 x half> %a, <vscale x 8 x half> %b)
  77   ret <vscale x 8 x half> %2
  78 }
  79
  80 declare <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1>, <vscale x 4 x float>, <vscale x 4 x float>)
  81 define <vscale x 4 x float> @replace_fsub_intrinsic_float(<vscale x 4 x float> %a, <vscale x 4 x float> %b) #0 {
  82 ; CHECK-LABEL: @replace_fsub_intrinsic_float
  83 ; CHECK-NEXT:  %1 = fsub fast <vscale x 4 x float> %a, %b
  84 ; CHECK-NEXT:  ret <vscale x 4 x float> %1
  85   %1 = tail call <vscale x 4 x i1> @llvm.aarch64.sve.ptrue.nxv4i1(i32 31)
  86   %2 = tail call fast <vscale x 4 x float> @llvm.aarch64.sve.fsub.nxv4f32(<vscale x 4 x i1> %1, <vscale x 4 x float> %a, <vscale x 4 x float> %b)
  87   ret <vscale x 4 x float> %2
  88 }
  89
  90
  91 declare <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1>, <vscale x 2 x double>, <vscale x 2 x double>)
  92 define <vscale x 2 x double> @replace_fsub_intrinsic_double(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
  93 ; CHECK-LABEL: @replace_fsub_intrinsic_double
  94 ; CHECK-NEXT:  %1 = fsub fast <vscale x 2 x double> %a, %b
  95 ; CHECK-NEXT:  ret <vscale x 2 x double> %1
  96   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
  97   %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
  98   ret <vscale x 2 x double> %2
  99 }
 100
 101 define <vscale x 2 x double> @no_replace_on_non_ptrue_all(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
 102 ; CHECK-LABEL: @no_replace_on_non_ptrue_all
 103 ; CHECK-NEXT:  %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
 104 ; CHECK-NEXT:  %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
 105 ; CHECK-NEXT:  ret <vscale x 2 x double> %2
 106   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 5)
 107   %2 = tail call fast <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
 108   ret <vscale x 2 x double> %2
 109 }
 110
 111 define <vscale x 2 x double> @replace_fsub_intrinsic_no_fast_flag(<vscale x 2 x double> %a, <vscale x 2 x double> %b) #0 {
 112 ; CHECK-LABEL: @replace_fsub_intrinsic_no_fast_flag
 113 ; CHECK-NEXT:  %1 = fsub <vscale x 2 x double> %a, %b
 114 ; CHECK-NEXT:  ret <vscale x 2 x double> %1
 115   %1 = tail call <vscale x 2 x i1> @llvm.aarch64.sve.ptrue.nxv2i1(i32 31)
 116   %2 = tail call <vscale x 2 x double> @llvm.aarch64.sve.fsub.nxv2f64(<vscale x 2 x i1> %1, <vscale x 2 x double> %a, <vscale x 2 x double> %b)
 117   ret <vscale x 2 x double> %2
 118 }
 119
 120 attributes #0 = { "target-features"="+sve" }