llvm/test/Analysis/CostModel/AMDGPU/fsub.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF16,GFX90A-FASTF64 %s
   3 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32,FASTF16,FASTF64 %s
   4 ; RUN: opt -cost-model -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32,SLOWF64 %s
   5 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=FASTF16-SIZE,GFX90A-FASTF64-SIZE %s
   6 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900  -mattr=+half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32-SIZE,FASTF16-SIZE %s
   7 ; RUN: opt -cost-model -cost-kind=code-size -analyze -mtriple=amdgcn-unknown-amdhsa -mattr=-half-rate-64-ops < %s | FileCheck -check-prefixes=NOPACKEDF32-SIZE,SLOWF64-SIZE %s
   8 ; END.
   9
  10 define amdgpu_kernel void @fsub_f32() #0 {
  11 ; GFX90A-FASTF64-LABEL: 'fsub_f32'
  12 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
  13 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fsub <2 x float> undef, undef
  14 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef
  15 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
  16 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
  17 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef
  18 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef
  19 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
  20 ;
  21 ; NOPACKEDF32-LABEL: 'fsub_f32'
  22 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
  23 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef
  24 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
  25 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
  26 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
  27 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef
  28 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef
  29 ; NOPACKEDF32-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
  30 ;
  31 ; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f32'
  32 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
  33 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f32 = fsub <2 x float> undef, undef
  34 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f32 = fsub <3 x float> undef, undef
  35 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f32 = fsub <4 x float> undef, undef
  36 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v5f32 = fsub <5 x float> undef, undef
  37 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v8f32 = fsub <8 x float> undef, undef
  38 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v9f32 = fsub <9 x float> undef, undef
  39 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  40 ;
  41 ; NOPACKEDF32-SIZE-LABEL: 'fsub_f32'
  42 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f32 = fsub float undef, undef
  43 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f32 = fsub <2 x float> undef, undef
  44 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f32 = fsub <3 x float> undef, undef
  45 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f32 = fsub <4 x float> undef, undef
  46 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 5 for instruction: %v5f32 = fsub <5 x float> undef, undef
  47 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v8f32 = fsub <8 x float> undef, undef
  48 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v9f32 = fsub <9 x float> undef, undef
  49 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  50 ;
  51   %f32 = fsub float undef, undef
  52   %v2f32 = fsub <2 x float> undef, undef
  53   %v3f32 = fsub <3 x float> undef, undef
  54   %v4f32 = fsub <4 x float> undef, undef
  55   %v5f32 = fsub <5 x float> undef, undef
  56   %v8f32 = fsub <8 x float> undef, undef
  57   %v9f32 = fsub <9 x float> undef, undef
  58   ret void
  59 }
  60
  61 define amdgpu_kernel void @fsub_f64() #0 {
  62 ; GFX90A-FASTF64-LABEL: 'fsub_f64'
  63 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = fsub double undef, undef
  64 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef
  65 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef
  66 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef
  67 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fsub <5 x double> undef, undef
  68 ; GFX90A-FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
  69 ;
  70 ; FASTF64-LABEL: 'fsub_f64'
  71 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fsub double undef, undef
  72 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef
  73 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef
  74 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef
  75 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fsub <5 x double> undef, undef
  76 ; FASTF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
  77 ;
  78 ; SLOWF64-LABEL: 'fsub_f64'
  79 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %f64 = fsub double undef, undef
  80 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v2f64 = fsub <2 x double> undef, undef
  81 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 12 for instruction: %v3f64 = fsub <3 x double> undef, undef
  82 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v4f64 = fsub <4 x double> undef, undef
  83 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 96 for instruction: %v5f64 = fsub <5 x double> undef, undef
  84 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
  85 ;
  86 ; GFX90A-FASTF64-SIZE-LABEL: 'fsub_f64'
  87 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f64 = fsub double undef, undef
  88 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f64 = fsub <2 x double> undef, undef
  89 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 3 for instruction: %v3f64 = fsub <3 x double> undef, undef
  90 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f64 = fsub <4 x double> undef, undef
  91 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 24 for instruction: %v5f64 = fsub <5 x double> undef, undef
  92 ; GFX90A-FASTF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
  93 ;
  94 ; NOPACKEDF32-SIZE-LABEL: 'fsub_f64'
  95 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %f64 = fsub double undef, undef
  96 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v2f64 = fsub <2 x double> undef, undef
  97 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 6 for instruction: %v3f64 = fsub <3 x double> undef, undef
  98 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v4f64 = fsub <4 x double> undef, undef
  99 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 48 for instruction: %v5f64 = fsub <5 x double> undef, undef
 100 ; NOPACKEDF32-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 101 ;
 102   %f64 = fsub double undef, undef
 103   %v2f64 = fsub <2 x double> undef, undef
 104   %v3f64 = fsub <3 x double> undef, undef
 105   %v4f64 = fsub <4 x double> undef, undef
 106   %v5f64 = fsub <5 x double> undef, undef
 107   ret void
 108 }
 109
 110 define amdgpu_kernel void @fsub_f16() #0 {
 111 ; FASTF16-LABEL: 'fsub_f16'
 112 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fsub half undef, undef
 113 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef
 114 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef
 115 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
 116 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef
 117 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef
 118 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef
 119 ; FASTF16-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 120 ;
 121 ; SLOWF64-LABEL: 'fsub_f16'
 122 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fsub half undef, undef
 123 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef
 124 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef
 125 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef
 126 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fsub <5 x half> undef, undef
 127 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fsub <16 x half> undef, undef
 128 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fsub <17 x half> undef, undef
 129 ; SLOWF64-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret void
 130 ;
 131 ; FASTF16-SIZE-LABEL: 'fsub_f16'
 132 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fsub half undef, undef
 133 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %v2f16 = fsub <2 x half> undef, undef
 134 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v3f16 = fsub <3 x half> undef, undef
 135 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v4f16 = fsub <4 x half> undef, undef
 136 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v5f16 = fsub <5 x half> undef, undef
 137 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v16f16 = fsub <16 x half> undef, undef
 138 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 20 for instruction: %v17f16 = fsub <17 x half> undef, undef
 139 ; FASTF16-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 140 ;
 141 ; SLOWF64-SIZE-LABEL: 'fsub_f16'
 142 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %f16 = fsub half undef, undef
 143 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %v2f16 = fsub <2 x half> undef, undef
 144 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v3f16 = fsub <3 x half> undef, undef
 145 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %v4f16 = fsub <4 x half> undef, undef
 146 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %v5f16 = fsub <5 x half> undef, undef
 147 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %v16f16 = fsub <16 x half> undef, undef
 148 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 34 for instruction: %v17f16 = fsub <17 x half> undef, undef
 149 ; SLOWF64-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret void
 150 ;
 151   %f16 = fsub half undef, undef
 152   %v2f16 = fsub <2 x half> undef, undef
 153   %v3f16 = fsub <3 x half> undef, undef
 154   %v4f16 = fsub <4 x half> undef, undef
 155   %v5f16 = fsub <5 x half> undef, undef
 156   %v16f16 = fsub <16 x half> undef, undef
 157   %v17f16 = fsub <17 x half> undef, undef
 158   ret void
 159 }