llvm/test/Analysis/CostModel/AMDGPU/fround.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL,FAST %s
   3 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,FAST %s
   4 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST %s
   5 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW %s
   6
   7 ; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s
   8 ; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s
   9 ; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s
  10 ; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW-SIZE %s
  11 ; END.
  12
  13 define i32 @ceil(i32 %arg) {
  14 ; FAST-LABEL: 'ceil'
  15 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
  16 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
  17 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
  18 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
  19 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
  20 ; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
  21 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
  22 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
  23 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
  24 ;
  25 ; SLOW-LABEL: 'ceil'
  26 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
  27 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
  28 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
  29 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
  30 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
  31 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
  32 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
  33 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
  34 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
  35 ;
  36 ; FAST-SIZE-LABEL: 'ceil'
  37 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
  38 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
  39 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
  40 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
  41 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
  42 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
  43 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
  44 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
  45 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  46 ;
  47 ; SLOW-SIZE-LABEL: 'ceil'
  48 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
  49 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
  50 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
  51 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
  52 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
  53 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
  54 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
  55 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
  56 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  57 ;
  58   %F32 = call float @llvm.ceil.f32(float undef)
  59   %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
  60   %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
  61   %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
  62
  63   %F64 = call double @llvm.ceil.f64(double undef)
  64   %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
  65   %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
  66   %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
  67
  68   ret i32 undef
  69 }
  70
  71 define i32 @floor(i32 %arg) {
  72 ; ALL-LABEL: 'floor'
  73 ; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
  74 ; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
  75 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
  76 ; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
  77 ; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
  78 ; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
  79 ; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
  80 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
  81 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
  82 ;
  83 ; ALL-SIZE-LABEL: 'floor'
  84 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
  85 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
  86 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
  87 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
  88 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
  89 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
  90 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
  91 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
  92 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  93 ;
  94   %F32 = call float @llvm.floor.f32(float undef)
  95   %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
  96   %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
  97   %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
  98
  99   %F64 = call double @llvm.floor.f64(double undef)
 100   %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
 101   %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
 102   %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
 103
 104   ret i32 undef
 105 }
 106
 107 define i32 @nearbyint(i32 %arg) {
 108 ; ALL-LABEL: 'nearbyint'
 109 ; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
 110 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
 111 ; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
 112 ; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
 113 ; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
 114 ; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
 115 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
 116 ; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
 117 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 118 ;
 119 ; ALL-SIZE-LABEL: 'nearbyint'
 120 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
 121 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
 122 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
 123 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
 124 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
 125 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
 126 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
 127 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
 128 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 129 ;
 130   %F32 = call float @llvm.nearbyint.f32(float undef)
 131   %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
 132   %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
 133   %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
 134
 135   %F64 = call double @llvm.nearbyint.f64(double undef)
 136   %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
 137   %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
 138   %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
 139
 140   ret i32 undef
 141 }
 142
 143 define i32 @rint(i32 %arg) {
 144 ; FAST-LABEL: 'rint'
 145 ; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 146 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 147 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 148 ; FAST-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 149 ; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef)
 150 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 151 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 152 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 153 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 154 ;
 155 ; SLOW-LABEL: 'rint'
 156 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 157 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 158 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 159 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 160 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef)
 161 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 162 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 163 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 164 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 165 ;
 166 ; FAST-SIZE-LABEL: 'rint'
 167 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 168 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 169 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 170 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 171 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef)
 172 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 173 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 174 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 175 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 176 ;
 177 ; SLOW-SIZE-LABEL: 'rint'
 178 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 179 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 180 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 181 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 182 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef)
 183 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 184 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 185 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 186 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 187 ;
 188   %F32 = call float @llvm.rint.f32(float undef)
 189   %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 190   %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 191   %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 192
 193   %F64 = call double @llvm.rint.f64(double undef)
 194   %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 195   %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 196   %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 197
 198   ret i32 undef
 199 }
 200
 201 define i32 @roundeven(i32 %arg) {
 202 ; FAST-LABEL: 'roundeven'
 203 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.roundeven.f32(float undef)
 204 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
 205 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
 206 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
 207 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.roundeven.f64(double undef)
 208 ; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
 209 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
 210 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
 211 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 212 ;
 213 ; SLOW-LABEL: 'roundeven'
 214 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.roundeven.f32(float undef)
 215 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
 216 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
 217 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
 218 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.roundeven.f64(double undef)
 219 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
 220 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
 221 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
 222 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 223 ;
 224 ; FAST-SIZE-LABEL: 'roundeven'
 225 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.roundeven.f32(float undef)
 226 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
 227 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
 228 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
 229 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.roundeven.f64(double undef)
 230 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
 231 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
 232 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
 233 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 234 ;
 235 ; SLOW-SIZE-LABEL: 'roundeven'
 236 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.roundeven.f32(float undef)
 237 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
 238 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
 239 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
 240 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.roundeven.f64(double undef)
 241 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
 242 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
 243 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
 244 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 245 ;
 246   %F32 = call float @llvm.roundeven.f32(float undef)
 247   %V4F32 = call <4 x float> @llvm.roundeven.v4f32(<4 x float> undef)
 248   %V8F32 = call <8 x float> @llvm.roundeven.v8f32(<8 x float> undef)
 249   %V16F32 = call <16 x float> @llvm.roundeven.v16f32(<16 x float> undef)
 250
 251   %F64 = call double @llvm.roundeven.f64(double undef)
 252   %V2F64 = call <2 x double> @llvm.roundeven.v2f64(<2 x double> undef)
 253   %V4F64 = call <4 x double> @llvm.roundeven.v4f64(<4 x double> undef)
 254   %V8F64 = call <8 x double> @llvm.roundeven.v8f64(<8 x double> undef)
 255
 256   ret i32 undef
 257 }
 258
 259 define i32 @trunc(i32 %arg) {
 260 ; FAST-LABEL: 'trunc'
 261 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
 262 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
 263 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
 264 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
 265 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
 266 ; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
 267 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
 268 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
 269 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 270 ;
 271 ; SLOW-LABEL: 'trunc'
 272 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
 273 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
 274 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
 275 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
 276 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
 277 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
 278 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
 279 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
 280 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 281 ;
 282 ; FAST-SIZE-LABEL: 'trunc'
 283 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
 284 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
 285 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
 286 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
 287 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
 288 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
 289 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
 290 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
 291 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 292 ;
 293 ; SLOW-SIZE-LABEL: 'trunc'
 294 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
 295 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
 296 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
 297 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
 298 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
 299 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
 300 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
 301 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
 302 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 303 ;
 304   %F32 = call float @llvm.trunc.f32(float undef)
 305   %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
 306   %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
 307   %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
 308
 309   %F64 = call double @llvm.trunc.f64(double undef)
 310   %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
 311   %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
 312   %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
 313
 314   ret i32 undef
 315 }
 316
 317 declare float @llvm.ceil.f32(float)
 318 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
 319 declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
 320 declare <16 x float> @llvm.ceil.v16f32(<16 x float>)
 321
 322 declare double @llvm.ceil.f64(double)
 323 declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
 324 declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
 325 declare <8 x double> @llvm.ceil.v8f64(<8 x double>)
 326
 327 declare float @llvm.floor.f32(float)
 328 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
 329 declare <8 x float> @llvm.floor.v8f32(<8 x float>)
 330 declare <16 x float> @llvm.floor.v16f32(<16 x float>)
 331
 332 declare double @llvm.floor.f64(double)
 333 declare <2 x double> @llvm.floor.v2f64(<2 x double>)
 334 declare <4 x double> @llvm.floor.v4f64(<4 x double>)
 335 declare <8 x double> @llvm.floor.v8f64(<8 x double>)
 336
 337 declare float @llvm.nearbyint.f32(float)
 338 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
 339 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
 340 declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>)
 341
 342 declare double @llvm.nearbyint.f64(double)
 343 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
 344 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
 345 declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>)
 346
 347 declare float @llvm.rint.f32(float)
 348 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
 349 declare <8 x float> @llvm.rint.v8f32(<8 x float>)
 350 declare <16 x float> @llvm.rint.v16f32(<16 x float>)
 351
 352 declare double @llvm.rint.f64(double)
 353 declare <2 x double> @llvm.rint.v2f64(<2 x double>)
 354 declare <4 x double> @llvm.rint.v4f64(<4 x double>)
 355 declare <8 x double> @llvm.rint.v8f64(<8 x double>)
 356
 357 declare float @llvm.roundeven.f32(float)
 358 declare <4 x float> @llvm.roundeven.v4f32(<4 x float>)
 359 declare <8 x float> @llvm.roundeven.v8f32(<8 x float>)
 360 declare <16 x float> @llvm.roundeven.v16f32(<16 x float>)
 361
 362 declare double @llvm.roundeven.f64(double)
 363 declare <2 x double> @llvm.roundeven.v2f64(<2 x double>)
 364 declare <4 x double> @llvm.roundeven.v4f64(<4 x double>)
 365 declare <8 x double> @llvm.roundeven.v8f64(<8 x double>)
 366
 367 declare float @llvm.trunc.f32(float)
 368 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
 369 declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
 370 declare <16 x float> @llvm.trunc.v16f32(<16 x float>)
 371
 372 declare double @llvm.trunc.f64(double)
 373 declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
 374 declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
 375 declare <8 x double> @llvm.trunc.v8f64(<8 x double>)