llvm/test/Analysis/CostModel/AMDGPU/fround.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_analyze_test_checks.py
   2 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL,FAST %s
   3 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL,FAST %s
   4 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL,FAST %s
   5 ; RUN: opt -passes="print<cost-model>" 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL,SLOW %s
   6
   7 ; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx1010 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s
   8 ; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx90a < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s
   9 ; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=ALL-SIZE,FAST-SIZE %s
  10 ; RUN: opt -passes="print<cost-model>" -cost-kind=code-size 2>&1 -disable-output -mtriple=amdgcn-unknown-amdhsa < %s | FileCheck -check-prefixes=ALL-SIZE,SLOW-SIZE %s
  11 ; END.
  12
  13 define i32 @ceil(i32 %arg) {
  14 ; FAST-LABEL: 'ceil'
  15 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
  16 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
  17 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
  18 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
  19 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
  20 ; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
  21 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
  22 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
  23 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
  24 ;
  25 ; SLOW-LABEL: 'ceil'
  26 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
  27 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
  28 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
  29 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
  30 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
  31 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
  32 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
  33 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
  34 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
  35 ;
  36 ; FAST-SIZE-LABEL: 'ceil'
  37 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
  38 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
  39 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
  40 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
  41 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
  42 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
  43 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
  44 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
  45 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  46 ;
  47 ; SLOW-SIZE-LABEL: 'ceil'
  48 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.ceil.f32(float undef)
  49 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
  50 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
  51 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
  52 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.ceil.f64(double undef)
  53 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
  54 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
  55 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
  56 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  57 ;
  58   %F32 = call float @llvm.ceil.f32(float undef)
  59   %V4F32 = call <4 x float> @llvm.ceil.v4f32(<4 x float> undef)
  60   %V8F32 = call <8 x float> @llvm.ceil.v8f32(<8 x float> undef)
  61   %V16F32 = call <16 x float> @llvm.ceil.v16f32(<16 x float> undef)
  62
  63   %F64 = call double @llvm.ceil.f64(double undef)
  64   %V2F64 = call <2 x double> @llvm.ceil.v2f64(<2 x double> undef)
  65   %V4F64 = call <4 x double> @llvm.ceil.v4f64(<4 x double> undef)
  66   %V8F64 = call <8 x double> @llvm.ceil.v8f64(<8 x double> undef)
  67
  68   ret i32 undef
  69 }
  70
  71 define i32 @floor(i32 %arg) {
  72 ; ALL-LABEL: 'floor'
  73 ; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
  74 ; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
  75 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
  76 ; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
  77 ; ALL-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
  78 ; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
  79 ; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
  80 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
  81 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
  82 ;
  83 ; ALL-SIZE-LABEL: 'floor'
  84 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.floor.f32(float undef)
  85 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
  86 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
  87 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
  88 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.floor.f64(double undef)
  89 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
  90 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
  91 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
  92 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
  93 ;
  94   %F32 = call float @llvm.floor.f32(float undef)
  95   %V4F32 = call <4 x float> @llvm.floor.v4f32(<4 x float> undef)
  96   %V8F32 = call <8 x float> @llvm.floor.v8f32(<8 x float> undef)
  97   %V16F32 = call <16 x float> @llvm.floor.v16f32(<16 x float> undef)
  98
  99   %F64 = call double @llvm.floor.f64(double undef)
 100   %V2F64 = call <2 x double> @llvm.floor.v2f64(<2 x double> undef)
 101   %V4F64 = call <4 x double> @llvm.floor.v4f64(<4 x double> undef)
 102   %V8F64 = call <8 x double> @llvm.floor.v8f64(<8 x double> undef)
 103
 104   ret i32 undef
 105 }
 106
 107 define i32 @nearbyint(i32 %arg) {
 108 ; ALL-LABEL: 'nearbyint'
 109 ; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
 110 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
 111 ; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
 112 ; ALL-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
 113 ; ALL-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
 114 ; ALL-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
 115 ; ALL-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
 116 ; ALL-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
 117 ; ALL-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 118 ;
 119 ; ALL-SIZE-LABEL: 'nearbyint'
 120 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F32 = call float @llvm.nearbyint.f32(float undef)
 121 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
 122 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
 123 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 32 for instruction: %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
 124 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.nearbyint.f64(double undef)
 125 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
 126 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
 127 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
 128 ; ALL-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 129 ;
 130   %F32 = call float @llvm.nearbyint.f32(float undef)
 131   %V4F32 = call <4 x float> @llvm.nearbyint.v4f32(<4 x float> undef)
 132   %V8F32 = call <8 x float> @llvm.nearbyint.v8f32(<8 x float> undef)
 133   %V16F32 = call <16 x float> @llvm.nearbyint.v16f32(<16 x float> undef)
 134
 135   %F64 = call double @llvm.nearbyint.f64(double undef)
 136   %V2F64 = call <2 x double> @llvm.nearbyint.v2f64(<2 x double> undef)
 137   %V4F64 = call <4 x double> @llvm.nearbyint.v4f64(<4 x double> undef)
 138   %V8F64 = call <8 x double> @llvm.nearbyint.v8f64(<8 x double> undef)
 139
 140   ret i32 undef
 141 }
 142
 143 define i32 @rint(i32 %arg) {
 144 ; FAST-LABEL: 'rint'
 145 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 146 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 147 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 148 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 149 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
 150 ; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 151 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 152 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 153 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 154 ;
 155 ; SLOW-LABEL: 'rint'
 156 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 157 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 158 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 159 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 160 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef)
 161 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 162 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 163 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 164 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 165 ;
 166 ; FAST-SIZE-LABEL: 'rint'
 167 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 168 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 169 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 170 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 171 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.rint.f64(double undef)
 172 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 173 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 174 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 175 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 176 ;
 177 ; SLOW-SIZE-LABEL: 'rint'
 178 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.rint.f32(float undef)
 179 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 180 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 181 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 182 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.rint.f64(double undef)
 183 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 184 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 185 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 186 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 187 ;
 188   %F32 = call float @llvm.rint.f32(float undef)
 189   %V4F32 = call <4 x float> @llvm.rint.v4f32(<4 x float> undef)
 190   %V8F32 = call <8 x float> @llvm.rint.v8f32(<8 x float> undef)
 191   %V16F32 = call <16 x float> @llvm.rint.v16f32(<16 x float> undef)
 192
 193   %F64 = call double @llvm.rint.f64(double undef)
 194   %V2F64 = call <2 x double> @llvm.rint.v2f64(<2 x double> undef)
 195   %V4F64 = call <4 x double> @llvm.rint.v4f64(<4 x double> undef)
 196   %V8F64 = call <8 x double> @llvm.rint.v8f64(<8 x double> undef)
 197
 198   ret i32 undef
 199 }
 200
 201 define i32 @trunc(i32 %arg) {
 202 ; FAST-LABEL: 'trunc'
 203 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
 204 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
 205 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
 206 ; FAST-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
 207 ; FAST-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
 208 ; FAST-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
 209 ; FAST-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
 210 ; FAST-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
 211 ; FAST-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 212 ;
 213 ; SLOW-LABEL: 'trunc'
 214 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
 215 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
 216 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
 217 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
 218 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
 219 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
 220 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
 221 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
 222 ; SLOW-NEXT:  Cost Model: Found an estimated cost of 10 for instruction: ret i32 undef
 223 ;
 224 ; FAST-SIZE-LABEL: 'trunc'
 225 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
 226 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
 227 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
 228 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
 229 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
 230 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
 231 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
 232 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
 233 ; FAST-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 234 ;
 235 ; SLOW-SIZE-LABEL: 'trunc'
 236 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: %F32 = call float @llvm.trunc.f32(float undef)
 237 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
 238 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
 239 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
 240 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 2 for instruction: %F64 = call double @llvm.trunc.f64(double undef)
 241 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 4 for instruction: %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
 242 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 8 for instruction: %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
 243 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 16 for instruction: %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
 244 ; SLOW-SIZE-NEXT:  Cost Model: Found an estimated cost of 1 for instruction: ret i32 undef
 245 ;
 246   %F32 = call float @llvm.trunc.f32(float undef)
 247   %V4F32 = call <4 x float> @llvm.trunc.v4f32(<4 x float> undef)
 248   %V8F32 = call <8 x float> @llvm.trunc.v8f32(<8 x float> undef)
 249   %V16F32 = call <16 x float> @llvm.trunc.v16f32(<16 x float> undef)
 250
 251   %F64 = call double @llvm.trunc.f64(double undef)
 252   %V2F64 = call <2 x double> @llvm.trunc.v2f64(<2 x double> undef)
 253   %V4F64 = call <4 x double> @llvm.trunc.v4f64(<4 x double> undef)
 254   %V8F64 = call <8 x double> @llvm.trunc.v8f64(<8 x double> undef)
 255
 256   ret i32 undef
 257 }
 258
 259 declare float @llvm.ceil.f32(float)
 260 declare <4 x float> @llvm.ceil.v4f32(<4 x float>)
 261 declare <8 x float> @llvm.ceil.v8f32(<8 x float>)
 262 declare <16 x float> @llvm.ceil.v16f32(<16 x float>)
 263
 264 declare double @llvm.ceil.f64(double)
 265 declare <2 x double> @llvm.ceil.v2f64(<2 x double>)
 266 declare <4 x double> @llvm.ceil.v4f64(<4 x double>)
 267 declare <8 x double> @llvm.ceil.v8f64(<8 x double>)
 268
 269 declare float @llvm.floor.f32(float)
 270 declare <4 x float> @llvm.floor.v4f32(<4 x float>)
 271 declare <8 x float> @llvm.floor.v8f32(<8 x float>)
 272 declare <16 x float> @llvm.floor.v16f32(<16 x float>)
 273
 274 declare double @llvm.floor.f64(double)
 275 declare <2 x double> @llvm.floor.v2f64(<2 x double>)
 276 declare <4 x double> @llvm.floor.v4f64(<4 x double>)
 277 declare <8 x double> @llvm.floor.v8f64(<8 x double>)
 278
 279 declare float @llvm.nearbyint.f32(float)
 280 declare <4 x float> @llvm.nearbyint.v4f32(<4 x float>)
 281 declare <8 x float> @llvm.nearbyint.v8f32(<8 x float>)
 282 declare <16 x float> @llvm.nearbyint.v16f32(<16 x float>)
 283
 284 declare double @llvm.nearbyint.f64(double)
 285 declare <2 x double> @llvm.nearbyint.v2f64(<2 x double>)
 286 declare <4 x double> @llvm.nearbyint.v4f64(<4 x double>)
 287 declare <8 x double> @llvm.nearbyint.v8f64(<8 x double>)
 288
 289 declare float @llvm.rint.f32(float)
 290 declare <4 x float> @llvm.rint.v4f32(<4 x float>)
 291 declare <8 x float> @llvm.rint.v8f32(<8 x float>)
 292 declare <16 x float> @llvm.rint.v16f32(<16 x float>)
 293
 294 declare double @llvm.rint.f64(double)
 295 declare <2 x double> @llvm.rint.v2f64(<2 x double>)
 296 declare <4 x double> @llvm.rint.v4f64(<4 x double>)
 297 declare <8 x double> @llvm.rint.v8f64(<8 x double>)
 298
 299 declare float @llvm.trunc.f32(float)
 300 declare <4 x float> @llvm.trunc.v4f32(<4 x float>)
 301 declare <8 x float> @llvm.trunc.v8f32(<8 x float>)
 302 declare <16 x float> @llvm.trunc.v16f32(<16 x float>)
 303
 304 declare double @llvm.trunc.f64(double)
 305 declare <2 x double> @llvm.trunc.v2f64(<2 x double>)
 306 declare <4 x double> @llvm.trunc.v4f64(<4 x double>)
 307 declare <8 x double> @llvm.trunc.v8f64(<8 x double>)