test/CodeGen/AMDGPU/clamp.ll

   1 ; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
   2 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,VI %s
   3 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefixes=GCN,GFX89,GFX9 %s
   4
   5 ; GCN-LABEL: {{^}}v_clamp_f32:
   6 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
   7 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
   8 define amdgpu_kernel void @v_clamp_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
   9   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  10   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
  11   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
  12   %a = load float, float addrspace(1)* %gep0
  13   %max = call float @llvm.maxnum.f32(float %a, float 0.0)
  14   %med = call float @llvm.minnum.f32(float %max, float 1.0)
  15
  16   store float %med, float addrspace(1)* %out.gep
  17   ret void
  18 }
  19
  20 ; GCN-LABEL: {{^}}v_clamp_neg_f32:
  21 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  22 ; GCN: v_max_f32_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
  23 define amdgpu_kernel void @v_clamp_neg_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
  24   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  25   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
  26   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
  27   %a = load float, float addrspace(1)* %gep0
  28   %fneg.a = fsub float -0.0, %a
  29   %max = call float @llvm.maxnum.f32(float %fneg.a, float 0.0)
  30   %med = call float @llvm.minnum.f32(float %max, float 1.0)
  31
  32   store float %med, float addrspace(1)* %out.gep
  33   ret void
  34 }
  35
  36 ; GCN-LABEL: {{^}}v_clamp_negabs_f32:
  37 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  38 ; GCN: v_max_f32_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}}
  39 define amdgpu_kernel void @v_clamp_negabs_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
  40   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  41   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
  42   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
  43   %a = load float, float addrspace(1)* %gep0
  44   %fabs.a = call float @llvm.fabs.f32(float %a)
  45   %fneg.fabs.a = fsub float -0.0, %fabs.a
  46
  47   %max = call float @llvm.maxnum.f32(float %fneg.fabs.a, float 0.0)
  48   %med = call float @llvm.minnum.f32(float %max, float 1.0)
  49
  50   store float %med, float addrspace(1)* %out.gep
  51   ret void
  52 }
  53
  54 ; GCN-LABEL: {{^}}v_clamp_negzero_f32:
  55 ; GCN-DAG: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  56 ; GCN-DAG: v_bfrev_b32_e32 [[SIGNBIT:v[0-9]+]], 1
  57 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], [[SIGNBIT]], 1.0
  58 define amdgpu_kernel void @v_clamp_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
  59   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  60   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
  61   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
  62   %a = load float, float addrspace(1)* %gep0
  63   %max = call float @llvm.maxnum.f32(float %a, float -0.0)
  64   %med = call float @llvm.minnum.f32(float %max, float 1.0)
  65
  66   store float %med, float addrspace(1)* %out.gep
  67   ret void
  68 }
  69
  70 ; GCN-LABEL: {{^}}v_clamp_multi_use_max_f32:
  71 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
  72 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
  73 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
  74 define amdgpu_kernel void @v_clamp_multi_use_max_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
  75   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  76   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
  77   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
  78   %a = load float, float addrspace(1)* %gep0
  79   %max = call float @llvm.maxnum.f32(float %a, float 0.0)
  80   %med = call float @llvm.minnum.f32(float %max, float 1.0)
  81
  82   store float %med, float addrspace(1)* %out.gep
  83   store volatile float %max, float addrspace(1)* undef
  84   ret void
  85 }
  86
  87 ; GCN-LABEL: {{^}}v_clamp_f16:
  88 ; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
  89 ; GFX89: v_max_f16_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
  90
  91 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], [[A]] clamp{{$}}
  92 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
  93 define amdgpu_kernel void @v_clamp_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
  94   %tid = call i32 @llvm.amdgcn.workitem.id.x()
  95   %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
  96   %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
  97   %a = load half, half addrspace(1)* %gep0
  98   %max = call half @llvm.maxnum.f16(half %a, half 0.0)
  99   %med = call half @llvm.minnum.f16(half %max, half 1.0)
 100
 101   store half %med, half addrspace(1)* %out.gep
 102   ret void
 103 }
 104
 105 ; GCN-LABEL: {{^}}v_clamp_neg_f16:
 106 ; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
 107 ; GFX89: v_max_f16_e64 v{{[0-9]+}}, -[[A]], -[[A]] clamp{{$}}
 108
 109 ; FIXME: Better to fold neg into max
 110 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -[[A]] clamp{{$}}
 111 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
 112 define amdgpu_kernel void @v_clamp_neg_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
 113   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 114   %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
 115   %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
 116   %a = load half, half addrspace(1)* %gep0
 117   %fneg.a = fsub half -0.0, %a
 118   %max = call half @llvm.maxnum.f16(half %fneg.a, half 0.0)
 119   %med = call half @llvm.minnum.f16(half %max, half 1.0)
 120
 121   store half %med, half addrspace(1)* %out.gep
 122   ret void
 123 }
 124
 125 ; GCN-LABEL: {{^}}v_clamp_negabs_f16:
 126 ; GCN: {{buffer|flat|global}}_load_ushort [[A:v[0-9]+]]
 127 ; GFX89: v_max_f16_e64 v{{[0-9]+}}, -|[[A]]|, -|[[A]]| clamp{{$}}
 128
 129 ; FIXME: Better to fold neg/abs into max
 130
 131 ; SI: v_cvt_f32_f16_e64 [[CVT:v[0-9]+]], -|[[A]]| clamp{{$}}
 132 ; SI: v_cvt_f16_f32_e32 v{{[0-9]+}}, [[CVT]]
 133 define amdgpu_kernel void @v_clamp_negabs_f16(half addrspace(1)* %out, half addrspace(1)* %aptr) #0 {
 134   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 135   %gep0 = getelementptr half, half addrspace(1)* %aptr, i32 %tid
 136   %out.gep = getelementptr half, half addrspace(1)* %out, i32 %tid
 137   %a = load half, half addrspace(1)* %gep0
 138   %fabs.a = call half @llvm.fabs.f16(half %a)
 139   %fneg.fabs.a = fsub half -0.0, %fabs.a
 140
 141   %max = call half @llvm.maxnum.f16(half %fneg.fabs.a, half 0.0)
 142   %med = call half @llvm.minnum.f16(half %max, half 1.0)
 143
 144   store half %med, half addrspace(1)* %out.gep
 145   ret void
 146 }
 147
 148 ; FIXME: Do f64 instructions support clamp?
 149 ; GCN-LABEL: {{^}}v_clamp_f64:
 150 ; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 151 ; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, [[A]], [[A]] clamp{{$}}
 152 define amdgpu_kernel void @v_clamp_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
 153   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 154   %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
 155   %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
 156   %a = load double, double addrspace(1)* %gep0
 157   %max = call double @llvm.maxnum.f64(double %a, double 0.0)
 158   %med = call double @llvm.minnum.f64(double %max, double 1.0)
 159
 160   store double %med, double addrspace(1)* %out.gep
 161   ret void
 162 }
 163
 164 ; GCN-LABEL: {{^}}v_clamp_neg_f64:
 165 ; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 166 ; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -[[A]], -[[A]] clamp{{$}}
 167 define amdgpu_kernel void @v_clamp_neg_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
 168   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 169   %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
 170   %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
 171   %a = load double, double addrspace(1)* %gep0
 172   %fneg.a = fsub double -0.0, %a
 173   %max = call double @llvm.maxnum.f64(double %fneg.a, double 0.0)
 174   %med = call double @llvm.minnum.f64(double %max, double 1.0)
 175
 176   store double %med, double addrspace(1)* %out.gep
 177   ret void
 178 }
 179
 180 ; GCN-LABEL: {{^}}v_clamp_negabs_f64:
 181 ; GCN: {{buffer|flat|global}}_load_dwordx2 [[A:v\[[0-9]+:[0-9]+\]]]
 182 ; GCN: v_max_f64 v{{\[[0-9]+:[0-9]+\]}}, -|[[A]]|, -|[[A]]| clamp{{$}}
 183 define amdgpu_kernel void @v_clamp_negabs_f64(double addrspace(1)* %out, double addrspace(1)* %aptr) #0 {
 184   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 185   %gep0 = getelementptr double, double addrspace(1)* %aptr, i32 %tid
 186   %out.gep = getelementptr double, double addrspace(1)* %out, i32 %tid
 187   %a = load double, double addrspace(1)* %gep0
 188   %fabs.a = call double @llvm.fabs.f64(double %a)
 189   %fneg.fabs.a = fsub double -0.0, %fabs.a
 190
 191   %max = call double @llvm.maxnum.f64(double %fneg.fabs.a, double 0.0)
 192   %med = call double @llvm.minnum.f64(double %max, double 1.0)
 193
 194   store double %med, double addrspace(1)* %out.gep
 195   ret void
 196 }
 197
 198 ; GCN-LABEL: {{^}}v_clamp_med3_aby_negzero_f32:
 199 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 200 ; GCN: v_med3_f32
 201 define amdgpu_kernel void @v_clamp_med3_aby_negzero_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
 202   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 203   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 204   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 205   %a = load float, float addrspace(1)* %gep0
 206   %med = call float @llvm.amdgcn.fmed3.f32(float -0.0, float 1.0, float %a)
 207   store float %med, float addrspace(1)* %out.gep
 208   ret void
 209 }
 210
 211 ; GCN-LABEL: {{^}}v_clamp_med3_aby_f32:
 212 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 213 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
 214 define amdgpu_kernel void @v_clamp_med3_aby_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
 215   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 216   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 217   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 218   %a = load float, float addrspace(1)* %gep0
 219   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
 220   store float %med, float addrspace(1)* %out.gep
 221   ret void
 222 }
 223
 224 ; GCN-LABEL: {{^}}v_clamp_med3_bay_f32:
 225 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 226 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
 227 define amdgpu_kernel void @v_clamp_med3_bay_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
 228   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 229   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 230   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 231   %a = load float, float addrspace(1)* %gep0
 232   %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
 233   store float %med, float addrspace(1)* %out.gep
 234   ret void
 235 }
 236
 237 ; GCN-LABEL: {{^}}v_clamp_med3_yab_f32:
 238 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 239 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
 240 define amdgpu_kernel void @v_clamp_med3_yab_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
 241   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 242   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 243   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 244   %a = load float, float addrspace(1)* %gep0
 245   %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
 246   store float %med, float addrspace(1)* %out.gep
 247   ret void
 248 }
 249
 250 ; GCN-LABEL: {{^}}v_clamp_med3_yba_f32:
 251 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 252 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
 253 define amdgpu_kernel void @v_clamp_med3_yba_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
 254   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 255   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 256   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 257   %a = load float, float addrspace(1)* %gep0
 258   %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
 259   store float %med, float addrspace(1)* %out.gep
 260   ret void
 261 }
 262
 263 ; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32:
 264 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 265 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
 266 define amdgpu_kernel void @v_clamp_med3_ayb_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
 267   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 268   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 269   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 270   %a = load float, float addrspace(1)* %gep0
 271   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
 272   store float %med, float addrspace(1)* %out.gep
 273   ret void
 274 }
 275
 276 ; GCN-LABEL: {{^}}v_clamp_med3_bya_f32:
 277 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 278 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
 279 define amdgpu_kernel void @v_clamp_med3_bya_f32(float addrspace(1)* %out, float addrspace(1)* %aptr) #0 {
 280   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 281   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 282   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 283   %a = load float, float addrspace(1)* %gep0
 284   %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
 285   store float %med, float addrspace(1)* %out.gep
 286   ret void
 287 }
 288
 289 ; GCN-LABEL: {{^}}v_clamp_constants_to_one_f32:
 290 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 1.0
 291 define amdgpu_kernel void @v_clamp_constants_to_one_f32(float addrspace(1)* %out) #0 {
 292   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 293   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 294   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 4.0)
 295   store float %med, float addrspace(1)* %out.gep
 296   ret void
 297 }
 298
 299 ; GCN-LABEL: {{^}}v_clamp_constants_to_zero_f32:
 300 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
 301 define amdgpu_kernel void @v_clamp_constants_to_zero_f32(float addrspace(1)* %out) #0 {
 302   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 303   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 304   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float -4.0)
 305   store float %med, float addrspace(1)* %out.gep
 306   ret void
 307 }
 308
 309 ; GCN-LABEL: {{^}}v_clamp_constant_preserve_f32:
 310 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0.5
 311 define amdgpu_kernel void @v_clamp_constant_preserve_f32(float addrspace(1)* %out) #0 {
 312   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 313   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 314   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0.5)
 315   store float %med, float addrspace(1)* %out.gep
 316   ret void
 317 }
 318
 319 ; GCN-LABEL: {{^}}v_clamp_constant_preserve_denorm_f32:
 320 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fffff{{$}}
 321 define amdgpu_kernel void @v_clamp_constant_preserve_denorm_f32(float addrspace(1)* %out) #0 {
 322   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 323   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 324   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 8388607 to float))
 325   store float %med, float addrspace(1)* %out.gep
 326   ret void
 327 }
 328
 329 ; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32:
 330 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
 331 define amdgpu_kernel void @v_clamp_constant_qnan_f32(float addrspace(1)* %out) #0 {
 332   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 333   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 334   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
 335   store float %med, float addrspace(1)* %out.gep
 336   ret void
 337 }
 338
 339 ; GCN-LABEL: {{^}}v_clamp_constant_snan_f32:
 340 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0{{$}}
 341 define amdgpu_kernel void @v_clamp_constant_snan_f32(float addrspace(1)* %out) #0 {
 342   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 343   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 344   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
 345   store float %med, float addrspace(1)* %out.gep
 346   ret void
 347 }
 348
 349 ; ---------------------------------------------------------------------
 350 ; Test non-default behaviors enabling snans and disabling dx10_clamp
 351 ; ---------------------------------------------------------------------
 352
 353 ; GCN-LABEL: {{^}}v_clamp_f32_no_dx10_clamp:
 354 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 355 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
 356 define amdgpu_kernel void @v_clamp_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
 357   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 358   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 359   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 360   %a = load float, float addrspace(1)* %gep0
 361   %max = call float @llvm.maxnum.f32(float %a, float 0.0)
 362   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 363
 364   store float %med, float addrspace(1)* %out.gep
 365   ret void
 366 }
 367
 368 ; GCN-LABEL: {{^}}v_clamp_f32_snan_dx10clamp:
 369 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 370 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
 371 define amdgpu_kernel void @v_clamp_f32_snan_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #3 {
 372   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 373   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 374   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 375   %a = load float, float addrspace(1)* %gep0
 376   %max = call float @llvm.maxnum.f32(float %a, float 0.0)
 377   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 378
 379   store float %med, float addrspace(1)* %out.gep
 380   ret void
 381 }
 382
 383 ; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp:
 384 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 385 ; GCN: v_max_f32_e32 [[MAX:v[0-9]+]], 0, [[A]]
 386 ; GCN: v_min_f32_e32 [[MIN:v[0-9]+]], 1.0, [[MAX]]
 387 define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
 388   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 389   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 390   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 391   %a = load float, float addrspace(1)* %gep0
 392   %max = call float @llvm.maxnum.f32(float %a, float 0.0)
 393   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 394
 395   store float %med, float addrspace(1)* %out.gep
 396   ret void
 397 }
 398
 399 ; GCN-LABEL: {{^}}v_clamp_f32_snan_no_dx10clamp_nnan_src:
 400 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 401 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
 402 define amdgpu_kernel void @v_clamp_f32_snan_no_dx10clamp_nnan_src(float addrspace(1)* %out, float addrspace(1)* %aptr) #4 {
 403   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 404   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 405   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 406   %a = load float, float addrspace(1)* %gep0
 407   %add  = fadd nnan float %a, 1.0
 408   %max = call float @llvm.maxnum.f32(float %add, float 0.0)
 409   %med = call float @llvm.minnum.f32(float %max, float 1.0)
 410
 411   store float %med, float addrspace(1)* %out.gep
 412   ret void
 413 }
 414
 415 ; GCN-LABEL: {{^}}v_clamp_med3_aby_f32_no_dx10_clamp:
 416 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 417 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
 418 define amdgpu_kernel void @v_clamp_med3_aby_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
 419   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 420   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 421   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 422   %a = load float, float addrspace(1)* %gep0
 423   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float %a)
 424   store float %med, float addrspace(1)* %out.gep
 425   ret void
 426 }
 427
 428 ; GCN-LABEL: {{^}}v_clamp_med3_bay_f32_no_dx10_clamp:
 429 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 430 ; GCN: v_max_f32_e64 v{{[0-9]+}}, [[A]], [[A]] clamp{{$}}
 431 define amdgpu_kernel void @v_clamp_med3_bay_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
 432   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 433   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 434   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 435   %a = load float, float addrspace(1)* %gep0
 436   %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float 0.0, float %a)
 437   store float %med, float addrspace(1)* %out.gep
 438   ret void
 439 }
 440
 441 ; GCN-LABEL: {{^}}v_clamp_med3_yab_f32_no_dx10_clamp:
 442 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 443 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 0, 1.0
 444 define amdgpu_kernel void @v_clamp_med3_yab_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
 445   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 446   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 447   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 448   %a = load float, float addrspace(1)* %gep0
 449   %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 0.0, float 1.0)
 450   store float %med, float addrspace(1)* %out.gep
 451   ret void
 452 }
 453
 454 ; GCN-LABEL: {{^}}v_clamp_med3_yba_f32_no_dx10_clamp:
 455 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 456 ; GCN: v_med3_f32 v{{[0-9]+}}, [[A]], 1.0, 0
 457 define amdgpu_kernel void @v_clamp_med3_yba_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
 458   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 459   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 460   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 461   %a = load float, float addrspace(1)* %gep0
 462   %med = call float @llvm.amdgcn.fmed3.f32(float %a, float 1.0, float 0.0)
 463   store float %med, float addrspace(1)* %out.gep
 464   ret void
 465 }
 466
 467 ; GCN-LABEL: {{^}}v_clamp_med3_ayb_f32_no_dx10_clamp:
 468 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 469 ; GCN: v_med3_f32 v{{[0-9]+}}, 0, [[A]], 1.0
 470 define amdgpu_kernel void @v_clamp_med3_ayb_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
 471   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 472   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 473   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 474   %a = load float, float addrspace(1)* %gep0
 475   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float %a, float 1.0)
 476   store float %med, float addrspace(1)* %out.gep
 477   ret void
 478 }
 479
 480 ; GCN-LABEL: {{^}}v_clamp_med3_bya_f32_no_dx10_clamp:
 481 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 482 ; GCN: v_med3_f32 v{{[0-9]+}}, 1.0, [[A]], 0
 483 define amdgpu_kernel void @v_clamp_med3_bya_f32_no_dx10_clamp(float addrspace(1)* %out, float addrspace(1)* %aptr) #2 {
 484   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 485   %gep0 = getelementptr float, float addrspace(1)* %aptr, i32 %tid
 486   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 487   %a = load float, float addrspace(1)* %gep0
 488   %med = call float @llvm.amdgcn.fmed3.f32(float 1.0, float %a, float 0.0)
 489   store float %med, float addrspace(1)* %out.gep
 490   ret void
 491 }
 492
 493 ; GCN-LABEL: {{^}}v_clamp_constant_qnan_f32_no_dx10_clamp:
 494 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7fc00000
 495 define amdgpu_kernel void @v_clamp_constant_qnan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
 496   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 497   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 498   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float 0x7FF8000000000000)
 499   store float %med, float addrspace(1)* %out.gep
 500   ret void
 501 }
 502
 503 ; GCN-LABEL: {{^}}v_clamp_constant_snan_f32_no_dx10_clamp:
 504 ; GCN: v_mov_b32_e32 v{{[0-9]+}}, 0x7f800001
 505 define amdgpu_kernel void @v_clamp_constant_snan_f32_no_dx10_clamp(float addrspace(1)* %out) #2 {
 506   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 507   %out.gep = getelementptr float, float addrspace(1)* %out, i32 %tid
 508   %med = call float @llvm.amdgcn.fmed3.f32(float 0.0, float 1.0, float bitcast (i32 2139095041 to float))
 509   store float %med, float addrspace(1)* %out.gep
 510   ret void
 511 }
 512
 513 ; GCN-LABEL: {{^}}v_clamp_v2f16:
 514 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 515 ; GFX9-NOT: [[A]]
 516 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
 517 define amdgpu_kernel void @v_clamp_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
 518   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 519   %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
 520   %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
 521   %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
 522   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> zeroinitializer)
 523   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 524
 525   store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
 526   ret void
 527 }
 528
 529 ; GCN-LABEL: {{^}}v_clamp_v2f16_undef_elt:
 530 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 531 ; GFX9-NOT: [[A]]
 532 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] clamp{{$}}
 533 define amdgpu_kernel void @v_clamp_v2f16_undef_elt(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
 534   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 535   %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
 536   %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
 537   %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
 538   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half undef, half 0.0>)
 539   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half undef>)
 540
 541   store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
 542   ret void
 543 }
 544
 545 ; GCN-LABEL: {{^}}v_clamp_v2f16_not_zero:
 546 ; GFX9: v_pk_max_f16
 547 ; GFX9: v_pk_min_f16
 548 define amdgpu_kernel void @v_clamp_v2f16_not_zero(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
 549   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 550   %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
 551   %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
 552   %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
 553   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 0.0>)
 554   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 555
 556   store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
 557   ret void
 558 }
 559
 560 ; GCN-LABEL: {{^}}v_clamp_v2f16_not_one:
 561 ; GFX9: v_pk_max_f16
 562 ; GFX9: v_pk_min_f16
 563 define amdgpu_kernel void @v_clamp_v2f16_not_one(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
 564   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 565   %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
 566   %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
 567   %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
 568   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 0.0, half 0.0>)
 569   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 0.0, half 1.0>)
 570
 571   store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
 572   ret void
 573 }
 574
 575 ; GCN-LABEL: {{^}}v_clamp_neg_v2f16:
 576 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 577 ; GFX9-NOT: [[A]]
 578 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
 579 define amdgpu_kernel void @v_clamp_neg_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
 580   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 581   %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
 582   %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
 583   %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
 584   %fneg.a = fsub <2 x half> <half -0.0, half -0.0>, %a
 585   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.a, <2 x half> zeroinitializer)
 586   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 587
 588   store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
 589   ret void
 590 }
 591
 592 ; GCN-LABEL: {{^}}v_clamp_negabs_v2f16:
 593 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 594 ; GFX9: v_and_b32_e32 [[ABS:v[0-9]+]], 0x7fff7fff, [[A]]
 595 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[ABS]], [[ABS]] neg_lo:[1,1] neg_hi:[1,1] clamp{{$}}
 596 define amdgpu_kernel void @v_clamp_negabs_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
 597   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 598   %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
 599   %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
 600   %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
 601   %fabs.a = call <2 x half> @llvm.fabs.v2f16(<2 x half> %a)
 602   %fneg.fabs.a = fsub <2 x half> <half -0.0, half -0.0>, %fabs.a
 603
 604   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %fneg.fabs.a, <2 x half> zeroinitializer)
 605   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 606
 607   store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
 608   ret void
 609 }
 610
 611 ; GCN-LABEL: {{^}}v_clamp_neglo_v2f16:
 612 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 613 ; GFX9-NOT: [[A]]
 614 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_lo:[1,1] clamp{{$}}
 615 define amdgpu_kernel void @v_clamp_neglo_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
 616   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 617   %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
 618   %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
 619   %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
 620   %lo = extractelement <2 x half> %a, i32 0
 621   %neg.lo = fsub half -0.0, %lo
 622   %neg.lo.vec = insertelement <2 x half> %a, half %neg.lo, i32 0
 623   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.lo.vec, <2 x half> zeroinitializer)
 624   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 625
 626   store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
 627   ret void
 628 }
 629
 630 ; GCN-LABEL: {{^}}v_clamp_neghi_v2f16:
 631 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 632 ; GFX9-NOT: [[A]]
 633 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] neg_hi:[1,1] clamp{{$}}
 634 define amdgpu_kernel void @v_clamp_neghi_v2f16(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
 635   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 636   %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
 637   %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
 638   %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
 639   %hi = extractelement <2 x half> %a, i32 1
 640   %neg.hi = fsub half -0.0, %hi
 641   %neg.hi.vec = insertelement <2 x half> %a, half %neg.hi, i32 1
 642   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %neg.hi.vec, <2 x half> zeroinitializer)
 643   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 644
 645   store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
 646   ret void
 647 }
 648
 649 ; GCN-LABEL: {{^}}v_clamp_v2f16_shuffle:
 650 ; GCN: {{buffer|flat|global}}_load_dword [[A:v[0-9]+]]
 651 ; GFX9-NOT: [[A]]
 652 ; GFX9: v_pk_max_f16 [[CLAMP:v[0-9]+]], [[A]], [[A]] op_sel:[1,1] op_sel_hi:[0,0] clamp{{$}}
 653 define amdgpu_kernel void @v_clamp_v2f16_shuffle(<2 x half> addrspace(1)* %out, <2 x half> addrspace(1)* %aptr) #0 {
 654   %tid = call i32 @llvm.amdgcn.workitem.id.x()
 655   %gep0 = getelementptr <2 x half>, <2 x half> addrspace(1)* %aptr, i32 %tid
 656   %out.gep = getelementptr <2 x half>, <2 x half> addrspace(1)* %out, i32 %tid
 657   %a = load <2 x half>, <2 x half> addrspace(1)* %gep0
 658   %shuf = shufflevector <2 x half> %a, <2 x half> undef, <2 x i32> <i32 1, i32 0>
 659   %max = call <2 x half> @llvm.maxnum.v2f16(<2 x half> %shuf, <2 x half> zeroinitializer)
 660   %med = call <2 x half> @llvm.minnum.v2f16(<2 x half> %max, <2 x half> <half 1.0, half 1.0>)
 661
 662   store <2 x half> %med, <2 x half> addrspace(1)* %out.gep
 663   ret void
 664 }
 665
 666 declare i32 @llvm.amdgcn.workitem.id.x() #1
 667 declare float @llvm.fabs.f32(float) #1
 668 declare float @llvm.minnum.f32(float, float) #1
 669 declare float @llvm.maxnum.f32(float, float) #1
 670 declare float @llvm.amdgcn.fmed3.f32(float, float, float) #1
 671 declare double @llvm.fabs.f64(double) #1
 672 declare double @llvm.minnum.f64(double, double) #1
 673 declare double @llvm.maxnum.f64(double, double) #1
 674 declare half @llvm.fabs.f16(half) #1
 675 declare half @llvm.minnum.f16(half, half) #1
 676 declare half @llvm.maxnum.f16(half, half) #1
 677 declare <2 x half> @llvm.fabs.v2f16(<2 x half>) #1
 678 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>) #1
 679 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>) #1
 680
 681 attributes #0 = { nounwind }
 682 attributes #1 = { nounwind readnone }
 683 attributes #2 = { nounwind "target-features"="-dx10-clamp,-fp-exceptions" "no-nans-fp-math"="false" }
 684 attributes #3 = { nounwind "target-features"="+dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }
 685 attributes #4 = { nounwind "target-features"="-dx10-clamp,+fp-exceptions" "no-nans-fp-math"="false" }