llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-minmax-const-combine.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
   3
   4 define float @test_min_max_ValK0_K1_f32(float %a) #0 {
   5 ; GFX10-LABEL: test_min_max_ValK0_K1_f32:
   6 ; GFX10:       ; %bb.0:
   7 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   8 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
   9 ; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
  10 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  11   %fmul = fmul float %a, 2.0
  12   %maxnum = call nnan float @llvm.maxnum.f32(float %fmul, float 0.0)
  13   %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 1.0)
  14   ret float %fmed
  15 }
  16
  17 define double @test_min_max_K0Val_K1_f64(double %a) #1 {
  18 ; GFX10-LABEL: test_min_max_K0Val_K1_f64:
  19 ; GFX10:       ; %bb.0:
  20 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  21 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  22 ; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], 2.0 clamp
  23 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  24   %fmul = fmul double %a, 2.0
  25   %maxnum = call nnan double @llvm.maxnum.f64(double 0.0, double %fmul)
  26   %fmed = call nnan double @llvm.minnum.f64(double %maxnum, double 1.0)
  27   ret double %fmed
  28 }
  29
  30 ; min-max patterns for ieee=true, dx10_clamp=true don't have to check for NaNs
  31 define half @test_min_K1max_ValK0_f16(half %a) #2 {
  32 ; GFX10-LABEL: test_min_K1max_ValK0_f16:
  33 ; GFX10:       ; %bb.0:
  34 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  35 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  36 ; GFX10-NEXT:    v_mul_f16_e64 v0, v0, 2.0 clamp
  37 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  38   %fmul = fmul half %a, 2.0
  39   %maxnum = call half @llvm.maxnum.f16(half %fmul, half 0.0)
  40   %fmed = call half @llvm.minnum.f16(half 1.0, half %maxnum)
  41   ret half %fmed
  42 }
  43
  44 define <2 x half> @test_min_K1max_K0Val_f16(<2 x half> %a) #1 {
  45 ; GFX10-LABEL: test_min_K1max_K0Val_f16:
  46 ; GFX10:       ; %bb.0:
  47 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  48 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  49 ; GFX10-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
  50 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  51   %fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
  52   %maxnum = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half 0.0>, <2 x half> %fmul)
  53   %fmed = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> <half 1.0, half 1.0>, <2 x half> %maxnum)
  54   ret <2 x half> %fmed
  55 }
  56
  57 define <2 x half> @test_min_max_splat_padded_with_undef(<2 x half> %a) #2 {
  58 ; GFX10-LABEL: test_min_max_splat_padded_with_undef:
  59 ; GFX10:       ; %bb.0:
  60 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  61 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  62 ; GFX10-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
  63 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  64   %fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
  65   %maxnum = call <2 x half> @llvm.maxnum.v2f16(<2 x half> <half 0.0, half undef>, <2 x half> %fmul)
  66   %fmed = call <2 x half> @llvm.minnum.v2f16(<2 x half> <half 1.0, half undef>, <2 x half> %maxnum)
  67   ret <2 x half> %fmed
  68 }
  69
  70 ; max-mix patterns work only for known non-NaN inputs
  71
  72 define float @test_max_min_ValK1_K0_f32(float %a) #0 {
  73 ; GFX10-LABEL: test_max_min_ValK1_K0_f32:
  74 ; GFX10:       ; %bb.0:
  75 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  76 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  77 ; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
  78 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  79   %fmul = fmul float %a, 2.0
  80   %minnum = call nnan float @llvm.minnum.f32(float %fmul, float 1.0)
  81   %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 0.0)
  82   ret float %fmed
  83 }
  84
  85 define double @test_max_min_K1Val_K0_f64(double %a) #1 {
  86 ; GFX10-LABEL: test_max_min_K1Val_K0_f64:
  87 ; GFX10:       ; %bb.0:
  88 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  89 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  90 ; GFX10-NEXT:    v_mul_f64 v[0:1], v[0:1], 2.0 clamp
  91 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  92   %fmul = fmul double %a, 2.0
  93   %minnum = call nnan double @llvm.minnum.f64(double 1.0, double %fmul)
  94   %fmed = call nnan double @llvm.maxnum.f64(double %minnum, double 0.0)
  95   ret double %fmed
  96 }
  97
  98 define half @test_max_K0min_ValK1_f16(half %a) #0 {
  99 ; GFX10-LABEL: test_max_K0min_ValK1_f16:
 100 ; GFX10:       ; %bb.0:
 101 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 102 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 103 ; GFX10-NEXT:    v_mul_f16_e64 v0, v0, 2.0 clamp
 104 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 105   %fmul = fmul half %a, 2.0
 106   %minnum = call nnan half @llvm.minnum.f16(half %fmul, half 1.0)
 107   %fmed = call nnan half @llvm.maxnum.f16(half 0.0, half %minnum)
 108   ret half %fmed
 109 }
 110
 111 ; treat undef as value that will result in a constant splat
 112 define <2 x half> @test_max_K0min_K1Val_v2f16(<2 x half> %a) #1 {
 113 ; GFX10-LABEL: test_max_K0min_K1Val_v2f16:
 114 ; GFX10:       ; %bb.0:
 115 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 116 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 117 ; GFX10-NEXT:    v_pk_mul_f16 v0, v0, 2.0 op_sel_hi:[1,0] clamp
 118 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 119   %fmul = fmul <2 x half> %a, <half 2.0, half 2.0>
 120   %minnum = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> <half 1.0, half undef>, <2 x half> %fmul)
 121   %fmed = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> <half undef, half 0.0>, <2 x half> %minnum)
 122   ret <2 x half> %fmed
 123 }
 124
 125 ; global nnan function attribute always forces clamp combine
 126
 127 define float @test_min_max_global_nnan(float %a) #3 {
 128 ; GFX10-LABEL: test_min_max_global_nnan:
 129 ; GFX10:       ; %bb.0:
 130 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 131 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 132 ; GFX10-NEXT:    v_max_f32_e64 v0, v0, v0 clamp
 133 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 134   %maxnum = call float @llvm.maxnum.f32(float %a, float 0.0)
 135   %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0)
 136   ret float %fmed
 137 }
 138
 139 define float @test_max_min_global_nnan(float %a) #3 {
 140 ; GFX10-LABEL: test_max_min_global_nnan:
 141 ; GFX10:       ; %bb.0:
 142 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 143 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 144 ; GFX10-NEXT:    v_max_f32_e64 v0, v0, v0 clamp
 145 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 146   %minnum = call float @llvm.minnum.f32(float %a, float 1.0)
 147   %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
 148   ret float %fmed
 149 }
 150
 151 ; ------------------------------------------------------------------------------
 152 ; Negative patterns
 153 ; ------------------------------------------------------------------------------
 154
 155 ; min(max(Val, 1.0), 0.0), should be min(max(Val, 0.0), 1.0)
 156 define float @test_min_max_K0_gt_K1(float %a) #0 {
 157 ; GFX10-LABEL: test_min_max_K0_gt_K1:
 158 ; GFX10:       ; %bb.0:
 159 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 160 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 161 ; GFX10-NEXT:    v_max_f32_e32 v0, 1.0, v0
 162 ; GFX10-NEXT:    v_min_f32_e32 v0, 0, v0
 163 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 164   %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 1.0)
 165   %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 0.0)
 166   ret float %fmed
 167 }
 168
 169 ; max(min(Val, 0.0), 1.0), should be max(min(Val, 1.0), 0.0)
 170 define float @test_max_min_K0_gt_K1(float %a) #0 {
 171 ; GFX10-LABEL: test_max_min_K0_gt_K1:
 172 ; GFX10:       ; %bb.0:
 173 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 174 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 175 ; GFX10-NEXT:    v_min_f32_e32 v0, 0, v0
 176 ; GFX10-NEXT:    v_max_f32_e32 v0, 1.0, v0
 177 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 178   %minnum = call nnan float @llvm.minnum.f32(float %a, float 0.0)
 179   %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 1.0)
 180   ret float %fmed
 181 }
 182
 183 ; Input that can be NaN
 184
 185 ; min-max patterns for ieee=false require known non-NaN input
 186 define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 187 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 188 ; GFX10:       ; %bb.0:
 189 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 190 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 191 ; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
 192 ; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
 193 ; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
 194 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 195   %fmul = fmul float %a, 2.0
 196   %maxnum = call float @llvm.maxnum.f32(float %fmul, float 0.0)
 197   %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0)
 198   ret float %fmed
 199 }
 200
 201 ; clamp fails here since input can be NaN and dx10_clamp=false; fmed3 succeds
 202 define float @test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false(float %a) #4 {
 203 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_true_dx10clamp_false:
 204 ; GFX10:       ; %bb.0:
 205 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 206 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 207 ; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
 208 ; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
 209 ; GFX10-NEXT:    v_med3_f32 v0, v0, 0, 1.0
 210 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 211   %fmul = fmul float %a, 2.0
 212   %maxnum = call float @llvm.maxnum.f32(float %fmul, float 0.0)
 213   %fmed = call float @llvm.minnum.f32(float %maxnum, float 1.0)
 214   ret float %fmed
 215 }
 216
 217 ; max-min patterns always require known non-NaN input
 218
 219 define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
 220 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
 221 ; GFX10:       ; %bb.0:
 222 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 223 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 224 ; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
 225 ; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
 226 ; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
 227 ; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
 228 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 229   %fmul = fmul float %a, 2.0
 230   %minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
 231   %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
 232   ret float %fmed
 233 }
 234
 235 define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 236 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 237 ; GFX10:       ; %bb.0:
 238 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 239 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 240 ; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
 241 ; GFX10-NEXT:    v_min_f32_e32 v0, 1.0, v0
 242 ; GFX10-NEXT:    v_max_f32_e32 v0, 0, v0
 243 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 244   %fmul = fmul float %a, 2.0
 245   %minnum = call float @llvm.minnum.f32(float %fmul, float 1.0)
 246   %fmed = call float @llvm.maxnum.f32(float %minnum, float 0.0)
 247   ret float %fmed
 248 }
 249
 250 declare half @llvm.minnum.f16(half, half)
 251 declare half @llvm.maxnum.f16(half, half)
 252 declare float @llvm.minnum.f32(float, float)
 253 declare float @llvm.maxnum.f32(float, float)
 254 declare double @llvm.minnum.f64(double, double)
 255 declare double @llvm.maxnum.f64(double, double)
 256 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
 257 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
 258 attributes #0 = {"amdgpu-ieee"="true"}
 259 attributes #1 = {"amdgpu-ieee"="false"}
 260 attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
 261 attributes #3 = {"no-nans-fp-math"="true"}
 262 attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"}