llvm/test/CodeGen/AMDGPU/GlobalISel/clamp-fmed3-const-combine.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
   3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
   4
   5 define float @test_fmed3_f32_known_nnan_ieee_true(float %a) #0 {
   6 ; GFX10-LABEL: test_fmed3_f32_known_nnan_ieee_true:
   7 ; GFX10:       ; %bb.0:
   8 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   9 ; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
  10 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  11 ;
  12 ; GFX12-LABEL: test_fmed3_f32_known_nnan_ieee_true:
  13 ; GFX12:       ; %bb.0:
  14 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
  15 ; GFX12-NEXT:    s_wait_expcnt 0x0
  16 ; GFX12-NEXT:    s_wait_samplecnt 0x0
  17 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
  18 ; GFX12-NEXT:    s_wait_kmcnt 0x0
  19 ; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
  20 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
  21   %fmul = fmul float %a, 2.0
  22   %fmed = call nnan float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
  23   ret float %fmed
  24 }
  25
  26 define half @test_fmed3_f16_known_nnan_ieee_false(half %a) #1 {
  27 ; GFX10-LABEL: test_fmed3_f16_known_nnan_ieee_false:
  28 ; GFX10:       ; %bb.0:
  29 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  30 ; GFX10-NEXT:    v_mul_f16_e64 v0, v0, 2.0 clamp
  31 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  32 ;
  33 ; GFX12-LABEL: test_fmed3_f16_known_nnan_ieee_false:
  34 ; GFX12:       ; %bb.0:
  35 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
  36 ; GFX12-NEXT:    s_wait_expcnt 0x0
  37 ; GFX12-NEXT:    s_wait_samplecnt 0x0
  38 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
  39 ; GFX12-NEXT:    s_wait_kmcnt 0x0
  40 ; GFX12-NEXT:    v_mul_f16_e64 v0, v0, 2.0 clamp
  41 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
  42   %fmul = fmul half %a, 2.0
  43   %fmed = call nnan half @llvm.amdgcn.fmed3.f16(half %fmul, half 0.0, half 1.0)
  44   ret half %fmed
  45 }
  46
  47 ; %fmin is known non-SNaN because fmin inputs are fcanonicalized
  48 define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 {
  49 ; GFX10-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true:
  50 ; GFX10:       ; %bb.0:
  51 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  52 ; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
  53 ; GFX10-NEXT:    v_min_f32_e64 v0, 0x41200000, v0 clamp
  54 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  55 ;
  56 ; GFX12-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_true:
  57 ; GFX12:       ; %bb.0:
  58 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
  59 ; GFX12-NEXT:    s_wait_expcnt 0x0
  60 ; GFX12-NEXT:    s_wait_samplecnt 0x0
  61 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
  62 ; GFX12-NEXT:    s_wait_kmcnt 0x0
  63 ; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
  64 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
  65 ; GFX12-NEXT:    v_min_num_f32_e64 v0, 0x41200000, v0 clamp
  66 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
  67   %fmin = call float @llvm.minnum.f32(float %a, float 10.0)
  68   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmin, float 0.0, float 1.0)
  69   ret float %fmed
  70 }
  71
  72 ; input may be SNaN. It's safe to clamp since third operand in fmed3 is 0.0
  73 define float @test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true(float %a) #2 {
  74 ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
  75 ; GFX10:       ; %bb.0:
  76 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  77 ; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
  78 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  79 ;
  80 ; GFX12-LABEL: test_fmed3_maybe_SNaN_input_zero_third_operand_ieee_true_dx10clamp_true:
  81 ; GFX12:       ; %bb.0:
  82 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
  83 ; GFX12-NEXT:    s_wait_expcnt 0x0
  84 ; GFX12-NEXT:    s_wait_samplecnt 0x0
  85 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
  86 ; GFX12-NEXT:    s_wait_kmcnt 0x0
  87 ; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
  88 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
  89   %fmul = fmul float %a, 2.0
  90   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
  91   ret float %fmed
  92 }
  93
  94 ; global nnan function attribute always forces clamp combine
  95
  96 define float @test_fmed3_global_nnan(float %a) #3 {
  97 ; GFX10-LABEL: test_fmed3_global_nnan:
  98 ; GFX10:       ; %bb.0:
  99 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 100 ; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 101 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 102 ;
 103 ; GFX12-LABEL: test_fmed3_global_nnan:
 104 ; GFX12:       ; %bb.0:
 105 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 106 ; GFX12-NEXT:    s_wait_expcnt 0x0
 107 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 108 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 109 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 110 ; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 111 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 112   %fmul = fmul float %a, 2.0
 113   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
 114   ret float %fmed
 115 }
 116
 117 ; ------------------------------------------------------------------------------
 118 ; Negative patterns
 119 ; ------------------------------------------------------------------------------
 120
 121 ; ieee=false requires known never NaN input
 122 define float @test_fmed3_f32_maybe_NaN_ieee_false(float %a) #1 {
 123 ; GFX10-LABEL: test_fmed3_f32_maybe_NaN_ieee_false:
 124 ; GFX10:       ; %bb.0:
 125 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 126 ; GFX10-NEXT:    v_mul_f32_e32 v0, 2.0, v0
 127 ; GFX10-NEXT:    v_med3_f32 v0, v0, 1.0, 0
 128 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 129 ;
 130 ; GFX12-LABEL: test_fmed3_f32_maybe_NaN_ieee_false:
 131 ; GFX12:       ; %bb.0:
 132 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 133 ; GFX12-NEXT:    s_wait_expcnt 0x0
 134 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 135 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 136 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 137 ; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 138 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 139   %fmul = fmul float %a, 2.0
 140   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 1.0, float 0.0)
 141   ret float %fmed
 142 }
 143
 144 ; ieee=true input is known non-SNaN but dx10_clamp=false
 145 define float @test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false(float %a) #4 {
 146 ; GFX10-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false:
 147 ; GFX10:       ; %bb.0:
 148 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 149 ; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
 150 ; GFX10-NEXT:    v_min_f32_e32 v0, 0x41200000, v0
 151 ; GFX10-NEXT:    v_med3_f32 v0, v0, 0, 1.0
 152 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 153 ;
 154 ; GFX12-LABEL: test_fmed3_non_SNaN_input_ieee_true_dx10clamp_false:
 155 ; GFX12:       ; %bb.0:
 156 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 157 ; GFX12-NEXT:    s_wait_expcnt 0x0
 158 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 159 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 160 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 161 ; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
 162 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 163 ; GFX12-NEXT:    v_min_num_f32_e64 v0, 0x41200000, v0 clamp
 164 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 165   %fmin = call float @llvm.minnum.f32(float %a, float 10.0)
 166   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmin, float 0.0, float 1.0)
 167   ret float %fmed
 168 }
 169
 170 ; ieee=true dx10_clamp=true but input may be SNaN, clamp requires third operand in fmed3 to be 0.0
 171 define float @test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true(float %a) #2 {
 172 ; GFX10-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
 173 ; GFX10:       ; %bb.0:
 174 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 175 ; GFX10-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 176 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 177 ;
 178 ; GFX12-LABEL: test_fmed3_maybe_SNaN_input_ieee_true_dx10clamp_true:
 179 ; GFX12:       ; %bb.0:
 180 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 181 ; GFX12-NEXT:    s_wait_expcnt 0x0
 182 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 183 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 184 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 185 ; GFX12-NEXT:    v_mul_f32_e64 v0, v0, 2.0 clamp
 186 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 187   %fmul = fmul float %a, 2.0
 188   %fmed = call float @llvm.amdgcn.fmed3.f32(float %fmul, float 0.0, float 1.0)
 189   ret float %fmed
 190 }
 191
 192 declare half @llvm.amdgcn.fmed3.f16(half, half, half)
 193 declare float @llvm.amdgcn.fmed3.f32(float, float, float)
 194 declare float @llvm.minnum.f32(float, float)
 195
 196 attributes #0 = {"amdgpu-ieee"="true"}
 197 attributes #1 = {"amdgpu-ieee"="false"}
 198 attributes #2 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="true"}
 199 attributes #3 = {"no-nans-fp-math"="true"}
 200 attributes #4 = {"amdgpu-ieee"="true" "amdgpu-dx10-clamp"="false"}