llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
   3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
   4 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX12 %s
   5
   6 define float @test_min_max_ValK0_K1_f32(float %a) #0 {
   7 ; GFX10-LABEL: test_min_max_ValK0_K1_f32:
   8 ; GFX10:       ; %bb.0:
   9 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  10 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
  11 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  12 ;
  13 ; GFX8-LABEL: test_min_max_ValK0_K1_f32:
  14 ; GFX8:       ; %bb.0:
  15 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  16 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
  17 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
  18 ;
  19 ; GFX12-LABEL: test_min_max_ValK0_K1_f32:
  20 ; GFX12:       ; %bb.0:
  21 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
  22 ; GFX12-NEXT:    s_wait_expcnt 0x0
  23 ; GFX12-NEXT:    s_wait_samplecnt 0x0
  24 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
  25 ; GFX12-NEXT:    s_wait_kmcnt 0x0
  26 ; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
  27 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
  28   %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
  29   %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 4.0)
  30   ret float %fmed
  31 }
  32
  33 define float @test_min_max_K0Val_K1_f32(float %a) #1 {
  34 ; GFX10-LABEL: test_min_max_K0Val_K1_f32:
  35 ; GFX10:       ; %bb.0:
  36 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  37 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
  38 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  39 ;
  40 ; GFX8-LABEL: test_min_max_K0Val_K1_f32:
  41 ; GFX8:       ; %bb.0:
  42 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  43 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
  44 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
  45 ;
  46 ; GFX12-LABEL: test_min_max_K0Val_K1_f32:
  47 ; GFX12:       ; %bb.0:
  48 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
  49 ; GFX12-NEXT:    s_wait_expcnt 0x0
  50 ; GFX12-NEXT:    s_wait_samplecnt 0x0
  51 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
  52 ; GFX12-NEXT:    s_wait_kmcnt 0x0
  53 ; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
  54 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
  55   %maxnum = call nnan float @llvm.maxnum.f32(float 2.0, float %a)
  56   %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 4.0)
  57   ret float %fmed
  58 }
  59
  60 ; min-max patterns for ieee=true do not have to check for NaNs
  61 ; 'v_max_f16_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
  62 define half @test_min_K1max_ValK0_f16(half %a) #0 {
  63 ; GFX10-LABEL: test_min_K1max_ValK0_f16:
  64 ; GFX10:       ; %bb.0:
  65 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  66 ; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
  67 ; GFX10-NEXT:    v_med3_f16 v0, v0, 2.0, 4.0
  68 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  69 ;
  70 ; GFX8-LABEL: test_min_K1max_ValK0_f16:
  71 ; GFX8:       ; %bb.0:
  72 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  73 ; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
  74 ; GFX8-NEXT:    v_max_f16_e32 v0, 2.0, v0
  75 ; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
  76 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
  77 ;
  78 ; GFX12-LABEL: test_min_K1max_ValK0_f16:
  79 ; GFX12:       ; %bb.0:
  80 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
  81 ; GFX12-NEXT:    s_wait_expcnt 0x0
  82 ; GFX12-NEXT:    s_wait_samplecnt 0x0
  83 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
  84 ; GFX12-NEXT:    s_wait_kmcnt 0x0
  85 ; GFX12-NEXT:    v_max_num_f16_e32 v0, v0, v0
  86 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
  87 ; GFX12-NEXT:    v_med3_num_f16 v0, v0, 2.0, 4.0
  88 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
  89   %maxnum = call half @llvm.maxnum.f16(half %a, half 2.0)
  90   %fmed = call half @llvm.minnum.f16(half 4.0, half %maxnum)
  91   ret half %fmed
  92 }
  93
  94 define half @test_min_K1max_K0Val_f16(half %a) #1 {
  95 ; GFX10-LABEL: test_min_K1max_K0Val_f16:
  96 ; GFX10:       ; %bb.0:
  97 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  98 ; GFX10-NEXT:    v_med3_f16 v0, v0, 2.0, 4.0
  99 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 100 ;
 101 ; GFX8-LABEL: test_min_K1max_K0Val_f16:
 102 ; GFX8:       ; %bb.0:
 103 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 104 ; GFX8-NEXT:    v_max_f16_e32 v0, 2.0, v0
 105 ; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
 106 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 107 ;
 108 ; GFX12-LABEL: test_min_K1max_K0Val_f16:
 109 ; GFX12:       ; %bb.0:
 110 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 111 ; GFX12-NEXT:    s_wait_expcnt 0x0
 112 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 113 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 114 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 115 ; GFX12-NEXT:    v_med3_num_f16 v0, v0, 2.0, 4.0
 116 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 117   %maxnum = call nnan half @llvm.maxnum.f16(half 2.0, half %a)
 118   %fmed = call nnan half @llvm.minnum.f16(half 4.0, half %maxnum)
 119   ret half %fmed
 120 }
 121
 122 ; max-mix patterns work only for non-NaN inputs
 123 define float @test_max_min_ValK1_K0_f32(float %a) #0 {
 124 ; GFX10-LABEL: test_max_min_ValK1_K0_f32:
 125 ; GFX10:       ; %bb.0:
 126 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 127 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 128 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 129 ;
 130 ; GFX8-LABEL: test_max_min_ValK1_K0_f32:
 131 ; GFX8:       ; %bb.0:
 132 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 133 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 134 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 135 ;
 136 ; GFX12-LABEL: test_max_min_ValK1_K0_f32:
 137 ; GFX12:       ; %bb.0:
 138 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 139 ; GFX12-NEXT:    s_wait_expcnt 0x0
 140 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 141 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 142 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 143 ; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 144 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 145   %minnum = call nnan float @llvm.minnum.f32(float %a, float 4.0)
 146   %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0)
 147   ret float %fmed
 148 }
 149
 150 define float @test_max_min_K1Val_K0_f32(float %a) #1 {
 151 ; GFX10-LABEL: test_max_min_K1Val_K0_f32:
 152 ; GFX10:       ; %bb.0:
 153 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 154 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 155 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 156 ;
 157 ; GFX8-LABEL: test_max_min_K1Val_K0_f32:
 158 ; GFX8:       ; %bb.0:
 159 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 160 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 161 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 162 ;
 163 ; GFX12-LABEL: test_max_min_K1Val_K0_f32:
 164 ; GFX12:       ; %bb.0:
 165 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 166 ; GFX12-NEXT:    s_wait_expcnt 0x0
 167 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 168 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 169 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 170 ; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 171 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 172   %minnum = call nnan float @llvm.minnum.f32(float 4.0, float %a)
 173   %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0)
 174   ret float %fmed
 175 }
 176
 177 define half @test_max_K0min_ValK1_f16(half %a) #0 {
 178 ; GFX10-LABEL: test_max_K0min_ValK1_f16:
 179 ; GFX10:       ; %bb.0:
 180 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 181 ; GFX10-NEXT:    v_med3_f16 v0, v0, 2.0, 4.0
 182 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 183 ;
 184 ; GFX8-LABEL: test_max_K0min_ValK1_f16:
 185 ; GFX8:       ; %bb.0:
 186 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 187 ; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
 188 ; GFX8-NEXT:    v_max_f16_e32 v0, 2.0, v0
 189 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 190 ;
 191 ; GFX12-LABEL: test_max_K0min_ValK1_f16:
 192 ; GFX12:       ; %bb.0:
 193 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 194 ; GFX12-NEXT:    s_wait_expcnt 0x0
 195 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 196 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 197 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 198 ; GFX12-NEXT:    v_med3_num_f16 v0, v0, 2.0, 4.0
 199 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 200   %minnum = call nnan half @llvm.minnum.f16(half %a, half 4.0)
 201   %fmed = call nnan half @llvm.maxnum.f16(half 2.0, half %minnum)
 202   ret half %fmed
 203 }
 204
 205 define half @test_max_K0min_K1Val_f16(half %a) #1 {
 206 ; GFX10-LABEL: test_max_K0min_K1Val_f16:
 207 ; GFX10:       ; %bb.0:
 208 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 209 ; GFX10-NEXT:    v_med3_f16 v0, v0, 2.0, 4.0
 210 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 211 ;
 212 ; GFX8-LABEL: test_max_K0min_K1Val_f16:
 213 ; GFX8:       ; %bb.0:
 214 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 215 ; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
 216 ; GFX8-NEXT:    v_max_f16_e32 v0, 2.0, v0
 217 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 218 ;
 219 ; GFX12-LABEL: test_max_K0min_K1Val_f16:
 220 ; GFX12:       ; %bb.0:
 221 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 222 ; GFX12-NEXT:    s_wait_expcnt 0x0
 223 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 224 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 225 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 226 ; GFX12-NEXT:    v_med3_num_f16 v0, v0, 2.0, 4.0
 227 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 228   %minnum = call nnan half @llvm.minnum.f16(half 4.0, half %a)
 229   %fmed = call nnan half @llvm.maxnum.f16(half 2.0, half %minnum)
 230   ret half %fmed
 231 }
 232
 233 ; global nnan function attribute always forces fmed3 combine
 234
 235 define float @test_min_max_global_nnan(float %a) #2 {
 236 ; GFX10-LABEL: test_min_max_global_nnan:
 237 ; GFX10:       ; %bb.0:
 238 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 239 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 240 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 241 ;
 242 ; GFX8-LABEL: test_min_max_global_nnan:
 243 ; GFX8:       ; %bb.0:
 244 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 245 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 246 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 247 ;
 248 ; GFX12-LABEL: test_min_max_global_nnan:
 249 ; GFX12:       ; %bb.0:
 250 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 251 ; GFX12-NEXT:    s_wait_expcnt 0x0
 252 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 253 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 254 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 255 ; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 256 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 257   %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
 258   %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
 259   ret float %fmed
 260 }
 261
 262 define float @test_max_min_global_nnan(float %a) #2 {
 263 ; GFX10-LABEL: test_max_min_global_nnan:
 264 ; GFX10:       ; %bb.0:
 265 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 266 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 267 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 268 ;
 269 ; GFX8-LABEL: test_max_min_global_nnan:
 270 ; GFX8:       ; %bb.0:
 271 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 272 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 273 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 274 ;
 275 ; GFX12-LABEL: test_max_min_global_nnan:
 276 ; GFX12:       ; %bb.0:
 277 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 278 ; GFX12-NEXT:    s_wait_expcnt 0x0
 279 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 280 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 281 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 282 ; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 283 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 284   %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
 285   %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
 286   ret float %fmed
 287 }
 288
 289 ; ------------------------------------------------------------------------------
 290 ; Negative patterns
 291 ; ------------------------------------------------------------------------------
 292
 293 ; min(max(Val, K0), K1) K0 > K1, should be K0<=K1
 294 define float @test_min_max_K0_gt_K1(float %a) #0 {
 295 ; GFX10-LABEL: test_min_max_K0_gt_K1:
 296 ; GFX10:       ; %bb.0:
 297 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 298 ; GFX10-NEXT:    v_max_f32_e32 v0, 4.0, v0
 299 ; GFX10-NEXT:    v_min_f32_e32 v0, 2.0, v0
 300 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 301 ;
 302 ; GFX8-LABEL: test_min_max_K0_gt_K1:
 303 ; GFX8:       ; %bb.0:
 304 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 305 ; GFX8-NEXT:    v_max_f32_e32 v0, 4.0, v0
 306 ; GFX8-NEXT:    v_min_f32_e32 v0, 2.0, v0
 307 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 308 ;
 309 ; GFX12-LABEL: test_min_max_K0_gt_K1:
 310 ; GFX12:       ; %bb.0:
 311 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 312 ; GFX12-NEXT:    s_wait_expcnt 0x0
 313 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 314 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 315 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 316 ; GFX12-NEXT:    v_maxmin_num_f32 v0, v0, 4.0, 2.0
 317 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 318   %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 4.0)
 319   %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 2.0)
 320   ret float %fmed
 321 }
 322
 323 ; max(min(Val, K1), K0) K0 > K1, should be K0<=K1
 324 define float @test_max_min_K0_gt_K1(float %a) #0 {
 325 ; GFX10-LABEL: test_max_min_K0_gt_K1:
 326 ; GFX10:       ; %bb.0:
 327 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 328 ; GFX10-NEXT:    v_min_f32_e32 v0, 2.0, v0
 329 ; GFX10-NEXT:    v_max_f32_e32 v0, 4.0, v0
 330 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 331 ;
 332 ; GFX8-LABEL: test_max_min_K0_gt_K1:
 333 ; GFX8:       ; %bb.0:
 334 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 335 ; GFX8-NEXT:    v_min_f32_e32 v0, 2.0, v0
 336 ; GFX8-NEXT:    v_max_f32_e32 v0, 4.0, v0
 337 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 338 ;
 339 ; GFX12-LABEL: test_max_min_K0_gt_K1:
 340 ; GFX12:       ; %bb.0:
 341 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 342 ; GFX12-NEXT:    s_wait_expcnt 0x0
 343 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 344 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 345 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 346 ; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 2.0, 4.0
 347 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 348   %minnum = call nnan float @llvm.minnum.f32(float %a, float 2.0)
 349   %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 4.0)
 350   ret float %fmed
 351 }
 352
 353 ; non-inline constant
 354 define float @test_min_max_non_inline_const(float %a) #0 {
 355 ; GFX10-LABEL: test_min_max_non_inline_const:
 356 ; GFX10:       ; %bb.0:
 357 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 358 ; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
 359 ; GFX10-NEXT:    v_min_f32_e32 v0, 0x41000000, v0
 360 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 361 ;
 362 ; GFX8-LABEL: test_min_max_non_inline_const:
 363 ; GFX8:       ; %bb.0:
 364 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 365 ; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
 366 ; GFX8-NEXT:    v_min_f32_e32 v0, 0x41000000, v0
 367 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 368 ;
 369 ; GFX12-LABEL: test_min_max_non_inline_const:
 370 ; GFX12:       ; %bb.0:
 371 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 372 ; GFX12-NEXT:    s_wait_expcnt 0x0
 373 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 374 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 375 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 376 ; GFX12-NEXT:    v_maxmin_num_f32 v0, v0, 2.0, 0x41000000
 377 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 378   %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
 379   %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 8.0)
 380   ret float %fmed
 381 }
 382
 383 ; there is no fmed3 for f64 or v2f16 types
 384
 385 define double @test_min_max_f64(double %a) #0 {
 386 ; GFX10-LABEL: test_min_max_f64:
 387 ; GFX10:       ; %bb.0:
 388 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 389 ; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], 2.0
 390 ; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], 4.0
 391 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 392 ;
 393 ; GFX8-LABEL: test_min_max_f64:
 394 ; GFX8:       ; %bb.0:
 395 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 396 ; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], 2.0
 397 ; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], 4.0
 398 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 399 ;
 400 ; GFX12-LABEL: test_min_max_f64:
 401 ; GFX12:       ; %bb.0:
 402 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 403 ; GFX12-NEXT:    s_wait_expcnt 0x0
 404 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 405 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 406 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 407 ; GFX12-NEXT:    v_max_num_f64_e32 v[0:1], 2.0, v[0:1]
 408 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 409 ; GFX12-NEXT:    v_min_num_f64_e32 v[0:1], 4.0, v[0:1]
 410 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 411   %maxnum = call nnan double @llvm.maxnum.f64(double %a, double 2.0)
 412   %fmed = call nnan double @llvm.minnum.f64(double %maxnum, double 4.0)
 413   ret double %fmed
 414 }
 415
 416 define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 {
 417 ; GFX10-LABEL: test_min_max_v2f16:
 418 ; GFX10:       ; %bb.0:
 419 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 420 ; GFX10-NEXT:    v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0]
 421 ; GFX10-NEXT:    v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0]
 422 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 423 ;
 424 ; GFX8-LABEL: test_min_max_v2f16:
 425 ; GFX8:       ; %bb.0:
 426 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 427 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x4000
 428 ; GFX8-NEXT:    v_max_f16_e32 v1, 2.0, v0
 429 ; GFX8-NEXT:    v_max_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 430 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x4400
 431 ; GFX8-NEXT:    v_min_f16_e32 v1, 4.0, v1
 432 ; GFX8-NEXT:    v_min_f16_sdwa v0, v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD
 433 ; GFX8-NEXT:    v_or_b32_e32 v0, v1, v0
 434 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 435 ;
 436 ; GFX12-LABEL: test_min_max_v2f16:
 437 ; GFX12:       ; %bb.0:
 438 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 439 ; GFX12-NEXT:    s_wait_expcnt 0x0
 440 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 441 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 442 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 443 ; GFX12-NEXT:    v_pk_max_num_f16 v0, v0, 2.0 op_sel_hi:[1,0]
 444 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 445 ; GFX12-NEXT:    v_pk_min_num_f16 v0, v0, 4.0 op_sel_hi:[1,0]
 446 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 447   %maxnum = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
 448   %fmed = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> %maxnum, <2 x half> <half 4.0, half 4.0>)
 449   ret <2 x half> %fmed
 450 }
 451
 452 ; input that can be NaN
 453
 454 ; min-max patterns for ieee=false require known non-NaN input
 455 define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 456 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 457 ; GFX10:       ; %bb.0:
 458 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 459 ; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
 460 ; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
 461 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 462 ;
 463 ; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 464 ; GFX8:       ; %bb.0:
 465 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 466 ; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
 467 ; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
 468 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 469 ;
 470 ; GFX12-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 471 ; GFX12:       ; %bb.0:
 472 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 473 ; GFX12-NEXT:    s_wait_expcnt 0x0
 474 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 475 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 476 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 477 ; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
 478 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 479 ; GFX12-NEXT:    v_med3_num_f32 v0, v0, 2.0, 4.0
 480 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 481   %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
 482   %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
 483   ret float %fmed
 484 }
 485
 486 ; max-min patterns always require known non-NaN input
 487
 488 define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 489 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 490 ; GFX10:       ; %bb.0:
 491 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 492 ; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
 493 ; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
 494 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 495 ;
 496 ; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 497 ; GFX8:       ; %bb.0:
 498 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 499 ; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
 500 ; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
 501 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 502 ;
 503 ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 504 ; GFX12:       ; %bb.0:
 505 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 506 ; GFX12-NEXT:    s_wait_expcnt 0x0
 507 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 508 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 509 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 510 ; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
 511 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 512 ; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 4.0, 2.0
 513 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 514   %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
 515   %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
 516   ret float %fmed
 517 }
 518
 519 ; 'v_max_f32_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
 520 define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
 521 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
 522 ; GFX10:       ; %bb.0:
 523 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 524 ; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
 525 ; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
 526 ; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
 527 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 528 ;
 529 ; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true:
 530 ; GFX8:       ; %bb.0:
 531 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 532 ; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 533 ; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
 534 ; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
 535 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 536 ;
 537 ; GFX12-LABEL: test_max_min_maybe_NaN_input_ieee_true:
 538 ; GFX12:       ; %bb.0:
 539 ; GFX12-NEXT:    s_wait_loadcnt_dscnt 0x0
 540 ; GFX12-NEXT:    s_wait_expcnt 0x0
 541 ; GFX12-NEXT:    s_wait_samplecnt 0x0
 542 ; GFX12-NEXT:    s_wait_bvhcnt 0x0
 543 ; GFX12-NEXT:    s_wait_kmcnt 0x0
 544 ; GFX12-NEXT:    v_max_num_f32_e32 v0, v0, v0
 545 ; GFX12-NEXT:    s_delay_alu instid0(VALU_DEP_1)
 546 ; GFX12-NEXT:    v_minmax_num_f32 v0, v0, 4.0, 2.0
 547 ; GFX12-NEXT:    s_setpc_b64 s[30:31]
 548   %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
 549   %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
 550   ret float %fmed
 551 }
 552
 553 declare half @llvm.minnum.f16(half, half)
 554 declare half @llvm.maxnum.f16(half, half)
 555 declare float @llvm.minnum.f32(float, float)
 556 declare float @llvm.maxnum.f32(float, float)
 557 declare double @llvm.minnum.f64(double, double)
 558 declare double @llvm.maxnum.f64(double, double)
 559 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
 560 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
 561 attributes #0 = {"amdgpu-ieee"="true"}
 562 attributes #1 = {"amdgpu-ieee"="false"}
 563 attributes #2 = {"no-nans-fp-math"="true"}