llvm/test/CodeGen/AMDGPU/GlobalISel/fmed3-min-max-const-combine.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
   3 ; RUN: llc -global-isel -mtriple=amdgcn-amd-mesa3d -mcpu=gfx803 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX8 %s
   4
   5 define float @test_min_max_ValK0_K1_f32(float %a) #0 {
   6 ; GFX10-LABEL: test_min_max_ValK0_K1_f32:
   7 ; GFX10:       ; %bb.0:
   8 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
   9 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  10 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
  11 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  12 ;
  13 ; GFX8-LABEL: test_min_max_ValK0_K1_f32:
  14 ; GFX8:       ; %bb.0:
  15 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  16 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
  17 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
  18   %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
  19   %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 4.0)
  20   ret float %fmed
  21 }
  22
  23 define float @test_min_max_K0Val_K1_f32(float %a) #1 {
  24 ; GFX10-LABEL: test_min_max_K0Val_K1_f32:
  25 ; GFX10:       ; %bb.0:
  26 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  27 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  28 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
  29 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  30 ;
  31 ; GFX8-LABEL: test_min_max_K0Val_K1_f32:
  32 ; GFX8:       ; %bb.0:
  33 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  34 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
  35 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
  36   %maxnum = call nnan float @llvm.maxnum.f32(float 2.0, float %a)
  37   %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 4.0)
  38   ret float %fmed
  39 }
  40
  41 ; min-max patterns for ieee=true do not have to check for NaNs
  42 ; 'v_max_f16_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
  43 define half @test_min_K1max_ValK0_f16(half %a) #0 {
  44 ; GFX10-LABEL: test_min_K1max_ValK0_f16:
  45 ; GFX10:       ; %bb.0:
  46 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  47 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  48 ; GFX10-NEXT:    v_max_f16_e32 v0, v0, v0
  49 ; GFX10-NEXT:    v_med3_f16 v0, v0, 2.0, 4.0
  50 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  51 ;
  52 ; GFX8-LABEL: test_min_K1max_ValK0_f16:
  53 ; GFX8:       ; %bb.0:
  54 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  55 ; GFX8-NEXT:    v_max_f16_e32 v0, v0, v0
  56 ; GFX8-NEXT:    v_max_f16_e32 v0, 2.0, v0
  57 ; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
  58 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
  59   %maxnum = call half @llvm.maxnum.f16(half %a, half 2.0)
  60   %fmed = call half @llvm.minnum.f16(half 4.0, half %maxnum)
  61   ret half %fmed
  62 }
  63
  64 define half @test_min_K1max_K0Val_f16(half %a) #1 {
  65 ; GFX10-LABEL: test_min_K1max_K0Val_f16:
  66 ; GFX10:       ; %bb.0:
  67 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  68 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  69 ; GFX10-NEXT:    v_med3_f16 v0, v0, 2.0, 4.0
  70 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  71 ;
  72 ; GFX8-LABEL: test_min_K1max_K0Val_f16:
  73 ; GFX8:       ; %bb.0:
  74 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  75 ; GFX8-NEXT:    v_max_f16_e32 v0, 2.0, v0
  76 ; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
  77 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
  78   %maxnum = call nnan half @llvm.maxnum.f16(half 2.0, half %a)
  79   %fmed = call nnan half @llvm.minnum.f16(half 4.0, half %maxnum)
  80   ret half %fmed
  81 }
  82
  83 ; max-mix patterns work only for non-NaN inputs
  84 define float @test_max_min_ValK1_K0_f32(float %a) #0 {
  85 ; GFX10-LABEL: test_max_min_ValK1_K0_f32:
  86 ; GFX10:       ; %bb.0:
  87 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  88 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
  89 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
  90 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
  91 ;
  92 ; GFX8-LABEL: test_max_min_ValK1_K0_f32:
  93 ; GFX8:       ; %bb.0:
  94 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
  95 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
  96 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
  97   %minnum = call nnan float @llvm.minnum.f32(float %a, float 4.0)
  98   %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0)
  99   ret float %fmed
 100 }
 101
 102 define float @test_max_min_K1Val_K0_f32(float %a) #1 {
 103 ; GFX10-LABEL: test_max_min_K1Val_K0_f32:
 104 ; GFX10:       ; %bb.0:
 105 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 106 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 107 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 108 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 109 ;
 110 ; GFX8-LABEL: test_max_min_K1Val_K0_f32:
 111 ; GFX8:       ; %bb.0:
 112 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 113 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 114 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 115   %minnum = call nnan float @llvm.minnum.f32(float 4.0, float %a)
 116   %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 2.0)
 117   ret float %fmed
 118 }
 119
 120 define half @test_max_K0min_ValK1_f16(half %a) #0 {
 121 ; GFX10-LABEL: test_max_K0min_ValK1_f16:
 122 ; GFX10:       ; %bb.0:
 123 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 124 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 125 ; GFX10-NEXT:    v_med3_f16 v0, v0, 2.0, 4.0
 126 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 127 ;
 128 ; GFX8-LABEL: test_max_K0min_ValK1_f16:
 129 ; GFX8:       ; %bb.0:
 130 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 131 ; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
 132 ; GFX8-NEXT:    v_max_f16_e32 v0, 2.0, v0
 133 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 134   %minnum = call nnan half @llvm.minnum.f16(half %a, half 4.0)
 135   %fmed = call nnan half @llvm.maxnum.f16(half 2.0, half %minnum)
 136   ret half %fmed
 137 }
 138
 139 define half @test_max_K0min_K1Val_f16(half %a) #1 {
 140 ; GFX10-LABEL: test_max_K0min_K1Val_f16:
 141 ; GFX10:       ; %bb.0:
 142 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 143 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 144 ; GFX10-NEXT:    v_med3_f16 v0, v0, 2.0, 4.0
 145 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 146 ;
 147 ; GFX8-LABEL: test_max_K0min_K1Val_f16:
 148 ; GFX8:       ; %bb.0:
 149 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 150 ; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
 151 ; GFX8-NEXT:    v_max_f16_e32 v0, 2.0, v0
 152 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 153   %minnum = call nnan half @llvm.minnum.f16(half 4.0, half %a)
 154   %fmed = call nnan half @llvm.maxnum.f16(half 2.0, half %minnum)
 155   ret half %fmed
 156 }
 157
 158 ; global nnan function attribute always forces fmed3 combine
 159
 160 define float @test_min_max_global_nnan(float %a) #2 {
 161 ; GFX10-LABEL: test_min_max_global_nnan:
 162 ; GFX10:       ; %bb.0:
 163 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 164 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 165 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 166 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 167 ;
 168 ; GFX8-LABEL: test_min_max_global_nnan:
 169 ; GFX8:       ; %bb.0:
 170 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 171 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 172 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 173   %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
 174   %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
 175   ret float %fmed
 176 }
 177
 178 define float @test_max_min_global_nnan(float %a) #2 {
 179 ; GFX10-LABEL: test_max_min_global_nnan:
 180 ; GFX10:       ; %bb.0:
 181 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 182 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 183 ; GFX10-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 184 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 185 ;
 186 ; GFX8-LABEL: test_max_min_global_nnan:
 187 ; GFX8:       ; %bb.0:
 188 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 189 ; GFX8-NEXT:    v_med3_f32 v0, v0, 2.0, 4.0
 190 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 191   %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
 192   %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
 193   ret float %fmed
 194 }
 195
 196 ; ------------------------------------------------------------------------------
 197 ; Negative patterns
 198 ; ------------------------------------------------------------------------------
 199
 200 ; min(max(Val, K0), K1) K0 > K1, should be K0<=K1
 201 define float @test_min_max_K0_gt_K1(float %a) #0 {
 202 ; GFX10-LABEL: test_min_max_K0_gt_K1:
 203 ; GFX10:       ; %bb.0:
 204 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 205 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 206 ; GFX10-NEXT:    v_max_f32_e32 v0, 4.0, v0
 207 ; GFX10-NEXT:    v_min_f32_e32 v0, 2.0, v0
 208 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 209 ;
 210 ; GFX8-LABEL: test_min_max_K0_gt_K1:
 211 ; GFX8:       ; %bb.0:
 212 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 213 ; GFX8-NEXT:    v_max_f32_e32 v0, 4.0, v0
 214 ; GFX8-NEXT:    v_min_f32_e32 v0, 2.0, v0
 215 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 216   %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 4.0)
 217   %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 2.0)
 218   ret float %fmed
 219 }
 220
 221 ; max(min(Val, K1), K0) K0 > K1, should be K0<=K1
 222 define float @test_max_min_K0_gt_K1(float %a) #0 {
 223 ; GFX10-LABEL: test_max_min_K0_gt_K1:
 224 ; GFX10:       ; %bb.0:
 225 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 226 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 227 ; GFX10-NEXT:    v_min_f32_e32 v0, 2.0, v0
 228 ; GFX10-NEXT:    v_max_f32_e32 v0, 4.0, v0
 229 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 230 ;
 231 ; GFX8-LABEL: test_max_min_K0_gt_K1:
 232 ; GFX8:       ; %bb.0:
 233 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 234 ; GFX8-NEXT:    v_min_f32_e32 v0, 2.0, v0
 235 ; GFX8-NEXT:    v_max_f32_e32 v0, 4.0, v0
 236 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 237   %minnum = call nnan float @llvm.minnum.f32(float %a, float 2.0)
 238   %fmed = call nnan float @llvm.maxnum.f32(float %minnum, float 4.0)
 239   ret float %fmed
 240 }
 241
 242 ; non-inline constant
 243 define float @test_min_max_non_inline_const(float %a) #0 {
 244 ; GFX10-LABEL: test_min_max_non_inline_const:
 245 ; GFX10:       ; %bb.0:
 246 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 247 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 248 ; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
 249 ; GFX10-NEXT:    v_min_f32_e32 v0, 0x41000000, v0
 250 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 251 ;
 252 ; GFX8-LABEL: test_min_max_non_inline_const:
 253 ; GFX8:       ; %bb.0:
 254 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 255 ; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
 256 ; GFX8-NEXT:    v_min_f32_e32 v0, 0x41000000, v0
 257 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 258   %maxnum = call nnan float @llvm.maxnum.f32(float %a, float 2.0)
 259   %fmed = call nnan float @llvm.minnum.f32(float %maxnum, float 8.0)
 260   ret float %fmed
 261 }
 262
 263 ; there is no fmed3 for f64 or v2f16 types
 264
 265 define double @test_min_max_f64(double %a) #0 {
 266 ; GFX10-LABEL: test_min_max_f64:
 267 ; GFX10:       ; %bb.0:
 268 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 269 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 270 ; GFX10-NEXT:    v_max_f64 v[0:1], v[0:1], 2.0
 271 ; GFX10-NEXT:    v_min_f64 v[0:1], v[0:1], 4.0
 272 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 273 ;
 274 ; GFX8-LABEL: test_min_max_f64:
 275 ; GFX8:       ; %bb.0:
 276 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 277 ; GFX8-NEXT:    v_max_f64 v[0:1], v[0:1], 2.0
 278 ; GFX8-NEXT:    v_min_f64 v[0:1], v[0:1], 4.0
 279 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 280   %maxnum = call nnan double @llvm.maxnum.f64(double %a, double 2.0)
 281   %fmed = call nnan double @llvm.minnum.f64(double %maxnum, double 4.0)
 282   ret double %fmed
 283 }
 284
 285 define <2 x half> @test_min_max_v2f16(<2 x half> %a) #0 {
 286 ; GFX10-LABEL: test_min_max_v2f16:
 287 ; GFX10:       ; %bb.0:
 288 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 289 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 290 ; GFX10-NEXT:    v_pk_max_f16 v0, v0, 2.0 op_sel_hi:[1,0]
 291 ; GFX10-NEXT:    v_pk_min_f16 v0, v0, 4.0 op_sel_hi:[1,0]
 292 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 293 ;
 294 ; GFX8-LABEL: test_min_max_v2f16:
 295 ; GFX8:       ; %bb.0:
 296 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 297 ; GFX8-NEXT:    v_mov_b32_e32 v2, 0x4000
 298 ; GFX8-NEXT:    v_max_f16_e32 v1, 2.0, v0
 299 ; GFX8-NEXT:    v_max_f16_sdwa v0, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD
 300 ; GFX8-NEXT:    v_min_f16_e32 v0, 4.0, v0
 301 ; GFX8-NEXT:    v_mov_b32_e32 v2, 16
 302 ; GFX8-NEXT:    v_min_f16_e32 v1, 4.0, v1
 303 ; GFX8-NEXT:    v_lshlrev_b32_sdwa v0, v2, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:WORD_0
 304 ; GFX8-NEXT:    v_or_b32_sdwa v0, v1, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
 305 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 306   %maxnum = call nnan <2 x half> @llvm.maxnum.v2f16(<2 x half> %a, <2 x half> <half 2.0, half 2.0>)
 307   %fmed = call nnan <2 x half> @llvm.minnum.v2f16(<2 x half> %maxnum, <2 x half> <half 4.0, half 4.0>)
 308   ret <2 x half> %fmed
 309 }
 310
 311 ; input that can be NaN
 312
 313 ; min-max patterns for ieee=false require known non-NaN input
 314 define float @test_min_max_maybe_NaN_input_ieee_false(float %a) #1 {
 315 ; GFX10-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 316 ; GFX10:       ; %bb.0:
 317 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 318 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 319 ; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
 320 ; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
 321 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 322 ;
 323 ; GFX8-LABEL: test_min_max_maybe_NaN_input_ieee_false:
 324 ; GFX8:       ; %bb.0:
 325 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 326 ; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
 327 ; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
 328 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 329   %maxnum = call float @llvm.maxnum.f32(float %a, float 2.0)
 330   %fmed = call float @llvm.minnum.f32(float %maxnum, float 4.0)
 331   ret float %fmed
 332 }
 333
 334 ; max-min patterns always require known non-NaN input
 335
 336 define float @test_max_min_maybe_NaN_input_ieee_false(float %a) #1 {
 337 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 338 ; GFX10:       ; %bb.0:
 339 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 340 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 341 ; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
 342 ; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
 343 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 344 ;
 345 ; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_false:
 346 ; GFX8:       ; %bb.0:
 347 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 348 ; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
 349 ; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
 350 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 351   %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
 352   %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
 353   ret float %fmed
 354 }
 355
 356 ; 'v_max_f32_e32 v0, v0, v0' is from fcanonicalize of the input to fmin/fmax with ieee=true
 357 define float @test_max_min_maybe_NaN_input_ieee_true(float %a) #0 {
 358 ; GFX10-LABEL: test_max_min_maybe_NaN_input_ieee_true:
 359 ; GFX10:       ; %bb.0:
 360 ; GFX10-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 361 ; GFX10-NEXT:    s_waitcnt_vscnt null, 0x0
 362 ; GFX10-NEXT:    v_max_f32_e32 v0, v0, v0
 363 ; GFX10-NEXT:    v_min_f32_e32 v0, 4.0, v0
 364 ; GFX10-NEXT:    v_max_f32_e32 v0, 2.0, v0
 365 ; GFX10-NEXT:    s_setpc_b64 s[30:31]
 366 ;
 367 ; GFX8-LABEL: test_max_min_maybe_NaN_input_ieee_true:
 368 ; GFX8:       ; %bb.0:
 369 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 370 ; GFX8-NEXT:    v_mul_f32_e32 v0, 1.0, v0
 371 ; GFX8-NEXT:    v_min_f32_e32 v0, 4.0, v0
 372 ; GFX8-NEXT:    v_max_f32_e32 v0, 2.0, v0
 373 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 374   %minnum = call float @llvm.minnum.f32(float %a, float 4.0)
 375   %fmed = call float @llvm.maxnum.f32(float %minnum, float 2.0)
 376   ret float %fmed
 377 }
 378
 379 declare half @llvm.minnum.f16(half, half)
 380 declare half @llvm.maxnum.f16(half, half)
 381 declare float @llvm.minnum.f32(float, float)
 382 declare float @llvm.maxnum.f32(float, float)
 383 declare double @llvm.minnum.f64(double, double)
 384 declare double @llvm.maxnum.f64(double, double)
 385 declare <2 x half> @llvm.minnum.v2f16(<2 x half>, <2 x half>)
 386 declare <2 x half> @llvm.maxnum.v2f16(<2 x half>, <2 x half>)
 387 attributes #0 = {"amdgpu-ieee"="true"}
 388 attributes #1 = {"amdgpu-ieee"="false"}
 389 attributes #2 = {"no-nans-fp-math"="true"}