llvm/test/CodeGen/AMDGPU/GlobalISel/llvm.amdgcn.ds.fmax.ll

   1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
   2 ; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
   3 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8 %s
   4 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9 %s
   5
   6 ; Make sure the memory operand information is preserved.
   7 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=tonga -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX8-MIR %s
   8 ; RUN: llc -global-isel -mtriple=amdgcn-mesa-mesa3d -mcpu=gfx900 -stop-after=instruction-select -verify-machineinstrs < %s | FileCheck -enable-var-scope -check-prefix=GFX9-MIR %s
   9
  10
  11 define amdgpu_ps float @ds_fmax_f32_ss(float addrspace(3)* inreg %ptr, float inreg %val) {
  12 ; GFX8-LABEL: ds_fmax_f32_ss:
  13 ; GFX8:       ; %bb.0:
  14 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
  15 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
  16 ; GFX8-NEXT:    s_mov_b32 m0, -1
  17 ; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1
  18 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
  19 ; GFX8-NEXT:    ; return to shader part epilog
  20 ;
  21 ; GFX9-LABEL: ds_fmax_f32_ss:
  22 ; GFX9:       ; %bb.0:
  23 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
  24 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
  25 ; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1
  26 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
  27 ; GFX9-NEXT:    ; return to shader part epilog
  28   ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss
  29   ; GFX8-MIR: bb.1 (%ir-block.0):
  30   ; GFX8-MIR:   liveins: $sgpr2, $sgpr3
  31   ; GFX8-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
  32   ; GFX8-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
  33   ; GFX8-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
  34   ; GFX8-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
  35   ; GFX8-MIR:   $m0 = S_MOV_B32 -1
  36   ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
  37   ; GFX8-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
  38   ; GFX8-MIR:   SI_RETURN_TO_EPILOG implicit $vgpr0
  39   ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss
  40   ; GFX9-MIR: bb.1 (%ir-block.0):
  41   ; GFX9-MIR:   liveins: $sgpr2, $sgpr3
  42   ; GFX9-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
  43   ; GFX9-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
  44   ; GFX9-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
  45   ; GFX9-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
  46   ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
  47   ; GFX9-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
  48   ; GFX9-MIR:   SI_RETURN_TO_EPILOG implicit $vgpr0
  49   %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
  50   ret float %ret
  51 }
  52
  53 define amdgpu_ps float @ds_fmax_f32_ss_offset(float addrspace(3)* inreg %ptr, float inreg %val) {
  54 ; GFX8-LABEL: ds_fmax_f32_ss_offset:
  55 ; GFX8:       ; %bb.0:
  56 ; GFX8-NEXT:    v_mov_b32_e32 v0, s3
  57 ; GFX8-NEXT:    v_mov_b32_e32 v1, s2
  58 ; GFX8-NEXT:    s_mov_b32 m0, -1
  59 ; GFX8-NEXT:    ds_max_rtn_f32 v0, v1, v0 offset:512
  60 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
  61 ; GFX8-NEXT:    ; return to shader part epilog
  62 ;
  63 ; GFX9-LABEL: ds_fmax_f32_ss_offset:
  64 ; GFX9:       ; %bb.0:
  65 ; GFX9-NEXT:    v_mov_b32_e32 v0, s3
  66 ; GFX9-NEXT:    v_mov_b32_e32 v1, s2
  67 ; GFX9-NEXT:    ds_max_rtn_f32 v0, v1, v0 offset:512
  68 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
  69 ; GFX9-NEXT:    ; return to shader part epilog
  70   ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset
  71   ; GFX8-MIR: bb.1 (%ir-block.0):
  72   ; GFX8-MIR:   liveins: $sgpr2, $sgpr3
  73   ; GFX8-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
  74   ; GFX8-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
  75   ; GFX8-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
  76   ; GFX8-MIR:   $m0 = S_MOV_B32 -1
  77   ; GFX8-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
  78   ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
  79   ; GFX8-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
  80   ; GFX8-MIR:   SI_RETURN_TO_EPILOG implicit $vgpr0
  81   ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset
  82   ; GFX9-MIR: bb.1 (%ir-block.0):
  83   ; GFX9-MIR:   liveins: $sgpr2, $sgpr3
  84   ; GFX9-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
  85   ; GFX9-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
  86   ; GFX9-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
  87   ; GFX9-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
  88   ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
  89   ; GFX9-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
  90   ; GFX9-MIR:   SI_RETURN_TO_EPILOG implicit $vgpr0
  91   %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
  92   %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
  93   ret float %ret
  94 }
  95
  96 define amdgpu_ps void @ds_fmax_f32_ss_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
  97 ; GFX8-LABEL: ds_fmax_f32_ss_nortn:
  98 ; GFX8:       ; %bb.0:
  99 ; GFX8-NEXT:    v_mov_b32_e32 v0, s2
 100 ; GFX8-NEXT:    v_mov_b32_e32 v1, s3
 101 ; GFX8-NEXT:    s_mov_b32 m0, -1
 102 ; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1
 103 ; GFX8-NEXT:    s_endpgm
 104 ;
 105 ; GFX9-LABEL: ds_fmax_f32_ss_nortn:
 106 ; GFX9:       ; %bb.0:
 107 ; GFX9-NEXT:    v_mov_b32_e32 v0, s2
 108 ; GFX9-NEXT:    v_mov_b32_e32 v1, s3
 109 ; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1
 110 ; GFX9-NEXT:    s_endpgm
 111   ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_nortn
 112   ; GFX8-MIR: bb.1 (%ir-block.0):
 113   ; GFX8-MIR:   liveins: $sgpr2, $sgpr3
 114   ; GFX8-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
 115   ; GFX8-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
 116   ; GFX8-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
 117   ; GFX8-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
 118   ; GFX8-MIR:   $m0 = S_MOV_B32 -1
 119   ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY2]], [[COPY3]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
 120   ; GFX8-MIR:   S_ENDPGM 0
 121   ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_nortn
 122   ; GFX9-MIR: bb.1 (%ir-block.0):
 123   ; GFX9-MIR:   liveins: $sgpr2, $sgpr3
 124   ; GFX9-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
 125   ; GFX9-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
 126   ; GFX9-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
 127   ; GFX9-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
 128   ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY2]], [[COPY3]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
 129   ; GFX9-MIR:   S_ENDPGM 0
 130   %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
 131   ret void
 132 }
 133
 134 define amdgpu_ps void @ds_fmax_f32_ss_offset_nortn(float addrspace(3)* inreg %ptr, float inreg %val) {
 135 ; GFX8-LABEL: ds_fmax_f32_ss_offset_nortn:
 136 ; GFX8:       ; %bb.0:
 137 ; GFX8-NEXT:    v_mov_b32_e32 v0, s3
 138 ; GFX8-NEXT:    v_mov_b32_e32 v1, s2
 139 ; GFX8-NEXT:    s_mov_b32 m0, -1
 140 ; GFX8-NEXT:    ds_max_rtn_f32 v0, v1, v0 offset:512
 141 ; GFX8-NEXT:    s_endpgm
 142 ;
 143 ; GFX9-LABEL: ds_fmax_f32_ss_offset_nortn:
 144 ; GFX9:       ; %bb.0:
 145 ; GFX9-NEXT:    v_mov_b32_e32 v0, s3
 146 ; GFX9-NEXT:    v_mov_b32_e32 v1, s2
 147 ; GFX9-NEXT:    ds_max_rtn_f32 v0, v1, v0 offset:512
 148 ; GFX9-NEXT:    s_endpgm
 149   ; GFX8-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn
 150   ; GFX8-MIR: bb.1 (%ir-block.0):
 151   ; GFX8-MIR:   liveins: $sgpr2, $sgpr3
 152   ; GFX8-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
 153   ; GFX8-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
 154   ; GFX8-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
 155   ; GFX8-MIR:   $m0 = S_MOV_B32 -1
 156   ; GFX8-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
 157   ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY3]], [[COPY2]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
 158   ; GFX8-MIR:   S_ENDPGM 0
 159   ; GFX9-MIR-LABEL: name: ds_fmax_f32_ss_offset_nortn
 160   ; GFX9-MIR: bb.1 (%ir-block.0):
 161   ; GFX9-MIR:   liveins: $sgpr2, $sgpr3
 162   ; GFX9-MIR:   [[COPY:%[0-9]+]]:sreg_32 = COPY $sgpr2
 163   ; GFX9-MIR:   [[COPY1:%[0-9]+]]:sreg_32 = COPY $sgpr3
 164   ; GFX9-MIR:   [[COPY2:%[0-9]+]]:vgpr_32 = COPY [[COPY1]]
 165   ; GFX9-MIR:   [[COPY3:%[0-9]+]]:vgpr_32 = COPY [[COPY]]
 166   ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY3]], [[COPY2]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
 167   ; GFX9-MIR:   S_ENDPGM 0
 168   %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
 169   %unused = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
 170   ret void
 171 }
 172
 173 define float @ds_fmax_f32_vv(float addrspace(3)* %ptr, float %val) {
 174 ; GFX8-LABEL: ds_fmax_f32_vv:
 175 ; GFX8:       ; %bb.0:
 176 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 177 ; GFX8-NEXT:    s_mov_b32 m0, -1
 178 ; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1
 179 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 180 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 181 ;
 182 ; GFX9-LABEL: ds_fmax_f32_vv:
 183 ; GFX9:       ; %bb.0:
 184 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 185 ; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1
 186 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 187 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 188   ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv
 189   ; GFX8-MIR: bb.1 (%ir-block.0):
 190   ; GFX8-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
 191   ; GFX8-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
 192   ; GFX8-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
 193   ; GFX8-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
 194   ; GFX8-MIR:   $m0 = S_MOV_B32 -1
 195   ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
 196   ; GFX8-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
 197   ; GFX8-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
 198   ; GFX8-MIR:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
 199   ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv
 200   ; GFX9-MIR: bb.1 (%ir-block.0):
 201   ; GFX9-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
 202   ; GFX9-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
 203   ; GFX9-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
 204   ; GFX9-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
 205   ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
 206   ; GFX9-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
 207   ; GFX9-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
 208   ; GFX9-MIR:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
 209   %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
 210   ret float %ret
 211 }
 212
 213 define float @ds_fmax_f32_vv_offset(float addrspace(3)* %ptr, float %val) {
 214 ; GFX8-LABEL: ds_fmax_f32_vv_offset:
 215 ; GFX8:       ; %bb.0:
 216 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 217 ; GFX8-NEXT:    s_mov_b32 m0, -1
 218 ; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1 offset:512
 219 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 220 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 221 ;
 222 ; GFX9-LABEL: ds_fmax_f32_vv_offset:
 223 ; GFX9:       ; %bb.0:
 224 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 225 ; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1 offset:512
 226 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 227 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 228   ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset
 229   ; GFX8-MIR: bb.1 (%ir-block.0):
 230   ; GFX8-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
 231   ; GFX8-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
 232   ; GFX8-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
 233   ; GFX8-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
 234   ; GFX8-MIR:   $m0 = S_MOV_B32 -1
 235   ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
 236   ; GFX8-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
 237   ; GFX8-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
 238   ; GFX8-MIR:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
 239   ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset
 240   ; GFX9-MIR: bb.1 (%ir-block.0):
 241   ; GFX9-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
 242   ; GFX9-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
 243   ; GFX9-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
 244   ; GFX9-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
 245   ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
 246   ; GFX9-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
 247   ; GFX9-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
 248   ; GFX9-MIR:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
 249   %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
 250   %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
 251   ret float %ret
 252 }
 253
 254 define void @ds_fmax_f32_vv_nortn(float addrspace(3)* %ptr, float %val) {
 255 ; GFX8-LABEL: ds_fmax_f32_vv_nortn:
 256 ; GFX8:       ; %bb.0:
 257 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 258 ; GFX8-NEXT:    s_mov_b32 m0, -1
 259 ; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1
 260 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 261 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 262 ;
 263 ; GFX9-LABEL: ds_fmax_f32_vv_nortn:
 264 ; GFX9:       ; %bb.0:
 265 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 266 ; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1
 267 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 268 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 269   ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_nortn
 270   ; GFX8-MIR: bb.1 (%ir-block.0):
 271   ; GFX8-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
 272   ; GFX8-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
 273   ; GFX8-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
 274   ; GFX8-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
 275   ; GFX8-MIR:   $m0 = S_MOV_B32 -1
 276   ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
 277   ; GFX8-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
 278   ; GFX8-MIR:   S_SETPC_B64_return [[COPY3]]
 279   ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_nortn
 280   ; GFX9-MIR: bb.1 (%ir-block.0):
 281   ; GFX9-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
 282   ; GFX9-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
 283   ; GFX9-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
 284   ; GFX9-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
 285   ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (load store (s32) on %ir.ptr, addrspace 3)
 286   ; GFX9-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
 287   ; GFX9-MIR:   S_SETPC_B64_return [[COPY3]]
 288   %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 false)
 289   ret void
 290 }
 291
 292 define void @ds_fmax_f32_vv_offset_nortn(float addrspace(3)* %ptr, float %val) {
 293 ; GFX8-LABEL: ds_fmax_f32_vv_offset_nortn:
 294 ; GFX8:       ; %bb.0:
 295 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 296 ; GFX8-NEXT:    s_mov_b32 m0, -1
 297 ; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1 offset:512
 298 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 299 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 300 ;
 301 ; GFX9-LABEL: ds_fmax_f32_vv_offset_nortn:
 302 ; GFX9:       ; %bb.0:
 303 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 304 ; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1 offset:512
 305 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 306 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 307   ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn
 308   ; GFX8-MIR: bb.1 (%ir-block.0):
 309   ; GFX8-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
 310   ; GFX8-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
 311   ; GFX8-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
 312   ; GFX8-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
 313   ; GFX8-MIR:   $m0 = S_MOV_B32 -1
 314   ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 512, 0, implicit $m0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
 315   ; GFX8-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
 316   ; GFX8-MIR:   S_SETPC_B64_return [[COPY3]]
 317   ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_offset_nortn
 318   ; GFX9-MIR: bb.1 (%ir-block.0):
 319   ; GFX9-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
 320   ; GFX9-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
 321   ; GFX9-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
 322   ; GFX9-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
 323   ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 512, 0, implicit $exec :: (load store (s32) on %ir.gep, addrspace 3)
 324   ; GFX9-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
 325   ; GFX9-MIR:   S_SETPC_B64_return [[COPY3]]
 326   %gep = getelementptr float, float addrspace(3)* %ptr, i32 128
 327   %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %gep, float %val, i32 0, i32 0, i1 false)
 328   ret void
 329 }
 330
 331 define float @ds_fmax_f32_vv_volatile(float addrspace(3)* %ptr, float %val) {
 332 ; GFX8-LABEL: ds_fmax_f32_vv_volatile:
 333 ; GFX8:       ; %bb.0:
 334 ; GFX8-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 335 ; GFX8-NEXT:    s_mov_b32 m0, -1
 336 ; GFX8-NEXT:    ds_max_rtn_f32 v0, v0, v1
 337 ; GFX8-NEXT:    s_waitcnt lgkmcnt(0)
 338 ; GFX8-NEXT:    s_setpc_b64 s[30:31]
 339 ;
 340 ; GFX9-LABEL: ds_fmax_f32_vv_volatile:
 341 ; GFX9:       ; %bb.0:
 342 ; GFX9-NEXT:    s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
 343 ; GFX9-NEXT:    ds_max_rtn_f32 v0, v0, v1
 344 ; GFX9-NEXT:    s_waitcnt lgkmcnt(0)
 345 ; GFX9-NEXT:    s_setpc_b64 s[30:31]
 346   ; GFX8-MIR-LABEL: name: ds_fmax_f32_vv_volatile
 347   ; GFX8-MIR: bb.1 (%ir-block.0):
 348   ; GFX8-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
 349   ; GFX8-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
 350   ; GFX8-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
 351   ; GFX8-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
 352   ; GFX8-MIR:   $m0 = S_MOV_B32 -1
 353   ; GFX8-MIR:   [[DS_MAX_RTN_F32_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32 [[COPY]], [[COPY1]], 0, 0, implicit $m0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3)
 354   ; GFX8-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_]]
 355   ; GFX8-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
 356   ; GFX8-MIR:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
 357   ; GFX9-MIR-LABEL: name: ds_fmax_f32_vv_volatile
 358   ; GFX9-MIR: bb.1 (%ir-block.0):
 359   ; GFX9-MIR:   liveins: $vgpr0, $vgpr1, $sgpr30_sgpr31
 360   ; GFX9-MIR:   [[COPY:%[0-9]+]]:vgpr_32 = COPY $vgpr0
 361   ; GFX9-MIR:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY $vgpr1
 362   ; GFX9-MIR:   [[COPY2:%[0-9]+]]:sgpr_64 = COPY $sgpr30_sgpr31
 363   ; GFX9-MIR:   [[DS_MAX_RTN_F32_gfx9_:%[0-9]+]]:vgpr_32 = DS_MAX_RTN_F32_gfx9 [[COPY]], [[COPY1]], 0, 0, implicit $exec :: (volatile load store (s32) on %ir.ptr, addrspace 3)
 364   ; GFX9-MIR:   $vgpr0 = COPY [[DS_MAX_RTN_F32_gfx9_]]
 365   ; GFX9-MIR:   [[COPY3:%[0-9]+]]:ccr_sgpr_64 = COPY [[COPY2]]
 366   ; GFX9-MIR:   S_SETPC_B64_return [[COPY3]], implicit $vgpr0
 367   %ret = call float @llvm.amdgcn.ds.fmax(float addrspace(3)* %ptr, float %val, i32 0, i32 0, i1 true)
 368   ret float %ret
 369 }
 370
 371 declare float @llvm.amdgcn.ds.fmax(float addrspace(3)* nocapture, float, i32 immarg, i32 immarg, i1 immarg) #0
 372
 373 attributes #0 = { argmemonly nounwind willreturn }