llvm/test/CodeGen/AMDGPU/indirect-addressing-si-noopt.ll

   1 ; RUN: llc -O0 -march=amdgcn -mcpu=tahiti -verify-machineinstrs < %s | FileCheck %s
   2
   3 ; FIXME: Merge into indirect-addressing-si.ll
   4
   5 ; Make sure that TwoAddressInstructions keeps src0 as subregister sub0
   6 ; of the tied implicit use and def of the super register.
   7
   8 ; CHECK-LABEL: {{^}}insert_wo_offset:
   9 ; CHECK: s_load_dword [[IN:s[0-9]+]]
  10 ; CHECK: s_mov_b32 m0, [[IN]]
  11 ; CHECK: v_movreld_b32_e32 v[[ELT0:[0-9]+]]
  12 ; CHECK: buffer_store_dwordx4
  13 ; CHECK: buffer_store_dwordx4
  14 ; CHECK: buffer_store_dwordx4
  15 ; CHECK: buffer_store_dwordx4
  16 define amdgpu_kernel void @insert_wo_offset(ptr addrspace(1) %out, i32 %in) {
  17 entry:
  18   %ins = insertelement <16 x float> <float 1.0, float 2.0, float 3.0, float 4.0, float 5.0, float 6.0, float 7.0, float 8.0, float 9.0, float 10.0, float 11.0, float 12.0, float 13.0, float 14.0, float 15.0, float 16.0>, float 17.0, i32 %in
  19   store <16 x float> %ins, ptr addrspace(1) %out
  20   ret void
  21 }
  22
  23 ; Make sure we don't hit use of undefined register errors when expanding an
  24 ; extract with undef index.
  25
  26 ; CHECK-LABEL: {{^}}extract_adjacent_blocks:
  27 ; CHECK: s_load_dword [[ARG:s[0-9]+]]
  28 ; CHECK: s_cmp_lg_u32
  29 ; CHECK: s_cbranch_scc1 [[BB4:.LBB[0-9]+_[0-9]+]]
  30
  31 ; CHECK: buffer_load_dwordx4
  32
  33 ; CHECK: s_branch [[ENDBB:.LBB[0-9]+_[0-9]+]]
  34
  35 ; CHECK: [[BB4]]:
  36 ; CHECK: buffer_load_dwordx4
  37
  38 ; CHECK: [[ENDBB]]:
  39 ; CHECK: buffer_store_dword
  40 ; CHECK: s_endpgm
  41
  42 define amdgpu_kernel void @extract_adjacent_blocks(i32 %arg) #0 {
  43 bb:
  44   %tmp = icmp eq i32 %arg, 0
  45   br i1 %tmp, label %bb1, label %bb4
  46
  47 bb1:
  48   %tmp2 = load volatile <4 x float>, ptr addrspace(1) undef
  49   %tmp3 = extractelement <4 x float> %tmp2, i32 undef
  50   call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp2) #0 ; Prevent block optimize out
  51   br label %bb7
  52
  53 bb4:
  54   %tmp5 = load volatile <4 x float>, ptr addrspace(1) undef
  55   %tmp6 = extractelement <4 x float> %tmp5, i32 undef
  56   call void asm sideeffect "; reg use $0", "v"(<4 x float> %tmp5) #0 ; Prevent block optimize out
  57   br label %bb7
  58
  59 bb7:
  60   %tmp8 = phi float [ %tmp3, %bb1 ], [ %tmp6, %bb4 ]
  61   store volatile float %tmp8, ptr addrspace(1) undef
  62   ret void
  63 }