llvm/test/CodeGen/AMDGPU/GlobalISel/flat-scratch-init.ll

   1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,RW-FLAT %s
   2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s
   3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s
   4
   5 ; Make sure flat_scratch_init is set
   6
   7 ; GCN-LABEL: {{^}}stack_object_addrspacecast_in_kernel_no_calls:
   8 ; RW-FLAT:     s_add_u32 flat_scratch_lo, s4, s7
   9 ; RW-FLAT:     s_addc_u32 flat_scratch_hi, s5, 0
  10 ; RO-FLAT-NOT: flat_scratch
  11 ; GCN:         flat_store_dword
  12 ; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
  13 ; RW-FLAT:     .amdhsa_user_sgpr_flat_scratch_init 1
  14 ; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
  15 ; RW-FLAT:     .amdhsa_system_sgpr_private_segment_wavefront_offset
  16 ; RW-FLAT-NOT: .amdhsa_enable_private_segment
  17 ; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
  18 ; RO-FLAT:     .amdhsa_enable_private_segment 1
  19 ; GCN-NOT:     .amdhsa_reserve_flat_scratch
  20 ; GCN:         COMPUTE_PGM_RSRC2:SCRATCH_EN: 1
  21 ; RW-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 6
  22 ; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
  23 define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
  24   %alloca = alloca i32, addrspace(5)
  25   %cast = addrspacecast ptr addrspace(5) %alloca to ptr
  26   store volatile i32 0, ptr %cast
  27   ret void
  28 }
  29
  30 ; TODO: Could optimize out in this case
  31 ; GCN-LABEL: {{^}}stack_object_in_kernel_no_calls:
  32 ; RO-FLAT-NOT: flat_scratch
  33 ; RW-FLAT:     buffer_store_dword
  34 ; RO-FLAT:     scratch_store_dword
  35 ; RW-FLAT:     .amdhsa_user_sgpr_private_segment_buffer 1
  36 ; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
  37 ; RW-FLAT:     .amdhsa_user_sgpr_flat_scratch_init 1
  38 ; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
  39 ; RW-FLAT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
  40 ; RW-FLAT-NOT: .amdhsa_enable_private_segment
  41 ; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
  42 ; RO-FLAT:     .amdhsa_enable_private_segment 1
  43 ; RW-FLAT:     .amdhsa_reserve_flat_scratch 0
  44 ; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch
  45 ; GCN:         COMPUTE_PGM_RSRC2:SCRATCH_EN: 1
  46 ; RW-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 6
  47 ; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
  48 define amdgpu_kernel void @stack_object_in_kernel_no_calls() {
  49   %alloca = alloca i32, addrspace(5)
  50   store volatile i32 0, ptr addrspace(5) %alloca
  51   ret void
  52 }
  53
  54 ; GCN-LABEL: {{^}}kernel_no_calls_no_stack:
  55 ; GCN-NOT:    flat_scratch
  56 ; RW-FLAT:     .amdhsa_user_sgpr_private_segment_buffer 1
  57 ; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
  58 ; RW-FLAT:     .amdhsa_user_sgpr_flat_scratch_init 0
  59 ; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
  60 ; RW-FLAT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 0
  61 ; RW-FLAT-NOT: .amdhsa_enable_private_segment
  62 ; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
  63 ; RO-FLAT:     .amdhsa_enable_private_segment 0
  64 ; RW-FLAT:     .amdhsa_reserve_flat_scratch 0
  65 ; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch 0
  66 ; GCN:         COMPUTE_PGM_RSRC2:SCRATCH_EN: 0
  67 ; RW-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 4
  68 ; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
  69 define amdgpu_kernel void @kernel_no_calls_no_stack() {
  70   ret void
  71 }