1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,RW-FLAT %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s
4 ; Make sure flat_scratch_init is set
6 ; GCN-LABEL: {{^}}stack_object_addrspacecast_in_kernel_no_calls:
7 ; RW-FLAT: s_add_u32 flat_scratch_lo, s4, s7
8 ; RW-FLAT: s_addc_u32 flat_scratch_hi, s5, 0
9 ; RO-FLAT-NOT: flat_scratch
10 ; GCN: flat_store_dword
11 ; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
12 ; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 1
13 ; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
14 ; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset
15 ; RW-FLAT-NOT: .amdhsa_enable_private_segment
16 ; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
17 ; RO-FLAT: .amdhsa_enable_private_segment 1
18 ; GCN-NOT: .amdhsa_reserve_flat_scratch
19 ; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 1
20 ; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 6
21 ; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0
22 define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
23 %alloca = alloca i32, addrspace(5)
24 %cast = addrspacecast i32 addrspace(5)* %alloca to i32*
25 store volatile i32 0, i32* %cast
29 ; TODO: Could optimize out in this case
30 ; GCN-LABEL: {{^}}stack_object_in_kernel_no_calls:
31 ; RO-FLAT-NOT: flat_scratch
32 ; RW-FLAT: buffer_store_dword
33 ; RO-FLAT: scratch_store_dword
34 ; RW-FLAT: .amdhsa_user_sgpr_private_segment_buffer 1
35 ; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
36 ; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 1
37 ; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
38 ; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
39 ; RW-FLAT-NOT: .amdhsa_enable_private_segment
40 ; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
41 ; RO-FLAT: .amdhsa_enable_private_segment 1
42 ; RW-FLAT: .amdhsa_reserve_flat_scratch 0
43 ; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch
44 ; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 1
45 ; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 6
46 ; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0
47 define amdgpu_kernel void @stack_object_in_kernel_no_calls() {
48 %alloca = alloca i32, addrspace(5)
49 store volatile i32 0, i32 addrspace(5)* %alloca
53 ; GCN-LABEL: {{^}}kernel_no_calls_no_stack:
54 ; GCN-NOT: flat_scratch
55 ; RW-FLAT: .amdhsa_user_sgpr_private_segment_buffer 1
56 ; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
57 ; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 0
58 ; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
59 ; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
60 ; RW-FLAT-NOT: .amdhsa_enable_private_segment
61 ; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
62 ; RO-FLAT: .amdhsa_enable_private_segment 0
63 ; RW-FLAT: .amdhsa_reserve_flat_scratch 0
64 ; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch 0
65 ; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 0
66 ; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 4
67 ; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0
68 define amdgpu_kernel void @kernel_no_calls_no_stack() {