1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,RW-FLAT %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s
5 ; Make sure flat_scratch_init is set
7 ; GCN-LABEL: {{^}}stack_object_addrspacecast_in_kernel_no_calls:
8 ; RW-FLAT: s_add_u32 flat_scratch_lo, s4, s7
9 ; RW-FLAT: s_addc_u32 flat_scratch_hi, s5, 0
10 ; RO-FLAT-NOT: flat_scratch
11 ; GCN: flat_store_dword
12 ; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
13 ; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 1
14 ; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
15 ; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset
16 ; RW-FLAT-NOT: .amdhsa_enable_private_segment
17 ; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
18 ; RO-FLAT: .amdhsa_enable_private_segment 1
19 ; GCN-NOT: .amdhsa_reserve_flat_scratch
20 ; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 1
21 ; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 6
22 ; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0
23 define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
24 %alloca = alloca i32, addrspace(5)
25 %cast = addrspacecast ptr addrspace(5) %alloca to ptr
26 store volatile i32 0, ptr %cast
30 ; TODO: Could optimize out in this case
31 ; GCN-LABEL: {{^}}stack_object_in_kernel_no_calls:
32 ; RO-FLAT-NOT: flat_scratch
33 ; RW-FLAT: buffer_store_dword
34 ; RO-FLAT: scratch_store_dword
35 ; RW-FLAT: .amdhsa_user_sgpr_private_segment_buffer 1
36 ; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
37 ; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 1
38 ; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
39 ; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 1
40 ; RW-FLAT-NOT: .amdhsa_enable_private_segment
41 ; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
42 ; RO-FLAT: .amdhsa_enable_private_segment 1
43 ; RW-FLAT: .amdhsa_reserve_flat_scratch 0
44 ; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch
45 ; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 1
46 ; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 6
47 ; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0
48 define amdgpu_kernel void @stack_object_in_kernel_no_calls() {
49 %alloca = alloca i32, addrspace(5)
50 store volatile i32 0, ptr addrspace(5) %alloca
54 ; GCN-LABEL: {{^}}kernel_no_calls_no_stack:
55 ; GCN-NOT: flat_scratch
56 ; RW-FLAT: .amdhsa_user_sgpr_private_segment_buffer 1
57 ; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
58 ; RW-FLAT: .amdhsa_user_sgpr_flat_scratch_init 0
59 ; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
60 ; RW-FLAT: .amdhsa_system_sgpr_private_segment_wavefront_offset 0
61 ; RW-FLAT-NOT: .amdhsa_enable_private_segment
62 ; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
63 ; RO-FLAT: .amdhsa_enable_private_segment 0
64 ; RW-FLAT: .amdhsa_reserve_flat_scratch 0
65 ; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch 0
66 ; GCN: COMPUTE_PGM_RSRC2:SCRATCH_EN: 0
67 ; RW-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 4
68 ; RO-FLAT: COMPUTE_PGM_RSRC2:USER_SGPR: 0
69 define amdgpu_kernel void @kernel_no_calls_no_stack() {