Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / GlobalISel / flat-scratch-init.ll
bloba8aa6c780b86a56386a1a4bfb421452a9a00c449
1 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 < %s | FileCheck -check-prefixes=GCN,RW-FLAT %s
2 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx900 -mattr=+architected-flat-scratch < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s
3 ; RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx940 < %s | FileCheck -check-prefixes=GCN,RO-FLAT %s
5 ; Make sure flat_scratch_init is set
7 ; GCN-LABEL: {{^}}stack_object_addrspacecast_in_kernel_no_calls:
8 ; RW-FLAT:     s_add_u32 flat_scratch_lo, s4, s7
9 ; RW-FLAT:     s_addc_u32 flat_scratch_hi, s5, 0
10 ; RO-FLAT-NOT: flat_scratch
11 ; GCN:         flat_store_dword
12 ; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
13 ; RW-FLAT:     .amdhsa_user_sgpr_flat_scratch_init 1
14 ; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
15 ; RW-FLAT:     .amdhsa_system_sgpr_private_segment_wavefront_offset
16 ; RW-FLAT-NOT: .amdhsa_enable_private_segment
17 ; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
18 ; RO-FLAT:     .amdhsa_enable_private_segment 1
19 ; GCN-NOT:     .amdhsa_reserve_flat_scratch
20 ; GCN:         COMPUTE_PGM_RSRC2:SCRATCH_EN: 1
21 ; RW-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 6
22 ; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
23 define amdgpu_kernel void @stack_object_addrspacecast_in_kernel_no_calls() {
24   %alloca = alloca i32, addrspace(5)
25   %cast = addrspacecast ptr addrspace(5) %alloca to ptr
26   store volatile i32 0, ptr %cast
27   ret void
30 ; TODO: Could optimize out in this case
31 ; GCN-LABEL: {{^}}stack_object_in_kernel_no_calls:
32 ; RO-FLAT-NOT: flat_scratch
33 ; RW-FLAT:     buffer_store_dword
34 ; RO-FLAT:     scratch_store_dword
35 ; RW-FLAT:     .amdhsa_user_sgpr_private_segment_buffer 1
36 ; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
37 ; RW-FLAT:     .amdhsa_user_sgpr_flat_scratch_init 1
38 ; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
39 ; RW-FLAT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 1
40 ; RW-FLAT-NOT: .amdhsa_enable_private_segment
41 ; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
42 ; RO-FLAT:     .amdhsa_enable_private_segment 1
43 ; RW-FLAT:     .amdhsa_reserve_flat_scratch 0
44 ; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch
45 ; GCN:         COMPUTE_PGM_RSRC2:SCRATCH_EN: 1
46 ; RW-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 6
47 ; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
48 define amdgpu_kernel void @stack_object_in_kernel_no_calls() {
49   %alloca = alloca i32, addrspace(5)
50   store volatile i32 0, ptr addrspace(5) %alloca
51   ret void
54 ; GCN-LABEL: {{^}}kernel_no_calls_no_stack:
55 ; GCN-NOT:    flat_scratch
56 ; RW-FLAT:     .amdhsa_user_sgpr_private_segment_buffer 1
57 ; RO-FLAT-NOT: .amdhsa_user_sgpr_private_segment_buffer
58 ; RW-FLAT:     .amdhsa_user_sgpr_flat_scratch_init 0
59 ; RO-FLAT-NOT: .amdhsa_user_sgpr_flat_scratch_init
60 ; RW-FLAT:     .amdhsa_system_sgpr_private_segment_wavefront_offset 0
61 ; RW-FLAT-NOT: .amdhsa_enable_private_segment
62 ; RO-FLAT-NOT: .amdhsa_system_sgpr_private_segment_wavefront_offset
63 ; RO-FLAT:     .amdhsa_enable_private_segment 0
64 ; RW-FLAT:     .amdhsa_reserve_flat_scratch 0
65 ; RO-FLAT-NOT: .amdhsa_reserve_flat_scratch 0
66 ; GCN:         COMPUTE_PGM_RSRC2:SCRATCH_EN: 0
67 ; RW-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 4
68 ; RO-FLAT:     COMPUTE_PGM_RSRC2:USER_SGPR: 0
69 define amdgpu_kernel void @kernel_no_calls_no_stack() {
70   ret void