1 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX908 %s
3 ; GFX908-LABEL: {{^}}max_11_vgprs_used_9a:
4 ; GFX908-NOT: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
5 ; GFX908-NOT: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
6 ; GFX908-DAG: v_accvgpr_write_b32 [[A_REG:a[0-9]+]], v{{[0-9]}}
7 ; GFX908-NOT: buffer_store_dword v{{[0-9]}},
9 ; GFX908: v_mov_b32_e32 v{{[0-9]}}, [[V_REG:v[0-9]+]]
10 ; GFX908: v_accvgpr_read_b32 [[V_REG]], [[A_REG]]
13 ; GFX908: NumVgprs: 10
14 ; GFX908: ScratchSize: 0
15 ; GFX908: VGPRBlocks: 2
16 ; GFX908: NumVGPRsForWavesPerEU: 10
17 define amdgpu_kernel void @max_11_vgprs_used_9a(ptr addrspace(1) %p) #0 {
18 %tid = load volatile i32, ptr addrspace(1) undef
19 call void asm sideeffect "", "a,a,a,a,a,a,a,a,a"(i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9)
20 %p1 = getelementptr inbounds i32, ptr addrspace(1) %p, i32 %tid
21 %p2 = getelementptr inbounds i32, ptr addrspace(1) %p1, i32 4
22 %p3 = getelementptr inbounds i32, ptr addrspace(1) %p2, i32 8
23 %p4 = getelementptr inbounds i32, ptr addrspace(1) %p3, i32 12
24 %p5 = getelementptr inbounds i32, ptr addrspace(1) %p4, i32 16
25 %p6 = getelementptr inbounds i32, ptr addrspace(1) %p5, i32 20
26 %p7 = getelementptr inbounds i32, ptr addrspace(1) %p6, i32 24
27 %p8 = getelementptr inbounds i32, ptr addrspace(1) %p7, i32 28
28 %p9 = getelementptr inbounds i32, ptr addrspace(1) %p8, i32 32
29 %p10 = getelementptr inbounds i32, ptr addrspace(1) %p9, i32 36
30 %v1 = load volatile i32, ptr addrspace(1) %p1
31 %v2 = load volatile i32, ptr addrspace(1) %p2
32 %v3 = load volatile i32, ptr addrspace(1) %p3
33 %v4 = load volatile i32, ptr addrspace(1) %p4
34 %v5 = load volatile i32, ptr addrspace(1) %p5
35 %v6 = load volatile i32, ptr addrspace(1) %p6
36 %v7 = load volatile i32, ptr addrspace(1) %p7
37 %v8 = load volatile i32, ptr addrspace(1) %p8
38 %v9 = load volatile i32, ptr addrspace(1) %p9
39 %v10 = load volatile i32, ptr addrspace(1) %p10
40 call void asm sideeffect "", "v,v,v,v,v,v,v,v,v,v"(i32 %v1, i32 %v2, i32 %v3, i32 %v4, i32 %v5, i32 %v6, i32 %v7, i32 %v8, i32 %v9, i32 %v10)
41 store volatile i32 %v1, ptr addrspace(1) undef
42 store volatile i32 %v2, ptr addrspace(1) undef
43 store volatile i32 %v3, ptr addrspace(1) undef
44 store volatile i32 %v4, ptr addrspace(1) undef
45 store volatile i32 %v5, ptr addrspace(1) undef
46 store volatile i32 %v6, ptr addrspace(1) undef
47 store volatile i32 %v7, ptr addrspace(1) undef
48 store volatile i32 %v8, ptr addrspace(1) undef
49 store volatile i32 %v9, ptr addrspace(1) undef
50 store volatile i32 %v10, ptr addrspace(1) undef
54 ; GFX908-LABEL: {{^}}max_11_vgprs_used_1a_partial_spill:
55 ; GFX908-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD0
56 ; GFX908-DAG: s_mov_b32 s{{[0-9]+}}, SCRATCH_RSRC_DWORD1
57 ; GFX908-DAG: v_accvgpr_write_b32 a0, 1
58 ; GFX908-DAG: buffer_store_dword v{{[0-9]}},
59 ; GFX908-DAG: v_accvgpr_write_b32 a1, v{{[0-9]}}
60 ; GFX908-DAG: v_accvgpr_write_b32 a2, v{{[0-9]}}
61 ; GFX908-DAG: v_accvgpr_write_b32 a3, v{{[0-9]}}
62 ; GFX908-DAG: v_accvgpr_write_b32 a4, v{{[0-9]}}
63 ; GFX908-DAG: v_accvgpr_write_b32 a5, v{{[0-9]}}
64 ; GFX908-DAG: v_accvgpr_write_b32 a6, v{{[0-9]}}
65 ; GFX908-DAG: v_accvgpr_write_b32 a7, v{{[0-9]}}
66 ; GFX908-DAG: v_accvgpr_write_b32 a8, v{{[0-9]}}
67 ; GFX908-DAG: v_accvgpr_write_b32 a9, v{{[0-9]}}
68 ; GFX908-DAG: v_accvgpr_write_b32 a10, v{{[0-9]}}
69 ; GFX908-DAG: buffer_load_dword v{{[0-9]}},
70 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a0
71 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a1
72 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a2
73 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a3
74 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a4
75 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a5
76 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a6
77 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a7
78 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a8
79 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a9
80 ; GFX908-DAG: v_accvgpr_read_b32 v{{[0-9]}}, a10
82 ; GFX908: NumVgprs: 10
83 ; GFX908: ScratchSize: 12
84 ; GFX908: VGPRBlocks: 2
85 ; GFX908: NumVGPRsForWavesPerEU: 11
86 define amdgpu_kernel void @max_11_vgprs_used_1a_partial_spill(ptr addrspace(1) %p) #0 {
87 %tid = load volatile i32, ptr addrspace(1) undef
88 call void asm sideeffect "", "a"(i32 1)
89 %p1 = getelementptr inbounds i64, ptr addrspace(1) %p, i32 %tid
90 %p2 = getelementptr inbounds i64, ptr addrspace(1) %p1, i32 8
91 %p3 = getelementptr inbounds i64, ptr addrspace(1) %p2, i32 16
92 %p4 = getelementptr inbounds i64, ptr addrspace(1) %p3, i32 24
93 %p5 = getelementptr inbounds i64, ptr addrspace(1) %p4, i32 32
94 %v1 = load volatile i64, ptr addrspace(1) %p1
95 %v2 = load volatile i64, ptr addrspace(1) %p2
96 %v3 = load volatile i64, ptr addrspace(1) %p3
97 %v4 = load volatile i64, ptr addrspace(1) %p4
98 %v5 = load volatile i64, ptr addrspace(1) %p5
99 call void asm sideeffect "", "v,v,v,v,v"(i64 %v1, i64 %v2, i64 %v3, i64 %v4, i64 %v5)
100 store volatile i64 %v1, ptr addrspace(1) %p2
101 store volatile i64 %v2, ptr addrspace(1) %p3
102 store volatile i64 %v3, ptr addrspace(1) %p4
103 store volatile i64 %v4, ptr addrspace(1) %p5
104 store volatile i64 %v5, ptr addrspace(1) %p1
108 attributes #0 = { nounwind "amdgpu-num-vgpr"="11" }