1 ; RUN: llc -march=amdgcn -mcpu=gfx90a -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX90A %s
2 ; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX908 %s
4 ; GCN-LABEL: {{^}}func_empty:
8 define void @func_empty() #0 {
12 ; GCN-LABEL: {{^}}func_areg_4:
19 define void @func_areg_4() #0 {
20 call void asm sideeffect "; use agpr3", "~{a3}" ()
24 ; GCN-LABEL: {{^}}func_areg_32:
31 define void @func_areg_32() #0 {
32 call void asm sideeffect "; use agpr31", "~{a31}" ()
36 ; GCN-LABEL: {{^}}func_areg_33:
38 ; GFX90A: v_accvgpr_read_b32 v0, a32 ; Reload Reuse
42 ; GFX90A: v_accvgpr_write_b32 a32, v0 ; Reload Reuse
45 define void @func_areg_33() #0 {
46 call void asm sideeffect "; use agpr32", "~{a32}" ()
50 ; GCN-LABEL: {{^}}func_areg_64:
53 ; GFX90A: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
55 ; GFX90A: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
58 define void @func_areg_64() #0 {
59 call void asm sideeffect "; use agpr63", "~{a63}" ()
63 ; GCN-LABEL: {{^}}func_areg_31_63:
65 ; GFX908-NOT: v_accvgpr
67 ; GFX90A: v_accvgpr_read_b32 v0, a63 ; Reload Reuse
68 ; GCN: use agpr31, agpr63
69 ; GFX90A: v_accvgpr_write_b32 a63, v0 ; Reload Reuse
70 ; GFX908-NOT: v_accvgpr
73 define void @func_areg_31_63() #0 {
74 call void asm sideeffect "; use agpr31, agpr63", "~{a31},~{a63}" ()
78 declare void @func_unknown() #0
80 ; GCN-LABEL: {{^}}test_call_empty:
84 ; GFX908-COUNT-8: v_accvgpr_read_b32
85 ; GFX90A-NOT: v_accvgpr
89 ; GFX90A-NOT: v_accvgpr
90 ; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
91 ; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
93 define amdgpu_kernel void @test_call_empty() #0 {
95 %reg = call <32 x float> asm sideeffect "; def $0", "=a"()
96 call void @func_empty()
97 store volatile <32 x float> %reg, ptr addrspace(1) undef
101 ; GCN-LABEL: {{^}}test_call_areg4:
104 ; GFX908: def a[0:31]
105 ; GFX90A: def a[4:35]
106 ; GFX908-COUNT-8: v_accvgpr_read_b32
107 ; GFX90A-NOT: v_accvgpr
111 ; GFX90A-NOT: v_accvgpr
112 ; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
113 ; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
115 define amdgpu_kernel void @test_call_areg4() #0 {
117 %reg = call <32 x float> asm sideeffect "; def $0", "=a"()
118 call void @func_areg_4()
119 store volatile <32 x float> %reg, ptr addrspace(1) undef
123 ; GCN-LABEL: {{^}}test_call_areg32:
126 ; GFX908: def a[0:31]
127 ; GFX90A: def a[32:63]
128 ; GFX908-COUNT-8: v_accvgpr_read_b32
129 ; GFX90A-NOT: v_accvgpr
133 ; GFX90A-NOT: v_accvgpr
134 ; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
135 ; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
137 define amdgpu_kernel void @test_call_areg32() #0 {
139 %reg = call <32 x float> asm sideeffect "; def $0", "=a"()
140 call void @func_areg_32()
141 store volatile <32 x float> %reg, ptr addrspace(1) undef
145 ; GCN-LABEL: {{^}}test_call_areg64:
149 ; GFX908-COUNT-8: v_accvgpr_read_b32
150 ; GFX90A-NOT: v_accvgpr
154 ; GFX90A-NOT: v_accvgpr
155 ; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
156 ; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
158 define amdgpu_kernel void @test_call_areg64() #0 {
160 %reg = call <32 x float> asm sideeffect "; def $0", "=a"()
161 call void @func_areg_64()
162 store volatile <32 x float> %reg, ptr addrspace(1) undef
166 ; GCN-LABEL: {{^}}test_call_areg31_63:
169 ; GFX908: def a[0:31]
170 ; GFX90A: def a[32:63]
171 ; GFX908-COUNT-8: v_accvgpr_read_b32
172 ; GFX90A-NOT: v_accvgpr
176 ; GFX90A-NOT: v_accvgpr
177 ; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
178 ; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
180 define amdgpu_kernel void @test_call_areg31_63() #0 {
182 %reg = call <32 x float> asm sideeffect "; def $0", "=a"()
183 call void @func_areg_31_63()
184 store volatile <32 x float> %reg, ptr addrspace(1) undef
188 ; GCN-LABEL: {{^}}test_call_unknown:
191 ; GFX908: def a[0:31]
192 ; GFX90A: def a[32:63]
193 ; GFX908-COUNT-8: v_accvgpr_read_b32
194 ; GFX90A-NOT: v_accvgpr
198 ; GFX90A-NOT: v_accvgpr
199 ; GFX908-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], v[{{[0-9:]+}}]
200 ; GFX90A-COUNT-8: global_store_dwordx4 v[{{[0-9:]+}}], a[{{[0-9:]+}}]
202 define amdgpu_kernel void @test_call_unknown() #0 {
204 %reg = call <32 x float> asm sideeffect "; def $0", "=a"()
205 call void @func_unknown()
206 store volatile <32 x float> %reg, ptr addrspace(1) undef
210 attributes #0 = { nounwind noinline "amdgpu-flat-work-group-size"="1,512" }