1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 4
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -o - %s | FileCheck -check-prefix=CHECK %s
4 define amdgpu_cs void @max_6_vgprs(ptr addrspace(1) %p) "amdgpu-num-vgpr"="6" {
5 ; CHECK-LABEL: max_6_vgprs:
7 ; CHECK-NEXT: global_load_b32 v2, v[0:1], off scope:SCOPE_SYS
8 ; CHECK-NEXT: s_wait_loadcnt 0x0
9 ; CHECK-NEXT: v_ashrrev_i32_e32 v3, 31, v2
10 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
11 ; CHECK-NEXT: v_lshlrev_b64_e32 v[2:3], 2, v[2:3]
12 ; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
13 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2)
14 ; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
15 ; CHECK-NEXT: global_load_b32 v5, v[0:1], off scope:SCOPE_SYS
16 ; CHECK-NEXT: s_wait_loadcnt 0x0
17 ; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:16 scope:SCOPE_SYS
18 ; CHECK-NEXT: s_wait_loadcnt 0x0
19 ; CHECK-NEXT: scratch_store_b32 off, v2, off ; 4-byte Folded Spill
20 ; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:48 scope:SCOPE_SYS
21 ; CHECK-NEXT: s_wait_loadcnt 0x0
22 ; CHECK-NEXT: scratch_store_b32 off, v2, off offset:4 ; 4-byte Folded Spill
23 ; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:96 scope:SCOPE_SYS
24 ; CHECK-NEXT: s_wait_loadcnt 0x0
25 ; CHECK-NEXT: scratch_store_b32 off, v2, off offset:8 ; 4-byte Folded Spill
26 ; CHECK-NEXT: global_load_b32 v0, v[0:1], off offset:160 scope:SCOPE_SYS
27 ; CHECK-NEXT: s_wait_loadcnt 0x0
28 ; CHECK-NEXT: scratch_store_b32 off, v0, off offset:12 ; 4-byte Folded Spill
29 ; CHECK-NEXT: ;;#ASMSTART
30 ; CHECK-NEXT: ;;#ASMEND
31 ; CHECK-NEXT: s_wait_storecnt 0x0
32 ; CHECK-NEXT: global_store_b32 v[0:1], v5, off scope:SCOPE_SYS
33 ; CHECK-NEXT: s_wait_storecnt 0x0
34 ; CHECK-NEXT: scratch_load_b32 v0, off, off th:TH_LOAD_LU ; 4-byte Folded Reload
35 ; CHECK-NEXT: s_wait_loadcnt 0x0
36 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
37 ; CHECK-NEXT: s_wait_storecnt 0x0
38 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
39 ; CHECK-NEXT: s_wait_loadcnt 0x0
40 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
41 ; CHECK-NEXT: s_wait_storecnt 0x0
42 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
43 ; CHECK-NEXT: s_wait_loadcnt 0x0
44 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
45 ; CHECK-NEXT: s_wait_storecnt 0x0
46 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
47 ; CHECK-NEXT: s_wait_loadcnt 0x0
48 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
49 ; CHECK-NEXT: s_wait_storecnt 0x0
50 ; CHECK-NEXT: s_endpgm
51 %tid = load volatile i32, ptr addrspace(1) undef
52 %p1 = getelementptr inbounds i32, ptr addrspace(1) %p, i32 %tid
53 %p2 = getelementptr inbounds i32, ptr addrspace(1) %p1, i32 4
54 %p3 = getelementptr inbounds i32, ptr addrspace(1) %p2, i32 8
55 %p4 = getelementptr inbounds i32, ptr addrspace(1) %p3, i32 12
56 %p5 = getelementptr inbounds i32, ptr addrspace(1) %p4, i32 16
57 %v1 = load volatile i32, ptr addrspace(1) %p1
58 %v2 = load volatile i32, ptr addrspace(1) %p2
59 %v3 = load volatile i32, ptr addrspace(1) %p3
60 %v4 = load volatile i32, ptr addrspace(1) %p4
61 %v5 = load volatile i32, ptr addrspace(1) %p5
62 call void asm sideeffect "", "~{v[0:4]}" ()
63 store volatile i32 %v1, ptr addrspace(1) undef
64 store volatile i32 %v2, ptr addrspace(1) undef
65 store volatile i32 %v3, ptr addrspace(1) undef
66 store volatile i32 %v4, ptr addrspace(1) undef
67 store volatile i32 %v5, ptr addrspace(1) undef
71 define amdgpu_cs void @max_11_vgprs_branch(ptr addrspace(1) %p, i32 %tmp) "amdgpu-num-vgpr"="11" {
72 ; CHECK-LABEL: max_11_vgprs_branch:
73 ; CHECK: ; %bb.0: ; %.entry
74 ; CHECK-NEXT: global_load_b32 v3, v[0:1], off scope:SCOPE_SYS
75 ; CHECK-NEXT: s_wait_loadcnt 0x0
76 ; CHECK-NEXT: s_mov_b32 s0, exec_lo
77 ; CHECK-NEXT: v_ashrrev_i32_e32 v4, 31, v3
78 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_1) | instskip(NEXT) | instid1(VALU_DEP_1)
79 ; CHECK-NEXT: v_lshlrev_b64_e32 v[3:4], 2, v[3:4]
80 ; CHECK-NEXT: v_add_co_u32 v0, vcc_lo, v0, v3
81 ; CHECK-NEXT: s_delay_alu instid0(VALU_DEP_2)
82 ; CHECK-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v4, vcc_lo
83 ; CHECK-NEXT: global_load_b32 v3, v[0:1], off offset:336 scope:SCOPE_SYS
84 ; CHECK-NEXT: s_wait_loadcnt 0x0
85 ; CHECK-NEXT: scratch_store_b32 off, v3, off offset:8 ; 4-byte Folded Spill
86 ; CHECK-NEXT: global_load_b32 v3, v[0:1], off offset:448 scope:SCOPE_SYS
87 ; CHECK-NEXT: s_wait_loadcnt 0x0
88 ; CHECK-NEXT: scratch_store_b32 off, v3, off offset:12 ; 4-byte Folded Spill
89 ; CHECK-NEXT: global_load_b32 v3, v[0:1], off offset:576 scope:SCOPE_SYS
90 ; CHECK-NEXT: s_wait_loadcnt 0x0
91 ; CHECK-NEXT: scratch_store_b32 off, v3, off ; 4-byte Folded Spill
92 ; CHECK-NEXT: global_load_b32 v3, v[0:1], off offset:720 scope:SCOPE_SYS
93 ; CHECK-NEXT: s_wait_loadcnt 0x0
94 ; CHECK-NEXT: scratch_store_b32 off, v3, off offset:4 ; 4-byte Folded Spill
95 ; CHECK-NEXT: v_cmpx_eq_u32_e32 0, v2
96 ; CHECK-NEXT: s_xor_b32 s0, exec_lo, s0
97 ; CHECK-NEXT: s_cbranch_execz .LBB1_2
98 ; CHECK-NEXT: ; %bb.1: ; %.false
99 ; CHECK-NEXT: global_load_b32 v10, v[0:1], off scope:SCOPE_SYS
100 ; CHECK-NEXT: s_wait_loadcnt 0x0
101 ; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:16 scope:SCOPE_SYS
102 ; CHECK-NEXT: s_wait_loadcnt 0x0
103 ; CHECK-NEXT: scratch_store_b32 off, v2, off offset:16 ; 4-byte Folded Spill
104 ; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:48 scope:SCOPE_SYS
105 ; CHECK-NEXT: s_wait_loadcnt 0x0
106 ; CHECK-NEXT: scratch_store_b32 off, v2, off offset:20 ; 4-byte Folded Spill
107 ; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:96 scope:SCOPE_SYS
108 ; CHECK-NEXT: s_wait_loadcnt 0x0
109 ; CHECK-NEXT: scratch_store_b32 off, v2, off offset:24 ; 4-byte Folded Spill
110 ; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:160 scope:SCOPE_SYS
111 ; CHECK-NEXT: s_wait_loadcnt 0x0
112 ; CHECK-NEXT: scratch_store_b32 off, v2, off offset:28 ; 4-byte Folded Spill
113 ; CHECK-NEXT: global_load_b32 v0, v[0:1], off offset:240 scope:SCOPE_SYS
114 ; CHECK-NEXT: s_wait_loadcnt 0x0
115 ; CHECK-NEXT: scratch_store_b32 off, v0, off offset:32 ; 4-byte Folded Spill
116 ; CHECK-NEXT: ;;#ASMSTART
117 ; CHECK-NEXT: ;;#ASMEND
118 ; CHECK-NEXT: s_wait_storecnt 0x0
119 ; CHECK-NEXT: global_store_b32 v[0:1], v10, off scope:SCOPE_SYS
120 ; CHECK-NEXT: s_wait_storecnt 0x0
121 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:16 th:TH_LOAD_LU ; 4-byte Folded Reload
122 ; CHECK-NEXT: s_wait_loadcnt 0x0
123 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
124 ; CHECK-NEXT: s_wait_storecnt 0x0
125 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:20 th:TH_LOAD_LU ; 4-byte Folded Reload
126 ; CHECK-NEXT: s_wait_loadcnt 0x0
127 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
128 ; CHECK-NEXT: s_wait_storecnt 0x0
129 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:24 th:TH_LOAD_LU ; 4-byte Folded Reload
130 ; CHECK-NEXT: s_wait_loadcnt 0x0
131 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
132 ; CHECK-NEXT: s_wait_storecnt 0x0
133 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:28 th:TH_LOAD_LU ; 4-byte Folded Reload
134 ; CHECK-NEXT: s_wait_loadcnt 0x0
135 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
136 ; CHECK-NEXT: s_wait_storecnt 0x0
137 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:32 th:TH_LOAD_LU ; 4-byte Folded Reload
138 ; CHECK-NEXT: s_wait_loadcnt 0x0
139 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
140 ; CHECK-NEXT: s_wait_storecnt 0x0
141 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
142 ; CHECK-NEXT: s_wait_loadcnt 0x0
143 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
144 ; CHECK-NEXT: s_wait_storecnt 0x0
145 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
146 ; CHECK-NEXT: s_wait_loadcnt 0x0
147 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
148 ; CHECK-NEXT: s_wait_storecnt 0x0
149 ; CHECK-NEXT: ; implicit-def: $vgpr0
150 ; CHECK-NEXT: ; kill: killed $vgpr0
151 ; CHECK-NEXT: ; implicit-def: $vgpr0
152 ; CHECK-NEXT: ; kill: killed $vgpr0
153 ; CHECK-NEXT: ; implicit-def: $vgpr0_vgpr1
154 ; CHECK-NEXT: .LBB1_2: ; %Flow
155 ; CHECK-NEXT: s_and_not1_saveexec_b32 s0, s0
156 ; CHECK-NEXT: s_cbranch_execz .LBB1_4
157 ; CHECK-NEXT: ; %bb.3: ; %.true
158 ; CHECK-NEXT: global_load_b32 v10, v[0:1], off scope:SCOPE_SYS
159 ; CHECK-NEXT: s_wait_loadcnt 0x0
160 ; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:16 scope:SCOPE_SYS
161 ; CHECK-NEXT: s_wait_loadcnt 0x0
162 ; CHECK-NEXT: scratch_store_b32 off, v2, off offset:16 ; 4-byte Folded Spill
163 ; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:48 scope:SCOPE_SYS
164 ; CHECK-NEXT: s_wait_loadcnt 0x0
165 ; CHECK-NEXT: scratch_store_b32 off, v2, off offset:20 ; 4-byte Folded Spill
166 ; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:96 scope:SCOPE_SYS
167 ; CHECK-NEXT: s_wait_loadcnt 0x0
168 ; CHECK-NEXT: scratch_store_b32 off, v2, off offset:24 ; 4-byte Folded Spill
169 ; CHECK-NEXT: global_load_b32 v2, v[0:1], off offset:160 scope:SCOPE_SYS
170 ; CHECK-NEXT: s_wait_loadcnt 0x0
171 ; CHECK-NEXT: scratch_store_b32 off, v2, off offset:28 ; 4-byte Folded Spill
172 ; CHECK-NEXT: global_load_b32 v0, v[0:1], off offset:240 scope:SCOPE_SYS
173 ; CHECK-NEXT: s_wait_loadcnt 0x0
174 ; CHECK-NEXT: scratch_store_b32 off, v0, off offset:32 ; 4-byte Folded Spill
175 ; CHECK-NEXT: ;;#ASMSTART
176 ; CHECK-NEXT: ;;#ASMEND
177 ; CHECK-NEXT: s_wait_storecnt 0x0
178 ; CHECK-NEXT: global_store_b32 v[0:1], v10, off scope:SCOPE_SYS
179 ; CHECK-NEXT: s_wait_storecnt 0x0
180 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:16 th:TH_LOAD_LU ; 4-byte Folded Reload
181 ; CHECK-NEXT: s_wait_loadcnt 0x0
182 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
183 ; CHECK-NEXT: s_wait_storecnt 0x0
184 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:20 th:TH_LOAD_LU ; 4-byte Folded Reload
185 ; CHECK-NEXT: s_wait_loadcnt 0x0
186 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
187 ; CHECK-NEXT: s_wait_storecnt 0x0
188 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:24 th:TH_LOAD_LU ; 4-byte Folded Reload
189 ; CHECK-NEXT: s_wait_loadcnt 0x0
190 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
191 ; CHECK-NEXT: s_wait_storecnt 0x0
192 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:28 th:TH_LOAD_LU ; 4-byte Folded Reload
193 ; CHECK-NEXT: s_wait_loadcnt 0x0
194 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
195 ; CHECK-NEXT: s_wait_storecnt 0x0
196 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:32 th:TH_LOAD_LU ; 4-byte Folded Reload
197 ; CHECK-NEXT: s_wait_loadcnt 0x0
198 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
199 ; CHECK-NEXT: s_wait_storecnt 0x0
200 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:8 th:TH_LOAD_LU ; 4-byte Folded Reload
201 ; CHECK-NEXT: s_wait_loadcnt 0x0
202 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
203 ; CHECK-NEXT: s_wait_storecnt 0x0
204 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:12 th:TH_LOAD_LU ; 4-byte Folded Reload
205 ; CHECK-NEXT: s_wait_loadcnt 0x0
206 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
207 ; CHECK-NEXT: s_wait_storecnt 0x0
208 ; CHECK-NEXT: .LBB1_4: ; %.exit
209 ; CHECK-NEXT: s_or_b32 exec_lo, exec_lo, s0
210 ; CHECK-NEXT: scratch_load_b32 v0, off, off th:TH_LOAD_LU ; 4-byte Folded Reload
211 ; CHECK-NEXT: s_wait_loadcnt 0x0
212 ; CHECK-NEXT: s_wait_storecnt 0x0
213 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
214 ; CHECK-NEXT: s_wait_storecnt 0x0
215 ; CHECK-NEXT: scratch_load_b32 v0, off, off offset:4 th:TH_LOAD_LU ; 4-byte Folded Reload
216 ; CHECK-NEXT: s_wait_loadcnt 0x0
217 ; CHECK-NEXT: global_store_b32 v[0:1], v0, off scope:SCOPE_SYS
218 ; CHECK-NEXT: s_wait_storecnt 0x0
219 ; CHECK-NEXT: s_endpgm
221 %tid = load volatile i32, ptr addrspace(1) undef
222 %p1 = getelementptr inbounds i32, ptr addrspace(1) %p, i32 %tid
223 %p2 = getelementptr inbounds i32, ptr addrspace(1) %p1, i32 4
224 %p3 = getelementptr inbounds i32, ptr addrspace(1) %p2, i32 8
225 %p4 = getelementptr inbounds i32, ptr addrspace(1) %p3, i32 12
226 %p5 = getelementptr inbounds i32, ptr addrspace(1) %p4, i32 16
227 %p6 = getelementptr inbounds i32, ptr addrspace(1) %p5, i32 20
228 %p7 = getelementptr inbounds i32, ptr addrspace(1) %p6, i32 24
229 %p8 = getelementptr inbounds i32, ptr addrspace(1) %p7, i32 28
230 %p9 = getelementptr inbounds i32, ptr addrspace(1) %p8, i32 32
231 %p10 = getelementptr inbounds i32, ptr addrspace(1) %p9, i32 36
232 %v7 = load volatile i32, ptr addrspace(1) %p7
233 %v8 = load volatile i32, ptr addrspace(1) %p8
234 %v9 = load volatile i32, ptr addrspace(1) %p9
235 %v10 = load volatile i32, ptr addrspace(1) %p10
236 %cmp = icmp ne i32 %tmp, 0
237 br i1 %cmp, label %.true, label %.false
240 %v1_t = load volatile i32, ptr addrspace(1) %p1
241 %v2_t = load volatile i32, ptr addrspace(1) %p2
242 %v3_t = load volatile i32, ptr addrspace(1) %p3
243 %v4_t = load volatile i32, ptr addrspace(1) %p4
244 %v5_t = load volatile i32, ptr addrspace(1) %p5
245 %v6_t = load volatile i32, ptr addrspace(1) %p6
246 call void asm sideeffect "", "~{v[0:9]}" ()
247 store volatile i32 %v1_t, ptr addrspace(1) undef
248 store volatile i32 %v2_t, ptr addrspace(1) undef
249 store volatile i32 %v3_t, ptr addrspace(1) undef
250 store volatile i32 %v4_t, ptr addrspace(1) undef
251 store volatile i32 %v5_t, ptr addrspace(1) undef
252 store volatile i32 %v6_t, ptr addrspace(1) undef
253 store volatile i32 %v7, ptr addrspace(1) undef
254 store volatile i32 %v8, ptr addrspace(1) undef
259 %v1_f = load volatile i32, ptr addrspace(1) %p1
260 %v2_f = load volatile i32, ptr addrspace(1) %p2
261 %v3_f = load volatile i32, ptr addrspace(1) %p3
262 %v4_f = load volatile i32, ptr addrspace(1) %p4
263 %v5_f = load volatile i32, ptr addrspace(1) %p5
264 %v6_f = load volatile i32, ptr addrspace(1) %p6
265 call void asm sideeffect "", "~{v[0:9]}" ()
266 store volatile i32 %v1_f, ptr addrspace(1) undef
267 store volatile i32 %v2_f, ptr addrspace(1) undef
268 store volatile i32 %v3_f, ptr addrspace(1) undef
269 store volatile i32 %v4_f, ptr addrspace(1) undef
270 store volatile i32 %v5_f, ptr addrspace(1) undef
271 store volatile i32 %v6_f, ptr addrspace(1) undef
272 store volatile i32 %v7, ptr addrspace(1) undef
273 store volatile i32 %v8, ptr addrspace(1) undef
278 store volatile i32 %v9, ptr addrspace(1) undef
279 store volatile i32 %v10, ptr addrspace(1) undef