1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx600 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX689,GFX67,GFX6
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx700 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX67,GFX7
4 ; RUN: llc -mtriple=amdgcn -mcpu=gfx801 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX689,GFX89
5 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX6789,GFX689,GFX89,GFX9
6 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 < %s | FileCheck %s -check-prefixes=GFX12
8 define amdgpu_cs void @test_sink_smem_offset_400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
9 ; GFX67-LABEL: test_sink_smem_offset_400:
10 ; GFX67: ; %bb.0: ; %entry
11 ; GFX67-NEXT: .LBB0_1: ; %loop
12 ; GFX67-NEXT: ; =>This Inner Loop Header: Depth=1
13 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
14 ; GFX67-NEXT: s_load_dword s3, s[0:1], 0x64
15 ; GFX67-NEXT: s_add_i32 s2, s2, -1
16 ; GFX67-NEXT: s_cmp_lg_u32 s2, 0
17 ; GFX67-NEXT: s_cbranch_scc1 .LBB0_1
18 ; GFX67-NEXT: ; %bb.2: ; %end
19 ; GFX67-NEXT: s_endpgm
21 ; GFX89-LABEL: test_sink_smem_offset_400:
22 ; GFX89: ; %bb.0: ; %entry
23 ; GFX89-NEXT: .LBB0_1: ; %loop
24 ; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1
25 ; GFX89-NEXT: s_waitcnt lgkmcnt(0)
26 ; GFX89-NEXT: s_load_dword s3, s[0:1], 0x190
27 ; GFX89-NEXT: s_add_i32 s2, s2, -1
28 ; GFX89-NEXT: s_cmp_lg_u32 s2, 0
29 ; GFX89-NEXT: s_cbranch_scc1 .LBB0_1
30 ; GFX89-NEXT: ; %bb.2: ; %end
31 ; GFX89-NEXT: s_endpgm
33 ; GFX12-LABEL: test_sink_smem_offset_400:
34 ; GFX12: ; %bb.0: ; %entry
35 ; GFX12-NEXT: .LBB0_1: ; %loop
36 ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
37 ; GFX12-NEXT: s_wait_kmcnt 0x0
38 ; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x190
39 ; GFX12-NEXT: s_add_co_i32 s2, s2, -1
40 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
41 ; GFX12-NEXT: s_cmp_lg_u32 s2, 0
42 ; GFX12-NEXT: s_cbranch_scc1 .LBB0_1
43 ; GFX12-NEXT: ; %bb.2: ; %end
44 ; GFX12-NEXT: s_endpgm
46 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 400
50 %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
51 %dec = sub i32 %count, 1
52 %load = load volatile i32, ptr addrspace(4) %gep
53 %cond = icmp eq i32 %dec, 0
54 br i1 %cond, label %end, label %loop
60 define amdgpu_cs void @test_sink_smem_offset_4000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
61 ; GFX6-LABEL: test_sink_smem_offset_4000:
62 ; GFX6: ; %bb.0: ; %entry
63 ; GFX6-NEXT: s_add_u32 s0, s0, 0xfa0
64 ; GFX6-NEXT: s_addc_u32 s1, s1, 0
65 ; GFX6-NEXT: .LBB1_1: ; %loop
66 ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1
67 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
68 ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x0
69 ; GFX6-NEXT: s_add_i32 s2, s2, -1
70 ; GFX6-NEXT: s_cmp_lg_u32 s2, 0
71 ; GFX6-NEXT: s_cbranch_scc1 .LBB1_1
72 ; GFX6-NEXT: ; %bb.2: ; %end
75 ; GFX7-LABEL: test_sink_smem_offset_4000:
76 ; GFX7: ; %bb.0: ; %entry
77 ; GFX7-NEXT: .LBB1_1: ; %loop
78 ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
79 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
80 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x3e8
81 ; GFX7-NEXT: s_add_i32 s2, s2, -1
82 ; GFX7-NEXT: s_cmp_lg_u32 s2, 0
83 ; GFX7-NEXT: s_cbranch_scc1 .LBB1_1
84 ; GFX7-NEXT: ; %bb.2: ; %end
87 ; GFX89-LABEL: test_sink_smem_offset_4000:
88 ; GFX89: ; %bb.0: ; %entry
89 ; GFX89-NEXT: .LBB1_1: ; %loop
90 ; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1
91 ; GFX89-NEXT: s_waitcnt lgkmcnt(0)
92 ; GFX89-NEXT: s_load_dword s3, s[0:1], 0xfa0
93 ; GFX89-NEXT: s_add_i32 s2, s2, -1
94 ; GFX89-NEXT: s_cmp_lg_u32 s2, 0
95 ; GFX89-NEXT: s_cbranch_scc1 .LBB1_1
96 ; GFX89-NEXT: ; %bb.2: ; %end
97 ; GFX89-NEXT: s_endpgm
99 ; GFX12-LABEL: test_sink_smem_offset_4000:
100 ; GFX12: ; %bb.0: ; %entry
101 ; GFX12-NEXT: .LBB1_1: ; %loop
102 ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
103 ; GFX12-NEXT: s_wait_kmcnt 0x0
104 ; GFX12-NEXT: s_load_b32 s3, s[0:1], 0xfa0
105 ; GFX12-NEXT: s_add_co_i32 s2, s2, -1
106 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
107 ; GFX12-NEXT: s_cmp_lg_u32 s2, 0
108 ; GFX12-NEXT: s_cbranch_scc1 .LBB1_1
109 ; GFX12-NEXT: ; %bb.2: ; %end
110 ; GFX12-NEXT: s_endpgm
112 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 4000
116 %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
117 %dec = sub i32 %count, 1
118 %load = load volatile i32, ptr addrspace(4) %gep
119 %cond = icmp eq i32 %dec, 0
120 br i1 %cond, label %end, label %loop
126 define amdgpu_cs void @test_sink_smem_offset_4000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
127 ; GFX689-LABEL: test_sink_smem_offset_4000000:
128 ; GFX689: ; %bb.0: ; %entry
129 ; GFX689-NEXT: s_add_u32 s0, s0, 0x3d0900
130 ; GFX689-NEXT: s_addc_u32 s1, s1, 0
131 ; GFX689-NEXT: .LBB2_1: ; %loop
132 ; GFX689-NEXT: ; =>This Inner Loop Header: Depth=1
133 ; GFX689-NEXT: s_waitcnt lgkmcnt(0)
134 ; GFX689-NEXT: s_load_dword s3, s[0:1], 0x0
135 ; GFX689-NEXT: s_add_i32 s2, s2, -1
136 ; GFX689-NEXT: s_cmp_lg_u32 s2, 0
137 ; GFX689-NEXT: s_cbranch_scc1 .LBB2_1
138 ; GFX689-NEXT: ; %bb.2: ; %end
139 ; GFX689-NEXT: s_endpgm
141 ; GFX7-LABEL: test_sink_smem_offset_4000000:
142 ; GFX7: ; %bb.0: ; %entry
143 ; GFX7-NEXT: .LBB2_1: ; %loop
144 ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
145 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
146 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0xf4240
147 ; GFX7-NEXT: s_add_i32 s2, s2, -1
148 ; GFX7-NEXT: s_cmp_lg_u32 s2, 0
149 ; GFX7-NEXT: s_cbranch_scc1 .LBB2_1
150 ; GFX7-NEXT: ; %bb.2: ; %end
151 ; GFX7-NEXT: s_endpgm
153 ; GFX12-LABEL: test_sink_smem_offset_4000000:
154 ; GFX12: ; %bb.0: ; %entry
155 ; GFX12-NEXT: .LBB2_1: ; %loop
156 ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
157 ; GFX12-NEXT: s_wait_kmcnt 0x0
158 ; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x3d0900
159 ; GFX12-NEXT: s_add_co_i32 s2, s2, -1
160 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
161 ; GFX12-NEXT: s_cmp_lg_u32 s2, 0
162 ; GFX12-NEXT: s_cbranch_scc1 .LBB2_1
163 ; GFX12-NEXT: ; %bb.2: ; %end
164 ; GFX12-NEXT: s_endpgm
166 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 4000000
170 %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
171 %dec = sub i32 %count, 1
172 %load = load volatile i32, ptr addrspace(4) %gep
173 %cond = icmp eq i32 %dec, 0
174 br i1 %cond, label %end, label %loop
180 define amdgpu_cs void @test_sink_smem_offset_40000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
181 ; GFX689-LABEL: test_sink_smem_offset_40000000:
182 ; GFX689: ; %bb.0: ; %entry
183 ; GFX689-NEXT: s_add_u32 s0, s0, 0x2625a00
184 ; GFX689-NEXT: s_addc_u32 s1, s1, 0
185 ; GFX689-NEXT: .LBB3_1: ; %loop
186 ; GFX689-NEXT: ; =>This Inner Loop Header: Depth=1
187 ; GFX689-NEXT: s_waitcnt lgkmcnt(0)
188 ; GFX689-NEXT: s_load_dword s3, s[0:1], 0x0
189 ; GFX689-NEXT: s_add_i32 s2, s2, -1
190 ; GFX689-NEXT: s_cmp_lg_u32 s2, 0
191 ; GFX689-NEXT: s_cbranch_scc1 .LBB3_1
192 ; GFX689-NEXT: ; %bb.2: ; %end
193 ; GFX689-NEXT: s_endpgm
195 ; GFX7-LABEL: test_sink_smem_offset_40000000:
196 ; GFX7: ; %bb.0: ; %entry
197 ; GFX7-NEXT: .LBB3_1: ; %loop
198 ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
199 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
200 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x989680
201 ; GFX7-NEXT: s_add_i32 s2, s2, -1
202 ; GFX7-NEXT: s_cmp_lg_u32 s2, 0
203 ; GFX7-NEXT: s_cbranch_scc1 .LBB3_1
204 ; GFX7-NEXT: ; %bb.2: ; %end
205 ; GFX7-NEXT: s_endpgm
207 ; GFX12-LABEL: test_sink_smem_offset_40000000:
208 ; GFX12: ; %bb.0: ; %entry
209 ; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x2625a00
210 ; GFX12-NEXT: .LBB3_1: ; %loop
211 ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
212 ; GFX12-NEXT: s_wait_kmcnt 0x0
213 ; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0
214 ; GFX12-NEXT: s_add_co_i32 s2, s2, -1
215 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
216 ; GFX12-NEXT: s_cmp_lg_u32 s2, 0
217 ; GFX12-NEXT: s_cbranch_scc1 .LBB3_1
218 ; GFX12-NEXT: ; %bb.2: ; %end
219 ; GFX12-NEXT: s_endpgm
221 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 40000000
225 %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
226 %dec = sub i32 %count, 1
227 %load = load volatile i32, ptr addrspace(4) %gep
228 %cond = icmp eq i32 %dec, 0
229 br i1 %cond, label %end, label %loop
235 define amdgpu_cs void @test_sink_smem_offset_40000000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
236 ; GFX6789-LABEL: test_sink_smem_offset_40000000000:
237 ; GFX6789: ; %bb.0: ; %entry
238 ; GFX6789-NEXT: s_add_u32 s0, s0, 0x502f9000
239 ; GFX6789-NEXT: s_addc_u32 s1, s1, 9
240 ; GFX6789-NEXT: .LBB4_1: ; %loop
241 ; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
242 ; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
243 ; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0
244 ; GFX6789-NEXT: s_add_i32 s2, s2, -1
245 ; GFX6789-NEXT: s_cmp_lg_u32 s2, 0
246 ; GFX6789-NEXT: s_cbranch_scc1 .LBB4_1
247 ; GFX6789-NEXT: ; %bb.2: ; %end
248 ; GFX6789-NEXT: s_endpgm
250 ; GFX12-LABEL: test_sink_smem_offset_40000000000:
251 ; GFX12: ; %bb.0: ; %entry
252 ; GFX12-NEXT: s_mov_b32 s4, 0x502f9000
253 ; GFX12-NEXT: s_mov_b32 s5, 9
254 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
255 ; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
256 ; GFX12-NEXT: .LBB4_1: ; %loop
257 ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
258 ; GFX12-NEXT: s_wait_kmcnt 0x0
259 ; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0
260 ; GFX12-NEXT: s_add_co_i32 s2, s2, -1
261 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
262 ; GFX12-NEXT: s_cmp_lg_u32 s2, 0
263 ; GFX12-NEXT: s_cbranch_scc1 .LBB4_1
264 ; GFX12-NEXT: ; %bb.2: ; %end
265 ; GFX12-NEXT: s_endpgm
267 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 40000000000
271 %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
272 %dec = sub i32 %count, 1
273 %load = load volatile i32, ptr addrspace(4) %gep
274 %cond = icmp eq i32 %dec, 0
275 br i1 %cond, label %end, label %loop
281 define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
282 ; GFX6789-LABEL: test_sink_smem_offset_neg400:
283 ; GFX6789: ; %bb.0: ; %entry
284 ; GFX6789-NEXT: s_add_u32 s0, s0, 0xfffffe70
285 ; GFX6789-NEXT: s_addc_u32 s1, s1, -1
286 ; GFX6789-NEXT: .LBB5_1: ; %loop
287 ; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
288 ; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
289 ; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0
290 ; GFX6789-NEXT: s_add_i32 s2, s2, -1
291 ; GFX6789-NEXT: s_cmp_lg_u32 s2, 0
292 ; GFX6789-NEXT: s_cbranch_scc1 .LBB5_1
293 ; GFX6789-NEXT: ; %bb.2: ; %end
294 ; GFX6789-NEXT: s_endpgm
296 ; GFX12-LABEL: test_sink_smem_offset_neg400:
297 ; GFX12: ; %bb.0: ; %entry
298 ; GFX12-NEXT: s_movk_i32 s4, 0xfe70
299 ; GFX12-NEXT: s_mov_b32 s5, -1
300 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
301 ; GFX12-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[4:5]
302 ; GFX12-NEXT: .LBB5_1: ; %loop
303 ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
304 ; GFX12-NEXT: s_wait_kmcnt 0x0
305 ; GFX12-NEXT: s_load_b32 s3, s[0:1], 0x0
306 ; GFX12-NEXT: s_add_co_i32 s2, s2, -1
307 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
308 ; GFX12-NEXT: s_cmp_lg_u32 s2, 0
309 ; GFX12-NEXT: s_cbranch_scc1 .LBB5_1
310 ; GFX12-NEXT: ; %bb.2: ; %end
311 ; GFX12-NEXT: s_endpgm
313 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 -400
317 %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
318 %dec = sub i32 %count, 1
319 %load = load volatile i32, ptr addrspace(4) %gep
320 %cond = icmp eq i32 %dec, 0
321 br i1 %cond, label %end, label %loop
327 ; Same for address space 6, constant 32-bit.
328 define amdgpu_cs void @test_sink_smem_offset_neg400_32bit(ptr addrspace(6) inreg %ptr, i32 inreg %val) {
329 ; GFX6789-LABEL: test_sink_smem_offset_neg400_32bit:
330 ; GFX6789: ; %bb.0: ; %entry
331 ; GFX6789-NEXT: s_add_i32 s2, s0, 0xfffffe70
332 ; GFX6789-NEXT: s_mov_b32 s3, 0
333 ; GFX6789-NEXT: .LBB6_1: ; %loop
334 ; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
335 ; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
336 ; GFX6789-NEXT: s_load_dword s0, s[2:3], 0x0
337 ; GFX6789-NEXT: s_add_i32 s1, s1, -1
338 ; GFX6789-NEXT: s_cmp_lg_u32 s1, 0
339 ; GFX6789-NEXT: s_cbranch_scc1 .LBB6_1
340 ; GFX6789-NEXT: ; %bb.2: ; %end
341 ; GFX6789-NEXT: s_endpgm
343 ; GFX12-LABEL: test_sink_smem_offset_neg400_32bit:
344 ; GFX12: ; %bb.0: ; %entry
345 ; GFX12-NEXT: s_add_co_i32 s2, s0, 0xfffffe70
346 ; GFX12-NEXT: s_mov_b32 s3, 0
347 ; GFX12-NEXT: .LBB6_1: ; %loop
348 ; GFX12-NEXT: ; =>This Inner Loop Header: Depth=1
349 ; GFX12-NEXT: s_wait_kmcnt 0x0
350 ; GFX12-NEXT: s_load_b32 s0, s[2:3], 0x0
351 ; GFX12-NEXT: s_add_co_i32 s1, s1, -1
352 ; GFX12-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
353 ; GFX12-NEXT: s_cmp_lg_u32 s1, 0
354 ; GFX12-NEXT: s_cbranch_scc1 .LBB6_1
355 ; GFX12-NEXT: ; %bb.2: ; %end
356 ; GFX12-NEXT: s_endpgm
358 %gep = getelementptr i8, ptr addrspace(6) %ptr, i64 -400
362 %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
363 %dec = sub i32 %count, 1
364 %load = load volatile i32, ptr addrspace(6) %gep
365 %cond = icmp eq i32 %dec, 0
366 br i1 %cond, label %end, label %loop
372 ;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: