1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 2
2 ; RUN: llc -march=amdgcn -mcpu=gfx600 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX689,GFX67,GFX6
3 ; RUN: llc -march=amdgcn -mcpu=gfx700 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX67,GFX7
4 ; RUN: llc -march=amdgcn -mcpu=gfx801 < %s | FileCheck %s -check-prefixes=GFX6789,GFX678,GFX689,GFX89
5 ; RUN: llc -march=amdgcn -mcpu=gfx900 < %s | FileCheck %s -check-prefixes=GFX6789,GFX689,GFX89,GFX9
7 define amdgpu_cs void @test_sink_smem_offset_400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
8 ; GFX67-LABEL: test_sink_smem_offset_400:
9 ; GFX67: ; %bb.0: ; %entry
10 ; GFX67-NEXT: .LBB0_1: ; %loop
11 ; GFX67-NEXT: ; =>This Inner Loop Header: Depth=1
12 ; GFX67-NEXT: s_waitcnt lgkmcnt(0)
13 ; GFX67-NEXT: s_load_dword s3, s[0:1], 0x64
14 ; GFX67-NEXT: s_add_i32 s2, s2, -1
15 ; GFX67-NEXT: s_cmp_lg_u32 s2, 0
16 ; GFX67-NEXT: s_cbranch_scc1 .LBB0_1
17 ; GFX67-NEXT: ; %bb.2: ; %end
18 ; GFX67-NEXT: s_endpgm
20 ; GFX89-LABEL: test_sink_smem_offset_400:
21 ; GFX89: ; %bb.0: ; %entry
22 ; GFX89-NEXT: .LBB0_1: ; %loop
23 ; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1
24 ; GFX89-NEXT: s_waitcnt lgkmcnt(0)
25 ; GFX89-NEXT: s_load_dword s3, s[0:1], 0x190
26 ; GFX89-NEXT: s_add_i32 s2, s2, -1
27 ; GFX89-NEXT: s_cmp_lg_u32 s2, 0
28 ; GFX89-NEXT: s_cbranch_scc1 .LBB0_1
29 ; GFX89-NEXT: ; %bb.2: ; %end
30 ; GFX89-NEXT: s_endpgm
32 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 400
36 %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
37 %dec = sub i32 %count, 1
38 %load = load volatile i32, ptr addrspace(4) %gep
39 %cond = icmp eq i32 %dec, 0
40 br i1 %cond, label %end, label %loop
46 define amdgpu_cs void @test_sink_smem_offset_4000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
47 ; GFX6-LABEL: test_sink_smem_offset_4000:
48 ; GFX6: ; %bb.0: ; %entry
49 ; GFX6-NEXT: s_add_u32 s0, s0, 0xfa0
50 ; GFX6-NEXT: s_addc_u32 s1, s1, 0
51 ; GFX6-NEXT: .LBB1_1: ; %loop
52 ; GFX6-NEXT: ; =>This Inner Loop Header: Depth=1
53 ; GFX6-NEXT: s_waitcnt lgkmcnt(0)
54 ; GFX6-NEXT: s_load_dword s3, s[0:1], 0x0
55 ; GFX6-NEXT: s_add_i32 s2, s2, -1
56 ; GFX6-NEXT: s_cmp_lg_u32 s2, 0
57 ; GFX6-NEXT: s_cbranch_scc1 .LBB1_1
58 ; GFX6-NEXT: ; %bb.2: ; %end
61 ; GFX7-LABEL: test_sink_smem_offset_4000:
62 ; GFX7: ; %bb.0: ; %entry
63 ; GFX7-NEXT: .LBB1_1: ; %loop
64 ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
65 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
66 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0x3e8
67 ; GFX7-NEXT: s_add_i32 s2, s2, -1
68 ; GFX7-NEXT: s_cmp_lg_u32 s2, 0
69 ; GFX7-NEXT: s_cbranch_scc1 .LBB1_1
70 ; GFX7-NEXT: ; %bb.2: ; %end
73 ; GFX89-LABEL: test_sink_smem_offset_4000:
74 ; GFX89: ; %bb.0: ; %entry
75 ; GFX89-NEXT: .LBB1_1: ; %loop
76 ; GFX89-NEXT: ; =>This Inner Loop Header: Depth=1
77 ; GFX89-NEXT: s_waitcnt lgkmcnt(0)
78 ; GFX89-NEXT: s_load_dword s3, s[0:1], 0xfa0
79 ; GFX89-NEXT: s_add_i32 s2, s2, -1
80 ; GFX89-NEXT: s_cmp_lg_u32 s2, 0
81 ; GFX89-NEXT: s_cbranch_scc1 .LBB1_1
82 ; GFX89-NEXT: ; %bb.2: ; %end
83 ; GFX89-NEXT: s_endpgm
85 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 4000
89 %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
90 %dec = sub i32 %count, 1
91 %load = load volatile i32, ptr addrspace(4) %gep
92 %cond = icmp eq i32 %dec, 0
93 br i1 %cond, label %end, label %loop
99 define amdgpu_cs void @test_sink_smem_offset_4000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
100 ; GFX689-LABEL: test_sink_smem_offset_4000000:
101 ; GFX689: ; %bb.0: ; %entry
102 ; GFX689-NEXT: s_add_u32 s0, s0, 0x3d0900
103 ; GFX689-NEXT: s_addc_u32 s1, s1, 0
104 ; GFX689-NEXT: .LBB2_1: ; %loop
105 ; GFX689-NEXT: ; =>This Inner Loop Header: Depth=1
106 ; GFX689-NEXT: s_waitcnt lgkmcnt(0)
107 ; GFX689-NEXT: s_load_dword s3, s[0:1], 0x0
108 ; GFX689-NEXT: s_add_i32 s2, s2, -1
109 ; GFX689-NEXT: s_cmp_lg_u32 s2, 0
110 ; GFX689-NEXT: s_cbranch_scc1 .LBB2_1
111 ; GFX689-NEXT: ; %bb.2: ; %end
112 ; GFX689-NEXT: s_endpgm
114 ; GFX7-LABEL: test_sink_smem_offset_4000000:
115 ; GFX7: ; %bb.0: ; %entry
116 ; GFX7-NEXT: .LBB2_1: ; %loop
117 ; GFX7-NEXT: ; =>This Inner Loop Header: Depth=1
118 ; GFX7-NEXT: s_waitcnt lgkmcnt(0)
119 ; GFX7-NEXT: s_load_dword s3, s[0:1], 0xf4240
120 ; GFX7-NEXT: s_add_i32 s2, s2, -1
121 ; GFX7-NEXT: s_cmp_lg_u32 s2, 0
122 ; GFX7-NEXT: s_cbranch_scc1 .LBB2_1
123 ; GFX7-NEXT: ; %bb.2: ; %end
124 ; GFX7-NEXT: s_endpgm
126 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 4000000
130 %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
131 %dec = sub i32 %count, 1
132 %load = load volatile i32, ptr addrspace(4) %gep
133 %cond = icmp eq i32 %dec, 0
134 br i1 %cond, label %end, label %loop
140 define amdgpu_cs void @test_sink_smem_offset_40000000000(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
141 ; GFX6789-LABEL: test_sink_smem_offset_40000000000:
142 ; GFX6789: ; %bb.0: ; %entry
143 ; GFX6789-NEXT: s_add_u32 s0, s0, 0x502f9000
144 ; GFX6789-NEXT: s_addc_u32 s1, s1, 9
145 ; GFX6789-NEXT: .LBB3_1: ; %loop
146 ; GFX6789-NEXT: ; =>This Inner Loop Header: Depth=1
147 ; GFX6789-NEXT: s_waitcnt lgkmcnt(0)
148 ; GFX6789-NEXT: s_load_dword s3, s[0:1], 0x0
149 ; GFX6789-NEXT: s_add_i32 s2, s2, -1
150 ; GFX6789-NEXT: s_cmp_lg_u32 s2, 0
151 ; GFX6789-NEXT: s_cbranch_scc1 .LBB3_1
152 ; GFX6789-NEXT: ; %bb.2: ; %end
153 ; GFX6789-NEXT: s_endpgm
155 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 40000000000
159 %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
160 %dec = sub i32 %count, 1
161 %load = load volatile i32, ptr addrspace(4) %gep
162 %cond = icmp eq i32 %dec, 0
163 br i1 %cond, label %end, label %loop
169 define amdgpu_cs void @test_sink_smem_offset_neg400(ptr addrspace(4) inreg %ptr, i32 inreg %val) {
170 ; GFX678-LABEL: test_sink_smem_offset_neg400:
171 ; GFX678: ; %bb.0: ; %entry
172 ; GFX678-NEXT: s_add_u32 s0, s0, 0xfffffe70
173 ; GFX678-NEXT: s_addc_u32 s1, s1, -1
174 ; GFX678-NEXT: .LBB4_1: ; %loop
175 ; GFX678-NEXT: ; =>This Inner Loop Header: Depth=1
176 ; GFX678-NEXT: s_waitcnt lgkmcnt(0)
177 ; GFX678-NEXT: s_load_dword s3, s[0:1], 0x0
178 ; GFX678-NEXT: s_add_i32 s2, s2, -1
179 ; GFX678-NEXT: s_cmp_lg_u32 s2, 0
180 ; GFX678-NEXT: s_cbranch_scc1 .LBB4_1
181 ; GFX678-NEXT: ; %bb.2: ; %end
182 ; GFX678-NEXT: s_endpgm
184 ; GFX9-LABEL: test_sink_smem_offset_neg400:
185 ; GFX9: ; %bb.0: ; %entry
186 ; GFX9-NEXT: .LBB4_1: ; %loop
187 ; GFX9-NEXT: ; =>This Inner Loop Header: Depth=1
188 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
189 ; GFX9-NEXT: s_load_dword s3, s[0:1], -0x190
190 ; GFX9-NEXT: s_add_i32 s2, s2, -1
191 ; GFX9-NEXT: s_cmp_lg_u32 s2, 0
192 ; GFX9-NEXT: s_cbranch_scc1 .LBB4_1
193 ; GFX9-NEXT: ; %bb.2: ; %end
194 ; GFX9-NEXT: s_endpgm
196 %gep = getelementptr i8, ptr addrspace(4) %ptr, i64 -400
200 %count = phi i32 [ %dec, %loop ], [ %val, %entry ]
201 %dec = sub i32 %count, 1
202 %load = load volatile i32, ptr addrspace(4) %gep
203 %cond = icmp eq i32 %dec, 0
204 br i1 %cond, label %end, label %loop