1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=bonaire < %s | FileCheck --check-prefixes=OPT,OPT-GFX7 %s
3 ; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=tonga < %s | FileCheck --check-prefixes=OPT,OPT-GFX8 %s
4 ; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=gfx900 < %s | FileCheck --check-prefixes=OPT,OPT-GFX9 %s
5 ; RUN: opt -S -passes='require<profile-summary>,function(codegenprepare)' -mtriple=amdgcn-unknown-unknown -mcpu=gfx1030 < %s | FileCheck --check-prefixes=OPT,OPT-GFX10 %s
7 ; RUN: llc -mtriple=amdgcn -mcpu=bonaire < %s | FileCheck --check-prefix=GFX7 %s
8 ; RUN: llc -mtriple=amdgcn -mcpu=tonga < %s | FileCheck --check-prefix=GFX8 %s
9 ; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck --check-prefix=GFX9 %s
10 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1030 < %s | FileCheck --check-prefix=GFX10 %s
12 define void @test_sinkable_flat_small_offset_i32(ptr %out, ptr %in, i32 %cond) {
13 ; OPT-GFX7-LABEL: @test_sinkable_flat_small_offset_i32(
14 ; OPT-GFX7-NEXT: entry:
15 ; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
16 ; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
17 ; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
18 ; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
20 ; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i32, ptr [[IN_GEP]], align 4
21 ; OPT-GFX7-NEXT: br label [[ENDIF]]
23 ; OPT-GFX7-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
24 ; OPT-GFX7-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
25 ; OPT-GFX7-NEXT: br label [[DONE:%.*]]
27 ; OPT-GFX7-NEXT: ret void
29 ; OPT-GFX8-LABEL: @test_sinkable_flat_small_offset_i32(
30 ; OPT-GFX8-NEXT: entry:
31 ; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
32 ; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
33 ; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
34 ; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
36 ; OPT-GFX8-NEXT: [[LOAD:%.*]] = load i32, ptr [[IN_GEP]], align 4
37 ; OPT-GFX8-NEXT: br label [[ENDIF]]
39 ; OPT-GFX8-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
40 ; OPT-GFX8-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
41 ; OPT-GFX8-NEXT: br label [[DONE:%.*]]
43 ; OPT-GFX8-NEXT: ret void
45 ; OPT-GFX9-LABEL: @test_sinkable_flat_small_offset_i32(
46 ; OPT-GFX9-NEXT: entry:
47 ; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
48 ; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
49 ; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
51 ; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
52 ; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
53 ; OPT-GFX9-NEXT: br label [[ENDIF]]
55 ; OPT-GFX9-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
56 ; OPT-GFX9-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
57 ; OPT-GFX9-NEXT: ret void
59 ; OPT-GFX10-LABEL: @test_sinkable_flat_small_offset_i32(
60 ; OPT-GFX10-NEXT: entry:
61 ; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
62 ; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
63 ; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
65 ; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 28
66 ; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr [[SUNKADDR]], align 4
67 ; OPT-GFX10-NEXT: br label [[ENDIF]]
69 ; OPT-GFX10-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
70 ; OPT-GFX10-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
71 ; OPT-GFX10-NEXT: ret void
73 ; GFX7-LABEL: test_sinkable_flat_small_offset_i32:
74 ; GFX7: ; %bb.0: ; %entry
75 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
76 ; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
77 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
78 ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc
79 ; GFX7-NEXT: s_cbranch_execz .LBB0_2
80 ; GFX7-NEXT: ; %bb.1: ; %if
81 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 28, v2
82 ; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
83 ; GFX7-NEXT: flat_load_dword v4, v[2:3]
84 ; GFX7-NEXT: .LBB0_2: ; %endif
85 ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
86 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x3d08fc, v0
87 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
88 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
89 ; GFX7-NEXT: flat_store_dword v[0:1], v4
90 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
91 ; GFX7-NEXT: s_setpc_b64 s[30:31]
93 ; GFX8-LABEL: test_sinkable_flat_small_offset_i32:
94 ; GFX8: ; %bb.0: ; %entry
95 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
96 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
97 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
98 ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
99 ; GFX8-NEXT: s_cbranch_execz .LBB0_2
100 ; GFX8-NEXT: ; %bb.1: ; %if
101 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 28, v2
102 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
103 ; GFX8-NEXT: flat_load_dword v4, v[2:3]
104 ; GFX8-NEXT: .LBB0_2: ; %endif
105 ; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
106 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3d08fc, v0
107 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
108 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
109 ; GFX8-NEXT: flat_store_dword v[0:1], v4
110 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
111 ; GFX8-NEXT: s_setpc_b64 s[30:31]
113 ; GFX9-LABEL: test_sinkable_flat_small_offset_i32:
114 ; GFX9: ; %bb.0: ; %entry
115 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
116 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
117 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
118 ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
119 ; GFX9-NEXT: s_cbranch_execz .LBB0_2
120 ; GFX9-NEXT: ; %bb.1: ; %if
121 ; GFX9-NEXT: flat_load_dword v4, v[2:3] offset:28
122 ; GFX9-NEXT: .LBB0_2: ; %endif
123 ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
124 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
125 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
126 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
127 ; GFX9-NEXT: flat_store_dword v[0:1], v4 offset:2300
128 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
129 ; GFX9-NEXT: s_setpc_b64 s[30:31]
131 ; GFX10-LABEL: test_sinkable_flat_small_offset_i32:
132 ; GFX10: ; %bb.0: ; %entry
133 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
134 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
135 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
136 ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
137 ; GFX10-NEXT: s_cbranch_execz .LBB0_2
138 ; GFX10-NEXT: ; %bb.1: ; %if
139 ; GFX10-NEXT: flat_load_dword v4, v[2:3] offset:28
140 ; GFX10-NEXT: .LBB0_2: ; %endif
141 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
142 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
143 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
144 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
145 ; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252
146 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
147 ; GFX10-NEXT: s_setpc_b64 s[30:31]
149 %out.gep = getelementptr i32, ptr %out, i64 999999
150 %in.gep = getelementptr i32, ptr %in, i64 7
151 %cmp0 = icmp eq i32 %cond, 0
152 br i1 %cmp0, label %endif, label %if
155 %load = load i32, ptr %in.gep
159 %x = phi i32 [ %load, %if ], [ 0, %entry ]
160 store i32 %x, ptr %out.gep
167 define void @test_sink_noop_addrspacecast_flat_to_global_i32(ptr %out, ptr %in, i32 %cond) {
168 ; OPT-GFX7-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
169 ; OPT-GFX7-NEXT: entry:
170 ; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
171 ; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
172 ; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
174 ; OPT-GFX7-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
175 ; OPT-GFX7-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
176 ; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
177 ; OPT-GFX7-NEXT: br label [[ENDIF]]
179 ; OPT-GFX7-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
180 ; OPT-GFX7-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
181 ; OPT-GFX7-NEXT: ret void
183 ; OPT-GFX8-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
184 ; OPT-GFX8-NEXT: entry:
185 ; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
186 ; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i32, ptr [[IN:%.*]], i64 7
187 ; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
188 ; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
190 ; OPT-GFX8-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN_GEP]] to ptr addrspace(1)
191 ; OPT-GFX8-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[TMP0]], align 4
192 ; OPT-GFX8-NEXT: br label [[ENDIF]]
194 ; OPT-GFX8-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
195 ; OPT-GFX8-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
196 ; OPT-GFX8-NEXT: ret void
198 ; OPT-GFX9-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
199 ; OPT-GFX9-NEXT: entry:
200 ; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
201 ; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
202 ; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
204 ; OPT-GFX9-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
205 ; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
206 ; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
207 ; OPT-GFX9-NEXT: br label [[ENDIF]]
209 ; OPT-GFX9-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
210 ; OPT-GFX9-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
211 ; OPT-GFX9-NEXT: ret void
213 ; OPT-GFX10-LABEL: @test_sink_noop_addrspacecast_flat_to_global_i32(
214 ; OPT-GFX10-NEXT: entry:
215 ; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
216 ; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
217 ; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
219 ; OPT-GFX10-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(1)
220 ; OPT-GFX10-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(1) [[TMP0]], i64 28
221 ; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(1) [[SUNKADDR]], align 4
222 ; OPT-GFX10-NEXT: br label [[ENDIF]]
224 ; OPT-GFX10-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
225 ; OPT-GFX10-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
226 ; OPT-GFX10-NEXT: ret void
228 ; GFX7-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
229 ; GFX7: ; %bb.0: ; %entry
230 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
231 ; GFX7-NEXT: s_mov_b32 s6, 0
232 ; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
233 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
234 ; GFX7-NEXT: s_and_saveexec_b64 s[8:9], vcc
235 ; GFX7-NEXT: s_cbranch_execz .LBB1_2
236 ; GFX7-NEXT: ; %bb.1: ; %if
237 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
238 ; GFX7-NEXT: s_mov_b32 s4, s6
239 ; GFX7-NEXT: s_mov_b32 s5, s6
240 ; GFX7-NEXT: buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 offset:28
241 ; GFX7-NEXT: .LBB1_2: ; %endif
242 ; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
243 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x3d08fc, v0
244 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
245 ; GFX7-NEXT: s_waitcnt vmcnt(0)
246 ; GFX7-NEXT: flat_store_dword v[0:1], v4
247 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
248 ; GFX7-NEXT: s_setpc_b64 s[30:31]
250 ; GFX8-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
251 ; GFX8: ; %bb.0: ; %entry
252 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
254 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
255 ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
256 ; GFX8-NEXT: s_cbranch_execz .LBB1_2
257 ; GFX8-NEXT: ; %bb.1: ; %if
258 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 28, v2
259 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
260 ; GFX8-NEXT: flat_load_dword v4, v[2:3]
261 ; GFX8-NEXT: .LBB1_2: ; %endif
262 ; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
263 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3d08fc, v0
264 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
265 ; GFX8-NEXT: s_waitcnt vmcnt(0)
266 ; GFX8-NEXT: flat_store_dword v[0:1], v4
267 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
268 ; GFX8-NEXT: s_setpc_b64 s[30:31]
270 ; GFX9-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
271 ; GFX9: ; %bb.0: ; %entry
272 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
274 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
275 ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
276 ; GFX9-NEXT: s_cbranch_execz .LBB1_2
277 ; GFX9-NEXT: ; %bb.1: ; %if
278 ; GFX9-NEXT: global_load_dword v4, v[2:3], off offset:28
279 ; GFX9-NEXT: .LBB1_2: ; %endif
280 ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
281 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
282 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
283 ; GFX9-NEXT: s_waitcnt vmcnt(0)
284 ; GFX9-NEXT: flat_store_dword v[0:1], v4 offset:2300
285 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
286 ; GFX9-NEXT: s_setpc_b64 s[30:31]
288 ; GFX10-LABEL: test_sink_noop_addrspacecast_flat_to_global_i32:
289 ; GFX10: ; %bb.0: ; %entry
290 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
291 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
292 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
293 ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
294 ; GFX10-NEXT: s_cbranch_execz .LBB1_2
295 ; GFX10-NEXT: ; %bb.1: ; %if
296 ; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:28
297 ; GFX10-NEXT: .LBB1_2: ; %endif
298 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
299 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
300 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
301 ; GFX10-NEXT: s_waitcnt vmcnt(0)
302 ; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252
303 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
304 ; GFX10-NEXT: s_setpc_b64 s[30:31]
306 %out.gep = getelementptr i32, ptr %out, i64 999999
307 %in.gep = getelementptr i32, ptr %in, i64 7
308 %cast = addrspacecast ptr %in.gep to ptr addrspace(1)
309 %cmp0 = icmp eq i32 %cond, 0
310 br i1 %cmp0, label %endif, label %if
313 %load = load i32, ptr addrspace(1) %cast
317 %x = phi i32 [ %load, %if ], [ 0, %entry ]
318 store i32 %x, ptr %out.gep
325 define void @test_sink_noop_addrspacecast_flat_to_constant_i32(ptr %out, ptr %in, i32 %cond) {
326 ; OPT-LABEL: @test_sink_noop_addrspacecast_flat_to_constant_i32(
328 ; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 999999
329 ; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[COND:%.*]], 0
330 ; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
332 ; OPT-NEXT: [[TMP0:%.*]] = addrspacecast ptr [[IN:%.*]] to ptr addrspace(4)
333 ; OPT-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr addrspace(4) [[TMP0]], i64 28
334 ; OPT-NEXT: [[LOAD:%.*]] = load i32, ptr addrspace(4) [[SUNKADDR]], align 4
335 ; OPT-NEXT: br label [[ENDIF]]
337 ; OPT-NEXT: [[X:%.*]] = phi i32 [ [[LOAD]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
338 ; OPT-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
341 ; GFX7-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
342 ; GFX7: ; %bb.0: ; %entry
343 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344 ; GFX7-NEXT: s_mov_b32 s6, 0
345 ; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
346 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
347 ; GFX7-NEXT: s_and_saveexec_b64 s[8:9], vcc
348 ; GFX7-NEXT: s_cbranch_execz .LBB2_2
349 ; GFX7-NEXT: ; %bb.1: ; %if
350 ; GFX7-NEXT: s_mov_b32 s7, 0xf000
351 ; GFX7-NEXT: s_mov_b32 s4, s6
352 ; GFX7-NEXT: s_mov_b32 s5, s6
353 ; GFX7-NEXT: buffer_load_dword v4, v[2:3], s[4:7], 0 addr64 offset:28
354 ; GFX7-NEXT: .LBB2_2: ; %endif
355 ; GFX7-NEXT: s_or_b64 exec, exec, s[8:9]
356 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x3d08fc, v0
357 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
358 ; GFX7-NEXT: s_waitcnt vmcnt(0)
359 ; GFX7-NEXT: flat_store_dword v[0:1], v4
360 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
361 ; GFX7-NEXT: s_setpc_b64 s[30:31]
363 ; GFX8-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
364 ; GFX8: ; %bb.0: ; %entry
365 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
366 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
367 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
368 ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
369 ; GFX8-NEXT: s_cbranch_execz .LBB2_2
370 ; GFX8-NEXT: ; %bb.1: ; %if
371 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 28, v2
372 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
373 ; GFX8-NEXT: flat_load_dword v4, v[2:3]
374 ; GFX8-NEXT: .LBB2_2: ; %endif
375 ; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
376 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x3d08fc, v0
377 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
378 ; GFX8-NEXT: s_waitcnt vmcnt(0)
379 ; GFX8-NEXT: flat_store_dword v[0:1], v4
380 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
381 ; GFX8-NEXT: s_setpc_b64 s[30:31]
383 ; GFX9-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
384 ; GFX9: ; %bb.0: ; %entry
385 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
386 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v4
387 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
388 ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
389 ; GFX9-NEXT: s_cbranch_execz .LBB2_2
390 ; GFX9-NEXT: ; %bb.1: ; %if
391 ; GFX9-NEXT: global_load_dword v4, v[2:3], off offset:28
392 ; GFX9-NEXT: .LBB2_2: ; %endif
393 ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
394 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3d0000, v0
395 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
396 ; GFX9-NEXT: s_waitcnt vmcnt(0)
397 ; GFX9-NEXT: flat_store_dword v[0:1], v4 offset:2300
398 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
399 ; GFX9-NEXT: s_setpc_b64 s[30:31]
401 ; GFX10-LABEL: test_sink_noop_addrspacecast_flat_to_constant_i32:
402 ; GFX10: ; %bb.0: ; %entry
403 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
404 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
405 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
406 ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
407 ; GFX10-NEXT: s_cbranch_execz .LBB2_2
408 ; GFX10-NEXT: ; %bb.1: ; %if
409 ; GFX10-NEXT: global_load_dword v4, v[2:3], off offset:28
410 ; GFX10-NEXT: .LBB2_2: ; %endif
411 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
412 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3d0800, v0
413 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
414 ; GFX10-NEXT: s_waitcnt vmcnt(0)
415 ; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:252
416 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
417 ; GFX10-NEXT: s_setpc_b64 s[30:31]
419 %out.gep = getelementptr i32, ptr %out, i64 999999
420 %in.gep = getelementptr i32, ptr %in, i64 7
421 %cast = addrspacecast ptr %in.gep to ptr addrspace(4)
422 %cmp0 = icmp eq i32 %cond, 0
423 br i1 %cmp0, label %endif, label %if
426 %load = load i32, ptr addrspace(4) %cast
430 %x = phi i32 [ %load, %if ], [ 0, %entry ]
431 store i32 %x, ptr %out.gep
438 define void @test_sink_flat_small_max_flat_offset(ptr %out, ptr %in) #1 {
439 ; OPT-GFX7-LABEL: @test_sink_flat_small_max_flat_offset(
440 ; OPT-GFX7-NEXT: entry:
441 ; OPT-GFX7-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
442 ; OPT-GFX7-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
443 ; OPT-GFX7-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
444 ; OPT-GFX7-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
445 ; OPT-GFX7-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
447 ; OPT-GFX7-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
448 ; OPT-GFX7-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
449 ; OPT-GFX7-NEXT: br label [[ENDIF]]
451 ; OPT-GFX7-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
452 ; OPT-GFX7-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
453 ; OPT-GFX7-NEXT: br label [[DONE:%.*]]
455 ; OPT-GFX7-NEXT: ret void
457 ; OPT-GFX8-LABEL: @test_sink_flat_small_max_flat_offset(
458 ; OPT-GFX8-NEXT: entry:
459 ; OPT-GFX8-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
460 ; OPT-GFX8-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
461 ; OPT-GFX8-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
462 ; OPT-GFX8-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
463 ; OPT-GFX8-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
465 ; OPT-GFX8-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
466 ; OPT-GFX8-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
467 ; OPT-GFX8-NEXT: br label [[ENDIF]]
469 ; OPT-GFX8-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
470 ; OPT-GFX8-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
471 ; OPT-GFX8-NEXT: br label [[DONE:%.*]]
473 ; OPT-GFX8-NEXT: ret void
475 ; OPT-GFX9-LABEL: @test_sink_flat_small_max_flat_offset(
476 ; OPT-GFX9-NEXT: entry:
477 ; OPT-GFX9-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
478 ; OPT-GFX9-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
479 ; OPT-GFX9-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
480 ; OPT-GFX9-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
482 ; OPT-GFX9-NEXT: [[SUNKADDR:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
483 ; OPT-GFX9-NEXT: [[LOAD:%.*]] = load i8, ptr [[SUNKADDR]], align 1
484 ; OPT-GFX9-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
485 ; OPT-GFX9-NEXT: br label [[ENDIF]]
487 ; OPT-GFX9-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
488 ; OPT-GFX9-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
489 ; OPT-GFX9-NEXT: ret void
491 ; OPT-GFX10-LABEL: @test_sink_flat_small_max_flat_offset(
492 ; OPT-GFX10-NEXT: entry:
493 ; OPT-GFX10-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
494 ; OPT-GFX10-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4095
495 ; OPT-GFX10-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
496 ; OPT-GFX10-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
497 ; OPT-GFX10-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
499 ; OPT-GFX10-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
500 ; OPT-GFX10-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
501 ; OPT-GFX10-NEXT: br label [[ENDIF]]
503 ; OPT-GFX10-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
504 ; OPT-GFX10-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
505 ; OPT-GFX10-NEXT: br label [[DONE:%.*]]
507 ; OPT-GFX10-NEXT: ret void
509 ; GFX7-LABEL: test_sink_flat_small_max_flat_offset:
510 ; GFX7: ; %bb.0: ; %entry
511 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
512 ; GFX7-NEXT: v_mbcnt_lo_u32_b32_e64 v5, -1, 0
513 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
514 ; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
515 ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc
516 ; GFX7-NEXT: s_cbranch_execz .LBB3_2
517 ; GFX7-NEXT: ; %bb.1: ; %if
518 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 0xfff, v2
519 ; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
520 ; GFX7-NEXT: flat_load_sbyte v4, v[2:3]
521 ; GFX7-NEXT: .LBB3_2: ; %endif
522 ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
523 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0
524 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
525 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
526 ; GFX7-NEXT: flat_store_dword v[0:1], v4
527 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
528 ; GFX7-NEXT: s_setpc_b64 s[30:31]
530 ; GFX8-LABEL: test_sink_flat_small_max_flat_offset:
531 ; GFX8: ; %bb.0: ; %entry
532 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533 ; GFX8-NEXT: v_mbcnt_lo_u32_b32 v5, -1, 0
534 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
535 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
536 ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
537 ; GFX8-NEXT: s_cbranch_execz .LBB3_2
538 ; GFX8-NEXT: ; %bb.1: ; %if
539 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0xfff, v2
540 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
541 ; GFX8-NEXT: flat_load_sbyte v4, v[2:3]
542 ; GFX8-NEXT: .LBB3_2: ; %endif
543 ; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
544 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x1000, v0
545 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
546 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
547 ; GFX8-NEXT: flat_store_dword v[0:1], v4
548 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
549 ; GFX8-NEXT: s_setpc_b64 s[30:31]
551 ; GFX9-LABEL: test_sink_flat_small_max_flat_offset:
552 ; GFX9: ; %bb.0: ; %entry
553 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
554 ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v5, -1, 0
555 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
556 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
557 ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
558 ; GFX9-NEXT: s_cbranch_execz .LBB3_2
559 ; GFX9-NEXT: ; %bb.1: ; %if
560 ; GFX9-NEXT: flat_load_sbyte v4, v[2:3] offset:4095
561 ; GFX9-NEXT: .LBB3_2: ; %endif
562 ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
563 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
564 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
565 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
566 ; GFX9-NEXT: flat_store_dword v[0:1], v4
567 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
568 ; GFX9-NEXT: s_setpc_b64 s[30:31]
570 ; GFX10-LABEL: test_sink_flat_small_max_flat_offset:
571 ; GFX10: ; %bb.0: ; %entry
572 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
573 ; GFX10-NEXT: v_mbcnt_lo_u32_b32 v4, -1, 0
574 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
575 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
576 ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
577 ; GFX10-NEXT: s_cbranch_execz .LBB3_2
578 ; GFX10-NEXT: ; %bb.1: ; %if
579 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, 0x800, v2
580 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
581 ; GFX10-NEXT: flat_load_sbyte v4, v[2:3] offset:2047
582 ; GFX10-NEXT: .LBB3_2: ; %endif
583 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
584 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
585 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
586 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
587 ; GFX10-NEXT: flat_store_dword v[0:1], v4
588 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
589 ; GFX10-NEXT: s_setpc_b64 s[30:31]
591 %out.gep = getelementptr i32, ptr %out, i32 1024
592 %in.gep = getelementptr i8, ptr %in, i64 4095
593 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
594 %cmp0 = icmp eq i32 %tid, 0
595 br i1 %cmp0, label %endif, label %if
598 %load = load i8, ptr %in.gep
599 %cast = sext i8 %load to i32
603 %x = phi i32 [ %cast, %if ], [ 0, %entry ]
604 store i32 %x, ptr %out.gep
611 define void @test_sink_flat_small_max_plus_1_flat_offset(ptr %out, ptr %in) #1 {
612 ; OPT-LABEL: @test_sink_flat_small_max_plus_1_flat_offset(
614 ; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i64 99999
615 ; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 4096
616 ; OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3:[0-9]+]]
617 ; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
618 ; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
620 ; OPT-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
621 ; OPT-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
622 ; OPT-NEXT: br label [[ENDIF]]
624 ; OPT-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
625 ; OPT-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
626 ; OPT-NEXT: br label [[DONE:%.*]]
630 ; GFX7-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
631 ; GFX7: ; %bb.0: ; %entry
632 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
633 ; GFX7-NEXT: v_mbcnt_lo_u32_b32_e64 v5, -1, 0
634 ; GFX7-NEXT: v_mov_b32_e32 v4, 0
635 ; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
636 ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc
637 ; GFX7-NEXT: s_cbranch_execz .LBB4_2
638 ; GFX7-NEXT: ; %bb.1: ; %if
639 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, 0x1000, v2
640 ; GFX7-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
641 ; GFX7-NEXT: flat_load_sbyte v4, v[2:3]
642 ; GFX7-NEXT: .LBB4_2: ; %endif
643 ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
644 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x61a7c, v0
645 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
646 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
647 ; GFX7-NEXT: flat_store_dword v[0:1], v4
648 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
649 ; GFX7-NEXT: s_setpc_b64 s[30:31]
651 ; GFX8-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
652 ; GFX8: ; %bb.0: ; %entry
653 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
654 ; GFX8-NEXT: v_mbcnt_lo_u32_b32 v5, -1, 0
655 ; GFX8-NEXT: v_mov_b32_e32 v4, 0
656 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
657 ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
658 ; GFX8-NEXT: s_cbranch_execz .LBB4_2
659 ; GFX8-NEXT: ; %bb.1: ; %if
660 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, 0x1000, v2
661 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, 0, v3, vcc
662 ; GFX8-NEXT: flat_load_sbyte v4, v[2:3]
663 ; GFX8-NEXT: .LBB4_2: ; %endif
664 ; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
665 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x61a7c, v0
666 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
667 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
668 ; GFX8-NEXT: flat_store_dword v[0:1], v4
669 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
670 ; GFX8-NEXT: s_setpc_b64 s[30:31]
672 ; GFX9-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
673 ; GFX9: ; %bb.0: ; %entry
674 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
675 ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v5, -1, 0
676 ; GFX9-NEXT: v_mov_b32_e32 v4, 0
677 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v5
678 ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
679 ; GFX9-NEXT: s_cbranch_execz .LBB4_2
680 ; GFX9-NEXT: ; %bb.1: ; %if
681 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, 0x1000, v2
682 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, 0, v3, vcc
683 ; GFX9-NEXT: flat_load_sbyte v4, v[2:3]
684 ; GFX9-NEXT: .LBB4_2: ; %endif
685 ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
686 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x61000, v0
687 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
688 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
689 ; GFX9-NEXT: flat_store_dword v[0:1], v4 offset:2684
690 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
691 ; GFX9-NEXT: s_setpc_b64 s[30:31]
693 ; GFX10-LABEL: test_sink_flat_small_max_plus_1_flat_offset:
694 ; GFX10: ; %bb.0: ; %entry
695 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
696 ; GFX10-NEXT: v_mbcnt_lo_u32_b32 v4, -1, 0
697 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v4
698 ; GFX10-NEXT: v_mov_b32_e32 v4, 0
699 ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
700 ; GFX10-NEXT: s_cbranch_execz .LBB4_2
701 ; GFX10-NEXT: ; %bb.1: ; %if
702 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, 0x1000, v2
703 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, 0, v3, vcc_lo
704 ; GFX10-NEXT: flat_load_sbyte v4, v[2:3]
705 ; GFX10-NEXT: .LBB4_2: ; %endif
706 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
707 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x61800, v0
708 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
709 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
710 ; GFX10-NEXT: flat_store_dword v[0:1], v4 offset:636
711 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
712 ; GFX10-NEXT: s_setpc_b64 s[30:31]
714 %out.gep = getelementptr i32, ptr %out, i64 99999
715 %in.gep = getelementptr i8, ptr %in, i64 4096
716 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
717 %cmp0 = icmp eq i32 %tid, 0
718 br i1 %cmp0, label %endif, label %if
721 %load = load i8, ptr %in.gep
722 %cast = sext i8 %load to i32
726 %x = phi i32 [ %cast, %if ], [ 0, %entry ]
727 store i32 %x, ptr %out.gep
734 define void @test_sinkable_flat_reg_offset(ptr %out, ptr %in, i64 %reg) #1 {
735 ; OPT-LABEL: @test_sinkable_flat_reg_offset(
737 ; OPT-NEXT: [[OUT_GEP:%.*]] = getelementptr i32, ptr [[OUT:%.*]], i32 1024
738 ; OPT-NEXT: [[IN_GEP:%.*]] = getelementptr i8, ptr [[IN:%.*]], i64 [[REG:%.*]]
739 ; OPT-NEXT: [[TID:%.*]] = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #[[ATTR3]]
740 ; OPT-NEXT: [[CMP0:%.*]] = icmp eq i32 [[TID]], 0
741 ; OPT-NEXT: br i1 [[CMP0]], label [[ENDIF:%.*]], label [[IF:%.*]]
743 ; OPT-NEXT: [[LOAD:%.*]] = load i8, ptr [[IN_GEP]], align 1
744 ; OPT-NEXT: [[CAST:%.*]] = sext i8 [[LOAD]] to i32
745 ; OPT-NEXT: br label [[ENDIF]]
747 ; OPT-NEXT: [[X:%.*]] = phi i32 [ [[CAST]], [[IF]] ], [ 0, [[ENTRY:%.*]] ]
748 ; OPT-NEXT: store i32 [[X]], ptr [[OUT_GEP]], align 4
749 ; OPT-NEXT: br label [[DONE:%.*]]
753 ; GFX7-LABEL: test_sinkable_flat_reg_offset:
754 ; GFX7: ; %bb.0: ; %entry
755 ; GFX7-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
756 ; GFX7-NEXT: v_mbcnt_lo_u32_b32_e64 v7, -1, 0
757 ; GFX7-NEXT: v_mov_b32_e32 v6, 0
758 ; GFX7-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
759 ; GFX7-NEXT: s_and_saveexec_b64 s[4:5], vcc
760 ; GFX7-NEXT: s_cbranch_execz .LBB5_2
761 ; GFX7-NEXT: ; %bb.1: ; %if
762 ; GFX7-NEXT: v_add_i32_e32 v2, vcc, v2, v4
763 ; GFX7-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
764 ; GFX7-NEXT: flat_load_sbyte v6, v[2:3]
765 ; GFX7-NEXT: .LBB5_2: ; %endif
766 ; GFX7-NEXT: s_or_b64 exec, exec, s[4:5]
767 ; GFX7-NEXT: v_add_i32_e32 v0, vcc, 0x1000, v0
768 ; GFX7-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
769 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
770 ; GFX7-NEXT: flat_store_dword v[0:1], v6
771 ; GFX7-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
772 ; GFX7-NEXT: s_setpc_b64 s[30:31]
774 ; GFX8-LABEL: test_sinkable_flat_reg_offset:
775 ; GFX8: ; %bb.0: ; %entry
776 ; GFX8-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
777 ; GFX8-NEXT: v_mbcnt_lo_u32_b32 v7, -1, 0
778 ; GFX8-NEXT: v_mov_b32_e32 v6, 0
779 ; GFX8-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
780 ; GFX8-NEXT: s_and_saveexec_b64 s[4:5], vcc
781 ; GFX8-NEXT: s_cbranch_execz .LBB5_2
782 ; GFX8-NEXT: ; %bb.1: ; %if
783 ; GFX8-NEXT: v_add_u32_e32 v2, vcc, v2, v4
784 ; GFX8-NEXT: v_addc_u32_e32 v3, vcc, v3, v5, vcc
785 ; GFX8-NEXT: flat_load_sbyte v6, v[2:3]
786 ; GFX8-NEXT: .LBB5_2: ; %endif
787 ; GFX8-NEXT: s_or_b64 exec, exec, s[4:5]
788 ; GFX8-NEXT: v_add_u32_e32 v0, vcc, 0x1000, v0
789 ; GFX8-NEXT: v_addc_u32_e32 v1, vcc, 0, v1, vcc
790 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
791 ; GFX8-NEXT: flat_store_dword v[0:1], v6
792 ; GFX8-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
793 ; GFX8-NEXT: s_setpc_b64 s[30:31]
795 ; GFX9-LABEL: test_sinkable_flat_reg_offset:
796 ; GFX9: ; %bb.0: ; %entry
797 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
798 ; GFX9-NEXT: v_mbcnt_lo_u32_b32 v7, -1, 0
799 ; GFX9-NEXT: v_mov_b32_e32 v6, 0
800 ; GFX9-NEXT: v_cmp_ne_u32_e32 vcc, 0, v7
801 ; GFX9-NEXT: s_and_saveexec_b64 s[4:5], vcc
802 ; GFX9-NEXT: s_cbranch_execz .LBB5_2
803 ; GFX9-NEXT: ; %bb.1: ; %if
804 ; GFX9-NEXT: v_add_co_u32_e32 v2, vcc, v2, v4
805 ; GFX9-NEXT: v_addc_co_u32_e32 v3, vcc, v3, v5, vcc
806 ; GFX9-NEXT: flat_load_sbyte v6, v[2:3]
807 ; GFX9-NEXT: .LBB5_2: ; %endif
808 ; GFX9-NEXT: s_or_b64 exec, exec, s[4:5]
809 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
810 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
811 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
812 ; GFX9-NEXT: flat_store_dword v[0:1], v6
813 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
814 ; GFX9-NEXT: s_setpc_b64 s[30:31]
816 ; GFX10-LABEL: test_sinkable_flat_reg_offset:
817 ; GFX10: ; %bb.0: ; %entry
818 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
819 ; GFX10-NEXT: v_mbcnt_lo_u32_b32 v6, -1, 0
820 ; GFX10-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v6
821 ; GFX10-NEXT: v_mov_b32_e32 v6, 0
822 ; GFX10-NEXT: s_and_saveexec_b32 s4, vcc_lo
823 ; GFX10-NEXT: s_cbranch_execz .LBB5_2
824 ; GFX10-NEXT: ; %bb.1: ; %if
825 ; GFX10-NEXT: v_add_co_u32 v2, vcc_lo, v2, v4
826 ; GFX10-NEXT: v_add_co_ci_u32_e32 v3, vcc_lo, v3, v5, vcc_lo
827 ; GFX10-NEXT: flat_load_sbyte v6, v[2:3]
828 ; GFX10-NEXT: .LBB5_2: ; %endif
829 ; GFX10-NEXT: s_or_b32 exec_lo, exec_lo, s4
830 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
831 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
832 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
833 ; GFX10-NEXT: flat_store_dword v[0:1], v6
834 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
835 ; GFX10-NEXT: s_setpc_b64 s[30:31]
837 %out.gep = getelementptr i32, ptr %out, i32 1024
838 %in.gep = getelementptr i8, ptr %in, i64 %reg
839 %tid = call i32 @llvm.amdgcn.mbcnt.lo(i32 -1, i32 0) #0
840 %cmp0 = icmp eq i32 %tid, 0
841 br i1 %cmp0, label %endif, label %if
844 %load = load i8, ptr %in.gep
845 %cast = sext i8 %load to i32
849 %x = phi i32 [ %cast, %if ], [ 0, %entry ]
850 store i32 %x, ptr %out.gep
857 declare i32 @llvm.amdgcn.mbcnt.lo(i32, i32) #0
859 attributes #0 = { nounwind readnone }
860 attributes #1 = { nounwind }
861 attributes #2 = { nounwind argmemonly }