1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
3 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
4 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
5 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
6 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
7 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
9 ; Test splitting flat instruction offsets into the low and high bits
10 ; when the offset doesn't fit in the offset field.
12 define i8 @flat_inst_valu_offset_1(ptr %p) {
13 ; GFX9-LABEL: flat_inst_valu_offset_1:
15 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:1
17 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
18 ; GFX9-NEXT: s_setpc_b64 s[30:31]
20 ; GFX10-LABEL: flat_inst_valu_offset_1:
22 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, v0, 1
24 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
25 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
26 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
27 ; GFX10-NEXT: s_setpc_b64 s[30:31]
29 ; GFX11-LABEL: flat_inst_valu_offset_1:
31 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32 ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:1
33 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
34 ; GFX11-NEXT: s_setpc_b64 s[30:31]
35 %gep = getelementptr i8, ptr %p, i64 1
36 %load = load i8, ptr %gep, align 4
40 define i8 @flat_inst_valu_offset_11bit_max(ptr %p) {
41 ; GFX9-LABEL: flat_inst_valu_offset_11bit_max:
43 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
44 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047
45 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
46 ; GFX9-NEXT: s_setpc_b64 s[30:31]
48 ; GFX10-LABEL: flat_inst_valu_offset_11bit_max:
50 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
51 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
52 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
53 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
54 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
55 ; GFX10-NEXT: s_setpc_b64 s[30:31]
57 ; GFX11-LABEL: flat_inst_valu_offset_11bit_max:
59 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
60 ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047
61 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
62 ; GFX11-NEXT: s_setpc_b64 s[30:31]
63 %gep = getelementptr i8, ptr %p, i64 2047
64 %load = load i8, ptr %gep, align 4
68 define i8 @flat_inst_valu_offset_12bit_max(ptr %p) {
69 ; GFX9-LABEL: flat_inst_valu_offset_12bit_max:
71 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
72 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
73 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
74 ; GFX9-NEXT: s_setpc_b64 s[30:31]
76 ; GFX10-LABEL: flat_inst_valu_offset_12bit_max:
78 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
79 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
80 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
81 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
82 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
83 ; GFX10-NEXT: s_setpc_b64 s[30:31]
85 ; GFX11-LABEL: flat_inst_valu_offset_12bit_max:
87 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
88 ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095
89 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
90 ; GFX11-NEXT: s_setpc_b64 s[30:31]
91 %gep = getelementptr i8, ptr %p, i64 4095
92 %load = load i8, ptr %gep, align 4
96 define i8 @flat_inst_valu_offset_13bit_max(ptr %p) {
97 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_13bit_max:
99 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
100 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
101 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
102 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
103 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
104 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
106 ; GFX10-LABEL: flat_inst_valu_offset_13bit_max:
108 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
110 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
111 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
112 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
113 ; GFX10-NEXT: s_setpc_b64 s[30:31]
115 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_13bit_max:
116 ; GFX11-SDAG: ; %bb.0:
117 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
119 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
120 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095
121 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
122 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
124 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_13bit_max:
125 ; GFX9-GISEL: ; %bb.0:
126 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
127 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
128 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
129 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
130 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
131 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
133 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_13bit_max:
134 ; GFX11-GISEL: ; %bb.0:
135 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
136 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
137 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
138 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
139 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
140 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
141 %gep = getelementptr i8, ptr %p, i64 8191
142 %load = load i8, ptr %gep, align 4
146 define i8 @flat_inst_valu_offset_neg_11bit_max(ptr %p) {
147 ; GFX9-LABEL: flat_inst_valu_offset_neg_11bit_max:
149 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
151 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
152 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
153 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
154 ; GFX9-NEXT: s_setpc_b64 s[30:31]
156 ; GFX10-LABEL: flat_inst_valu_offset_neg_11bit_max:
158 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
159 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
160 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
161 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
162 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
163 ; GFX10-NEXT: s_setpc_b64 s[30:31]
165 ; GFX11-LABEL: flat_inst_valu_offset_neg_11bit_max:
167 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
168 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff800, v0
169 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
170 ; GFX11-NEXT: flat_load_u8 v0, v[0:1]
171 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
172 ; GFX11-NEXT: s_setpc_b64 s[30:31]
173 %gep = getelementptr i8, ptr %p, i64 -2048
174 %load = load i8, ptr %gep, align 4
178 define i8 @flat_inst_valu_offset_neg_12bit_max(ptr %p) {
179 ; GFX9-LABEL: flat_inst_valu_offset_neg_12bit_max:
181 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
182 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
183 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
184 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
185 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
186 ; GFX9-NEXT: s_setpc_b64 s[30:31]
188 ; GFX10-LABEL: flat_inst_valu_offset_neg_12bit_max:
190 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
191 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
192 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
193 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
194 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
195 ; GFX10-NEXT: s_setpc_b64 s[30:31]
197 ; GFX11-LABEL: flat_inst_valu_offset_neg_12bit_max:
199 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
200 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
201 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
202 ; GFX11-NEXT: flat_load_u8 v0, v[0:1]
203 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
204 ; GFX11-NEXT: s_setpc_b64 s[30:31]
205 %gep = getelementptr i8, ptr %p, i64 -4096
206 %load = load i8, ptr %gep, align 4
210 define i8 @flat_inst_valu_offset_neg_13bit_max(ptr %p) {
211 ; GFX9-LABEL: flat_inst_valu_offset_neg_13bit_max:
213 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
214 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
215 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
216 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
217 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
218 ; GFX9-NEXT: s_setpc_b64 s[30:31]
220 ; GFX10-LABEL: flat_inst_valu_offset_neg_13bit_max:
222 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
223 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
224 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
225 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
226 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
227 ; GFX10-NEXT: s_setpc_b64 s[30:31]
229 ; GFX11-LABEL: flat_inst_valu_offset_neg_13bit_max:
231 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
232 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
233 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
234 ; GFX11-NEXT: flat_load_u8 v0, v[0:1]
235 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
236 ; GFX11-NEXT: s_setpc_b64 s[30:31]
237 %gep = getelementptr i8, ptr %p, i64 -8192
238 %load = load i8, ptr %gep, align 4
242 define i8 @flat_inst_valu_offset_2x_11bit_max(ptr %p) {
243 ; GFX9-LABEL: flat_inst_valu_offset_2x_11bit_max:
245 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
246 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
247 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
248 ; GFX9-NEXT: s_setpc_b64 s[30:31]
250 ; GFX10-LABEL: flat_inst_valu_offset_2x_11bit_max:
252 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
253 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
254 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
255 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
256 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
257 ; GFX10-NEXT: s_setpc_b64 s[30:31]
259 ; GFX11-LABEL: flat_inst_valu_offset_2x_11bit_max:
261 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
262 ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095
263 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
264 ; GFX11-NEXT: s_setpc_b64 s[30:31]
265 %gep = getelementptr i8, ptr %p, i64 4095
266 %load = load i8, ptr %gep, align 4
270 define i8 @flat_inst_valu_offset_2x_12bit_max(ptr %p) {
271 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max:
272 ; GFX9-SDAG: ; %bb.0:
273 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
274 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
275 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
276 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
277 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
278 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
280 ; GFX10-LABEL: flat_inst_valu_offset_2x_12bit_max:
282 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
283 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
284 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
285 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
286 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
287 ; GFX10-NEXT: s_setpc_b64 s[30:31]
289 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_12bit_max:
290 ; GFX11-SDAG: ; %bb.0:
291 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
293 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
294 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095
295 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
296 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
298 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max:
299 ; GFX9-GISEL: ; %bb.0:
300 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
302 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
303 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
304 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
305 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
307 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_12bit_max:
308 ; GFX11-GISEL: ; %bb.0:
309 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
310 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
311 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
312 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
313 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
314 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
315 %gep = getelementptr i8, ptr %p, i64 8191
316 %load = load i8, ptr %gep, align 4
320 define i8 @flat_inst_valu_offset_2x_13bit_max(ptr %p) {
321 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max:
322 ; GFX9-SDAG: ; %bb.0:
323 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
324 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0
325 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
326 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
327 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
328 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
330 ; GFX10-LABEL: flat_inst_valu_offset_2x_13bit_max:
332 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
333 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0
334 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
335 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
336 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
337 ; GFX10-NEXT: s_setpc_b64 s[30:31]
339 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_2x_13bit_max:
340 ; GFX11-SDAG: ; %bb.0:
341 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
342 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0
343 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
344 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095
345 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
346 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
348 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max:
349 ; GFX9-GISEL: ; %bb.0:
350 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
351 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x3fff, v0
352 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
353 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
354 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
355 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
357 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_2x_13bit_max:
358 ; GFX11-GISEL: ; %bb.0:
359 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
360 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0
361 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
362 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
363 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
364 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
365 %gep = getelementptr i8, ptr %p, i64 16383
366 %load = load i8, ptr %gep, align 4
370 define i8 @flat_inst_valu_offset_2x_neg_11bit_max(ptr %p) {
371 ; GFX9-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
373 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
374 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
375 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
376 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
377 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
378 ; GFX9-NEXT: s_setpc_b64 s[30:31]
380 ; GFX10-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
382 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
383 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
384 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
385 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
386 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
387 ; GFX10-NEXT: s_setpc_b64 s[30:31]
389 ; GFX11-LABEL: flat_inst_valu_offset_2x_neg_11bit_max:
391 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
392 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
393 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
394 ; GFX11-NEXT: flat_load_u8 v0, v[0:1]
395 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
396 ; GFX11-NEXT: s_setpc_b64 s[30:31]
397 %gep = getelementptr i8, ptr %p, i64 -4096
398 %load = load i8, ptr %gep, align 4
402 define i8 @flat_inst_valu_offset_2x_neg_12bit_max(ptr %p) {
403 ; GFX9-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
405 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
406 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
407 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
408 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
409 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
410 ; GFX9-NEXT: s_setpc_b64 s[30:31]
412 ; GFX10-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
414 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
415 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
416 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
417 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
418 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
419 ; GFX10-NEXT: s_setpc_b64 s[30:31]
421 ; GFX11-LABEL: flat_inst_valu_offset_2x_neg_12bit_max:
423 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
424 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
425 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
426 ; GFX11-NEXT: flat_load_u8 v0, v[0:1]
427 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
428 ; GFX11-NEXT: s_setpc_b64 s[30:31]
429 %gep = getelementptr i8, ptr %p, i64 -8192
430 %load = load i8, ptr %gep, align 4
434 define i8 @flat_inst_valu_offset_2x_neg_13bit_max(ptr %p) {
435 ; GFX9-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
437 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
438 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
439 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
440 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1]
441 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
442 ; GFX9-NEXT: s_setpc_b64 s[30:31]
444 ; GFX10-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
446 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
447 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
448 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
449 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1]
450 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
451 ; GFX10-NEXT: s_setpc_b64 s[30:31]
453 ; GFX11-LABEL: flat_inst_valu_offset_2x_neg_13bit_max:
455 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
456 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
457 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
458 ; GFX11-NEXT: flat_load_u8 v0, v[0:1]
459 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
460 ; GFX11-NEXT: s_setpc_b64 s[30:31]
461 %gep = getelementptr i8, ptr %p, i64 -16384
462 %load = load i8, ptr %gep, align 4
466 ; Fill 11-bit low-bits (1ull << 33) | 2047
467 define i8 @flat_inst_valu_offset_64bit_11bit_split0(ptr %p) {
468 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
469 ; GFX9-SDAG: ; %bb.0:
470 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
471 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
472 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
473 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:2047
474 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
475 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
477 ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
478 ; GFX10-SDAG: ; %bb.0:
479 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
480 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
481 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
482 ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
483 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
484 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
486 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
487 ; GFX11-SDAG: ; %bb.0:
488 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
489 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
490 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
491 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2047
492 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
493 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
495 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
496 ; GFX9-GISEL: ; %bb.0:
497 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7ff
499 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
500 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
501 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
502 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
503 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
504 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
505 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
506 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
508 ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
509 ; GFX10-GISEL: ; %bb.0:
510 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
512 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
513 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
514 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
515 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
516 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
517 ; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
518 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
519 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
521 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split0:
522 ; GFX11-GISEL: ; %bb.0:
523 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
524 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
525 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
526 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
527 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
528 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
529 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
530 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
531 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
532 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
533 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
534 %gep = getelementptr i8, ptr %p, i64 8589936639
535 %load = load i8, ptr %gep, align 4
539 ; Fill 11-bit low-bits (1ull << 33) | 2048
540 define i8 @flat_inst_valu_offset_64bit_11bit_split1(ptr %p) {
541 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
542 ; GFX9-SDAG: ; %bb.0:
543 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
544 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
545 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
546 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:2048
547 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
548 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
550 ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
551 ; GFX10-SDAG: ; %bb.0:
552 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
553 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
554 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
555 ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
556 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
557 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
559 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
560 ; GFX11-SDAG: ; %bb.0:
561 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
562 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
563 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
564 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2048
565 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
566 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
568 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
569 ; GFX9-GISEL: ; %bb.0:
570 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x800
572 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
573 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
574 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
575 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
576 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
577 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
578 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
579 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
581 ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
582 ; GFX10-GISEL: ; %bb.0:
583 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
585 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
586 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
587 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
588 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
589 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
590 ; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
591 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
592 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
594 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_split1:
595 ; GFX11-GISEL: ; %bb.0:
596 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
598 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
599 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
600 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
601 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
602 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
603 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
604 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
605 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
606 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
607 %gep = getelementptr i8, ptr %p, i64 8589936640
608 %load = load i8, ptr %gep, align 4
612 ; Fill 12-bit low-bits (1ull << 33) | 4095
613 define i8 @flat_inst_valu_offset_64bit_12bit_split0(ptr %p) {
614 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
615 ; GFX9-SDAG: ; %bb.0:
616 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
617 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
618 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
619 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
620 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
621 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
623 ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
624 ; GFX10-SDAG: ; %bb.0:
625 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
626 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
627 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
628 ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
629 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
630 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
632 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
633 ; GFX11-SDAG: ; %bb.0:
634 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
635 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
636 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
637 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095
638 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
639 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
641 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
642 ; GFX9-GISEL: ; %bb.0:
643 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
644 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xfff
645 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
646 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
647 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
648 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
649 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
650 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
651 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
652 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
654 ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
655 ; GFX10-GISEL: ; %bb.0:
656 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
658 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
659 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
660 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
661 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
662 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
663 ; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
664 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
665 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
667 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split0:
668 ; GFX11-GISEL: ; %bb.0:
669 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
670 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
671 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
672 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
673 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
674 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
675 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
676 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
677 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
678 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
679 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
680 %gep = getelementptr i8, ptr %p, i64 8589938687
681 %load = load i8, ptr %gep, align 4
685 ; Fill 12-bit low-bits (1ull << 33) | 4096
686 define i8 @flat_inst_valu_offset_64bit_12bit_split1(ptr %p) {
687 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
688 ; GFX9-SDAG: ; %bb.0:
689 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
690 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
691 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
692 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
693 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
694 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
696 ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
697 ; GFX10-SDAG: ; %bb.0:
698 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
699 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
700 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
701 ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
702 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
703 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
705 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
706 ; GFX11-SDAG: ; %bb.0:
707 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
708 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
709 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
710 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
711 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
712 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
714 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
715 ; GFX9-GISEL: ; %bb.0:
716 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
717 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1000
718 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
719 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
720 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
721 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
722 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
723 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
724 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
725 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
727 ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
728 ; GFX10-GISEL: ; %bb.0:
729 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
731 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
732 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
733 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
734 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
735 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
736 ; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
737 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
738 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
740 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_split1:
741 ; GFX11-GISEL: ; %bb.0:
742 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
744 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
745 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
746 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
747 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
748 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
749 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
750 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
751 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
752 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
753 %gep = getelementptr i8, ptr %p, i64 8589938688
754 %load = load i8, ptr %gep, align 4
758 ; Fill 13-bit low-bits (1ull << 33) | 8191
759 define i8 @flat_inst_valu_offset_64bit_13bit_split0(ptr %p) {
760 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
761 ; GFX9-SDAG: ; %bb.0:
762 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
763 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
764 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
765 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095
766 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
767 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
769 ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
770 ; GFX10-SDAG: ; %bb.0:
771 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
772 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
773 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
774 ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
775 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
776 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
778 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
779 ; GFX11-SDAG: ; %bb.0:
780 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
781 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
782 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
783 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095
784 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
785 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
787 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
788 ; GFX9-GISEL: ; %bb.0:
789 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
790 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1fff
791 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
792 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
793 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
794 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
795 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
796 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
797 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
798 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
800 ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
801 ; GFX10-GISEL: ; %bb.0:
802 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
803 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
804 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
805 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
806 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
807 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
808 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
809 ; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
810 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
811 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
813 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split0:
814 ; GFX11-GISEL: ; %bb.0:
815 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
816 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
817 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
818 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
819 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
820 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
821 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
822 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
823 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
824 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
825 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
826 %gep = getelementptr i8, ptr %p, i64 8589942783
827 %load = load i8, ptr %gep, align 4
831 ; Fill 13-bit low-bits (1ull << 33) | 8192
832 define i8 @flat_inst_valu_offset_64bit_13bit_split1(ptr %p) {
833 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
834 ; GFX9-SDAG: ; %bb.0:
835 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
836 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
837 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
838 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
839 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
840 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
842 ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
843 ; GFX10-SDAG: ; %bb.0:
844 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
845 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
846 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
847 ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
848 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
849 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
851 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
852 ; GFX11-SDAG: ; %bb.0:
853 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
854 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
855 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
856 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
857 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
858 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
860 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
861 ; GFX9-GISEL: ; %bb.0:
862 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
863 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x2000
864 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
865 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
866 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
867 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
868 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
869 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
870 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
871 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
873 ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
874 ; GFX10-GISEL: ; %bb.0:
875 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
876 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
877 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
878 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
879 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
880 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
881 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
882 ; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
883 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
884 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
886 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_split1:
887 ; GFX11-GISEL: ; %bb.0:
888 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
890 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
891 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
892 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
893 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
894 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
895 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
896 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
897 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
898 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
899 %gep = getelementptr i8, ptr %p, i64 8589942784
900 %load = load i8, ptr %gep, align 4
904 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
905 define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split0(ptr %p) {
906 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
907 ; GFX9-SDAG: ; %bb.0:
908 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
909 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
910 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1
911 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
912 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
913 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
914 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
916 ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
917 ; GFX10-SDAG: ; %bb.0:
918 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
919 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
920 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
921 ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
922 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
923 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
925 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
926 ; GFX11-SDAG: ; %bb.0:
927 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
928 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, v0
929 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
930 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
931 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
932 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
934 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
935 ; GFX9-GISEL: ; %bb.0:
936 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
937 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7ff
938 ; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
939 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
940 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
941 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
942 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
943 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
944 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
945 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
947 ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
948 ; GFX10-GISEL: ; %bb.0:
949 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
950 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
951 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
952 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
953 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
954 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
955 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
956 ; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
957 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
958 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
960 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split0:
961 ; GFX11-GISEL: ; %bb.0:
962 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
963 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
964 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
965 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
966 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
967 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
968 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
969 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
970 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
971 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
972 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
973 %gep = getelementptr i8, ptr %p, i64 -9223372036854773761
974 %load = load i8, ptr %gep, align 4
978 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
979 define i8 @flat_inst_valu_offset_64bit_11bit_neg_high_split1(ptr %p) {
980 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
981 ; GFX9-SDAG: ; %bb.0:
982 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
983 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
984 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1
985 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
986 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
987 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
988 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
990 ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
991 ; GFX10-SDAG: ; %bb.0:
992 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
993 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
994 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
995 ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
996 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
997 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
999 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
1000 ; GFX11-SDAG: ; %bb.0:
1001 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1002 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
1003 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1004 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
1005 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1006 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1008 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
1009 ; GFX9-GISEL: ; %bb.0:
1010 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1011 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x800
1012 ; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
1013 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
1014 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
1015 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1016 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1017 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
1018 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1019 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1021 ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
1022 ; GFX10-GISEL: ; %bb.0:
1023 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1024 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
1025 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
1026 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
1027 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
1028 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1029 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1030 ; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
1031 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1032 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1034 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_11bit_neg_high_split1:
1035 ; GFX11-GISEL: ; %bb.0:
1036 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1037 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
1038 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
1039 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1040 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1041 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1042 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
1043 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1044 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
1045 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1046 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1047 %gep = getelementptr i8, ptr %p, i64 -9223372036854773760
1048 %load = load i8, ptr %gep, align 4
1052 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
1053 define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split0(ptr %p) {
1054 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
1055 ; GFX9-SDAG: ; %bb.0:
1056 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1057 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
1058 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1
1059 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1060 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
1061 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1062 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1064 ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
1065 ; GFX10-SDAG: ; %bb.0:
1066 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1067 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
1068 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1069 ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
1070 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1071 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1073 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
1074 ; GFX11-SDAG: ; %bb.0:
1075 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1076 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
1077 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1078 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
1079 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1080 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1082 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
1083 ; GFX9-GISEL: ; %bb.0:
1084 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1085 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xfff
1086 ; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
1087 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
1088 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
1089 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1090 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1091 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
1092 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1093 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1095 ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
1096 ; GFX10-GISEL: ; %bb.0:
1097 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1098 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
1099 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
1100 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
1101 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
1102 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1103 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1104 ; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
1105 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1106 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1108 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split0:
1109 ; GFX11-GISEL: ; %bb.0:
1110 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1111 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
1112 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
1113 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1114 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1115 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1116 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
1117 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1118 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
1119 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1120 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1121 %gep = getelementptr i8, ptr %p, i64 -9223372036854771713
1122 %load = load i8, ptr %gep, align 4
1126 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
1127 define i8 @flat_inst_valu_offset_64bit_12bit_neg_high_split1(ptr %p) {
1128 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
1129 ; GFX9-SDAG: ; %bb.0:
1130 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1131 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1132 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1
1133 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1134 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
1135 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1136 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1138 ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
1139 ; GFX10-SDAG: ; %bb.0:
1140 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1141 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
1142 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1143 ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
1144 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1145 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1147 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
1148 ; GFX11-SDAG: ; %bb.0:
1149 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1150 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
1151 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1152 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
1153 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1154 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1156 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
1157 ; GFX9-GISEL: ; %bb.0:
1158 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1159 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1000
1160 ; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
1161 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
1162 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
1163 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1164 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1165 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
1166 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1167 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1169 ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
1170 ; GFX10-GISEL: ; %bb.0:
1171 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1172 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
1173 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
1174 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
1175 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
1176 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1177 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1178 ; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
1179 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1180 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1182 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_12bit_neg_high_split1:
1183 ; GFX11-GISEL: ; %bb.0:
1184 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1185 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
1186 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
1187 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1188 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1189 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1190 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
1191 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1192 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
1193 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1194 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1195 %gep = getelementptr i8, ptr %p, i64 -9223372036854771712
1196 %load = load i8, ptr %gep, align 4
1200 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
1201 define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split0(ptr %p) {
1202 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
1203 ; GFX9-SDAG: ; %bb.0:
1204 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1205 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
1206 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1
1207 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1208 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
1209 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1210 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1212 ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
1213 ; GFX10-SDAG: ; %bb.0:
1214 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1215 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1216 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1217 ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
1218 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1219 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1221 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
1222 ; GFX11-SDAG: ; %bb.0:
1223 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1224 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
1225 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1226 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
1227 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1228 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1230 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
1231 ; GFX9-GISEL: ; %bb.0:
1232 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1233 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1fff
1234 ; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
1235 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
1236 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
1237 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1238 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1239 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
1240 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1241 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1243 ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
1244 ; GFX10-GISEL: ; %bb.0:
1245 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1246 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
1247 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
1248 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
1249 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
1250 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1251 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1252 ; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
1253 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1254 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1256 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split0:
1257 ; GFX11-GISEL: ; %bb.0:
1258 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1259 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
1260 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
1261 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1262 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1263 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1264 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
1265 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1266 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
1267 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1268 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1269 %gep = getelementptr i8, ptr %p, i64 -9223372036854767617
1270 %load = load i8, ptr %gep, align 4
1274 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
1275 define i8 @flat_inst_valu_offset_64bit_13bit_neg_high_split1(ptr %p) {
1276 ; GFX9-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
1277 ; GFX9-SDAG: ; %bb.0:
1278 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1279 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1280 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1
1281 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1282 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
1283 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1284 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1286 ; GFX10-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
1287 ; GFX10-SDAG: ; %bb.0:
1288 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1289 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
1290 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1291 ; GFX10-SDAG-NEXT: flat_load_ubyte v0, v[0:1]
1292 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1293 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1295 ; GFX11-SDAG-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
1296 ; GFX11-SDAG: ; %bb.0:
1297 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1298 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
1299 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1300 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1]
1301 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1302 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1304 ; GFX9-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
1305 ; GFX9-GISEL: ; %bb.0:
1306 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1307 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x2000
1308 ; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
1309 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
1310 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
1311 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1312 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1313 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
1314 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1315 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1317 ; GFX10-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
1318 ; GFX10-GISEL: ; %bb.0:
1319 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1320 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
1321 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
1322 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
1323 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
1324 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1325 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1326 ; GFX10-GISEL-NEXT: flat_load_ubyte v0, v[0:1]
1327 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1328 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1330 ; GFX11-GISEL-LABEL: flat_inst_valu_offset_64bit_13bit_neg_high_split1:
1331 ; GFX11-GISEL: ; %bb.0:
1332 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1333 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
1334 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
1335 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1336 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1337 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1338 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
1339 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1340 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1]
1341 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1342 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1343 %gep = getelementptr i8, ptr %p, i64 -9223372036854767616
1344 %load = load i8, ptr %gep, align 4
1348 define amdgpu_kernel void @flat_inst_salu_offset_1(ptr %p) {
1349 ; GFX9-LABEL: flat_inst_salu_offset_1:
1351 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1352 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1353 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1354 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1355 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:1 glc
1356 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1357 ; GFX9-NEXT: flat_store_byte v[0:1], v0
1358 ; GFX9-NEXT: s_endpgm
1360 ; GFX10-LABEL: flat_inst_salu_offset_1:
1362 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1363 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1364 ; GFX10-NEXT: s_add_u32 s0, s0, 1
1365 ; GFX10-NEXT: s_addc_u32 s1, s1, 0
1366 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1367 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1368 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
1369 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1370 ; GFX10-NEXT: flat_store_byte v[0:1], v0
1371 ; GFX10-NEXT: s_endpgm
1373 ; GFX11-LABEL: flat_inst_salu_offset_1:
1375 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1376 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1377 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1378 ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:1 glc dlc
1379 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1380 ; GFX11-NEXT: flat_store_b8 v[0:1], v0
1381 ; GFX11-NEXT: s_endpgm
1382 %gep = getelementptr i8, ptr %p, i64 1
1383 %load = load volatile i8, ptr %gep, align 1
1384 store i8 %load, ptr undef
1388 define amdgpu_kernel void @flat_inst_salu_offset_11bit_max(ptr %p) {
1389 ; GFX9-LABEL: flat_inst_salu_offset_11bit_max:
1391 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1392 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1393 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1394 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1395 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 glc
1396 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1397 ; GFX9-NEXT: flat_store_byte v[0:1], v0
1398 ; GFX9-NEXT: s_endpgm
1400 ; GFX10-LABEL: flat_inst_salu_offset_11bit_max:
1402 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1403 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1404 ; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff
1405 ; GFX10-NEXT: s_addc_u32 s1, s1, 0
1406 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1407 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1408 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
1409 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1410 ; GFX10-NEXT: flat_store_byte v[0:1], v0
1411 ; GFX10-NEXT: s_endpgm
1413 ; GFX11-LABEL: flat_inst_salu_offset_11bit_max:
1415 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1416 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1417 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1418 ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc
1419 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1420 ; GFX11-NEXT: flat_store_b8 v[0:1], v0
1421 ; GFX11-NEXT: s_endpgm
1422 %gep = getelementptr i8, ptr %p, i64 2047
1423 %load = load volatile i8, ptr %gep, align 1
1424 store i8 %load, ptr undef
1428 define amdgpu_kernel void @flat_inst_salu_offset_12bit_max(ptr %p) {
1429 ; GFX9-LABEL: flat_inst_salu_offset_12bit_max:
1431 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1432 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1433 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1434 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1435 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc
1436 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1437 ; GFX9-NEXT: flat_store_byte v[0:1], v0
1438 ; GFX9-NEXT: s_endpgm
1440 ; GFX10-LABEL: flat_inst_salu_offset_12bit_max:
1442 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1443 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1444 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfff
1445 ; GFX10-NEXT: s_addc_u32 s1, s1, 0
1446 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1447 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1448 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
1449 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1450 ; GFX10-NEXT: flat_store_byte v[0:1], v0
1451 ; GFX10-NEXT: s_endpgm
1453 ; GFX11-LABEL: flat_inst_salu_offset_12bit_max:
1455 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1456 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1457 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1458 ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1459 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1460 ; GFX11-NEXT: flat_store_b8 v[0:1], v0
1461 ; GFX11-NEXT: s_endpgm
1462 %gep = getelementptr i8, ptr %p, i64 4095
1463 %load = load volatile i8, ptr %gep, align 1
1464 store i8 %load, ptr undef
1468 define amdgpu_kernel void @flat_inst_salu_offset_13bit_max(ptr %p) {
1469 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_13bit_max:
1470 ; GFX9-SDAG: ; %bb.0:
1471 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1472 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1473 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
1474 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
1475 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1476 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1477 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc
1478 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1479 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
1480 ; GFX9-SDAG-NEXT: s_endpgm
1482 ; GFX10-LABEL: flat_inst_salu_offset_13bit_max:
1484 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1485 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1486 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff
1487 ; GFX10-NEXT: s_addc_u32 s1, s1, 0
1488 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1489 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1490 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
1491 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1492 ; GFX10-NEXT: flat_store_byte v[0:1], v0
1493 ; GFX10-NEXT: s_endpgm
1495 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_13bit_max:
1496 ; GFX11-SDAG: ; %bb.0:
1497 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1498 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1499 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
1500 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1501 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0
1502 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1503 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1504 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
1505 ; GFX11-SDAG-NEXT: s_endpgm
1507 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_13bit_max:
1508 ; GFX9-GISEL: ; %bb.0:
1509 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1510 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1511 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff
1512 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0
1513 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
1514 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
1515 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
1516 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1517 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
1518 ; GFX9-GISEL-NEXT: s_endpgm
1520 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_13bit_max:
1521 ; GFX11-GISEL: ; %bb.0:
1522 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1523 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1524 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff
1525 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0
1526 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1527 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1528 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
1529 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1530 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
1531 ; GFX11-GISEL-NEXT: s_endpgm
1532 %gep = getelementptr i8, ptr %p, i64 8191
1533 %load = load volatile i8, ptr %gep, align 1
1534 store i8 %load, ptr undef
1538 define amdgpu_kernel void @flat_inst_salu_offset_neg_11bit_max(ptr %p) {
1539 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_neg_11bit_max:
1540 ; GFX9-SDAG: ; %bb.0:
1541 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1542 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1543 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
1544 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
1545 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff800, v0
1546 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1547 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
1548 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1549 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
1550 ; GFX9-SDAG-NEXT: s_endpgm
1552 ; GFX10-LABEL: flat_inst_salu_offset_neg_11bit_max:
1554 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1555 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1556 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff800
1557 ; GFX10-NEXT: s_addc_u32 s1, s1, -1
1558 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1559 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1560 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
1561 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1562 ; GFX10-NEXT: flat_store_byte v[0:1], v0
1563 ; GFX10-NEXT: s_endpgm
1565 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_neg_11bit_max:
1566 ; GFX11-SDAG: ; %bb.0:
1567 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1568 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1569 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff800, s0
1570 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1571 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1572 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
1573 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1574 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
1575 ; GFX11-SDAG-NEXT: s_endpgm
1577 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_neg_11bit_max:
1578 ; GFX9-GISEL: ; %bb.0:
1579 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1580 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1581 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff800
1582 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, -1
1583 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
1584 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
1585 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
1586 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1587 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
1588 ; GFX9-GISEL-NEXT: s_endpgm
1590 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_neg_11bit_max:
1591 ; GFX11-GISEL: ; %bb.0:
1592 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1593 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1594 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff800
1595 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1
1596 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1597 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1598 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
1599 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1600 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
1601 ; GFX11-GISEL-NEXT: s_endpgm
1602 %gep = getelementptr i8, ptr %p, i64 -2048
1603 %load = load volatile i8, ptr %gep, align 1
1604 store i8 %load, ptr undef
1608 define amdgpu_kernel void @flat_inst_salu_offset_neg_12bit_max(ptr %p) {
1609 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_neg_12bit_max:
1610 ; GFX9-SDAG: ; %bb.0:
1611 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1612 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1613 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
1614 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
1615 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
1616 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1617 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
1618 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1619 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
1620 ; GFX9-SDAG-NEXT: s_endpgm
1622 ; GFX10-LABEL: flat_inst_salu_offset_neg_12bit_max:
1624 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1625 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1626 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff000
1627 ; GFX10-NEXT: s_addc_u32 s1, s1, -1
1628 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1629 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1630 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
1631 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1632 ; GFX10-NEXT: flat_store_byte v[0:1], v0
1633 ; GFX10-NEXT: s_endpgm
1635 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_neg_12bit_max:
1636 ; GFX11-SDAG: ; %bb.0:
1637 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1638 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1639 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0
1640 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1641 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1642 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
1643 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1644 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
1645 ; GFX11-SDAG-NEXT: s_endpgm
1647 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_neg_12bit_max:
1648 ; GFX9-GISEL: ; %bb.0:
1649 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1650 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1651 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff000
1652 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, -1
1653 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
1654 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
1655 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
1656 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1657 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
1658 ; GFX9-GISEL-NEXT: s_endpgm
1660 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_neg_12bit_max:
1661 ; GFX11-GISEL: ; %bb.0:
1662 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1663 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1664 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff000
1665 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1
1666 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1667 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1668 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
1669 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1670 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
1671 ; GFX11-GISEL-NEXT: s_endpgm
1672 %gep = getelementptr i8, ptr %p, i64 -4096
1673 %load = load volatile i8, ptr %gep, align 1
1674 store i8 %load, ptr undef
1678 define amdgpu_kernel void @flat_inst_salu_offset_neg_13bit_max(ptr %p) {
1679 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_neg_13bit_max:
1680 ; GFX9-SDAG: ; %bb.0:
1681 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1682 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1683 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
1684 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
1685 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
1686 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1687 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
1688 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1689 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
1690 ; GFX9-SDAG-NEXT: s_endpgm
1692 ; GFX10-LABEL: flat_inst_salu_offset_neg_13bit_max:
1694 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1695 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1696 ; GFX10-NEXT: s_add_u32 s0, s0, 0xffffe000
1697 ; GFX10-NEXT: s_addc_u32 s1, s1, -1
1698 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1699 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1700 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
1701 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1702 ; GFX10-NEXT: flat_store_byte v[0:1], v0
1703 ; GFX10-NEXT: s_endpgm
1705 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_neg_13bit_max:
1706 ; GFX11-SDAG: ; %bb.0:
1707 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1708 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1709 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0
1710 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1711 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1712 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
1713 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1714 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
1715 ; GFX11-SDAG-NEXT: s_endpgm
1717 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_neg_13bit_max:
1718 ; GFX9-GISEL: ; %bb.0:
1719 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1720 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1721 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000
1722 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, -1
1723 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
1724 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
1725 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
1726 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1727 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
1728 ; GFX9-GISEL-NEXT: s_endpgm
1730 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_neg_13bit_max:
1731 ; GFX11-GISEL: ; %bb.0:
1732 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1733 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1734 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000
1735 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1
1736 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1737 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1738 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
1739 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1740 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
1741 ; GFX11-GISEL-NEXT: s_endpgm
1742 %gep = getelementptr i8, ptr %p, i64 -8192
1743 %load = load volatile i8, ptr %gep, align 1
1744 store i8 %load, ptr undef
1748 define amdgpu_kernel void @flat_inst_salu_offset_2x_11bit_max(ptr %p) {
1749 ; GFX9-LABEL: flat_inst_salu_offset_2x_11bit_max:
1751 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1752 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1753 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1754 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1755 ; GFX9-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc
1756 ; GFX9-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1757 ; GFX9-NEXT: flat_store_byte v[0:1], v0
1758 ; GFX9-NEXT: s_endpgm
1760 ; GFX10-LABEL: flat_inst_salu_offset_2x_11bit_max:
1762 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1763 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1764 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfff
1765 ; GFX10-NEXT: s_addc_u32 s1, s1, 0
1766 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1767 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1768 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
1769 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1770 ; GFX10-NEXT: flat_store_byte v[0:1], v0
1771 ; GFX10-NEXT: s_endpgm
1773 ; GFX11-LABEL: flat_inst_salu_offset_2x_11bit_max:
1775 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1776 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1777 ; GFX11-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1778 ; GFX11-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1779 ; GFX11-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1780 ; GFX11-NEXT: flat_store_b8 v[0:1], v0
1781 ; GFX11-NEXT: s_endpgm
1782 %gep = getelementptr i8, ptr %p, i64 4095
1783 %load = load volatile i8, ptr %gep, align 1
1784 store i8 %load, ptr undef
1788 define amdgpu_kernel void @flat_inst_salu_offset_2x_12bit_max(ptr %p) {
1789 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_12bit_max:
1790 ; GFX9-SDAG: ; %bb.0:
1791 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1792 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1793 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
1794 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
1795 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1796 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1797 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc
1798 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1799 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
1800 ; GFX9-SDAG-NEXT: s_endpgm
1802 ; GFX10-LABEL: flat_inst_salu_offset_2x_12bit_max:
1804 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1805 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1806 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff
1807 ; GFX10-NEXT: s_addc_u32 s1, s1, 0
1808 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1809 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1810 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
1811 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1812 ; GFX10-NEXT: flat_store_byte v[0:1], v0
1813 ; GFX10-NEXT: s_endpgm
1815 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_12bit_max:
1816 ; GFX11-SDAG: ; %bb.0:
1817 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1818 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1819 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
1820 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1821 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0
1822 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1823 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1824 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
1825 ; GFX11-SDAG-NEXT: s_endpgm
1827 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_12bit_max:
1828 ; GFX9-GISEL: ; %bb.0:
1829 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1830 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1831 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff
1832 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0
1833 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
1834 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
1835 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
1836 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1837 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
1838 ; GFX9-GISEL-NEXT: s_endpgm
1840 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_12bit_max:
1841 ; GFX11-GISEL: ; %bb.0:
1842 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1843 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1844 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff
1845 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0
1846 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1847 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1848 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
1849 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1850 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
1851 ; GFX11-GISEL-NEXT: s_endpgm
1852 %gep = getelementptr i8, ptr %p, i64 8191
1853 %load = load volatile i8, ptr %gep, align 1
1854 store i8 %load, ptr undef
1858 define amdgpu_kernel void @flat_inst_salu_offset_2x_13bit_max(ptr %p) {
1859 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_13bit_max:
1860 ; GFX9-SDAG: ; %bb.0:
1861 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1862 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1863 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
1864 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
1865 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0
1866 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
1867 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc
1868 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1869 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
1870 ; GFX9-SDAG-NEXT: s_endpgm
1872 ; GFX10-LABEL: flat_inst_salu_offset_2x_13bit_max:
1874 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1875 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1876 ; GFX10-NEXT: s_add_u32 s0, s0, 0x3fff
1877 ; GFX10-NEXT: s_addc_u32 s1, s1, 0
1878 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1879 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1880 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
1881 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1882 ; GFX10-NEXT: flat_store_byte v[0:1], v0
1883 ; GFX10-NEXT: s_endpgm
1885 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_13bit_max:
1886 ; GFX11-SDAG: ; %bb.0:
1887 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1888 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1889 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x3000, s0
1890 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1891 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 0, s1, s0
1892 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc
1893 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1894 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
1895 ; GFX11-SDAG-NEXT: s_endpgm
1897 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_13bit_max:
1898 ; GFX9-GISEL: ; %bb.0:
1899 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1900 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1901 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x3fff
1902 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0
1903 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
1904 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
1905 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
1906 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1907 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
1908 ; GFX9-GISEL-NEXT: s_endpgm
1910 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_13bit_max:
1911 ; GFX11-GISEL: ; %bb.0:
1912 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1913 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1914 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x3fff
1915 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0
1916 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1917 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1918 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
1919 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1920 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
1921 ; GFX11-GISEL-NEXT: s_endpgm
1922 %gep = getelementptr i8, ptr %p, i64 16383
1923 %load = load volatile i8, ptr %gep, align 1
1924 store i8 %load, ptr undef
1928 define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_11bit_max(ptr %p) {
1929 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
1930 ; GFX9-SDAG: ; %bb.0:
1931 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1932 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1933 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
1934 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
1935 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfffff000, v0
1936 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
1937 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
1938 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1939 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
1940 ; GFX9-SDAG-NEXT: s_endpgm
1942 ; GFX10-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
1944 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1945 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1946 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfffff000
1947 ; GFX10-NEXT: s_addc_u32 s1, s1, -1
1948 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
1949 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1950 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
1951 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1952 ; GFX10-NEXT: flat_store_byte v[0:1], v0
1953 ; GFX10-NEXT: s_endpgm
1955 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
1956 ; GFX11-SDAG: ; %bb.0:
1957 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1958 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1959 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0
1960 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1961 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1962 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
1963 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1964 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
1965 ; GFX11-SDAG-NEXT: s_endpgm
1967 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
1968 ; GFX9-GISEL: ; %bb.0:
1969 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1970 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1971 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff000
1972 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, -1
1973 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
1974 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
1975 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
1976 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1977 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
1978 ; GFX9-GISEL-NEXT: s_endpgm
1980 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_neg_11bit_max:
1981 ; GFX11-GISEL: ; %bb.0:
1982 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1983 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1984 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff000
1985 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1
1986 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1987 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1988 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
1989 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
1990 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
1991 ; GFX11-GISEL-NEXT: s_endpgm
1992 %gep = getelementptr i8, ptr %p, i64 -4096
1993 %load = load volatile i8, ptr %gep, align 1
1994 store i8 %load, ptr undef
1998 define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_12bit_max(ptr %p) {
1999 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
2000 ; GFX9-SDAG: ; %bb.0:
2001 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2002 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2003 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
2004 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
2005 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
2006 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2007 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
2008 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2009 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2010 ; GFX9-SDAG-NEXT: s_endpgm
2012 ; GFX10-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
2014 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2015 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2016 ; GFX10-NEXT: s_add_u32 s0, s0, 0xffffe000
2017 ; GFX10-NEXT: s_addc_u32 s1, s1, -1
2018 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2019 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2020 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2021 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2022 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2023 ; GFX10-NEXT: s_endpgm
2025 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
2026 ; GFX11-SDAG: ; %bb.0:
2027 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2028 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2029 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0
2030 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2031 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
2032 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2033 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2034 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2035 ; GFX11-SDAG-NEXT: s_endpgm
2037 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
2038 ; GFX9-GISEL: ; %bb.0:
2039 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2040 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2041 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000
2042 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, -1
2043 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2044 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2045 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2046 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2047 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2048 ; GFX9-GISEL-NEXT: s_endpgm
2050 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_neg_12bit_max:
2051 ; GFX11-GISEL: ; %bb.0:
2052 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2053 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2054 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000
2055 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1
2056 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2057 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2058 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2059 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2060 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2061 ; GFX11-GISEL-NEXT: s_endpgm
2062 %gep = getelementptr i8, ptr %p, i64 -8192
2063 %load = load volatile i8, ptr %gep, align 1
2064 store i8 %load, ptr undef
2068 define amdgpu_kernel void @flat_inst_salu_offset_2x_neg_13bit_max(ptr %p) {
2069 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
2070 ; GFX9-SDAG: ; %bb.0:
2071 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2072 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2073 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
2074 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
2075 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
2076 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
2077 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
2078 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2079 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2080 ; GFX9-SDAG-NEXT: s_endpgm
2082 ; GFX10-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
2084 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2085 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2086 ; GFX10-NEXT: s_add_u32 s0, s0, 0xffffc000
2087 ; GFX10-NEXT: s_addc_u32 s1, s1, -1
2088 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2089 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2090 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2091 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2092 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2093 ; GFX10-NEXT: s_endpgm
2095 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
2096 ; GFX11-SDAG: ; %bb.0:
2097 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2098 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2099 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0
2100 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2101 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
2102 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2103 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2104 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2105 ; GFX11-SDAG-NEXT: s_endpgm
2107 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
2108 ; GFX9-GISEL: ; %bb.0:
2109 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2110 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2111 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xffffc000
2112 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, -1
2113 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2114 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2115 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2116 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2117 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2118 ; GFX9-GISEL-NEXT: s_endpgm
2120 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_2x_neg_13bit_max:
2121 ; GFX11-GISEL: ; %bb.0:
2122 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2123 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2124 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xffffc000
2125 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1
2126 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2127 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2128 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2129 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2130 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2131 ; GFX11-GISEL-NEXT: s_endpgm
2132 %gep = getelementptr i8, ptr %p, i64 -16384
2133 %load = load volatile i8, ptr %gep, align 1
2134 store i8 %load, ptr undef
2138 ; Fill 11-bit low-bits (1ull << 33) | 2047
2139 define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split0(ptr %p) {
2140 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
2141 ; GFX9-SDAG: ; %bb.0:
2142 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2143 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2144 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
2145 ; GFX9-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
2146 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
2147 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:2047 glc
2148 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2149 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2150 ; GFX9-SDAG-NEXT: s_endpgm
2152 ; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
2154 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2155 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2156 ; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff
2157 ; GFX10-NEXT: s_addc_u32 s1, s1, 2
2158 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2159 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2160 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2161 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2162 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2163 ; GFX10-NEXT: s_endpgm
2165 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
2166 ; GFX11-SDAG: ; %bb.0:
2167 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2168 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2169 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
2170 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2171 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2172 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2047 glc dlc
2173 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2174 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2175 ; GFX11-SDAG-NEXT: s_endpgm
2177 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
2178 ; GFX9-GISEL: ; %bb.0:
2179 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2180 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2181 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff
2182 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 2
2183 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2184 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2185 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2186 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2187 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2188 ; GFX9-GISEL-NEXT: s_endpgm
2190 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split0:
2191 ; GFX11-GISEL: ; %bb.0:
2192 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2193 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2194 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff
2195 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2
2196 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2197 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2198 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2199 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2200 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2201 ; GFX11-GISEL-NEXT: s_endpgm
2202 %gep = getelementptr i8, ptr %p, i64 8589936639
2203 %load = load volatile i8, ptr %gep, align 1
2204 store i8 %load, ptr undef
2208 ; Fill 11-bit low-bits (1ull << 33) | 2048
2209 define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_split1(ptr %p) {
2210 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
2211 ; GFX9-SDAG: ; %bb.0:
2212 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2213 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2214 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
2215 ; GFX9-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
2216 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
2217 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:2048 glc
2218 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2219 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2220 ; GFX9-SDAG-NEXT: s_endpgm
2222 ; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
2224 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2225 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2226 ; GFX10-NEXT: s_add_u32 s0, s0, 0x800
2227 ; GFX10-NEXT: s_addc_u32 s1, s1, 2
2228 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2229 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2230 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2231 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2232 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2233 ; GFX10-NEXT: s_endpgm
2235 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
2236 ; GFX11-SDAG: ; %bb.0:
2237 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2238 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2239 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
2240 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2241 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2242 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:2048 glc dlc
2243 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2244 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2245 ; GFX11-SDAG-NEXT: s_endpgm
2247 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
2248 ; GFX9-GISEL: ; %bb.0:
2249 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2250 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2251 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x800
2252 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 2
2253 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2254 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2255 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2256 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2257 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2258 ; GFX9-GISEL-NEXT: s_endpgm
2260 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_split1:
2261 ; GFX11-GISEL: ; %bb.0:
2262 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2263 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2264 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x800
2265 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2
2266 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2267 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2268 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2269 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2270 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2271 ; GFX11-GISEL-NEXT: s_endpgm
2272 %gep = getelementptr i8, ptr %p, i64 8589936640
2273 %load = load volatile i8, ptr %gep, align 1
2274 store i8 %load, ptr undef
2278 ; Fill 12-bit low-bits (1ull << 33) | 4095
2279 define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split0(ptr %p) {
2280 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
2281 ; GFX9-SDAG: ; %bb.0:
2282 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2283 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2284 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
2285 ; GFX9-SDAG-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
2286 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
2287 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc
2288 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2289 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2290 ; GFX9-SDAG-NEXT: s_endpgm
2292 ; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
2294 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2295 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2296 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfff
2297 ; GFX10-NEXT: s_addc_u32 s1, s1, 2
2298 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2299 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2300 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2301 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2302 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2303 ; GFX10-NEXT: s_endpgm
2305 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
2306 ; GFX11-SDAG: ; %bb.0:
2307 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2308 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2309 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
2310 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2311 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2312 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc
2313 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2314 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2315 ; GFX11-SDAG-NEXT: s_endpgm
2317 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
2318 ; GFX9-GISEL: ; %bb.0:
2319 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2320 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2321 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xfff
2322 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 2
2323 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2324 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2325 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2326 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2327 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2328 ; GFX9-GISEL-NEXT: s_endpgm
2330 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split0:
2331 ; GFX11-GISEL: ; %bb.0:
2332 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2333 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2334 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfff
2335 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2
2336 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2337 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2338 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2339 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2340 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2341 ; GFX11-GISEL-NEXT: s_endpgm
2342 %gep = getelementptr i8, ptr %p, i64 8589938687
2343 %load = load volatile i8, ptr %gep, align 1
2344 store i8 %load, ptr undef
2348 ; Fill 12-bit low-bits (1ull << 33) | 4096
2349 define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_split1(ptr %p) {
2350 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
2351 ; GFX9-SDAG: ; %bb.0:
2352 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2353 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2354 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
2355 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
2356 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
2357 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
2358 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
2359 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2360 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2361 ; GFX9-SDAG-NEXT: s_endpgm
2363 ; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
2365 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2366 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2367 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1000
2368 ; GFX10-NEXT: s_addc_u32 s1, s1, 2
2369 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2370 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2371 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2372 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2373 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2374 ; GFX10-NEXT: s_endpgm
2376 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
2377 ; GFX11-SDAG: ; %bb.0:
2378 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2379 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2380 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
2381 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2382 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2383 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2384 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2385 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2386 ; GFX11-SDAG-NEXT: s_endpgm
2388 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
2389 ; GFX9-GISEL: ; %bb.0:
2390 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2391 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2392 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x1000
2393 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 2
2394 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2395 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2396 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2397 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2398 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2399 ; GFX9-GISEL-NEXT: s_endpgm
2401 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_split1:
2402 ; GFX11-GISEL: ; %bb.0:
2403 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2404 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2405 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1000
2406 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2
2407 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2408 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2409 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2410 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2411 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2412 ; GFX11-GISEL-NEXT: s_endpgm
2413 %gep = getelementptr i8, ptr %p, i64 8589938688
2414 %load = load volatile i8, ptr %gep, align 1
2415 store i8 %load, ptr undef
2419 ; Fill 13-bit low-bits (1ull << 33) | 8191
2420 define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split0(ptr %p) {
2421 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
2422 ; GFX9-SDAG: ; %bb.0:
2423 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2424 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2425 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
2426 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
2427 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
2428 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
2429 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] offset:4095 glc
2430 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2431 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2432 ; GFX9-SDAG-NEXT: s_endpgm
2434 ; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
2436 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2437 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2438 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff
2439 ; GFX10-NEXT: s_addc_u32 s1, s1, 2
2440 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2441 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2442 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2443 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2444 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2445 ; GFX10-NEXT: s_endpgm
2447 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
2448 ; GFX11-SDAG: ; %bb.0:
2449 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2450 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2451 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
2452 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2453 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2454 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] offset:4095 glc dlc
2455 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2456 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2457 ; GFX11-SDAG-NEXT: s_endpgm
2459 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
2460 ; GFX9-GISEL: ; %bb.0:
2461 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2462 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2463 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff
2464 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 2
2465 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2466 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2467 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2468 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2469 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2470 ; GFX9-GISEL-NEXT: s_endpgm
2472 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split0:
2473 ; GFX11-GISEL: ; %bb.0:
2474 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2475 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2476 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff
2477 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2
2478 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2479 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2480 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2481 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2482 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2483 ; GFX11-GISEL-NEXT: s_endpgm
2484 %gep = getelementptr i8, ptr %p, i64 8589942783
2485 %load = load volatile i8, ptr %gep, align 1
2486 store i8 %load, ptr undef
2490 ; Fill 13-bit low-bits (1ull << 33) | 8192
2491 define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_split1(ptr %p) {
2492 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
2493 ; GFX9-SDAG: ; %bb.0:
2494 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2495 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2496 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
2497 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v1, s1
2498 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
2499 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
2500 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
2501 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2502 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2503 ; GFX9-SDAG-NEXT: s_endpgm
2505 ; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
2507 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2508 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2509 ; GFX10-NEXT: s_add_u32 s0, s0, 0x2000
2510 ; GFX10-NEXT: s_addc_u32 s1, s1, 2
2511 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2512 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2513 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2514 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2515 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2516 ; GFX10-NEXT: s_endpgm
2518 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
2519 ; GFX11-SDAG: ; %bb.0:
2520 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2521 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2522 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x2000, s0
2523 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2524 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2525 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2526 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2527 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2528 ; GFX11-SDAG-NEXT: s_endpgm
2530 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
2531 ; GFX9-GISEL: ; %bb.0:
2532 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2533 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2534 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x2000
2535 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 2
2536 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2537 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2538 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2539 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2540 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2541 ; GFX9-GISEL-NEXT: s_endpgm
2543 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_split1:
2544 ; GFX11-GISEL: ; %bb.0:
2545 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2546 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2547 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x2000
2548 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2
2549 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2550 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2551 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2552 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2553 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2554 ; GFX11-GISEL-NEXT: s_endpgm
2555 %gep = getelementptr i8, ptr %p, i64 8589942784
2556 %load = load volatile i8, ptr %gep, align 1
2557 store i8 %load, ptr undef
2561 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
2562 define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split0(ptr %p) {
2563 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
2564 ; GFX9-SDAG: ; %bb.0:
2565 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2566 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 1
2567 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2568 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
2569 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s1
2570 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x7ff, v0
2571 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
2572 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
2573 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2574 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2575 ; GFX9-SDAG-NEXT: s_endpgm
2577 ; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
2579 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2580 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2581 ; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff
2582 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
2583 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2584 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2585 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2586 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2587 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2588 ; GFX10-NEXT: s_endpgm
2590 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
2591 ; GFX11-SDAG: ; %bb.0:
2592 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2593 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2594 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1
2595 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x7ff, s0
2596 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
2597 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
2598 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2599 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2600 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2601 ; GFX11-SDAG-NEXT: s_endpgm
2603 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
2604 ; GFX9-GISEL: ; %bb.0:
2605 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2606 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2607 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff
2608 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000
2609 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2610 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2611 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2612 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2613 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2614 ; GFX9-GISEL-NEXT: s_endpgm
2616 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split0:
2617 ; GFX11-GISEL: ; %bb.0:
2618 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2619 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2620 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff
2621 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000
2622 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2623 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2624 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2625 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2626 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2627 ; GFX11-GISEL-NEXT: s_endpgm
2628 %gep = getelementptr i8, ptr %p, i64 -9223372036854773761
2629 %load = load volatile i8, ptr %gep, align 1
2630 store i8 %load, ptr undef
2634 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
2635 define amdgpu_kernel void @flat_inst_salu_offset_64bit_11bit_neg_high_split1(ptr %p) {
2636 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
2637 ; GFX9-SDAG: ; %bb.0:
2638 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2639 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 1
2640 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2641 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
2642 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s1
2643 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x800, v0
2644 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
2645 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
2646 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2647 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2648 ; GFX9-SDAG-NEXT: s_endpgm
2650 ; GFX10-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
2652 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2653 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2654 ; GFX10-NEXT: s_add_u32 s0, s0, 0x800
2655 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
2656 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2657 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2658 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2659 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2660 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2661 ; GFX10-NEXT: s_endpgm
2663 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
2664 ; GFX11-SDAG: ; %bb.0:
2665 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2666 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2667 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1
2668 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, s0
2669 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
2670 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
2671 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2672 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2673 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2674 ; GFX11-SDAG-NEXT: s_endpgm
2676 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
2677 ; GFX9-GISEL: ; %bb.0:
2678 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2679 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2680 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x800
2681 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000
2682 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2683 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2684 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2685 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2686 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2687 ; GFX9-GISEL-NEXT: s_endpgm
2689 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_11bit_neg_high_split1:
2690 ; GFX11-GISEL: ; %bb.0:
2691 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2692 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2693 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x800
2694 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000
2695 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2696 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2697 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2698 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2699 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2700 ; GFX11-GISEL-NEXT: s_endpgm
2701 %gep = getelementptr i8, ptr %p, i64 -9223372036854773760
2702 %load = load volatile i8, ptr %gep, align 1
2703 store i8 %load, ptr undef
2707 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
2708 define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split0(ptr %p) {
2709 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
2710 ; GFX9-SDAG: ; %bb.0:
2711 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2712 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 1
2713 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2714 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
2715 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s1
2716 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0xfff, v0
2717 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
2718 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
2719 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2720 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2721 ; GFX9-SDAG-NEXT: s_endpgm
2723 ; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
2725 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2726 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2727 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfff
2728 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
2729 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2730 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2731 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2732 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2733 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2734 ; GFX10-NEXT: s_endpgm
2736 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
2737 ; GFX11-SDAG: ; %bb.0:
2738 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2739 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2740 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1
2741 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, s0
2742 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
2743 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
2744 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2745 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2746 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2747 ; GFX11-SDAG-NEXT: s_endpgm
2749 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
2750 ; GFX9-GISEL: ; %bb.0:
2751 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2752 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2753 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0xfff
2754 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000
2755 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2756 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2757 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2758 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2759 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2760 ; GFX9-GISEL-NEXT: s_endpgm
2762 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split0:
2763 ; GFX11-GISEL: ; %bb.0:
2764 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2765 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2766 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfff
2767 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000
2768 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2769 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2770 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2771 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2772 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2773 ; GFX11-GISEL-NEXT: s_endpgm
2774 %gep = getelementptr i8, ptr %p, i64 -9223372036854771713
2775 %load = load volatile i8, ptr %gep, align 1
2776 store i8 %load, ptr undef
2780 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
2781 define amdgpu_kernel void @flat_inst_salu_offset_64bit_12bit_neg_high_split1(ptr %p) {
2782 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
2783 ; GFX9-SDAG: ; %bb.0:
2784 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2785 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 1
2786 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2787 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
2788 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s1
2789 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
2790 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
2791 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
2792 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2793 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2794 ; GFX9-SDAG-NEXT: s_endpgm
2796 ; GFX10-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
2798 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2799 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2800 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1000
2801 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
2802 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2803 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2804 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2805 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2806 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2807 ; GFX10-NEXT: s_endpgm
2809 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
2810 ; GFX11-SDAG: ; %bb.0:
2811 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2812 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2813 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1
2814 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, s0
2815 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
2816 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
2817 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2818 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2819 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2820 ; GFX11-SDAG-NEXT: s_endpgm
2822 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
2823 ; GFX9-GISEL: ; %bb.0:
2824 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2825 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2826 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x1000
2827 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000
2828 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2829 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2830 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2831 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2832 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2833 ; GFX9-GISEL-NEXT: s_endpgm
2835 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_12bit_neg_high_split1:
2836 ; GFX11-GISEL: ; %bb.0:
2837 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2838 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2839 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1000
2840 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000
2841 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2842 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2843 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2844 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2845 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2846 ; GFX11-GISEL-NEXT: s_endpgm
2847 %gep = getelementptr i8, ptr %p, i64 -9223372036854771712
2848 %load = load volatile i8, ptr %gep, align 1
2849 store i8 %load, ptr undef
2853 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
2854 define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split0(ptr %p) {
2855 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
2856 ; GFX9-SDAG: ; %bb.0:
2857 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2858 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 1
2859 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2860 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
2861 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s1
2862 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
2863 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
2864 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
2865 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2866 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2867 ; GFX9-SDAG-NEXT: s_endpgm
2869 ; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
2871 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2872 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2873 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff
2874 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
2875 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2876 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2877 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2878 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2879 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2880 ; GFX10-NEXT: s_endpgm
2882 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
2883 ; GFX11-SDAG: ; %bb.0:
2884 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2885 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2886 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1
2887 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, s0
2888 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
2889 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
2890 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2891 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2892 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2893 ; GFX11-SDAG-NEXT: s_endpgm
2895 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
2896 ; GFX9-GISEL: ; %bb.0:
2897 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2898 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2899 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff
2900 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000
2901 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2902 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2903 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2904 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2905 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2906 ; GFX9-GISEL-NEXT: s_endpgm
2908 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split0:
2909 ; GFX11-GISEL: ; %bb.0:
2910 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2911 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2912 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff
2913 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000
2914 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2915 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2916 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2917 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2918 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2919 ; GFX11-GISEL-NEXT: s_endpgm
2920 %gep = getelementptr i8, ptr %p, i64 -9223372036854767617
2921 %load = load volatile i8, ptr %gep, align 1
2922 store i8 %load, ptr undef
2926 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
2927 define amdgpu_kernel void @flat_inst_salu_offset_64bit_13bit_neg_high_split1(ptr %p) {
2928 ; GFX9-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
2929 ; GFX9-SDAG: ; %bb.0:
2930 ; GFX9-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2931 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v1, 1
2932 ; GFX9-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2933 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v0, s0
2934 ; GFX9-SDAG-NEXT: v_mov_b32_e32 v2, s1
2935 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
2936 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
2937 ; GFX9-SDAG-NEXT: flat_load_ubyte v0, v[0:1] glc
2938 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2939 ; GFX9-SDAG-NEXT: flat_store_byte v[0:1], v0
2940 ; GFX9-SDAG-NEXT: s_endpgm
2942 ; GFX10-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
2944 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2945 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2946 ; GFX10-NEXT: s_add_u32 s0, s0, 0x2000
2947 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
2948 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
2949 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
2950 ; GFX10-NEXT: flat_load_ubyte v0, v[0:1] glc dlc
2951 ; GFX10-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2952 ; GFX10-NEXT: flat_store_byte v[0:1], v0
2953 ; GFX10-NEXT: s_endpgm
2955 ; GFX11-SDAG-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
2956 ; GFX11-SDAG: ; %bb.0:
2957 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2958 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2959 ; GFX11-SDAG-NEXT: v_mov_b32_e32 v1, s1
2960 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, s0
2961 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_2)
2962 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
2963 ; GFX11-SDAG-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2964 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2965 ; GFX11-SDAG-NEXT: flat_store_b8 v[0:1], v0
2966 ; GFX11-SDAG-NEXT: s_endpgm
2968 ; GFX9-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
2969 ; GFX9-GISEL: ; %bb.0:
2970 ; GFX9-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2971 ; GFX9-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2972 ; GFX9-GISEL-NEXT: s_add_u32 s0, s0, 0x2000
2973 ; GFX9-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000
2974 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v0, s0
2975 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v1, s1
2976 ; GFX9-GISEL-NEXT: flat_load_ubyte v0, v[0:1] glc
2977 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2978 ; GFX9-GISEL-NEXT: flat_store_byte v[0:1], v0
2979 ; GFX9-GISEL-NEXT: s_endpgm
2981 ; GFX11-GISEL-LABEL: flat_inst_salu_offset_64bit_13bit_neg_high_split1:
2982 ; GFX11-GISEL: ; %bb.0:
2983 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2984 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2985 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x2000
2986 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 0x80000000
2987 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2988 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2989 ; GFX11-GISEL-NEXT: flat_load_u8 v0, v[0:1] glc dlc
2990 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) lgkmcnt(0)
2991 ; GFX11-GISEL-NEXT: flat_store_b8 v[0:1], v0
2992 ; GFX11-GISEL-NEXT: s_endpgm
2993 %gep = getelementptr i8, ptr %p, i64 -9223372036854767616
2994 %load = load volatile i8, ptr %gep, align 1
2995 store i8 %load, ptr undef