1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
5 ; Test splitting flat instruction offsets into the low and high bits
6 ; when the offset doesn't fit in the offset field.
8 define i8 @global_inst_valu_offset_1(i8 addrspace(1)* %p) {
9 ; GFX9-LABEL: global_inst_valu_offset_1:
11 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1
13 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14 ; GFX9-NEXT: s_setpc_b64 s[30:31]
16 ; GFX10-LABEL: global_inst_valu_offset_1:
18 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1
21 ; GFX10-NEXT: s_waitcnt vmcnt(0)
22 ; GFX10-NEXT: s_setpc_b64 s[30:31]
23 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1
24 %load = load i8, i8 addrspace(1)* %gep, align 4
28 define i8 @global_inst_valu_offset_11bit_max(i8 addrspace(1)* %p) {
29 ; GFX9-LABEL: global_inst_valu_offset_11bit_max:
31 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
32 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
33 ; GFX9-NEXT: s_waitcnt vmcnt(0)
34 ; GFX9-NEXT: s_setpc_b64 s[30:31]
36 ; GFX10-LABEL: global_inst_valu_offset_11bit_max:
38 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
39 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
40 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
41 ; GFX10-NEXT: s_waitcnt vmcnt(0)
42 ; GFX10-NEXT: s_setpc_b64 s[30:31]
43 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047
44 %load = load i8, i8 addrspace(1)* %gep, align 4
48 define i8 @global_inst_valu_offset_12bit_max(i8 addrspace(1)* %p) {
49 ; GFX9-LABEL: global_inst_valu_offset_12bit_max:
51 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
52 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
53 ; GFX9-NEXT: s_waitcnt vmcnt(0)
54 ; GFX9-NEXT: s_setpc_b64 s[30:31]
56 ; GFX10-LABEL: global_inst_valu_offset_12bit_max:
58 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
59 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
60 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
61 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
62 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
63 ; GFX10-NEXT: s_waitcnt vmcnt(0)
64 ; GFX10-NEXT: s_setpc_b64 s[30:31]
65 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
66 %load = load i8, i8 addrspace(1)* %gep, align 4
70 define i8 @global_inst_valu_offset_13bit_max(i8 addrspace(1)* %p) {
71 ; GFX9-LABEL: global_inst_valu_offset_13bit_max:
73 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
74 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
75 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
76 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
77 ; GFX9-NEXT: s_waitcnt vmcnt(0)
78 ; GFX9-NEXT: s_setpc_b64 s[30:31]
80 ; GFX10-LABEL: global_inst_valu_offset_13bit_max:
82 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
83 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
84 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0
85 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
86 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
87 ; GFX10-NEXT: s_waitcnt vmcnt(0)
88 ; GFX10-NEXT: s_setpc_b64 s[30:31]
89 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
90 %load = load i8, i8 addrspace(1)* %gep, align 4
94 define i8 @global_inst_valu_offset_neg_11bit_max(i8 addrspace(1)* %p) {
95 ; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max:
97 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
98 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
99 ; GFX9-NEXT: s_waitcnt vmcnt(0)
100 ; GFX9-NEXT: s_setpc_b64 s[30:31]
102 ; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max:
104 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
106 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
107 ; GFX10-NEXT: s_waitcnt vmcnt(0)
108 ; GFX10-NEXT: s_setpc_b64 s[30:31]
109 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048
110 %load = load i8, i8 addrspace(1)* %gep, align 4
114 define i8 @global_inst_valu_offset_neg_12bit_max(i8 addrspace(1)* %p) {
115 ; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max:
117 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
118 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096
119 ; GFX9-NEXT: s_waitcnt vmcnt(0)
120 ; GFX9-NEXT: s_setpc_b64 s[30:31]
122 ; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max:
124 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
125 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
126 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
127 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
128 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
129 ; GFX10-NEXT: s_waitcnt vmcnt(0)
130 ; GFX10-NEXT: s_setpc_b64 s[30:31]
131 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
132 %load = load i8, i8 addrspace(1)* %gep, align 4
136 define i8 @global_inst_valu_offset_neg_13bit_max(i8 addrspace(1)* %p) {
137 ; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max:
139 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
140 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
141 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
142 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
143 ; GFX9-NEXT: s_waitcnt vmcnt(0)
144 ; GFX9-NEXT: s_setpc_b64 s[30:31]
146 ; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max:
148 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
149 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
150 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
151 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
152 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
153 ; GFX10-NEXT: s_waitcnt vmcnt(0)
154 ; GFX10-NEXT: s_setpc_b64 s[30:31]
155 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
156 %load = load i8, i8 addrspace(1)* %gep, align 4
160 define i8 @global_inst_valu_offset_2x_11bit_max(i8 addrspace(1)* %p) {
161 ; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max:
163 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
165 ; GFX9-NEXT: s_waitcnt vmcnt(0)
166 ; GFX9-NEXT: s_setpc_b64 s[30:31]
168 ; GFX10-LABEL: global_inst_valu_offset_2x_11bit_max:
170 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
172 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
173 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
174 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
175 ; GFX10-NEXT: s_waitcnt vmcnt(0)
176 ; GFX10-NEXT: s_setpc_b64 s[30:31]
177 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
178 %load = load i8, i8 addrspace(1)* %gep, align 4
182 define i8 @global_inst_valu_offset_2x_12bit_max(i8 addrspace(1)* %p) {
183 ; GFX9-LABEL: global_inst_valu_offset_2x_12bit_max:
185 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
186 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
187 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
188 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
189 ; GFX9-NEXT: s_waitcnt vmcnt(0)
190 ; GFX9-NEXT: s_setpc_b64 s[30:31]
192 ; GFX10-LABEL: global_inst_valu_offset_2x_12bit_max:
194 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
195 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
196 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0
197 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
198 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
199 ; GFX10-NEXT: s_waitcnt vmcnt(0)
200 ; GFX10-NEXT: s_setpc_b64 s[30:31]
201 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
202 %load = load i8, i8 addrspace(1)* %gep, align 4
206 define i8 @global_inst_valu_offset_2x_13bit_max(i8 addrspace(1)* %p) {
207 ; GFX9-LABEL: global_inst_valu_offset_2x_13bit_max:
209 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
210 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0
211 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
212 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
213 ; GFX9-NEXT: s_waitcnt vmcnt(0)
214 ; GFX9-NEXT: s_setpc_b64 s[30:31]
216 ; GFX10-LABEL: global_inst_valu_offset_2x_13bit_max:
218 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
220 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x3800, v0
221 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
222 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
223 ; GFX10-NEXT: s_waitcnt vmcnt(0)
224 ; GFX10-NEXT: s_setpc_b64 s[30:31]
225 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383
226 %load = load i8, i8 addrspace(1)* %gep, align 4
230 define i8 @global_inst_valu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) {
231 ; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
233 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
234 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096
235 ; GFX9-NEXT: s_waitcnt vmcnt(0)
236 ; GFX9-NEXT: s_setpc_b64 s[30:31]
238 ; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
240 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
241 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
242 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
243 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
244 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
245 ; GFX10-NEXT: s_waitcnt vmcnt(0)
246 ; GFX10-NEXT: s_setpc_b64 s[30:31]
247 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
248 %load = load i8, i8 addrspace(1)* %gep, align 4
252 define i8 @global_inst_valu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) {
253 ; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
255 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
256 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
257 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
258 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
259 ; GFX9-NEXT: s_waitcnt vmcnt(0)
260 ; GFX9-NEXT: s_setpc_b64 s[30:31]
262 ; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
264 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
265 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
266 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
267 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
268 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
269 ; GFX10-NEXT: s_waitcnt vmcnt(0)
270 ; GFX10-NEXT: s_setpc_b64 s[30:31]
271 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
272 %load = load i8, i8 addrspace(1)* %gep, align 4
276 define i8 @global_inst_valu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) {
277 ; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
279 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
280 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
281 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
282 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
283 ; GFX9-NEXT: s_waitcnt vmcnt(0)
284 ; GFX9-NEXT: s_setpc_b64 s[30:31]
286 ; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
288 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
289 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
290 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
291 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
292 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
293 ; GFX10-NEXT: s_waitcnt vmcnt(0)
294 ; GFX10-NEXT: s_setpc_b64 s[30:31]
295 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384
296 %load = load i8, i8 addrspace(1)* %gep, align 4
300 ; Fill 11-bit low-bits (1ull << 33) | 2047
301 define i8 @global_inst_valu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) {
302 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split0:
304 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
306 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
307 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
308 ; GFX9-NEXT: s_waitcnt vmcnt(0)
309 ; GFX9-NEXT: s_setpc_b64 s[30:31]
311 ; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split0:
313 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
315 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
316 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
317 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
318 ; GFX10-NEXT: s_waitcnt vmcnt(0)
319 ; GFX10-NEXT: s_setpc_b64 s[30:31]
320 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639
321 %load = load i8, i8 addrspace(1)* %gep, align 4
325 ; Fill 11-bit low-bits (1ull << 33) | 2048
326 define i8 @global_inst_valu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) {
327 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split1:
329 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
330 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
331 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
332 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048
333 ; GFX9-NEXT: s_waitcnt vmcnt(0)
334 ; GFX9-NEXT: s_setpc_b64 s[30:31]
336 ; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1:
338 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
339 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
340 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
341 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
342 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
343 ; GFX10-NEXT: s_waitcnt vmcnt(0)
344 ; GFX10-NEXT: s_setpc_b64 s[30:31]
345 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640
346 %load = load i8, i8 addrspace(1)* %gep, align 4
350 ; Fill 12-bit low-bits (1ull << 33) | 4095
351 define i8 @global_inst_valu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) {
352 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split0:
354 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
356 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
357 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
358 ; GFX9-NEXT: s_waitcnt vmcnt(0)
359 ; GFX9-NEXT: s_setpc_b64 s[30:31]
361 ; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split0:
363 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
365 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
366 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
367 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
368 ; GFX10-NEXT: s_waitcnt vmcnt(0)
369 ; GFX10-NEXT: s_setpc_b64 s[30:31]
370 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687
371 %load = load i8, i8 addrspace(1)* %gep, align 4
375 ; Fill 12-bit low-bits (1ull << 33) | 4096
376 define i8 @global_inst_valu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) {
377 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1:
379 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
380 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
381 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
382 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
383 ; GFX9-NEXT: s_waitcnt vmcnt(0)
384 ; GFX9-NEXT: s_setpc_b64 s[30:31]
386 ; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1:
388 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
389 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
390 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
391 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
392 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
393 ; GFX10-NEXT: s_waitcnt vmcnt(0)
394 ; GFX10-NEXT: s_setpc_b64 s[30:31]
395 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688
396 %load = load i8, i8 addrspace(1)* %gep, align 4
400 ; Fill 13-bit low-bits (1ull << 33) | 8191
401 define i8 @global_inst_valu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) {
402 ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split0:
404 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
405 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
406 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
407 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
408 ; GFX9-NEXT: s_waitcnt vmcnt(0)
409 ; GFX9-NEXT: s_setpc_b64 s[30:31]
411 ; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split0:
413 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
414 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
415 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0
416 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
417 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
418 ; GFX10-NEXT: s_waitcnt vmcnt(0)
419 ; GFX10-NEXT: s_setpc_b64 s[30:31]
420 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783
421 %load = load i8, i8 addrspace(1)* %gep, align 4
425 ; Fill 13-bit low-bits (1ull << 33) | 8192
426 define i8 @global_inst_valu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) {
427 ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1:
429 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
430 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
431 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
432 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
433 ; GFX9-NEXT: s_waitcnt vmcnt(0)
434 ; GFX9-NEXT: s_setpc_b64 s[30:31]
436 ; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1:
438 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
439 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
440 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
441 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
442 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
443 ; GFX10-NEXT: s_waitcnt vmcnt(0)
444 ; GFX10-NEXT: s_setpc_b64 s[30:31]
445 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784
446 %load = load i8, i8 addrspace(1)* %gep, align 4
450 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
451 define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) {
452 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
454 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
455 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
456 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
457 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
458 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2049
459 ; GFX9-NEXT: s_waitcnt vmcnt(0)
460 ; GFX9-NEXT: s_setpc_b64 s[30:31]
462 ; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
464 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
466 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
467 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
468 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
469 ; GFX10-NEXT: s_waitcnt vmcnt(0)
470 ; GFX10-NEXT: s_setpc_b64 s[30:31]
471 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
472 %load = load i8, i8 addrspace(1)* %gep, align 4
476 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
477 define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) {
478 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
480 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
481 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
482 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
483 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
484 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
485 ; GFX9-NEXT: s_waitcnt vmcnt(0)
486 ; GFX9-NEXT: s_setpc_b64 s[30:31]
488 ; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
490 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
491 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
492 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
493 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
494 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
495 ; GFX10-NEXT: s_waitcnt vmcnt(0)
496 ; GFX10-NEXT: s_setpc_b64 s[30:31]
497 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
498 %load = load i8, i8 addrspace(1)* %gep, align 4
502 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
503 define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) {
504 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
506 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
507 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
508 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
509 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
510 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
511 ; GFX9-NEXT: s_waitcnt vmcnt(0)
512 ; GFX9-NEXT: s_setpc_b64 s[30:31]
514 ; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
516 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
517 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
518 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
519 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
520 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
521 ; GFX10-NEXT: s_waitcnt vmcnt(0)
522 ; GFX10-NEXT: s_setpc_b64 s[30:31]
523 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713
524 %load = load i8, i8 addrspace(1)* %gep, align 4
528 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
529 define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) {
530 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
532 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
533 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
534 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
535 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
536 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
537 ; GFX9-NEXT: s_waitcnt vmcnt(0)
538 ; GFX9-NEXT: s_setpc_b64 s[30:31]
540 ; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
542 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
543 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
544 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
545 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
546 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
547 ; GFX10-NEXT: s_waitcnt vmcnt(0)
548 ; GFX10-NEXT: s_setpc_b64 s[30:31]
549 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712
550 %load = load i8, i8 addrspace(1)* %gep, align 4
554 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
555 define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) {
556 ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
558 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
559 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
560 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
561 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
562 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
563 ; GFX9-NEXT: s_waitcnt vmcnt(0)
564 ; GFX9-NEXT: s_setpc_b64 s[30:31]
566 ; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
568 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
569 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
570 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
571 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
572 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
573 ; GFX10-NEXT: s_waitcnt vmcnt(0)
574 ; GFX10-NEXT: s_setpc_b64 s[30:31]
575 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617
576 %load = load i8, i8 addrspace(1)* %gep, align 4
580 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
581 define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) {
582 ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
584 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
585 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
586 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
587 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
588 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
589 ; GFX9-NEXT: s_waitcnt vmcnt(0)
590 ; GFX9-NEXT: s_setpc_b64 s[30:31]
592 ; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
594 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
595 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
596 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
597 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
598 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
599 ; GFX10-NEXT: s_waitcnt vmcnt(0)
600 ; GFX10-NEXT: s_setpc_b64 s[30:31]
601 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616
602 %load = load i8, i8 addrspace(1)* %gep, align 4
606 define amdgpu_kernel void @global_inst_salu_offset_1(i8 addrspace(1)* %p) {
607 ; GFX9-LABEL: global_inst_salu_offset_1:
609 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
610 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
611 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
612 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:1 glc
613 ; GFX9-NEXT: s_waitcnt vmcnt(0)
614 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
615 ; GFX9-NEXT: s_endpgm
617 ; GFX10-LABEL: global_inst_salu_offset_1:
619 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
620 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
621 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
622 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:1 glc dlc
623 ; GFX10-NEXT: s_waitcnt vmcnt(0)
624 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
625 ; GFX10-NEXT: s_endpgm
626 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1
627 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
628 store i8 %load, i8 addrspace(1)* undef
632 define amdgpu_kernel void @global_inst_salu_offset_11bit_max(i8 addrspace(1)* %p) {
633 ; GFX9-LABEL: global_inst_salu_offset_11bit_max:
635 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
636 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
637 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
638 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc
639 ; GFX9-NEXT: s_waitcnt vmcnt(0)
640 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
641 ; GFX9-NEXT: s_endpgm
643 ; GFX10-LABEL: global_inst_salu_offset_11bit_max:
645 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
646 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
647 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
648 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
649 ; GFX10-NEXT: s_waitcnt vmcnt(0)
650 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
651 ; GFX10-NEXT: s_endpgm
652 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047
653 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
654 store i8 %load, i8 addrspace(1)* undef
658 define amdgpu_kernel void @global_inst_salu_offset_12bit_max(i8 addrspace(1)* %p) {
659 ; GFX9-LABEL: global_inst_salu_offset_12bit_max:
661 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
662 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
663 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
664 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc
665 ; GFX9-NEXT: s_waitcnt vmcnt(0)
666 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
667 ; GFX9-NEXT: s_endpgm
669 ; GFX10-LABEL: global_inst_salu_offset_12bit_max:
671 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
672 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x800
673 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
674 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
675 ; GFX10-NEXT: s_waitcnt vmcnt(0)
676 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
677 ; GFX10-NEXT: s_endpgm
678 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
679 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
680 store i8 %load, i8 addrspace(1)* undef
684 define amdgpu_kernel void @global_inst_salu_offset_13bit_max(i8 addrspace(1)* %p) {
685 ; GFX9-LABEL: global_inst_salu_offset_13bit_max:
687 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
688 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1000
689 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
690 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc
691 ; GFX9-NEXT: s_waitcnt vmcnt(0)
692 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
693 ; GFX9-NEXT: s_endpgm
695 ; GFX10-LABEL: global_inst_salu_offset_13bit_max:
697 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
698 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x1800
699 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
700 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
701 ; GFX10-NEXT: s_waitcnt vmcnt(0)
702 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
703 ; GFX10-NEXT: s_endpgm
704 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
705 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
706 store i8 %load, i8 addrspace(1)* undef
710 define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(i8 addrspace(1)* %p) {
711 ; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max:
713 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
714 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
715 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
716 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-2048 glc
717 ; GFX9-NEXT: s_waitcnt vmcnt(0)
718 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
719 ; GFX9-NEXT: s_endpgm
721 ; GFX10-LABEL: global_inst_salu_offset_neg_11bit_max:
723 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
724 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
725 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
726 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-2048 glc dlc
727 ; GFX10-NEXT: s_waitcnt vmcnt(0)
728 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
729 ; GFX10-NEXT: s_endpgm
730 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048
731 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
732 store i8 %load, i8 addrspace(1)* undef
736 define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)* %p) {
737 ; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max:
739 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
740 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
741 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
742 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-4096 glc
743 ; GFX9-NEXT: s_waitcnt vmcnt(0)
744 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
745 ; GFX9-NEXT: s_endpgm
747 ; GFX10-LABEL: global_inst_salu_offset_neg_12bit_max:
749 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
750 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
751 ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0
752 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
753 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
754 ; GFX10-NEXT: s_waitcnt vmcnt(0)
755 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
756 ; GFX10-NEXT: s_endpgm
757 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
758 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
759 store i8 %load, i8 addrspace(1)* undef
763 define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) {
764 ; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max:
766 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
767 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
768 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
769 ; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000
770 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
771 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
772 ; GFX9-NEXT: s_waitcnt vmcnt(0)
773 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
774 ; GFX9-NEXT: s_endpgm
776 ; GFX10-LABEL: global_inst_salu_offset_neg_13bit_max:
778 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
779 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
780 ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0
781 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
782 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
783 ; GFX10-NEXT: s_waitcnt vmcnt(0)
784 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
785 ; GFX10-NEXT: s_endpgm
786 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
787 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
788 store i8 %load, i8 addrspace(1)* undef
792 define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(i8 addrspace(1)* %p) {
793 ; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max:
795 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
796 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
797 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
798 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc
799 ; GFX9-NEXT: s_waitcnt vmcnt(0)
800 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
801 ; GFX9-NEXT: s_endpgm
803 ; GFX10-LABEL: global_inst_salu_offset_2x_11bit_max:
805 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
806 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x800
807 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
808 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
809 ; GFX10-NEXT: s_waitcnt vmcnt(0)
810 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
811 ; GFX10-NEXT: s_endpgm
812 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
813 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
814 store i8 %load, i8 addrspace(1)* undef
818 define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(i8 addrspace(1)* %p) {
819 ; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max:
821 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
822 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1000
823 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
824 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc
825 ; GFX9-NEXT: s_waitcnt vmcnt(0)
826 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
827 ; GFX9-NEXT: s_endpgm
829 ; GFX10-LABEL: global_inst_salu_offset_2x_12bit_max:
831 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
832 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x1800
833 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
834 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
835 ; GFX10-NEXT: s_waitcnt vmcnt(0)
836 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
837 ; GFX10-NEXT: s_endpgm
838 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
839 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
840 store i8 %load, i8 addrspace(1)* undef
844 define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(i8 addrspace(1)* %p) {
845 ; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max:
847 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
848 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3000
849 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
850 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc
851 ; GFX9-NEXT: s_waitcnt vmcnt(0)
852 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
853 ; GFX9-NEXT: s_endpgm
855 ; GFX10-LABEL: global_inst_salu_offset_2x_13bit_max:
857 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
858 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x3800
859 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
860 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
861 ; GFX10-NEXT: s_waitcnt vmcnt(0)
862 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
863 ; GFX10-NEXT: s_endpgm
864 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383
865 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
866 store i8 %load, i8 addrspace(1)* undef
870 define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) {
871 ; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
873 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
874 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
875 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
876 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-4096 glc
877 ; GFX9-NEXT: s_waitcnt vmcnt(0)
878 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
879 ; GFX9-NEXT: s_endpgm
881 ; GFX10-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
883 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
884 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
885 ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0
886 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
887 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
888 ; GFX10-NEXT: s_waitcnt vmcnt(0)
889 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
890 ; GFX10-NEXT: s_endpgm
891 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
892 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
893 store i8 %load, i8 addrspace(1)* undef
897 define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) {
898 ; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
900 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
901 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
902 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
903 ; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000
904 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
905 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
906 ; GFX9-NEXT: s_waitcnt vmcnt(0)
907 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
908 ; GFX9-NEXT: s_endpgm
910 ; GFX10-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
912 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
913 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
914 ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0
915 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
916 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
917 ; GFX10-NEXT: s_waitcnt vmcnt(0)
918 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
919 ; GFX10-NEXT: s_endpgm
920 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
921 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
922 store i8 %load, i8 addrspace(1)* undef
926 define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) {
927 ; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
929 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
930 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
931 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
932 ; GFX9-NEXT: s_add_u32 s0, s0, 0xffffc000
933 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
934 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
935 ; GFX9-NEXT: s_waitcnt vmcnt(0)
936 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
937 ; GFX9-NEXT: s_endpgm
939 ; GFX10-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
941 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
942 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
943 ; GFX10-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0
944 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
945 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
946 ; GFX10-NEXT: s_waitcnt vmcnt(0)
947 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
948 ; GFX10-NEXT: s_endpgm
949 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384
950 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
951 store i8 %load, i8 addrspace(1)* undef
955 ; Fill 11-bit low-bits (1ull << 33) | 2047
956 define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) {
957 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0:
959 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
960 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
961 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
962 ; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff
963 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
964 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
965 ; GFX9-NEXT: s_waitcnt vmcnt(0)
966 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
967 ; GFX9-NEXT: s_endpgm
969 ; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split0:
971 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
972 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
973 ; GFX10-NEXT: v_add_co_u32 v0, s0, 0, s0
974 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
975 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
976 ; GFX10-NEXT: s_waitcnt vmcnt(0)
977 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
978 ; GFX10-NEXT: s_endpgm
979 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639
980 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
981 store i8 %load, i8 addrspace(1)* undef
985 ; Fill 11-bit low-bits (1ull << 33) | 2048
986 define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) {
987 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1:
989 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
990 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
991 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
992 ; GFX9-NEXT: s_add_u32 s0, s0, 0x800
993 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
994 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
995 ; GFX9-NEXT: s_waitcnt vmcnt(0)
996 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
997 ; GFX9-NEXT: s_endpgm
999 ; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split1:
1001 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1002 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1003 ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0
1004 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1005 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1006 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1007 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1008 ; GFX10-NEXT: s_endpgm
1009 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640
1010 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1011 store i8 %load, i8 addrspace(1)* undef
1015 ; Fill 12-bit low-bits (1ull << 33) | 4095
1016 define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) {
1017 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0:
1019 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1020 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1021 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1022 ; GFX9-NEXT: s_add_u32 s0, s0, 0xfff
1023 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1024 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1025 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1026 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1027 ; GFX9-NEXT: s_endpgm
1029 ; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split0:
1031 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1032 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1033 ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x800, s0
1034 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1035 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
1036 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1037 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1038 ; GFX10-NEXT: s_endpgm
1039 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687
1040 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1041 store i8 %load, i8 addrspace(1)* undef
1045 ; Fill 12-bit low-bits (1ull << 33) | 4096
1046 define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) {
1047 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1:
1049 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1050 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1051 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1052 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1000
1053 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1054 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1055 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1056 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1057 ; GFX9-NEXT: s_endpgm
1059 ; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split1:
1061 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1062 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1063 ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
1064 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1065 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1066 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1067 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1068 ; GFX10-NEXT: s_endpgm
1069 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688
1070 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1071 store i8 %load, i8 addrspace(1)* undef
1075 ; Fill 13-bit low-bits (1ull << 33) | 8191
1076 define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) {
1077 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0:
1079 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1080 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1081 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1082 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
1083 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1084 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1085 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1086 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1087 ; GFX9-NEXT: s_endpgm
1089 ; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split0:
1091 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1092 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1093 ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x1800, s0
1094 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1095 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
1096 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1097 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1098 ; GFX10-NEXT: s_endpgm
1099 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783
1100 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1101 store i8 %load, i8 addrspace(1)* undef
1105 ; Fill 13-bit low-bits (1ull << 33) | 8192
1106 define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) {
1107 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1:
1109 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1110 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1111 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1112 ; GFX9-NEXT: s_add_u32 s0, s0, 0x2000
1113 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
1114 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1115 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1116 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1117 ; GFX9-NEXT: s_endpgm
1119 ; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split1:
1121 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1122 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1123 ; GFX10-NEXT: v_add_co_u32 v0, s0, 0x2000, s0
1124 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1125 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1126 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1127 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1128 ; GFX10-NEXT: s_endpgm
1129 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784
1130 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1131 store i8 %load, i8 addrspace(1)* undef
1135 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
1136 define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) {
1137 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
1139 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1140 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1141 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1142 ; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff
1143 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1144 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1145 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1146 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1147 ; GFX9-NEXT: s_endpgm
1149 ; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
1151 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1152 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1153 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1154 ; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff
1155 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
1156 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
1157 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1158 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1159 ; GFX10-NEXT: s_endpgm
1160 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
1161 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1162 store i8 %load, i8 addrspace(1)* undef
1166 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
1167 define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) {
1168 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
1170 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1171 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1172 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1173 ; GFX9-NEXT: s_add_u32 s0, s0, 0x800
1174 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1175 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1176 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1177 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1178 ; GFX9-NEXT: s_endpgm
1180 ; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
1182 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1183 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1184 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1185 ; GFX10-NEXT: s_add_u32 s0, s0, 0x800
1186 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
1187 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
1188 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1189 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1190 ; GFX10-NEXT: s_endpgm
1191 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
1192 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1193 store i8 %load, i8 addrspace(1)* undef
1197 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
1198 define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) {
1199 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
1201 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1202 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1203 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1204 ; GFX9-NEXT: s_add_u32 s0, s0, 0xfff
1205 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1206 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1207 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1208 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1209 ; GFX9-NEXT: s_endpgm
1211 ; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
1213 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1214 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1215 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1216 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfff
1217 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
1218 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
1219 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1220 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1221 ; GFX10-NEXT: s_endpgm
1222 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713
1223 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1224 store i8 %load, i8 addrspace(1)* undef
1228 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
1229 define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) {
1230 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
1232 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1233 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1234 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1235 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1000
1236 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1237 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1238 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1239 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1240 ; GFX9-NEXT: s_endpgm
1242 ; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
1244 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1245 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1246 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1247 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1000
1248 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
1249 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
1250 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1251 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1252 ; GFX10-NEXT: s_endpgm
1253 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712
1254 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1255 store i8 %load, i8 addrspace(1)* undef
1259 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
1260 define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) {
1261 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
1263 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1264 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1265 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1266 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
1267 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1268 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1269 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1270 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1271 ; GFX9-NEXT: s_endpgm
1273 ; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
1275 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1276 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1277 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1278 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff
1279 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
1280 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
1281 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1282 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1283 ; GFX10-NEXT: s_endpgm
1284 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617
1285 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1286 store i8 %load, i8 addrspace(1)* undef
1290 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
1291 define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) {
1292 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
1294 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1295 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1296 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1297 ; GFX9-NEXT: s_add_u32 s0, s0, 0x2000
1298 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
1299 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1300 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1301 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1302 ; GFX9-NEXT: s_endpgm
1304 ; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
1306 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1307 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1308 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1309 ; GFX10-NEXT: s_add_u32 s0, s0, 0x2000
1310 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
1311 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
1312 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1313 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1314 ; GFX10-NEXT: s_endpgm
1315 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616
1316 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1317 store i8 %load, i8 addrspace(1)* undef