1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX9 %s
3 ; RUN: llc -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX10 %s
5 ; Test splitting flat instruction offsets into the low and high bits
6 ; when the offset doesn't fit in the offset field.
8 define i8 @global_inst_valu_offset_1(i8 addrspace(1)* %p) {
9 ; GFX9-LABEL: global_inst_valu_offset_1:
11 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
12 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1
13 ; GFX9-NEXT: s_waitcnt vmcnt(0)
14 ; GFX9-NEXT: s_setpc_b64 s[30:31]
16 ; GFX10-LABEL: global_inst_valu_offset_1:
18 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
19 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
20 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1
21 ; GFX10-NEXT: ; implicit-def: $vcc_hi
22 ; GFX10-NEXT: s_waitcnt vmcnt(0)
23 ; GFX10-NEXT: s_setpc_b64 s[30:31]
24 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1
25 %load = load i8, i8 addrspace(1)* %gep, align 4
29 define i8 @global_inst_valu_offset_11bit_max(i8 addrspace(1)* %p) {
30 ; GFX9-LABEL: global_inst_valu_offset_11bit_max:
32 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
33 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
34 ; GFX9-NEXT: s_waitcnt vmcnt(0)
35 ; GFX9-NEXT: s_setpc_b64 s[30:31]
37 ; GFX10-LABEL: global_inst_valu_offset_11bit_max:
39 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
40 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
41 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
42 ; GFX10-NEXT: ; implicit-def: $vcc_hi
43 ; GFX10-NEXT: s_waitcnt vmcnt(0)
44 ; GFX10-NEXT: s_setpc_b64 s[30:31]
45 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047
46 %load = load i8, i8 addrspace(1)* %gep, align 4
50 define i8 @global_inst_valu_offset_12bit_max(i8 addrspace(1)* %p) {
51 ; GFX9-LABEL: global_inst_valu_offset_12bit_max:
53 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
54 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
55 ; GFX9-NEXT: s_waitcnt vmcnt(0)
56 ; GFX9-NEXT: s_setpc_b64 s[30:31]
58 ; GFX10-LABEL: global_inst_valu_offset_12bit_max:
60 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
61 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
62 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
63 ; GFX10-NEXT: ; implicit-def: $vcc_hi
64 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
65 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
66 ; GFX10-NEXT: s_waitcnt vmcnt(0)
67 ; GFX10-NEXT: s_setpc_b64 s[30:31]
68 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
69 %load = load i8, i8 addrspace(1)* %gep, align 4
73 define i8 @global_inst_valu_offset_13bit_max(i8 addrspace(1)* %p) {
74 ; GFX9-LABEL: global_inst_valu_offset_13bit_max:
76 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
77 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
78 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
79 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
80 ; GFX9-NEXT: s_waitcnt vmcnt(0)
81 ; GFX9-NEXT: s_setpc_b64 s[30:31]
83 ; GFX10-LABEL: global_inst_valu_offset_13bit_max:
85 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
86 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
87 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0
88 ; GFX10-NEXT: ; implicit-def: $vcc_hi
89 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
90 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
91 ; GFX10-NEXT: s_waitcnt vmcnt(0)
92 ; GFX10-NEXT: s_setpc_b64 s[30:31]
93 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
94 %load = load i8, i8 addrspace(1)* %gep, align 4
98 define i8 @global_inst_valu_offset_neg_11bit_max(i8 addrspace(1)* %p) {
99 ; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max:
101 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
102 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
103 ; GFX9-NEXT: s_waitcnt vmcnt(0)
104 ; GFX9-NEXT: s_setpc_b64 s[30:31]
106 ; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max:
108 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
109 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
110 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
111 ; GFX10-NEXT: ; implicit-def: $vcc_hi
112 ; GFX10-NEXT: s_waitcnt vmcnt(0)
113 ; GFX10-NEXT: s_setpc_b64 s[30:31]
114 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048
115 %load = load i8, i8 addrspace(1)* %gep, align 4
119 define i8 @global_inst_valu_offset_neg_12bit_max(i8 addrspace(1)* %p) {
120 ; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max:
122 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096
124 ; GFX9-NEXT: s_waitcnt vmcnt(0)
125 ; GFX9-NEXT: s_setpc_b64 s[30:31]
127 ; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max:
129 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
130 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
131 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0
132 ; GFX10-NEXT: ; implicit-def: $vcc_hi
133 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
134 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
135 ; GFX10-NEXT: s_waitcnt vmcnt(0)
136 ; GFX10-NEXT: s_setpc_b64 s[30:31]
137 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
138 %load = load i8, i8 addrspace(1)* %gep, align 4
142 define i8 @global_inst_valu_offset_neg_13bit_max(i8 addrspace(1)* %p) {
143 ; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max:
145 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
146 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
147 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
148 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
149 ; GFX9-NEXT: s_waitcnt vmcnt(0)
150 ; GFX9-NEXT: s_setpc_b64 s[30:31]
152 ; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max:
154 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
155 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
156 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0
157 ; GFX10-NEXT: ; implicit-def: $vcc_hi
158 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
159 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
160 ; GFX10-NEXT: s_waitcnt vmcnt(0)
161 ; GFX10-NEXT: s_setpc_b64 s[30:31]
162 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
163 %load = load i8, i8 addrspace(1)* %gep, align 4
167 define i8 @global_inst_valu_offset_2x_11bit_max(i8 addrspace(1)* %p) {
168 ; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max:
170 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
172 ; GFX9-NEXT: s_waitcnt vmcnt(0)
173 ; GFX9-NEXT: s_setpc_b64 s[30:31]
175 ; GFX10-LABEL: global_inst_valu_offset_2x_11bit_max:
177 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
179 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
180 ; GFX10-NEXT: ; implicit-def: $vcc_hi
181 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
182 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
183 ; GFX10-NEXT: s_waitcnt vmcnt(0)
184 ; GFX10-NEXT: s_setpc_b64 s[30:31]
185 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
186 %load = load i8, i8 addrspace(1)* %gep, align 4
190 define i8 @global_inst_valu_offset_2x_12bit_max(i8 addrspace(1)* %p) {
191 ; GFX9-LABEL: global_inst_valu_offset_2x_12bit_max:
193 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
194 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
195 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
196 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
197 ; GFX9-NEXT: s_waitcnt vmcnt(0)
198 ; GFX9-NEXT: s_setpc_b64 s[30:31]
200 ; GFX10-LABEL: global_inst_valu_offset_2x_12bit_max:
202 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
203 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
204 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0
205 ; GFX10-NEXT: ; implicit-def: $vcc_hi
206 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
207 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
208 ; GFX10-NEXT: s_waitcnt vmcnt(0)
209 ; GFX10-NEXT: s_setpc_b64 s[30:31]
210 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
211 %load = load i8, i8 addrspace(1)* %gep, align 4
215 define i8 @global_inst_valu_offset_2x_13bit_max(i8 addrspace(1)* %p) {
216 ; GFX9-LABEL: global_inst_valu_offset_2x_13bit_max:
218 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
219 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0
220 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
221 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
222 ; GFX9-NEXT: s_waitcnt vmcnt(0)
223 ; GFX9-NEXT: s_setpc_b64 s[30:31]
225 ; GFX10-LABEL: global_inst_valu_offset_2x_13bit_max:
227 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
228 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
229 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x3800, v0
230 ; GFX10-NEXT: ; implicit-def: $vcc_hi
231 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
232 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
233 ; GFX10-NEXT: s_waitcnt vmcnt(0)
234 ; GFX10-NEXT: s_setpc_b64 s[30:31]
235 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383
236 %load = load i8, i8 addrspace(1)* %gep, align 4
240 define i8 @global_inst_valu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) {
241 ; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
243 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
244 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096
245 ; GFX9-NEXT: s_waitcnt vmcnt(0)
246 ; GFX9-NEXT: s_setpc_b64 s[30:31]
248 ; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
250 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
251 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
252 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xfffff000, v0
253 ; GFX10-NEXT: ; implicit-def: $vcc_hi
254 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
255 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
256 ; GFX10-NEXT: s_waitcnt vmcnt(0)
257 ; GFX10-NEXT: s_setpc_b64 s[30:31]
258 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
259 %load = load i8, i8 addrspace(1)* %gep, align 4
263 define i8 @global_inst_valu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) {
264 ; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
266 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
267 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
268 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
269 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
270 ; GFX9-NEXT: s_waitcnt vmcnt(0)
271 ; GFX9-NEXT: s_setpc_b64 s[30:31]
273 ; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
275 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
276 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
277 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffe000, v0
278 ; GFX10-NEXT: ; implicit-def: $vcc_hi
279 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
280 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
281 ; GFX10-NEXT: s_waitcnt vmcnt(0)
282 ; GFX10-NEXT: s_setpc_b64 s[30:31]
283 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
284 %load = load i8, i8 addrspace(1)* %gep, align 4
288 define i8 @global_inst_valu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) {
289 ; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
291 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
292 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
293 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
294 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
295 ; GFX9-NEXT: s_waitcnt vmcnt(0)
296 ; GFX9-NEXT: s_setpc_b64 s[30:31]
298 ; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
300 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
301 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
302 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0xffffc000, v0
303 ; GFX10-NEXT: ; implicit-def: $vcc_hi
304 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
305 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
306 ; GFX10-NEXT: s_waitcnt vmcnt(0)
307 ; GFX10-NEXT: s_setpc_b64 s[30:31]
308 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384
309 %load = load i8, i8 addrspace(1)* %gep, align 4
313 ; Fill 11-bit low-bits (1ull << 33) | 2047
314 define i8 @global_inst_valu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) {
315 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split0:
317 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
318 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
319 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
320 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
321 ; GFX9-NEXT: s_waitcnt vmcnt(0)
322 ; GFX9-NEXT: s_setpc_b64 s[30:31]
324 ; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split0:
326 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
327 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
328 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, v0
329 ; GFX10-NEXT: ; implicit-def: $vcc_hi
330 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
331 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
332 ; GFX10-NEXT: s_waitcnt vmcnt(0)
333 ; GFX10-NEXT: s_setpc_b64 s[30:31]
334 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639
335 %load = load i8, i8 addrspace(1)* %gep, align 4
339 ; Fill 11-bit low-bits (1ull << 33) | 2048
340 define i8 @global_inst_valu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) {
341 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_split1:
343 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
344 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
345 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
346 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048
347 ; GFX9-NEXT: s_waitcnt vmcnt(0)
348 ; GFX9-NEXT: s_setpc_b64 s[30:31]
350 ; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_split1:
352 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
353 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
354 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
355 ; GFX10-NEXT: ; implicit-def: $vcc_hi
356 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
357 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
358 ; GFX10-NEXT: s_waitcnt vmcnt(0)
359 ; GFX10-NEXT: s_setpc_b64 s[30:31]
360 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640
361 %load = load i8, i8 addrspace(1)* %gep, align 4
365 ; Fill 12-bit low-bits (1ull << 33) | 4095
366 define i8 @global_inst_valu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) {
367 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split0:
369 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
370 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
371 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
372 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
373 ; GFX9-NEXT: s_waitcnt vmcnt(0)
374 ; GFX9-NEXT: s_setpc_b64 s[30:31]
376 ; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split0:
378 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
379 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
380 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x800, v0
381 ; GFX10-NEXT: ; implicit-def: $vcc_hi
382 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
383 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
384 ; GFX10-NEXT: s_waitcnt vmcnt(0)
385 ; GFX10-NEXT: s_setpc_b64 s[30:31]
386 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687
387 %load = load i8, i8 addrspace(1)* %gep, align 4
391 ; Fill 12-bit low-bits (1ull << 33) | 4096
392 define i8 @global_inst_valu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) {
393 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_split1:
395 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
396 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
397 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
398 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
399 ; GFX9-NEXT: s_waitcnt vmcnt(0)
400 ; GFX9-NEXT: s_setpc_b64 s[30:31]
402 ; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_split1:
404 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
405 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
406 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
407 ; GFX10-NEXT: ; implicit-def: $vcc_hi
408 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
409 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
410 ; GFX10-NEXT: s_waitcnt vmcnt(0)
411 ; GFX10-NEXT: s_setpc_b64 s[30:31]
412 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688
413 %load = load i8, i8 addrspace(1)* %gep, align 4
417 ; Fill 13-bit low-bits (1ull << 33) | 8191
418 define i8 @global_inst_valu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) {
419 ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split0:
421 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
422 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
423 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
424 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
425 ; GFX9-NEXT: s_waitcnt vmcnt(0)
426 ; GFX9-NEXT: s_setpc_b64 s[30:31]
428 ; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split0:
430 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
431 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
432 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1800, v0
433 ; GFX10-NEXT: ; implicit-def: $vcc_hi
434 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
435 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
436 ; GFX10-NEXT: s_waitcnt vmcnt(0)
437 ; GFX10-NEXT: s_setpc_b64 s[30:31]
438 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783
439 %load = load i8, i8 addrspace(1)* %gep, align 4
443 ; Fill 13-bit low-bits (1ull << 33) | 8192
444 define i8 @global_inst_valu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) {
445 ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_split1:
447 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
448 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
449 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
450 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
451 ; GFX9-NEXT: s_waitcnt vmcnt(0)
452 ; GFX9-NEXT: s_setpc_b64 s[30:31]
454 ; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_split1:
456 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
457 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
458 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0
459 ; GFX10-NEXT: ; implicit-def: $vcc_hi
460 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
461 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
462 ; GFX10-NEXT: s_waitcnt vmcnt(0)
463 ; GFX10-NEXT: s_setpc_b64 s[30:31]
464 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784
465 %load = load i8, i8 addrspace(1)* %gep, align 4
469 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
470 define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) {
471 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
473 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
475 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
476 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
477 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
478 ; GFX9-NEXT: s_waitcnt vmcnt(0)
479 ; GFX9-NEXT: s_setpc_b64 s[30:31]
481 ; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
483 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
484 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
485 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, v0
486 ; GFX10-NEXT: ; implicit-def: $vcc_hi
487 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
488 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
489 ; GFX10-NEXT: s_waitcnt vmcnt(0)
490 ; GFX10-NEXT: s_setpc_b64 s[30:31]
491 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
492 %load = load i8, i8 addrspace(1)* %gep, align 4
496 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
497 define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) {
498 ; GFX9-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
500 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
501 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
502 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
503 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
504 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048
505 ; GFX9-NEXT: s_waitcnt vmcnt(0)
506 ; GFX9-NEXT: s_setpc_b64 s[30:31]
508 ; GFX10-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
510 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
512 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
513 ; GFX10-NEXT: ; implicit-def: $vcc_hi
514 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
515 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
516 ; GFX10-NEXT: s_waitcnt vmcnt(0)
517 ; GFX10-NEXT: s_setpc_b64 s[30:31]
518 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
519 %load = load i8, i8 addrspace(1)* %gep, align 4
523 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
524 define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) {
525 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
527 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
528 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
529 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
530 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
531 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
532 ; GFX9-NEXT: s_waitcnt vmcnt(0)
533 ; GFX9-NEXT: s_setpc_b64 s[30:31]
535 ; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
537 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
538 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
539 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
540 ; GFX10-NEXT: ; implicit-def: $vcc_hi
541 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
542 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
543 ; GFX10-NEXT: s_waitcnt vmcnt(0)
544 ; GFX10-NEXT: s_setpc_b64 s[30:31]
545 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713
546 %load = load i8, i8 addrspace(1)* %gep, align 4
550 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
551 define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) {
552 ; GFX9-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
554 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
555 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
556 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
557 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
558 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096
559 ; GFX9-NEXT: s_waitcnt vmcnt(0)
560 ; GFX9-NEXT: s_setpc_b64 s[30:31]
562 ; GFX10-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
564 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
565 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
566 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, v0
567 ; GFX10-NEXT: ; implicit-def: $vcc_hi
568 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
569 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
570 ; GFX10-NEXT: s_waitcnt vmcnt(0)
571 ; GFX10-NEXT: s_setpc_b64 s[30:31]
572 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712
573 %load = load i8, i8 addrspace(1)* %gep, align 4
577 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
578 define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) {
579 ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
581 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
582 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
583 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
584 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
585 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
586 ; GFX9-NEXT: s_waitcnt vmcnt(0)
587 ; GFX9-NEXT: s_setpc_b64 s[30:31]
589 ; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
591 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
592 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
593 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0
594 ; GFX10-NEXT: ; implicit-def: $vcc_hi
595 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
596 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
597 ; GFX10-NEXT: s_waitcnt vmcnt(0)
598 ; GFX10-NEXT: s_setpc_b64 s[30:31]
599 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617
600 %load = load i8, i8 addrspace(1)* %gep, align 4
604 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
605 define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) {
606 ; GFX9-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
608 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
609 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
610 ; GFX9-NEXT: v_bfrev_b32_e32 v2, 1
611 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
612 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
613 ; GFX9-NEXT: s_waitcnt vmcnt(0)
614 ; GFX9-NEXT: s_setpc_b64 s[30:31]
616 ; GFX10-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
618 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
619 ; GFX10-NEXT: s_waitcnt_vscnt null, 0x0
620 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, v0
621 ; GFX10-NEXT: ; implicit-def: $vcc_hi
622 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
623 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
624 ; GFX10-NEXT: s_waitcnt vmcnt(0)
625 ; GFX10-NEXT: s_setpc_b64 s[30:31]
626 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616
627 %load = load i8, i8 addrspace(1)* %gep, align 4
631 define amdgpu_kernel void @global_inst_salu_offset_1(i8 addrspace(1)* %p) {
632 ; GFX9-LABEL: global_inst_salu_offset_1:
634 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
635 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
636 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
637 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
638 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1
639 ; GFX9-NEXT: s_waitcnt vmcnt(0)
640 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
641 ; GFX9-NEXT: s_endpgm
643 ; GFX10-LABEL: global_inst_salu_offset_1:
645 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
646 ; GFX10-NEXT: ; implicit-def: $vcc_hi
647 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
648 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
649 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
650 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1
651 ; GFX10-NEXT: s_waitcnt vmcnt(0)
652 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
653 ; GFX10-NEXT: s_endpgm
654 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 1
655 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
656 store i8 %load, i8 addrspace(1)* undef
660 define amdgpu_kernel void @global_inst_salu_offset_11bit_max(i8 addrspace(1)* %p) {
661 ; GFX9-LABEL: global_inst_salu_offset_11bit_max:
663 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
664 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
665 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
666 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
667 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
668 ; GFX9-NEXT: s_waitcnt vmcnt(0)
669 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
670 ; GFX9-NEXT: s_endpgm
672 ; GFX10-LABEL: global_inst_salu_offset_11bit_max:
674 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
675 ; GFX10-NEXT: ; implicit-def: $vcc_hi
676 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
677 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
678 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
679 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
680 ; GFX10-NEXT: s_waitcnt vmcnt(0)
681 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
682 ; GFX10-NEXT: s_endpgm
683 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 2047
684 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
685 store i8 %load, i8 addrspace(1)* undef
689 define amdgpu_kernel void @global_inst_salu_offset_12bit_max(i8 addrspace(1)* %p) {
690 ; GFX9-LABEL: global_inst_salu_offset_12bit_max:
692 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
693 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
694 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
695 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
696 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
697 ; GFX9-NEXT: s_waitcnt vmcnt(0)
698 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
699 ; GFX9-NEXT: s_endpgm
701 ; GFX10-LABEL: global_inst_salu_offset_12bit_max:
703 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
704 ; GFX10-NEXT: ; implicit-def: $vcc_hi
705 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
706 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0
707 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
708 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
709 ; GFX10-NEXT: s_waitcnt vmcnt(0)
710 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
711 ; GFX10-NEXT: s_endpgm
712 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
713 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
714 store i8 %load, i8 addrspace(1)* undef
718 define amdgpu_kernel void @global_inst_salu_offset_13bit_max(i8 addrspace(1)* %p) {
719 ; GFX9-LABEL: global_inst_salu_offset_13bit_max:
721 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
722 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
723 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
724 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
725 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
726 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
727 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
728 ; GFX9-NEXT: s_waitcnt vmcnt(0)
729 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
730 ; GFX9-NEXT: s_endpgm
732 ; GFX10-LABEL: global_inst_salu_offset_13bit_max:
734 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
735 ; GFX10-NEXT: ; implicit-def: $vcc_hi
736 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
737 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0
738 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
739 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
740 ; GFX10-NEXT: s_waitcnt vmcnt(0)
741 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
742 ; GFX10-NEXT: s_endpgm
743 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
744 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
745 store i8 %load, i8 addrspace(1)* undef
749 define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(i8 addrspace(1)* %p) {
750 ; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max:
752 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
753 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
754 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
755 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
756 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
757 ; GFX9-NEXT: s_waitcnt vmcnt(0)
758 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
759 ; GFX9-NEXT: s_endpgm
761 ; GFX10-LABEL: global_inst_salu_offset_neg_11bit_max:
763 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
764 ; GFX10-NEXT: ; implicit-def: $vcc_hi
765 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
766 ; GFX10-NEXT: v_mov_b32_e32 v0, s0
767 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
768 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
769 ; GFX10-NEXT: s_waitcnt vmcnt(0)
770 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
771 ; GFX10-NEXT: s_endpgm
772 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -2048
773 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
774 store i8 %load, i8 addrspace(1)* undef
778 define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(i8 addrspace(1)* %p) {
779 ; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max:
781 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
782 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
783 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
784 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
785 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096
786 ; GFX9-NEXT: s_waitcnt vmcnt(0)
787 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
788 ; GFX9-NEXT: s_endpgm
790 ; GFX10-LABEL: global_inst_salu_offset_neg_12bit_max:
792 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
793 ; GFX10-NEXT: ; implicit-def: $vcc_hi
794 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
795 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xfffff000, s0
796 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
797 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
798 ; GFX10-NEXT: s_waitcnt vmcnt(0)
799 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
800 ; GFX10-NEXT: s_endpgm
801 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
802 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
803 store i8 %load, i8 addrspace(1)* undef
807 define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(i8 addrspace(1)* %p) {
808 ; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max:
810 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
811 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
812 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
813 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
814 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
815 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
816 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
817 ; GFX9-NEXT: s_waitcnt vmcnt(0)
818 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
819 ; GFX9-NEXT: s_endpgm
821 ; GFX10-LABEL: global_inst_salu_offset_neg_13bit_max:
823 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
824 ; GFX10-NEXT: ; implicit-def: $vcc_hi
825 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
826 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffe000, s0
827 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
828 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
829 ; GFX10-NEXT: s_waitcnt vmcnt(0)
830 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
831 ; GFX10-NEXT: s_endpgm
832 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
833 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
834 store i8 %load, i8 addrspace(1)* undef
838 define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(i8 addrspace(1)* %p) {
839 ; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max:
841 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
842 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
843 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
844 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
845 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
846 ; GFX9-NEXT: s_waitcnt vmcnt(0)
847 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
848 ; GFX9-NEXT: s_endpgm
850 ; GFX10-LABEL: global_inst_salu_offset_2x_11bit_max:
852 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
853 ; GFX10-NEXT: ; implicit-def: $vcc_hi
854 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
855 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0
856 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
857 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
858 ; GFX10-NEXT: s_waitcnt vmcnt(0)
859 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
860 ; GFX10-NEXT: s_endpgm
861 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 4095
862 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
863 store i8 %load, i8 addrspace(1)* undef
867 define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(i8 addrspace(1)* %p) {
868 ; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max:
870 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
871 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
872 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
873 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
874 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
875 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
876 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
877 ; GFX9-NEXT: s_waitcnt vmcnt(0)
878 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
879 ; GFX9-NEXT: s_endpgm
881 ; GFX10-LABEL: global_inst_salu_offset_2x_12bit_max:
883 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
884 ; GFX10-NEXT: ; implicit-def: $vcc_hi
885 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
886 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0
887 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
888 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
889 ; GFX10-NEXT: s_waitcnt vmcnt(0)
890 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
891 ; GFX10-NEXT: s_endpgm
892 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8191
893 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
894 store i8 %load, i8 addrspace(1)* undef
898 define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(i8 addrspace(1)* %p) {
899 ; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max:
901 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
902 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
903 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
904 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
905 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0
906 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
907 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
908 ; GFX9-NEXT: s_waitcnt vmcnt(0)
909 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
910 ; GFX9-NEXT: s_endpgm
912 ; GFX10-LABEL: global_inst_salu_offset_2x_13bit_max:
914 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
915 ; GFX10-NEXT: ; implicit-def: $vcc_hi
916 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
917 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x3800, s0
918 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 0, s1, s0
919 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
920 ; GFX10-NEXT: s_waitcnt vmcnt(0)
921 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
922 ; GFX10-NEXT: s_endpgm
923 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 16383
924 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
925 store i8 %load, i8 addrspace(1)* undef
929 define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(i8 addrspace(1)* %p) {
930 ; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
932 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
933 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
934 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
935 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
936 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096
937 ; GFX9-NEXT: s_waitcnt vmcnt(0)
938 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
939 ; GFX9-NEXT: s_endpgm
941 ; GFX10-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
943 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
944 ; GFX10-NEXT: ; implicit-def: $vcc_hi
945 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
946 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xfffff000, s0
947 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
948 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
949 ; GFX10-NEXT: s_waitcnt vmcnt(0)
950 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
951 ; GFX10-NEXT: s_endpgm
952 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -4096
953 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
954 store i8 %load, i8 addrspace(1)* undef
958 define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(i8 addrspace(1)* %p) {
959 ; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
961 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
962 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
963 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
964 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
965 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
966 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
967 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
968 ; GFX9-NEXT: s_waitcnt vmcnt(0)
969 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
970 ; GFX9-NEXT: s_endpgm
972 ; GFX10-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
974 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
975 ; GFX10-NEXT: ; implicit-def: $vcc_hi
976 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
977 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffe000, s0
978 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
979 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
980 ; GFX10-NEXT: s_waitcnt vmcnt(0)
981 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
982 ; GFX10-NEXT: s_endpgm
983 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -8192
984 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
985 store i8 %load, i8 addrspace(1)* undef
989 define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(i8 addrspace(1)* %p) {
990 ; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
992 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
993 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
994 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
995 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
996 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
997 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
998 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
999 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1000 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1001 ; GFX9-NEXT: s_endpgm
1003 ; GFX10-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
1005 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1006 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1007 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1008 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0xffffc000, s0
1009 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
1010 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1011 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1012 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1013 ; GFX10-NEXT: s_endpgm
1014 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -16384
1015 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1016 store i8 %load, i8 addrspace(1)* undef
1020 ; Fill 11-bit low-bits (1ull << 33) | 2047
1021 define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(i8 addrspace(1)* %p) {
1022 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0:
1024 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1025 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1026 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1027 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1028 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1029 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
1030 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1031 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1032 ; GFX9-NEXT: s_endpgm
1034 ; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split0:
1036 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1037 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1038 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1039 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0, s0
1040 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1041 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
1042 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1043 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1044 ; GFX10-NEXT: s_endpgm
1045 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936639
1046 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1047 store i8 %load, i8 addrspace(1)* undef
1051 ; Fill 11-bit low-bits (1ull << 33) | 2048
1052 define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(i8 addrspace(1)* %p) {
1053 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1:
1055 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1056 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1057 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1058 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1059 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1060 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048
1061 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1062 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1063 ; GFX9-NEXT: s_endpgm
1065 ; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_split1:
1067 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1068 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1069 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1070 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0
1071 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1072 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1073 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1074 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1075 ; GFX10-NEXT: s_endpgm
1076 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589936640
1077 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1078 store i8 %load, i8 addrspace(1)* undef
1082 ; Fill 12-bit low-bits (1ull << 33) | 4095
1083 define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(i8 addrspace(1)* %p) {
1084 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0:
1086 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1087 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1088 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1089 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1090 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1091 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
1092 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1093 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1094 ; GFX9-NEXT: s_endpgm
1096 ; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split0:
1098 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1099 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1100 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1101 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x800, s0
1102 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1103 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
1104 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1105 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1106 ; GFX10-NEXT: s_endpgm
1107 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938687
1108 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1109 store i8 %load, i8 addrspace(1)* undef
1113 ; Fill 12-bit low-bits (1ull << 33) | 4096
1114 define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(i8 addrspace(1)* %p) {
1115 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1:
1117 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1118 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1119 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1120 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1121 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1122 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1123 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1124 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1125 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1126 ; GFX9-NEXT: s_endpgm
1128 ; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_split1:
1130 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1131 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1132 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1133 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1000, s0
1134 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1135 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1136 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1137 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1138 ; GFX10-NEXT: s_endpgm
1139 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589938688
1140 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1141 store i8 %load, i8 addrspace(1)* undef
1145 ; Fill 13-bit low-bits (1ull << 33) | 8191
1146 define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(i8 addrspace(1)* %p) {
1147 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0:
1149 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1150 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1151 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1152 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1153 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1154 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1155 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
1156 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1157 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1158 ; GFX9-NEXT: s_endpgm
1160 ; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split0:
1162 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1163 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1164 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1165 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x1800, s0
1166 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1167 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
1168 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1169 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1170 ; GFX10-NEXT: s_endpgm
1171 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942783
1172 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1173 store i8 %load, i8 addrspace(1)* undef
1177 ; Fill 13-bit low-bits (1ull << 33) | 8192
1178 define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(i8 addrspace(1)* %p) {
1179 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1:
1181 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1182 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1183 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1184 ; GFX9-NEXT: v_mov_b32_e32 v1, s1
1185 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1186 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
1187 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1188 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1189 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1190 ; GFX9-NEXT: s_endpgm
1192 ; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_split1:
1194 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1195 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1196 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1197 ; GFX10-NEXT: v_add_co_u32_e64 v0, s0, 0x2000, s0
1198 ; GFX10-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
1199 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1200 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1201 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1202 ; GFX10-NEXT: s_endpgm
1203 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 8589942784
1204 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1205 store i8 %load, i8 addrspace(1)* undef
1209 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
1210 define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(i8 addrspace(1)* %p) {
1211 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
1213 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1214 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1215 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1216 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1217 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1218 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1219 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
1220 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1221 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1222 ; GFX9-NEXT: s_endpgm
1224 ; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
1226 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1227 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1228 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1229 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1230 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0, s0
1231 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1232 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
1233 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1234 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1235 ; GFX10-NEXT: s_endpgm
1236 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773761
1237 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1238 store i8 %load, i8 addrspace(1)* undef
1242 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
1243 define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(i8 addrspace(1)* %p) {
1244 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
1246 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1247 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1248 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1249 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1250 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1251 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1252 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2048
1253 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1254 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1255 ; GFX9-NEXT: s_endpgm
1257 ; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
1259 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1260 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1261 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1262 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1263 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0
1264 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1265 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
1266 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1267 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1268 ; GFX10-NEXT: s_endpgm
1269 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854773760
1270 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1271 store i8 %load, i8 addrspace(1)* undef
1275 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
1276 define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(i8 addrspace(1)* %p) {
1277 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
1279 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1280 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1281 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1282 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1283 ; GFX9-NEXT: v_add_co_u32_e64 v0, vcc, 0, s0
1284 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1285 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
1286 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1287 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1288 ; GFX9-NEXT: s_endpgm
1290 ; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
1292 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1293 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1294 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1295 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1296 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0
1297 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1298 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
1299 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1300 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1301 ; GFX10-NEXT: s_endpgm
1302 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771713
1303 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1304 store i8 %load, i8 addrspace(1)* undef
1308 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
1309 define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(i8 addrspace(1)* %p) {
1310 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
1312 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1313 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1314 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1315 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1316 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1317 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1318 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1319 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096
1320 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1321 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1322 ; GFX9-NEXT: s_endpgm
1324 ; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
1326 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1327 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1328 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1329 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1330 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x1000, s0
1331 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1332 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1333 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1334 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1335 ; GFX10-NEXT: s_endpgm
1336 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854771712
1337 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1338 store i8 %load, i8 addrspace(1)* undef
1342 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
1343 define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(i8 addrspace(1)* %p) {
1344 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
1346 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1347 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1348 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1349 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1350 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1351 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1352 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1353 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
1354 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1355 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1356 ; GFX9-NEXT: s_endpgm
1358 ; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
1360 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1361 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1362 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1363 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1364 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0
1365 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1366 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
1367 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1368 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1369 ; GFX10-NEXT: s_endpgm
1370 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767617
1371 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1372 store i8 %load, i8 addrspace(1)* undef
1376 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
1377 define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(i8 addrspace(1)* %p) {
1378 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
1380 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1381 ; GFX9-NEXT: v_bfrev_b32_e32 v1, 1
1382 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1383 ; GFX9-NEXT: v_mov_b32_e32 v0, s0
1384 ; GFX9-NEXT: v_mov_b32_e32 v2, s1
1385 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1386 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v2, vcc
1387 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
1388 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1389 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1390 ; GFX9-NEXT: s_endpgm
1392 ; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
1394 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1395 ; GFX10-NEXT: ; implicit-def: $vcc_hi
1396 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1397 ; GFX10-NEXT: v_mov_b32_e32 v1, s1
1398 ; GFX10-NEXT: v_add_co_u32_e64 v0, vcc_lo, 0x2000, s0
1399 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1400 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
1401 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1402 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1403 ; GFX10-NEXT: s_endpgm
1404 %gep = getelementptr i8, i8 addrspace(1)* %p, i64 -9223372036854767616
1405 %load = load volatile i8, i8 addrspace(1)* %gep, align 1
1406 store i8 %load, i8 addrspace(1)* undef