1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-GISEL %s
3 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-GISEL %s
4 ; RUN: llc -global-isel=1 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-GISEL %s
5 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx900 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX9,GFX9-SDAG %s
6 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX10,GFX10-SDAG %s
7 ; RUN: llc -global-isel=0 -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefixes=GFX11,GFX11-SDAG %s
9 ; Test splitting flat instruction offsets into the low and high bits
10 ; when the offset doesn't fit in the offset field.
12 define i8 @global_inst_valu_offset_1(ptr addrspace(1) %p) {
13 ; GFX9-LABEL: global_inst_valu_offset_1:
15 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
16 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:1
17 ; GFX9-NEXT: s_waitcnt vmcnt(0)
18 ; GFX9-NEXT: s_setpc_b64 s[30:31]
20 ; GFX10-LABEL: global_inst_valu_offset_1:
22 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
23 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:1
24 ; GFX10-NEXT: s_waitcnt vmcnt(0)
25 ; GFX10-NEXT: s_setpc_b64 s[30:31]
27 ; GFX11-LABEL: global_inst_valu_offset_1:
29 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
30 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:1
31 ; GFX11-NEXT: s_waitcnt vmcnt(0)
32 ; GFX11-NEXT: s_setpc_b64 s[30:31]
33 %gep = getelementptr i8, ptr addrspace(1) %p, i64 1
34 %load = load i8, ptr addrspace(1) %gep, align 4
38 define i8 @global_inst_valu_offset_11bit_max(ptr addrspace(1) %p) {
39 ; GFX9-LABEL: global_inst_valu_offset_11bit_max:
41 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
42 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
43 ; GFX9-NEXT: s_waitcnt vmcnt(0)
44 ; GFX9-NEXT: s_setpc_b64 s[30:31]
46 ; GFX10-LABEL: global_inst_valu_offset_11bit_max:
48 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
49 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
50 ; GFX10-NEXT: s_waitcnt vmcnt(0)
51 ; GFX10-NEXT: s_setpc_b64 s[30:31]
53 ; GFX11-LABEL: global_inst_valu_offset_11bit_max:
55 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
56 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:2047
57 ; GFX11-NEXT: s_waitcnt vmcnt(0)
58 ; GFX11-NEXT: s_setpc_b64 s[30:31]
59 %gep = getelementptr i8, ptr addrspace(1) %p, i64 2047
60 %load = load i8, ptr addrspace(1) %gep, align 4
64 define i8 @global_inst_valu_offset_12bit_max(ptr addrspace(1) %p) {
65 ; GFX9-LABEL: global_inst_valu_offset_12bit_max:
67 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
68 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
69 ; GFX9-NEXT: s_waitcnt vmcnt(0)
70 ; GFX9-NEXT: s_setpc_b64 s[30:31]
72 ; GFX10-GISEL-LABEL: global_inst_valu_offset_12bit_max:
73 ; GFX10-GISEL: ; %bb.0:
74 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
75 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
76 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
77 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
78 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
79 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
81 ; GFX11-LABEL: global_inst_valu_offset_12bit_max:
83 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
84 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095
85 ; GFX11-NEXT: s_waitcnt vmcnt(0)
86 ; GFX11-NEXT: s_setpc_b64 s[30:31]
88 ; GFX10-SDAG-LABEL: global_inst_valu_offset_12bit_max:
89 ; GFX10-SDAG: ; %bb.0:
90 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
91 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
92 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
93 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
94 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
95 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
96 %gep = getelementptr i8, ptr addrspace(1) %p, i64 4095
97 %load = load i8, ptr addrspace(1) %gep, align 4
101 define i8 @global_inst_valu_offset_13bit_max(ptr addrspace(1) %p) {
102 ; GFX9-GISEL-LABEL: global_inst_valu_offset_13bit_max:
103 ; GFX9-GISEL: ; %bb.0:
104 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
105 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
106 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
107 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
108 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
109 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
111 ; GFX10-GISEL-LABEL: global_inst_valu_offset_13bit_max:
112 ; GFX10-GISEL: ; %bb.0:
113 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
114 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
115 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
116 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
117 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
118 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
120 ; GFX11-GISEL-LABEL: global_inst_valu_offset_13bit_max:
121 ; GFX11-GISEL: ; %bb.0:
122 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
123 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
124 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
125 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
126 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
127 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
129 ; GFX9-SDAG-LABEL: global_inst_valu_offset_13bit_max:
130 ; GFX9-SDAG: ; %bb.0:
131 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
132 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
133 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
134 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
135 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
136 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
138 ; GFX10-SDAG-LABEL: global_inst_valu_offset_13bit_max:
139 ; GFX10-SDAG: ; %bb.0:
140 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
141 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0
142 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
143 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
144 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
145 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
147 ; GFX11-SDAG-LABEL: global_inst_valu_offset_13bit_max:
148 ; GFX11-SDAG: ; %bb.0:
149 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
150 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
151 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
152 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095
153 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
154 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
155 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8191
156 %load = load i8, ptr addrspace(1) %gep, align 4
160 define i8 @global_inst_valu_offset_neg_11bit_max(ptr addrspace(1) %p) {
161 ; GFX9-LABEL: global_inst_valu_offset_neg_11bit_max:
163 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
164 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
165 ; GFX9-NEXT: s_waitcnt vmcnt(0)
166 ; GFX9-NEXT: s_setpc_b64 s[30:31]
168 ; GFX10-LABEL: global_inst_valu_offset_neg_11bit_max:
170 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
171 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
172 ; GFX10-NEXT: s_waitcnt vmcnt(0)
173 ; GFX10-NEXT: s_setpc_b64 s[30:31]
175 ; GFX11-LABEL: global_inst_valu_offset_neg_11bit_max:
177 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
178 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-2048
179 ; GFX11-NEXT: s_waitcnt vmcnt(0)
180 ; GFX11-NEXT: s_setpc_b64 s[30:31]
181 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -2048
182 %load = load i8, ptr addrspace(1) %gep, align 4
186 define i8 @global_inst_valu_offset_neg_12bit_max(ptr addrspace(1) %p) {
187 ; GFX9-LABEL: global_inst_valu_offset_neg_12bit_max:
189 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
190 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096
191 ; GFX9-NEXT: s_waitcnt vmcnt(0)
192 ; GFX9-NEXT: s_setpc_b64 s[30:31]
194 ; GFX10-LABEL: global_inst_valu_offset_neg_12bit_max:
196 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
197 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
198 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
199 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
200 ; GFX10-NEXT: s_waitcnt vmcnt(0)
201 ; GFX10-NEXT: s_setpc_b64 s[30:31]
203 ; GFX11-LABEL: global_inst_valu_offset_neg_12bit_max:
205 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
206 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096
207 ; GFX11-NEXT: s_waitcnt vmcnt(0)
208 ; GFX11-NEXT: s_setpc_b64 s[30:31]
209 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096
210 %load = load i8, ptr addrspace(1) %gep, align 4
214 define i8 @global_inst_valu_offset_neg_13bit_max(ptr addrspace(1) %p) {
215 ; GFX9-LABEL: global_inst_valu_offset_neg_13bit_max:
217 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
218 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
219 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
220 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
221 ; GFX9-NEXT: s_waitcnt vmcnt(0)
222 ; GFX9-NEXT: s_setpc_b64 s[30:31]
224 ; GFX10-LABEL: global_inst_valu_offset_neg_13bit_max:
226 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
227 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
228 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
229 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
230 ; GFX10-NEXT: s_waitcnt vmcnt(0)
231 ; GFX10-NEXT: s_setpc_b64 s[30:31]
233 ; GFX11-LABEL: global_inst_valu_offset_neg_13bit_max:
235 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
236 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
237 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
238 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
239 ; GFX11-NEXT: s_waitcnt vmcnt(0)
240 ; GFX11-NEXT: s_setpc_b64 s[30:31]
241 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192
242 %load = load i8, ptr addrspace(1) %gep, align 4
246 define i8 @global_inst_valu_offset_2x_11bit_max(ptr addrspace(1) %p) {
247 ; GFX9-LABEL: global_inst_valu_offset_2x_11bit_max:
249 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
250 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
251 ; GFX9-NEXT: s_waitcnt vmcnt(0)
252 ; GFX9-NEXT: s_setpc_b64 s[30:31]
254 ; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_11bit_max:
255 ; GFX10-GISEL: ; %bb.0:
256 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
257 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0xfff, v0
258 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
259 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
260 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
261 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
263 ; GFX11-LABEL: global_inst_valu_offset_2x_11bit_max:
265 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
266 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:4095
267 ; GFX11-NEXT: s_waitcnt vmcnt(0)
268 ; GFX11-NEXT: s_setpc_b64 s[30:31]
270 ; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_11bit_max:
271 ; GFX10-SDAG: ; %bb.0:
272 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
273 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
274 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
275 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
276 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
277 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
278 %gep = getelementptr i8, ptr addrspace(1) %p, i64 4095
279 %load = load i8, ptr addrspace(1) %gep, align 4
283 define i8 @global_inst_valu_offset_2x_12bit_max(ptr addrspace(1) %p) {
284 ; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max:
285 ; GFX9-GISEL: ; %bb.0:
286 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
287 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x1fff, v0
288 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
289 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
290 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
291 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
293 ; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max:
294 ; GFX10-GISEL: ; %bb.0:
295 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
296 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
297 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
298 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
299 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
300 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
302 ; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_12bit_max:
303 ; GFX11-GISEL: ; %bb.0:
304 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
305 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x1fff, v0
306 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
307 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
308 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
309 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
311 ; GFX9-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max:
312 ; GFX9-SDAG: ; %bb.0:
313 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
314 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
315 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
316 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
317 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
318 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
320 ; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max:
321 ; GFX10-SDAG: ; %bb.0:
322 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
323 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0
324 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
325 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
326 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
327 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
329 ; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_12bit_max:
330 ; GFX11-SDAG: ; %bb.0:
331 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
332 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
333 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
334 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095
335 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
336 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
337 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8191
338 %load = load i8, ptr addrspace(1) %gep, align 4
342 define i8 @global_inst_valu_offset_2x_13bit_max(ptr addrspace(1) %p) {
343 ; GFX9-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max:
344 ; GFX9-GISEL: ; %bb.0:
345 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
346 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, 0x3fff, v0
347 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
348 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
349 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
350 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
352 ; GFX10-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max:
353 ; GFX10-GISEL: ; %bb.0:
354 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
355 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0
356 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
357 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
358 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
359 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
361 ; GFX11-GISEL-LABEL: global_inst_valu_offset_2x_13bit_max:
362 ; GFX11-GISEL: ; %bb.0:
363 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
364 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, 0x3fff, v0
365 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
366 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
367 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
368 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
370 ; GFX9-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max:
371 ; GFX9-SDAG: ; %bb.0:
372 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
373 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x3000, v0
374 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 0, v1, vcc
375 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
376 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
377 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
379 ; GFX10-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max:
380 ; GFX10-SDAG: ; %bb.0:
381 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
382 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3800, v0
383 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
384 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
385 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
386 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
388 ; GFX11-SDAG-LABEL: global_inst_valu_offset_2x_13bit_max:
389 ; GFX11-SDAG: ; %bb.0:
390 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
391 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x3000, v0
392 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0, v1, vcc_lo
393 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095
394 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
395 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
396 %gep = getelementptr i8, ptr addrspace(1) %p, i64 16383
397 %load = load i8, ptr addrspace(1) %gep, align 4
401 define i8 @global_inst_valu_offset_2x_neg_11bit_max(ptr addrspace(1) %p) {
402 ; GFX9-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
404 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
405 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off offset:-4096
406 ; GFX9-NEXT: s_waitcnt vmcnt(0)
407 ; GFX9-NEXT: s_setpc_b64 s[30:31]
409 ; GFX10-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
411 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
412 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xfffff000, v0
413 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
414 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
415 ; GFX10-NEXT: s_waitcnt vmcnt(0)
416 ; GFX10-NEXT: s_setpc_b64 s[30:31]
418 ; GFX11-LABEL: global_inst_valu_offset_2x_neg_11bit_max:
420 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
421 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off offset:-4096
422 ; GFX11-NEXT: s_waitcnt vmcnt(0)
423 ; GFX11-NEXT: s_setpc_b64 s[30:31]
424 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096
425 %load = load i8, ptr addrspace(1) %gep, align 4
429 define i8 @global_inst_valu_offset_2x_neg_12bit_max(ptr addrspace(1) %p) {
430 ; GFX9-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
432 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
433 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffe000, v0
434 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
435 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
436 ; GFX9-NEXT: s_waitcnt vmcnt(0)
437 ; GFX9-NEXT: s_setpc_b64 s[30:31]
439 ; GFX10-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
441 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
442 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
443 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
444 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
445 ; GFX10-NEXT: s_waitcnt vmcnt(0)
446 ; GFX10-NEXT: s_setpc_b64 s[30:31]
448 ; GFX11-LABEL: global_inst_valu_offset_2x_neg_12bit_max:
450 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
451 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffe000, v0
452 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
453 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
454 ; GFX11-NEXT: s_waitcnt vmcnt(0)
455 ; GFX11-NEXT: s_setpc_b64 s[30:31]
456 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192
457 %load = load i8, ptr addrspace(1) %gep, align 4
461 define i8 @global_inst_valu_offset_2x_neg_13bit_max(ptr addrspace(1) %p) {
462 ; GFX9-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
464 ; GFX9-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
465 ; GFX9-NEXT: v_add_co_u32_e32 v0, vcc, 0xffffc000, v0
466 ; GFX9-NEXT: v_addc_co_u32_e32 v1, vcc, -1, v1, vcc
467 ; GFX9-NEXT: global_load_ubyte v0, v[0:1], off
468 ; GFX9-NEXT: s_waitcnt vmcnt(0)
469 ; GFX9-NEXT: s_setpc_b64 s[30:31]
471 ; GFX10-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
473 ; GFX10-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
474 ; GFX10-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
475 ; GFX10-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
476 ; GFX10-NEXT: global_load_ubyte v0, v[0:1], off
477 ; GFX10-NEXT: s_waitcnt vmcnt(0)
478 ; GFX10-NEXT: s_setpc_b64 s[30:31]
480 ; GFX11-LABEL: global_inst_valu_offset_2x_neg_13bit_max:
482 ; GFX11-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
483 ; GFX11-NEXT: v_add_co_u32 v0, vcc_lo, 0xffffc000, v0
484 ; GFX11-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, -1, v1, vcc_lo
485 ; GFX11-NEXT: global_load_u8 v0, v[0:1], off
486 ; GFX11-NEXT: s_waitcnt vmcnt(0)
487 ; GFX11-NEXT: s_setpc_b64 s[30:31]
488 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -16384
489 %load = load i8, ptr addrspace(1) %gep, align 4
493 ; Fill 11-bit low-bits (1ull << 33) | 2047
494 define i8 @global_inst_valu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
495 ; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
496 ; GFX9-GISEL: ; %bb.0:
497 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
498 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7ff
499 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
500 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
501 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
502 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
503 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
504 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
505 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
506 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
508 ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
509 ; GFX10-GISEL: ; %bb.0:
510 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
511 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
512 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
513 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
514 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
515 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
516 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
517 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
518 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
519 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
521 ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split0:
522 ; GFX11-GISEL: ; %bb.0:
523 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
524 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
525 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
526 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
527 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
528 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
529 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
530 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
531 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
532 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
533 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
535 ; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0:
536 ; GFX9-SDAG: ; %bb.0:
537 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
538 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
539 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
540 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
541 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
542 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
544 ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0:
545 ; GFX10-SDAG: ; %bb.0:
546 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
547 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
548 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
549 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
550 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
551 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
553 ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split0:
554 ; GFX11-SDAG: ; %bb.0:
555 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
556 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
557 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
558 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2047
559 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
560 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
561 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936639
562 %load = load i8, ptr addrspace(1) %gep, align 4
566 ; Fill 11-bit low-bits (1ull << 33) | 2048
567 define i8 @global_inst_valu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
568 ; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
569 ; GFX9-GISEL: ; %bb.0:
570 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
571 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x800
572 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
573 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
574 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
575 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
576 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
577 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
578 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
579 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
581 ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
582 ; GFX10-GISEL: ; %bb.0:
583 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
584 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
585 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
586 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
587 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
588 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
589 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
590 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
591 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
592 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
594 ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_split1:
595 ; GFX11-GISEL: ; %bb.0:
596 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
597 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
598 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
599 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
600 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
601 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
602 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
603 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
604 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
605 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
606 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
608 ; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
609 ; GFX9-SDAG: ; %bb.0:
610 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
611 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
612 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
613 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2048
614 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
615 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
617 ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
618 ; GFX10-SDAG: ; %bb.0:
619 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
620 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
621 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
622 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
623 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
624 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
626 ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_split1:
627 ; GFX11-SDAG: ; %bb.0:
628 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
629 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
630 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
631 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2048
632 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
633 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
634 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936640
635 %load = load i8, ptr addrspace(1) %gep, align 4
639 ; Fill 12-bit low-bits (1ull << 33) | 4095
640 define i8 @global_inst_valu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
641 ; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
642 ; GFX9-GISEL: ; %bb.0:
643 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
644 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xfff
645 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
646 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
647 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
648 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
649 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
650 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
651 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
652 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
654 ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
655 ; GFX10-GISEL: ; %bb.0:
656 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
657 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
658 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
659 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
660 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
661 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
662 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
663 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
664 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
665 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
667 ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split0:
668 ; GFX11-GISEL: ; %bb.0:
669 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
670 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
671 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
672 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
673 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
674 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
675 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
676 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
677 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
678 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
679 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
681 ; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0:
682 ; GFX9-SDAG: ; %bb.0:
683 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
684 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0, v0
685 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
686 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
687 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
688 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
690 ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0:
691 ; GFX10-SDAG: ; %bb.0:
692 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
693 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
694 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
695 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
696 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
697 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
699 ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split0:
700 ; GFX11-SDAG: ; %bb.0:
701 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
702 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0, v0
703 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
704 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095
705 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
706 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
707 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938687
708 %load = load i8, ptr addrspace(1) %gep, align 4
712 ; Fill 12-bit low-bits (1ull << 33) | 4096
713 define i8 @global_inst_valu_offset_64bit_12bit_split1(ptr addrspace(1) %p) {
714 ; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
715 ; GFX9-GISEL: ; %bb.0:
716 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
717 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1000
718 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
719 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
720 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
721 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
722 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
723 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
724 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
725 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
727 ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
728 ; GFX10-GISEL: ; %bb.0:
729 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
730 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
731 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
732 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
733 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
734 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
735 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
736 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
737 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
738 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
740 ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_split1:
741 ; GFX11-GISEL: ; %bb.0:
742 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
743 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
744 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
745 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
746 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
747 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
748 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
749 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
750 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
751 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
752 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
754 ; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
755 ; GFX9-SDAG: ; %bb.0:
756 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
757 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
758 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
759 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
760 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
761 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
763 ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
764 ; GFX10-SDAG: ; %bb.0:
765 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
766 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
767 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
768 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
769 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
770 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
772 ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_split1:
773 ; GFX11-SDAG: ; %bb.0:
774 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
775 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
776 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
777 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
778 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
779 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
780 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938688
781 %load = load i8, ptr addrspace(1) %gep, align 4
785 ; Fill 13-bit low-bits (1ull << 33) | 8191
786 define i8 @global_inst_valu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
787 ; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
788 ; GFX9-GISEL: ; %bb.0:
789 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
790 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1fff
791 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
792 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
793 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
794 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
795 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
796 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
797 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
798 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
800 ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
801 ; GFX10-GISEL: ; %bb.0:
802 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
803 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
804 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
805 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
806 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
807 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
808 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
809 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
810 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
811 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
813 ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split0:
814 ; GFX11-GISEL: ; %bb.0:
815 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
816 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
817 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
818 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
819 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
820 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
821 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
822 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
823 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
824 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
825 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
827 ; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0:
828 ; GFX9-SDAG: ; %bb.0:
829 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
830 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
831 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
832 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:4095
833 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
834 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
836 ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0:
837 ; GFX10-SDAG: ; %bb.0:
838 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
839 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1800, v0
840 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
841 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047
842 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
843 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
845 ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split0:
846 ; GFX11-SDAG: ; %bb.0:
847 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
848 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
849 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
850 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095
851 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
852 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
853 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942783
854 %load = load i8, ptr addrspace(1) %gep, align 4
858 ; Fill 13-bit low-bits (1ull << 33) | 8192
859 define i8 @global_inst_valu_offset_64bit_13bit_split1(ptr addrspace(1) %p) {
860 ; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
861 ; GFX9-GISEL: ; %bb.0:
862 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
863 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x2000
864 ; GFX9-GISEL-NEXT: s_mov_b32 s5, 2
865 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
866 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
867 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
868 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
869 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
870 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
871 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
873 ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
874 ; GFX10-GISEL: ; %bb.0:
875 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
876 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
877 ; GFX10-GISEL-NEXT: s_mov_b32 s5, 2
878 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
879 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
880 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
881 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
882 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
883 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
884 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
886 ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_split1:
887 ; GFX11-GISEL: ; %bb.0:
888 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
889 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
890 ; GFX11-GISEL-NEXT: s_mov_b32 s1, 2
891 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
892 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
893 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
894 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
895 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
896 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
897 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
898 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
900 ; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
901 ; GFX9-SDAG: ; %bb.0:
902 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
903 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
904 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, 2, v1, vcc
905 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
906 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
907 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
909 ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
910 ; GFX10-SDAG: ; %bb.0:
911 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
912 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
913 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
914 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
915 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
916 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
918 ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_split1:
919 ; GFX11-SDAG: ; %bb.0:
920 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
921 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
922 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 2, v1, vcc_lo
923 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
924 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
925 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
926 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942784
927 %load = load i8, ptr addrspace(1) %gep, align 4
931 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
932 define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1) %p) {
933 ; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
934 ; GFX9-GISEL: ; %bb.0:
935 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
936 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x7ff
937 ; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
938 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
939 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
940 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
941 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
942 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
943 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
944 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
946 ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
947 ; GFX10-GISEL: ; %bb.0:
948 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
949 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x7ff
950 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
951 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
952 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
953 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
954 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
955 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
956 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
957 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
959 ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
960 ; GFX11-GISEL: ; %bb.0:
961 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
962 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x7ff
963 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
964 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
965 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
966 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
967 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
968 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
969 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
970 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
971 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
973 ; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
974 ; GFX9-SDAG: ; %bb.0:
975 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
976 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
977 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1
978 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
979 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-2049
980 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
981 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
983 ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
984 ; GFX10-SDAG: ; %bb.0:
985 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
986 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
987 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
988 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
989 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
990 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
992 ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split0:
993 ; GFX11-SDAG: ; %bb.0:
994 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
995 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
996 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
997 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-2049
998 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
999 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1000 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773761
1001 %load = load i8, ptr addrspace(1) %gep, align 4
1005 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
1006 define i8 @global_inst_valu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1) %p) {
1007 ; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1008 ; GFX9-GISEL: ; %bb.0:
1009 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1010 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x800
1011 ; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
1012 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
1013 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
1014 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1015 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1016 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
1017 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1018 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1020 ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1021 ; GFX10-GISEL: ; %bb.0:
1022 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1023 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x800
1024 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
1025 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
1026 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
1027 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1028 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1029 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
1030 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
1031 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1033 ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1034 ; GFX11-GISEL: ; %bb.0:
1035 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1036 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x800
1037 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
1038 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1039 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1040 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1041 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
1042 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1043 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
1044 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1045 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1047 ; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1048 ; GFX9-SDAG: ; %bb.0:
1049 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1050 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1051 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1
1052 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1053 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-2048
1054 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1055 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1057 ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1058 ; GFX10-SDAG: ; %bb.0:
1059 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1060 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x800, v0
1061 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1062 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
1063 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
1064 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1066 ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_11bit_neg_high_split1:
1067 ; GFX11-SDAG: ; %bb.0:
1068 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1069 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
1070 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1071 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-2048
1072 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1073 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1074 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773760
1075 %load = load i8, ptr addrspace(1) %gep, align 4
1079 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
1080 define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1) %p) {
1081 ; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1082 ; GFX9-GISEL: ; %bb.0:
1083 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1084 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0xfff
1085 ; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
1086 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
1087 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
1088 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1089 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1090 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
1091 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1092 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1094 ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1095 ; GFX10-GISEL: ; %bb.0:
1096 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1097 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0xfff
1098 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
1099 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
1100 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
1101 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1102 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1103 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
1104 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
1105 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1107 ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1108 ; GFX11-GISEL: ; %bb.0:
1109 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1110 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0xfff
1111 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
1112 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1113 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1114 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1115 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
1116 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1117 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
1118 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1119 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1121 ; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1122 ; GFX9-SDAG: ; %bb.0:
1123 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1124 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1125 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1
1126 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1127 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
1128 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1129 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1131 ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1132 ; GFX10-SDAG: ; %bb.0:
1133 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1134 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
1135 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1136 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
1137 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
1138 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1140 ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split0:
1141 ; GFX11-SDAG: ; %bb.0:
1142 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1143 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
1144 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1145 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-1
1146 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1147 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1148 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771713
1149 %load = load i8, ptr addrspace(1) %gep, align 4
1153 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
1154 define i8 @global_inst_valu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1) %p) {
1155 ; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
1156 ; GFX9-GISEL: ; %bb.0:
1157 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1158 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1000
1159 ; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
1160 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
1161 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
1162 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1163 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1164 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
1165 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1166 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1168 ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
1169 ; GFX10-GISEL: ; %bb.0:
1170 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1171 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1000
1172 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
1173 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
1174 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
1175 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1176 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1177 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
1178 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
1179 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1181 ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
1182 ; GFX11-GISEL: ; %bb.0:
1183 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1184 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1000
1185 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
1186 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1187 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1188 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1189 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
1190 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1191 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
1192 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1193 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1195 ; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
1196 ; GFX9-SDAG: ; %bb.0:
1197 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1198 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x1000, v0
1199 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1
1200 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1201 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
1202 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1203 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1205 ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
1206 ; GFX10-SDAG: ; %bb.0:
1207 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1208 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
1209 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1210 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
1211 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
1212 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1214 ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_12bit_neg_high_split1:
1215 ; GFX11-SDAG: ; %bb.0:
1216 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1217 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x1000, v0
1218 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1219 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
1220 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1221 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1222 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771712
1223 %load = load i8, ptr addrspace(1) %gep, align 4
1227 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
1228 define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1) %p) {
1229 ; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1230 ; GFX9-GISEL: ; %bb.0:
1231 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1232 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x1fff
1233 ; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
1234 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
1235 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
1236 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1237 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1238 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
1239 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1240 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1242 ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1243 ; GFX10-GISEL: ; %bb.0:
1244 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1245 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x1fff
1246 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
1247 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
1248 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
1249 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1250 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1251 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
1252 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
1253 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1255 ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1256 ; GFX11-GISEL: ; %bb.0:
1257 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1258 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x1fff
1259 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
1260 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1261 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1262 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1263 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
1264 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1265 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
1266 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1267 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1269 ; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1270 ; GFX9-SDAG: ; %bb.0:
1271 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1272 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1273 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1
1274 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1275 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
1276 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1277 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1279 ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1280 ; GFX10-SDAG: ; %bb.0:
1281 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1282 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
1283 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1284 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:-1
1285 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
1286 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1288 ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split0:
1289 ; GFX11-SDAG: ; %bb.0:
1290 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1291 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
1292 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1293 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:-1
1294 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1295 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1296 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767617
1297 %load = load i8, ptr addrspace(1) %gep, align 4
1301 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
1302 define i8 @global_inst_valu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1) %p) {
1303 ; GFX9-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
1304 ; GFX9-GISEL: ; %bb.0:
1305 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1306 ; GFX9-GISEL-NEXT: s_movk_i32 s4, 0x2000
1307 ; GFX9-GISEL-NEXT: s_brev_b32 s5, 1
1308 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v2, s4
1309 ; GFX9-GISEL-NEXT: v_mov_b32_e32 v3, s5
1310 ; GFX9-GISEL-NEXT: v_add_co_u32_e32 v0, vcc, v0, v2
1311 ; GFX9-GISEL-NEXT: v_addc_co_u32_e32 v1, vcc, v1, v3, vcc
1312 ; GFX9-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
1313 ; GFX9-GISEL-NEXT: s_waitcnt vmcnt(0)
1314 ; GFX9-GISEL-NEXT: s_setpc_b64 s[30:31]
1316 ; GFX10-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
1317 ; GFX10-GISEL: ; %bb.0:
1318 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1319 ; GFX10-GISEL-NEXT: s_movk_i32 s4, 0x2000
1320 ; GFX10-GISEL-NEXT: s_brev_b32 s5, 1
1321 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v2, s4
1322 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v3, s5
1323 ; GFX10-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1324 ; GFX10-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1325 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off
1326 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
1327 ; GFX10-GISEL-NEXT: s_setpc_b64 s[30:31]
1329 ; GFX11-GISEL-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
1330 ; GFX11-GISEL: ; %bb.0:
1331 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1332 ; GFX11-GISEL-NEXT: s_movk_i32 s0, 0x2000
1333 ; GFX11-GISEL-NEXT: s_brev_b32 s1, 1
1334 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1) | instskip(NEXT) | instid1(VALU_DEP_1)
1335 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v3, s1 :: v_dual_mov_b32 v2, s0
1336 ; GFX11-GISEL-NEXT: v_add_co_u32 v0, vcc_lo, v0, v2
1337 ; GFX11-GISEL-NEXT: s_delay_alu instid0(VALU_DEP_2)
1338 ; GFX11-GISEL-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, v1, v3, vcc_lo
1339 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off
1340 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1341 ; GFX11-GISEL-NEXT: s_setpc_b64 s[30:31]
1343 ; GFX9-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
1344 ; GFX9-SDAG: ; %bb.0:
1345 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1346 ; GFX9-SDAG-NEXT: v_add_co_u32_e32 v0, vcc, 0x2000, v0
1347 ; GFX9-SDAG-NEXT: v_bfrev_b32_e32 v2, 1
1348 ; GFX9-SDAG-NEXT: v_addc_co_u32_e32 v1, vcc, v2, v1, vcc
1349 ; GFX9-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
1350 ; GFX9-SDAG-NEXT: s_waitcnt vmcnt(0)
1351 ; GFX9-SDAG-NEXT: s_setpc_b64 s[30:31]
1353 ; GFX10-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
1354 ; GFX10-SDAG: ; %bb.0:
1355 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1356 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
1357 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1358 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off
1359 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
1360 ; GFX10-SDAG-NEXT: s_setpc_b64 s[30:31]
1362 ; GFX11-SDAG-LABEL: global_inst_valu_offset_64bit_13bit_neg_high_split1:
1363 ; GFX11-SDAG: ; %bb.0:
1364 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0)
1365 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, vcc_lo, 0x2000, v0
1366 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e32 v1, vcc_lo, 0x80000000, v1, vcc_lo
1367 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off
1368 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1369 ; GFX11-SDAG-NEXT: s_setpc_b64 s[30:31]
1370 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767616
1371 %load = load i8, ptr addrspace(1) %gep, align 4
1375 define amdgpu_kernel void @global_inst_salu_offset_1(ptr addrspace(1) %p) {
1376 ; GFX9-LABEL: global_inst_salu_offset_1:
1378 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1379 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1380 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1381 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:1 glc
1382 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1383 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1384 ; GFX9-NEXT: s_endpgm
1386 ; GFX10-LABEL: global_inst_salu_offset_1:
1388 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1389 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1390 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1391 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:1 glc dlc
1392 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1393 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1394 ; GFX10-NEXT: s_endpgm
1396 ; GFX11-LABEL: global_inst_salu_offset_1:
1398 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1399 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1400 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1401 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:1 glc dlc
1402 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1403 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
1404 ; GFX11-NEXT: s_nop 0
1405 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1406 ; GFX11-NEXT: s_endpgm
1407 %gep = getelementptr i8, ptr addrspace(1) %p, i64 1
1408 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1409 store i8 %load, ptr addrspace(1) undef
1413 define amdgpu_kernel void @global_inst_salu_offset_11bit_max(ptr addrspace(1) %p) {
1414 ; GFX9-LABEL: global_inst_salu_offset_11bit_max:
1416 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1417 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1418 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1419 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc
1420 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1421 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1422 ; GFX9-NEXT: s_endpgm
1424 ; GFX10-LABEL: global_inst_salu_offset_11bit_max:
1426 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1427 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1428 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1429 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
1430 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1431 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1432 ; GFX10-NEXT: s_endpgm
1434 ; GFX11-LABEL: global_inst_salu_offset_11bit_max:
1436 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1437 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1438 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1439 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:2047 glc dlc
1440 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1441 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
1442 ; GFX11-NEXT: s_nop 0
1443 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1444 ; GFX11-NEXT: s_endpgm
1445 %gep = getelementptr i8, ptr addrspace(1) %p, i64 2047
1446 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1447 store i8 %load, ptr addrspace(1) undef
1451 define amdgpu_kernel void @global_inst_salu_offset_12bit_max(ptr addrspace(1) %p) {
1452 ; GFX9-LABEL: global_inst_salu_offset_12bit_max:
1454 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1455 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1456 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1457 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc
1458 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1459 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1460 ; GFX9-NEXT: s_endpgm
1462 ; GFX10-LABEL: global_inst_salu_offset_12bit_max:
1464 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1465 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x800
1466 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1467 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
1468 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1469 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1470 ; GFX10-NEXT: s_endpgm
1472 ; GFX11-LABEL: global_inst_salu_offset_12bit_max:
1474 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1475 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1476 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1477 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
1478 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1479 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
1480 ; GFX11-NEXT: s_nop 0
1481 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1482 ; GFX11-NEXT: s_endpgm
1483 %gep = getelementptr i8, ptr addrspace(1) %p, i64 4095
1484 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1485 store i8 %load, ptr addrspace(1) undef
1489 define amdgpu_kernel void @global_inst_salu_offset_13bit_max(ptr addrspace(1) %p) {
1490 ; GFX9-LABEL: global_inst_salu_offset_13bit_max:
1492 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1493 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1000
1494 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1495 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc
1496 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1497 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1498 ; GFX9-NEXT: s_endpgm
1500 ; GFX10-LABEL: global_inst_salu_offset_13bit_max:
1502 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1503 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x1800
1504 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1505 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
1506 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1507 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1508 ; GFX10-NEXT: s_endpgm
1510 ; GFX11-LABEL: global_inst_salu_offset_13bit_max:
1512 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1513 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000
1514 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1515 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
1516 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1517 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
1518 ; GFX11-NEXT: s_nop 0
1519 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1520 ; GFX11-NEXT: s_endpgm
1521 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8191
1522 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1523 store i8 %load, ptr addrspace(1) undef
1527 define amdgpu_kernel void @global_inst_salu_offset_neg_11bit_max(ptr addrspace(1) %p) {
1528 ; GFX9-LABEL: global_inst_salu_offset_neg_11bit_max:
1530 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1531 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1532 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1533 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-2048 glc
1534 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1535 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1536 ; GFX9-NEXT: s_endpgm
1538 ; GFX10-LABEL: global_inst_salu_offset_neg_11bit_max:
1540 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1541 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
1542 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1543 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-2048 glc dlc
1544 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1545 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1546 ; GFX10-NEXT: s_endpgm
1548 ; GFX11-LABEL: global_inst_salu_offset_neg_11bit_max:
1550 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1551 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1552 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1553 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-2048 glc dlc
1554 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1555 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
1556 ; GFX11-NEXT: s_nop 0
1557 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1558 ; GFX11-NEXT: s_endpgm
1559 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -2048
1560 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1561 store i8 %load, ptr addrspace(1) undef
1565 define amdgpu_kernel void @global_inst_salu_offset_neg_12bit_max(ptr addrspace(1) %p) {
1566 ; GFX9-LABEL: global_inst_salu_offset_neg_12bit_max:
1568 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1569 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1570 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1571 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-4096 glc
1572 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1573 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1574 ; GFX9-NEXT: s_endpgm
1576 ; GFX10-GISEL-LABEL: global_inst_salu_offset_neg_12bit_max:
1577 ; GFX10-GISEL: ; %bb.0:
1578 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1579 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1580 ; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff000
1581 ; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, -1
1582 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
1583 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
1584 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1585 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
1586 ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off
1587 ; GFX10-GISEL-NEXT: s_endpgm
1589 ; GFX11-LABEL: global_inst_salu_offset_neg_12bit_max:
1591 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1592 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1593 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1594 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc
1595 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1596 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
1597 ; GFX11-NEXT: s_nop 0
1598 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1599 ; GFX11-NEXT: s_endpgm
1601 ; GFX10-SDAG-LABEL: global_inst_salu_offset_neg_12bit_max:
1602 ; GFX10-SDAG: ; %bb.0:
1603 ; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1604 ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1605 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0
1606 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
1607 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1608 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
1609 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off
1610 ; GFX10-SDAG-NEXT: s_endpgm
1611 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096
1612 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1613 store i8 %load, ptr addrspace(1) undef
1617 define amdgpu_kernel void @global_inst_salu_offset_neg_13bit_max(ptr addrspace(1) %p) {
1618 ; GFX9-LABEL: global_inst_salu_offset_neg_13bit_max:
1620 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1621 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1622 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1623 ; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000
1624 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
1625 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1626 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1627 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1628 ; GFX9-NEXT: s_endpgm
1630 ; GFX10-GISEL-LABEL: global_inst_salu_offset_neg_13bit_max:
1631 ; GFX10-GISEL: ; %bb.0:
1632 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1633 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1634 ; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000
1635 ; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, -1
1636 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
1637 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
1638 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1639 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
1640 ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off
1641 ; GFX10-GISEL-NEXT: s_endpgm
1643 ; GFX11-GISEL-LABEL: global_inst_salu_offset_neg_13bit_max:
1644 ; GFX11-GISEL: ; %bb.0:
1645 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1646 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1647 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000
1648 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1
1649 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1650 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1651 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
1652 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1653 ; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
1654 ; GFX11-GISEL-NEXT: s_nop 0
1655 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1656 ; GFX11-GISEL-NEXT: s_endpgm
1658 ; GFX10-SDAG-LABEL: global_inst_salu_offset_neg_13bit_max:
1659 ; GFX10-SDAG: ; %bb.0:
1660 ; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1661 ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1662 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0
1663 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
1664 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1665 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
1666 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off
1667 ; GFX10-SDAG-NEXT: s_endpgm
1669 ; GFX11-SDAG-LABEL: global_inst_salu_offset_neg_13bit_max:
1670 ; GFX11-SDAG: ; %bb.0:
1671 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1672 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1673 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0
1674 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1675 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1676 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc
1677 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1678 ; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
1679 ; GFX11-SDAG-NEXT: s_nop 0
1680 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1681 ; GFX11-SDAG-NEXT: s_endpgm
1682 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192
1683 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1684 store i8 %load, ptr addrspace(1) undef
1688 define amdgpu_kernel void @global_inst_salu_offset_2x_11bit_max(ptr addrspace(1) %p) {
1689 ; GFX9-LABEL: global_inst_salu_offset_2x_11bit_max:
1691 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1692 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1693 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1694 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc
1695 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1696 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1697 ; GFX9-NEXT: s_endpgm
1699 ; GFX10-LABEL: global_inst_salu_offset_2x_11bit_max:
1701 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1702 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x800
1703 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1704 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
1705 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1706 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1707 ; GFX10-NEXT: s_endpgm
1709 ; GFX11-LABEL: global_inst_salu_offset_2x_11bit_max:
1711 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1712 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1713 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1714 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
1715 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1716 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
1717 ; GFX11-NEXT: s_nop 0
1718 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1719 ; GFX11-NEXT: s_endpgm
1720 %gep = getelementptr i8, ptr addrspace(1) %p, i64 4095
1721 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1722 store i8 %load, ptr addrspace(1) undef
1726 define amdgpu_kernel void @global_inst_salu_offset_2x_12bit_max(ptr addrspace(1) %p) {
1727 ; GFX9-LABEL: global_inst_salu_offset_2x_12bit_max:
1729 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1730 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x1000
1731 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1732 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc
1733 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1734 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1735 ; GFX9-NEXT: s_endpgm
1737 ; GFX10-LABEL: global_inst_salu_offset_2x_12bit_max:
1739 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1740 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x1800
1741 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1742 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
1743 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1744 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1745 ; GFX10-NEXT: s_endpgm
1747 ; GFX11-LABEL: global_inst_salu_offset_2x_12bit_max:
1749 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1750 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x1000
1751 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1752 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
1753 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1754 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
1755 ; GFX11-NEXT: s_nop 0
1756 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1757 ; GFX11-NEXT: s_endpgm
1758 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8191
1759 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1760 store i8 %load, ptr addrspace(1) undef
1764 define amdgpu_kernel void @global_inst_salu_offset_2x_13bit_max(ptr addrspace(1) %p) {
1765 ; GFX9-LABEL: global_inst_salu_offset_2x_13bit_max:
1767 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1768 ; GFX9-NEXT: v_mov_b32_e32 v0, 0x3000
1769 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1770 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:4095 glc
1771 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1772 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1773 ; GFX9-NEXT: s_endpgm
1775 ; GFX10-LABEL: global_inst_salu_offset_2x_13bit_max:
1777 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1778 ; GFX10-NEXT: v_mov_b32_e32 v0, 0x3800
1779 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
1780 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] offset:2047 glc dlc
1781 ; GFX10-NEXT: s_waitcnt vmcnt(0)
1782 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
1783 ; GFX10-NEXT: s_endpgm
1785 ; GFX11-LABEL: global_inst_salu_offset_2x_13bit_max:
1787 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1788 ; GFX11-NEXT: v_mov_b32_e32 v0, 0x3000
1789 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1790 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:4095 glc dlc
1791 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1792 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
1793 ; GFX11-NEXT: s_nop 0
1794 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1795 ; GFX11-NEXT: s_endpgm
1796 %gep = getelementptr i8, ptr addrspace(1) %p, i64 16383
1797 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1798 store i8 %load, ptr addrspace(1) undef
1802 define amdgpu_kernel void @global_inst_salu_offset_2x_neg_11bit_max(ptr addrspace(1) %p) {
1803 ; GFX9-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
1805 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1806 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1807 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1808 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] offset:-4096 glc
1809 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1810 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1811 ; GFX9-NEXT: s_endpgm
1813 ; GFX10-GISEL-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
1814 ; GFX10-GISEL: ; %bb.0:
1815 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1816 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1817 ; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0xfffff000
1818 ; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, -1
1819 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
1820 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
1821 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1822 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
1823 ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off
1824 ; GFX10-GISEL-NEXT: s_endpgm
1826 ; GFX11-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
1828 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1829 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
1830 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
1831 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] offset:-4096 glc dlc
1832 ; GFX11-NEXT: s_waitcnt vmcnt(0)
1833 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
1834 ; GFX11-NEXT: s_nop 0
1835 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1836 ; GFX11-NEXT: s_endpgm
1838 ; GFX10-SDAG-LABEL: global_inst_salu_offset_2x_neg_11bit_max:
1839 ; GFX10-SDAG: ; %bb.0:
1840 ; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1841 ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1842 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0xfffff000, s0
1843 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
1844 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1845 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
1846 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off
1847 ; GFX10-SDAG-NEXT: s_endpgm
1848 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -4096
1849 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1850 store i8 %load, ptr addrspace(1) undef
1854 define amdgpu_kernel void @global_inst_salu_offset_2x_neg_12bit_max(ptr addrspace(1) %p) {
1855 ; GFX9-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
1857 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1858 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1859 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1860 ; GFX9-NEXT: s_add_u32 s0, s0, 0xffffe000
1861 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
1862 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1863 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1864 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1865 ; GFX9-NEXT: s_endpgm
1867 ; GFX10-GISEL-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
1868 ; GFX10-GISEL: ; %bb.0:
1869 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1870 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1871 ; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000
1872 ; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, -1
1873 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
1874 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
1875 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1876 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
1877 ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off
1878 ; GFX10-GISEL-NEXT: s_endpgm
1880 ; GFX11-GISEL-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
1881 ; GFX11-GISEL: ; %bb.0:
1882 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1883 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1884 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xffffe000
1885 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1
1886 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1887 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1888 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
1889 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1890 ; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
1891 ; GFX11-GISEL-NEXT: s_nop 0
1892 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1893 ; GFX11-GISEL-NEXT: s_endpgm
1895 ; GFX10-SDAG-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
1896 ; GFX10-SDAG: ; %bb.0:
1897 ; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1898 ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1899 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0
1900 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
1901 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1902 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
1903 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off
1904 ; GFX10-SDAG-NEXT: s_endpgm
1906 ; GFX11-SDAG-LABEL: global_inst_salu_offset_2x_neg_12bit_max:
1907 ; GFX11-SDAG: ; %bb.0:
1908 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1909 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1910 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffe000, s0
1911 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1912 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1913 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc
1914 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1915 ; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
1916 ; GFX11-SDAG-NEXT: s_nop 0
1917 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1918 ; GFX11-SDAG-NEXT: s_endpgm
1919 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -8192
1920 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1921 store i8 %load, ptr addrspace(1) undef
1925 define amdgpu_kernel void @global_inst_salu_offset_2x_neg_13bit_max(ptr addrspace(1) %p) {
1926 ; GFX9-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
1928 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1929 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
1930 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
1931 ; GFX9-NEXT: s_add_u32 s0, s0, 0xffffc000
1932 ; GFX9-NEXT: s_addc_u32 s1, s1, -1
1933 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
1934 ; GFX9-NEXT: s_waitcnt vmcnt(0)
1935 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
1936 ; GFX9-NEXT: s_endpgm
1938 ; GFX10-GISEL-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
1939 ; GFX10-GISEL: ; %bb.0:
1940 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1941 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1942 ; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0xffffc000
1943 ; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, -1
1944 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
1945 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
1946 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1947 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
1948 ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off
1949 ; GFX10-GISEL-NEXT: s_endpgm
1951 ; GFX11-GISEL-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
1952 ; GFX11-GISEL: ; %bb.0:
1953 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1954 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
1955 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xffffc000
1956 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, -1
1957 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
1958 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
1959 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
1960 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
1961 ; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
1962 ; GFX11-GISEL-NEXT: s_nop 0
1963 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1964 ; GFX11-GISEL-NEXT: s_endpgm
1966 ; GFX10-SDAG-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
1967 ; GFX10-SDAG: ; %bb.0:
1968 ; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
1969 ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1970 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0
1971 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, -1, s1, s0
1972 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
1973 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
1974 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off
1975 ; GFX10-SDAG-NEXT: s_endpgm
1977 ; GFX11-SDAG-LABEL: global_inst_salu_offset_2x_neg_13bit_max:
1978 ; GFX11-SDAG: ; %bb.0:
1979 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
1980 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
1981 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0xffffc000, s0
1982 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
1983 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, -1, s1, s0
1984 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc
1985 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
1986 ; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
1987 ; GFX11-SDAG-NEXT: s_nop 0
1988 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
1989 ; GFX11-SDAG-NEXT: s_endpgm
1990 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -16384
1991 %load = load volatile i8, ptr addrspace(1) %gep, align 1
1992 store i8 %load, ptr addrspace(1) undef
1996 ; Fill 11-bit low-bits (1ull << 33) | 2047
1997 define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split0(ptr addrspace(1) %p) {
1998 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split0:
2000 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2001 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2002 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2003 ; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff
2004 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
2005 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
2006 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2007 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
2008 ; GFX9-NEXT: s_endpgm
2010 ; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split0:
2011 ; GFX10-GISEL: ; %bb.0:
2012 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2013 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2014 ; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff
2015 ; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 2
2016 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
2017 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
2018 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
2019 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
2020 ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off
2021 ; GFX10-GISEL-NEXT: s_endpgm
2023 ; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split0:
2024 ; GFX11-GISEL: ; %bb.0:
2025 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2026 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2027 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x7ff
2028 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2
2029 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2030 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2031 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
2032 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
2033 ; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
2034 ; GFX11-GISEL-NEXT: s_nop 0
2035 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2036 ; GFX11-GISEL-NEXT: s_endpgm
2038 ; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split0:
2039 ; GFX10-SDAG: ; %bb.0:
2040 ; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2041 ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2042 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
2043 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
2044 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
2045 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
2046 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off
2047 ; GFX10-SDAG-NEXT: s_endpgm
2049 ; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split0:
2050 ; GFX11-SDAG: ; %bb.0:
2051 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2052 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2053 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
2054 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2055 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2056 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2047 glc dlc
2057 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
2058 ; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
2059 ; GFX11-SDAG-NEXT: s_nop 0
2060 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2061 ; GFX11-SDAG-NEXT: s_endpgm
2062 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936639
2063 %load = load volatile i8, ptr addrspace(1) %gep, align 1
2064 store i8 %load, ptr addrspace(1) undef
2068 ; Fill 11-bit low-bits (1ull << 33) | 2048
2069 define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_split1(ptr addrspace(1) %p) {
2070 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_split1:
2072 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2073 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2074 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2075 ; GFX9-NEXT: s_add_u32 s0, s0, 0x800
2076 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
2077 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
2078 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2079 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
2080 ; GFX9-NEXT: s_endpgm
2082 ; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split1:
2083 ; GFX10-GISEL: ; %bb.0:
2084 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2085 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2086 ; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0x800
2087 ; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 2
2088 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
2089 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
2090 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
2091 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
2092 ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off
2093 ; GFX10-GISEL-NEXT: s_endpgm
2095 ; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_11bit_split1:
2096 ; GFX11-GISEL: ; %bb.0:
2097 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2098 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2099 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x800
2100 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2
2101 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2102 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2103 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
2104 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
2105 ; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
2106 ; GFX11-GISEL-NEXT: s_nop 0
2107 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2108 ; GFX11-GISEL-NEXT: s_endpgm
2110 ; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split1:
2111 ; GFX10-SDAG: ; %bb.0:
2112 ; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2113 ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2114 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0x800, s0
2115 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
2116 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
2117 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
2118 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off
2119 ; GFX10-SDAG-NEXT: s_endpgm
2121 ; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_11bit_split1:
2122 ; GFX11-SDAG: ; %bb.0:
2123 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2124 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2125 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
2126 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2127 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2128 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:2048 glc dlc
2129 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
2130 ; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
2131 ; GFX11-SDAG-NEXT: s_nop 0
2132 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2133 ; GFX11-SDAG-NEXT: s_endpgm
2134 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589936640
2135 %load = load volatile i8, ptr addrspace(1) %gep, align 1
2136 store i8 %load, ptr addrspace(1) undef
2140 ; Fill 12-bit low-bits (1ull << 33) | 4095
2141 define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split0(ptr addrspace(1) %p) {
2142 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split0:
2144 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2145 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2146 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2147 ; GFX9-NEXT: s_add_u32 s0, s0, 0xfff
2148 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
2149 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
2150 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2151 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
2152 ; GFX9-NEXT: s_endpgm
2154 ; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split0:
2155 ; GFX10-GISEL: ; %bb.0:
2156 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2157 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2158 ; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0xfff
2159 ; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 2
2160 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
2161 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
2162 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
2163 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
2164 ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off
2165 ; GFX10-GISEL-NEXT: s_endpgm
2167 ; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split0:
2168 ; GFX11-GISEL: ; %bb.0:
2169 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2170 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2171 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0xfff
2172 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2
2173 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2174 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2175 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
2176 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
2177 ; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
2178 ; GFX11-GISEL-NEXT: s_nop 0
2179 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2180 ; GFX11-GISEL-NEXT: s_endpgm
2182 ; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split0:
2183 ; GFX10-SDAG: ; %bb.0:
2184 ; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2185 ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2186 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0x800, s0
2187 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
2188 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
2189 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
2190 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off
2191 ; GFX10-SDAG-NEXT: s_endpgm
2193 ; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split0:
2194 ; GFX11-SDAG: ; %bb.0:
2195 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2196 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2197 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0, s0
2198 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2199 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2200 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 glc dlc
2201 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
2202 ; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
2203 ; GFX11-SDAG-NEXT: s_nop 0
2204 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2205 ; GFX11-SDAG-NEXT: s_endpgm
2206 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938687
2207 %load = load volatile i8, ptr addrspace(1) %gep, align 1
2208 store i8 %load, ptr addrspace(1) undef
2212 ; Fill 12-bit low-bits (1ull << 33) | 4096
2213 define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_split1(ptr addrspace(1) %p) {
2214 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_split1:
2216 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2217 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2218 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2219 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1000
2220 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
2221 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
2222 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2223 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
2224 ; GFX9-NEXT: s_endpgm
2226 ; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split1:
2227 ; GFX10-GISEL: ; %bb.0:
2228 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2229 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2230 ; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0x1000
2231 ; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 2
2232 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
2233 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
2234 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
2235 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
2236 ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off
2237 ; GFX10-GISEL-NEXT: s_endpgm
2239 ; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_12bit_split1:
2240 ; GFX11-GISEL: ; %bb.0:
2241 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2242 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2243 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1000
2244 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2
2245 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2246 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2247 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
2248 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
2249 ; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
2250 ; GFX11-GISEL-NEXT: s_nop 0
2251 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2252 ; GFX11-GISEL-NEXT: s_endpgm
2254 ; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split1:
2255 ; GFX10-SDAG: ; %bb.0:
2256 ; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2257 ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2258 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
2259 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
2260 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
2261 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
2262 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off
2263 ; GFX10-SDAG-NEXT: s_endpgm
2265 ; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_12bit_split1:
2266 ; GFX11-SDAG: ; %bb.0:
2267 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2268 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2269 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
2270 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2271 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2272 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc
2273 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
2274 ; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
2275 ; GFX11-SDAG-NEXT: s_nop 0
2276 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2277 ; GFX11-SDAG-NEXT: s_endpgm
2278 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589938688
2279 %load = load volatile i8, ptr addrspace(1) %gep, align 1
2280 store i8 %load, ptr addrspace(1) undef
2284 ; Fill 13-bit low-bits (1ull << 33) | 8191
2285 define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split0(ptr addrspace(1) %p) {
2286 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split0:
2288 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2289 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2290 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2291 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
2292 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
2293 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
2294 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2295 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
2296 ; GFX9-NEXT: s_endpgm
2298 ; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split0:
2299 ; GFX10-GISEL: ; %bb.0:
2300 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2301 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2302 ; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff
2303 ; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 2
2304 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
2305 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
2306 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
2307 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
2308 ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off
2309 ; GFX10-GISEL-NEXT: s_endpgm
2311 ; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split0:
2312 ; GFX11-GISEL: ; %bb.0:
2313 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2314 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2315 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x1fff
2316 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2
2317 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2318 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2319 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
2320 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
2321 ; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
2322 ; GFX11-GISEL-NEXT: s_nop 0
2323 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2324 ; GFX11-GISEL-NEXT: s_endpgm
2326 ; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split0:
2327 ; GFX10-SDAG: ; %bb.0:
2328 ; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2329 ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2330 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1800, s0
2331 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
2332 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off offset:2047 glc dlc
2333 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
2334 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off
2335 ; GFX10-SDAG-NEXT: s_endpgm
2337 ; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split0:
2338 ; GFX11-SDAG: ; %bb.0:
2339 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2340 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2341 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x1000, s0
2342 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2343 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2344 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off offset:4095 glc dlc
2345 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
2346 ; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
2347 ; GFX11-SDAG-NEXT: s_nop 0
2348 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2349 ; GFX11-SDAG-NEXT: s_endpgm
2350 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942783
2351 %load = load volatile i8, ptr addrspace(1) %gep, align 1
2352 store i8 %load, ptr addrspace(1) undef
2356 ; Fill 13-bit low-bits (1ull << 33) | 8192
2357 define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_split1(ptr addrspace(1) %p) {
2358 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_split1:
2360 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2361 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2362 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2363 ; GFX9-NEXT: s_add_u32 s0, s0, 0x2000
2364 ; GFX9-NEXT: s_addc_u32 s1, s1, 2
2365 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
2366 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2367 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
2368 ; GFX9-NEXT: s_endpgm
2370 ; GFX10-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split1:
2371 ; GFX10-GISEL: ; %bb.0:
2372 ; GFX10-GISEL-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2373 ; GFX10-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2374 ; GFX10-GISEL-NEXT: s_add_u32 s0, s0, 0x2000
2375 ; GFX10-GISEL-NEXT: s_addc_u32 s1, s1, 2
2376 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v0, s0
2377 ; GFX10-GISEL-NEXT: v_mov_b32_e32 v1, s1
2378 ; GFX10-GISEL-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
2379 ; GFX10-GISEL-NEXT: s_waitcnt vmcnt(0)
2380 ; GFX10-GISEL-NEXT: global_store_byte v[0:1], v0, off
2381 ; GFX10-GISEL-NEXT: s_endpgm
2383 ; GFX11-GISEL-LABEL: global_inst_salu_offset_64bit_13bit_split1:
2384 ; GFX11-GISEL: ; %bb.0:
2385 ; GFX11-GISEL-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2386 ; GFX11-GISEL-NEXT: s_waitcnt lgkmcnt(0)
2387 ; GFX11-GISEL-NEXT: s_add_u32 s0, s0, 0x2000
2388 ; GFX11-GISEL-NEXT: s_addc_u32 s1, s1, 2
2389 ; GFX11-GISEL-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
2390 ; GFX11-GISEL-NEXT: v_dual_mov_b32 v0, s0 :: v_dual_mov_b32 v1, s1
2391 ; GFX11-GISEL-NEXT: global_load_u8 v0, v[0:1], off glc dlc
2392 ; GFX11-GISEL-NEXT: s_waitcnt vmcnt(0)
2393 ; GFX11-GISEL-NEXT: global_store_b8 v[0:1], v0, off
2394 ; GFX11-GISEL-NEXT: s_nop 0
2395 ; GFX11-GISEL-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2396 ; GFX11-GISEL-NEXT: s_endpgm
2398 ; GFX10-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split1:
2399 ; GFX10-SDAG: ; %bb.0:
2400 ; GFX10-SDAG-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2401 ; GFX10-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2402 ; GFX10-SDAG-NEXT: v_add_co_u32 v0, s0, 0x2000, s0
2403 ; GFX10-SDAG-NEXT: v_add_co_ci_u32_e64 v1, s0, 2, s1, s0
2404 ; GFX10-SDAG-NEXT: global_load_ubyte v0, v[0:1], off glc dlc
2405 ; GFX10-SDAG-NEXT: s_waitcnt vmcnt(0)
2406 ; GFX10-SDAG-NEXT: global_store_byte v[0:1], v0, off
2407 ; GFX10-SDAG-NEXT: s_endpgm
2409 ; GFX11-SDAG-LABEL: global_inst_salu_offset_64bit_13bit_split1:
2410 ; GFX11-SDAG: ; %bb.0:
2411 ; GFX11-SDAG-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2412 ; GFX11-SDAG-NEXT: s_waitcnt lgkmcnt(0)
2413 ; GFX11-SDAG-NEXT: v_add_co_u32 v0, s0, 0x2000, s0
2414 ; GFX11-SDAG-NEXT: s_delay_alu instid0(VALU_DEP_1)
2415 ; GFX11-SDAG-NEXT: v_add_co_ci_u32_e64 v1, null, 2, s1, s0
2416 ; GFX11-SDAG-NEXT: global_load_u8 v0, v[0:1], off glc dlc
2417 ; GFX11-SDAG-NEXT: s_waitcnt vmcnt(0)
2418 ; GFX11-SDAG-NEXT: global_store_b8 v[0:1], v0, off
2419 ; GFX11-SDAG-NEXT: s_nop 0
2420 ; GFX11-SDAG-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2421 ; GFX11-SDAG-NEXT: s_endpgm
2422 %gep = getelementptr i8, ptr addrspace(1) %p, i64 8589942784
2423 %load = load volatile i8, ptr addrspace(1) %gep, align 1
2424 store i8 %load, ptr addrspace(1) undef
2428 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2047
2429 define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split0(ptr addrspace(1) %p) {
2430 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
2432 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2433 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2434 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2435 ; GFX9-NEXT: s_add_u32 s0, s0, 0x7ff
2436 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
2437 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
2438 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2439 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
2440 ; GFX9-NEXT: s_endpgm
2442 ; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
2444 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2445 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2446 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2447 ; GFX10-NEXT: s_add_u32 s0, s0, 0x7ff
2448 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
2449 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
2450 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2451 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
2452 ; GFX10-NEXT: s_endpgm
2454 ; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split0:
2456 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2457 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
2458 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2459 ; GFX11-NEXT: s_add_u32 s0, s0, 0x7ff
2460 ; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000
2461 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc
2462 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2463 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
2464 ; GFX11-NEXT: s_nop 0
2465 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2466 ; GFX11-NEXT: s_endpgm
2467 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773761
2468 %load = load volatile i8, ptr addrspace(1) %gep, align 1
2469 store i8 %load, ptr addrspace(1) undef
2473 ; Fill 11-bit low-bits, negative high bits (1ull << 63) | 2048
2474 define amdgpu_kernel void @global_inst_salu_offset_64bit_11bit_neg_high_split1(ptr addrspace(1) %p) {
2475 ; GFX9-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
2477 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2478 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2479 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2480 ; GFX9-NEXT: s_add_u32 s0, s0, 0x800
2481 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
2482 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
2483 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2484 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
2485 ; GFX9-NEXT: s_endpgm
2487 ; GFX10-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
2489 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2490 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2491 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2492 ; GFX10-NEXT: s_add_u32 s0, s0, 0x800
2493 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
2494 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
2495 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2496 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
2497 ; GFX10-NEXT: s_endpgm
2499 ; GFX11-LABEL: global_inst_salu_offset_64bit_11bit_neg_high_split1:
2501 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2502 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
2503 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2504 ; GFX11-NEXT: s_add_u32 s0, s0, 0x800
2505 ; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000
2506 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc
2507 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2508 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
2509 ; GFX11-NEXT: s_nop 0
2510 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2511 ; GFX11-NEXT: s_endpgm
2512 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854773760
2513 %load = load volatile i8, ptr addrspace(1) %gep, align 1
2514 store i8 %load, ptr addrspace(1) undef
2518 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4095
2519 define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split0(ptr addrspace(1) %p) {
2520 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
2522 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2523 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2524 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2525 ; GFX9-NEXT: s_add_u32 s0, s0, 0xfff
2526 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
2527 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
2528 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2529 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
2530 ; GFX9-NEXT: s_endpgm
2532 ; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
2534 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2535 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2536 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2537 ; GFX10-NEXT: s_add_u32 s0, s0, 0xfff
2538 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
2539 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
2540 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2541 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
2542 ; GFX10-NEXT: s_endpgm
2544 ; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split0:
2546 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2547 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
2548 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2549 ; GFX11-NEXT: s_add_u32 s0, s0, 0xfff
2550 ; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000
2551 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc
2552 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2553 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
2554 ; GFX11-NEXT: s_nop 0
2555 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2556 ; GFX11-NEXT: s_endpgm
2557 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771713
2558 %load = load volatile i8, ptr addrspace(1) %gep, align 1
2559 store i8 %load, ptr addrspace(1) undef
2563 ; Fill 12-bit low-bits, negative high bits (1ull << 63) | 4096
2564 define amdgpu_kernel void @global_inst_salu_offset_64bit_12bit_neg_high_split1(ptr addrspace(1) %p) {
2565 ; GFX9-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
2567 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2568 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2569 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2570 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1000
2571 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
2572 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
2573 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2574 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
2575 ; GFX9-NEXT: s_endpgm
2577 ; GFX10-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
2579 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2580 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2581 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2582 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1000
2583 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
2584 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
2585 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2586 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
2587 ; GFX10-NEXT: s_endpgm
2589 ; GFX11-LABEL: global_inst_salu_offset_64bit_12bit_neg_high_split1:
2591 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2592 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
2593 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2594 ; GFX11-NEXT: s_add_u32 s0, s0, 0x1000
2595 ; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000
2596 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc
2597 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2598 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
2599 ; GFX11-NEXT: s_nop 0
2600 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2601 ; GFX11-NEXT: s_endpgm
2602 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854771712
2603 %load = load volatile i8, ptr addrspace(1) %gep, align 1
2604 store i8 %load, ptr addrspace(1) undef
2608 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8191
2609 define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split0(ptr addrspace(1) %p) {
2610 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
2612 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2613 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2614 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2615 ; GFX9-NEXT: s_add_u32 s0, s0, 0x1fff
2616 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
2617 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
2618 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2619 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
2620 ; GFX9-NEXT: s_endpgm
2622 ; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
2624 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2625 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2626 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2627 ; GFX10-NEXT: s_add_u32 s0, s0, 0x1fff
2628 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
2629 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
2630 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2631 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
2632 ; GFX10-NEXT: s_endpgm
2634 ; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split0:
2636 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2637 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
2638 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2639 ; GFX11-NEXT: s_add_u32 s0, s0, 0x1fff
2640 ; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000
2641 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc
2642 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2643 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
2644 ; GFX11-NEXT: s_nop 0
2645 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2646 ; GFX11-NEXT: s_endpgm
2647 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767617
2648 %load = load volatile i8, ptr addrspace(1) %gep, align 1
2649 store i8 %load, ptr addrspace(1) undef
2653 ; Fill 13-bit low-bits, negative high bits (1ull << 63) | 8192
2654 define amdgpu_kernel void @global_inst_salu_offset_64bit_13bit_neg_high_split1(ptr addrspace(1) %p) {
2655 ; GFX9-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
2657 ; GFX9-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2658 ; GFX9-NEXT: v_mov_b32_e32 v0, 0
2659 ; GFX9-NEXT: s_waitcnt lgkmcnt(0)
2660 ; GFX9-NEXT: s_add_u32 s0, s0, 0x2000
2661 ; GFX9-NEXT: s_addc_u32 s1, s1, 0x80000000
2662 ; GFX9-NEXT: global_load_ubyte v0, v0, s[0:1] glc
2663 ; GFX9-NEXT: s_waitcnt vmcnt(0)
2664 ; GFX9-NEXT: global_store_byte v[0:1], v0, off
2665 ; GFX9-NEXT: s_endpgm
2667 ; GFX10-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
2669 ; GFX10-NEXT: s_load_dwordx2 s[0:1], s[0:1], 0x24
2670 ; GFX10-NEXT: v_mov_b32_e32 v0, 0
2671 ; GFX10-NEXT: s_waitcnt lgkmcnt(0)
2672 ; GFX10-NEXT: s_add_u32 s0, s0, 0x2000
2673 ; GFX10-NEXT: s_addc_u32 s1, s1, 0x80000000
2674 ; GFX10-NEXT: global_load_ubyte v0, v0, s[0:1] glc dlc
2675 ; GFX10-NEXT: s_waitcnt vmcnt(0)
2676 ; GFX10-NEXT: global_store_byte v[0:1], v0, off
2677 ; GFX10-NEXT: s_endpgm
2679 ; GFX11-LABEL: global_inst_salu_offset_64bit_13bit_neg_high_split1:
2681 ; GFX11-NEXT: s_load_b64 s[0:1], s[0:1], 0x24
2682 ; GFX11-NEXT: v_mov_b32_e32 v0, 0
2683 ; GFX11-NEXT: s_waitcnt lgkmcnt(0)
2684 ; GFX11-NEXT: s_add_u32 s0, s0, 0x2000
2685 ; GFX11-NEXT: s_addc_u32 s1, s1, 0x80000000
2686 ; GFX11-NEXT: global_load_u8 v0, v0, s[0:1] glc dlc
2687 ; GFX11-NEXT: s_waitcnt vmcnt(0)
2688 ; GFX11-NEXT: global_store_b8 v[0:1], v0, off
2689 ; GFX11-NEXT: s_nop 0
2690 ; GFX11-NEXT: s_sendmsg sendmsg(MSG_DEALLOC_VGPRS)
2691 ; GFX11-NEXT: s_endpgm
2692 %gep = getelementptr i8, ptr addrspace(1) %p, i64 -9223372036854767616
2693 %load = load volatile i8, ptr addrspace(1) %gep, align 1
2694 store i8 %load, ptr addrspace(1) undef