1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12,GFX12-SDAG %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12,GFX12-GISEL %s
5 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
9 define amdgpu_ps void @prefetch_data_sgpr(ptr addrspace(4) inreg %ptr) {
10 ; GFX12-LABEL: prefetch_data_sgpr:
11 ; GFX12: ; %bb.0: ; %entry
12 ; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
13 ; GFX12-NEXT: s_endpgm
15 ; GFX11-LABEL: prefetch_data_sgpr:
16 ; GFX11: ; %bb.0: ; %entry
17 ; GFX11-NEXT: s_endpgm
19 tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 1)
23 define amdgpu_ps void @prefetch_data_sgpr_offset(ptr addrspace(4) inreg %ptr) {
24 ; GFX12-LABEL: prefetch_data_sgpr_offset:
25 ; GFX12: ; %bb.0: ; %entry
26 ; GFX12-NEXT: s_prefetch_data s[0:1], 0x200, null, 0
27 ; GFX12-NEXT: s_endpgm
29 ; GFX11-LABEL: prefetch_data_sgpr_offset:
30 ; GFX11: ; %bb.0: ; %entry
31 ; GFX11-NEXT: s_endpgm
33 %gep = getelementptr float, ptr addrspace(4) %ptr, i32 128
34 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
40 define amdgpu_ps void @prefetch_data_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
41 ; GFX12-LABEL: prefetch_data_sgpr_max_offset:
42 ; GFX12: ; %bb.0: ; %entry
43 ; GFX12-NEXT: s_prefetch_data s[0:1], 0x7fffff, null, 0
44 ; GFX12-NEXT: s_endpgm
46 ; GFX11-LABEL: prefetch_data_sgpr_max_offset:
47 ; GFX11: ; %bb.0: ; %entry
48 ; GFX11-NEXT: s_endpgm
50 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
51 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
55 define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
56 ; GFX12-SDAG-LABEL: prefetch_data_sgpr_min_offset:
57 ; GFX12-SDAG: ; %bb.0: ; %entry
58 ; GFX12-SDAG-NEXT: s_mov_b32 s2, 0xff800000
59 ; GFX12-SDAG-NEXT: s_mov_b32 s3, -1
60 ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
61 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
62 ; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
63 ; GFX12-SDAG-NEXT: s_endpgm
65 ; GFX11-LABEL: prefetch_data_sgpr_min_offset:
66 ; GFX11: ; %bb.0: ; %entry
67 ; GFX11-NEXT: s_endpgm
69 ; GFX12-GISEL-LABEL: prefetch_data_sgpr_min_offset:
70 ; GFX12-GISEL: ; %bb.0: ; %entry
71 ; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
72 ; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
73 ; GFX12-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
74 ; GFX12-GISEL-NEXT: s_endpgm
76 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
77 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
81 define amdgpu_ps void @prefetch_data_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
82 ; GFX12-SDAG-LABEL: prefetch_data_sgpr_too_large_offset:
83 ; GFX12-SDAG: ; %bb.0: ; %entry
84 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
85 ; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
86 ; GFX12-SDAG-NEXT: s_endpgm
88 ; GFX11-LABEL: prefetch_data_sgpr_too_large_offset:
89 ; GFX11: ; %bb.0: ; %entry
90 ; GFX11-NEXT: s_endpgm
92 ; GFX12-GISEL-LABEL: prefetch_data_sgpr_too_large_offset:
93 ; GFX12-GISEL: ; %bb.0: ; %entry
94 ; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
95 ; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
96 ; GFX12-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
97 ; GFX12-GISEL-NEXT: s_endpgm
99 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
100 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
104 ; Check divergent address
106 define amdgpu_ps void @prefetch_data_vgpr(ptr addrspace(1) %ptr) {
107 ; GCN-LABEL: prefetch_data_vgpr:
108 ; GCN: ; %bb.0: ; %entry
111 tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
115 ; Check LDS and Scratch, we cannot prefetch it
117 define amdgpu_ps void @prefetch_data_lds(ptr addrspace(3) inreg %ptr) {
118 ; GCN-LABEL: prefetch_data_lds:
119 ; GCN: ; %bb.0: ; %entry
122 tail call void @llvm.prefetch.p3(ptr addrspace(3) %ptr, i32 0, i32 0, i32 1)
126 define amdgpu_ps void @prefetch_data_scratch(ptr addrspace(5) inreg %ptr) {
127 ; GCN-LABEL: prefetch_data_scratch:
128 ; GCN: ; %bb.0: ; %entry
131 tail call void @llvm.prefetch.p5(ptr addrspace(5) %ptr, i32 0, i32 0, i32 1)
135 ; Check supported address spaces
137 define amdgpu_ps void @prefetch_data_sgpr_flat(ptr inreg %ptr) {
138 ; GFX12-LABEL: prefetch_data_sgpr_flat:
139 ; GFX12: ; %bb.0: ; %entry
140 ; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
141 ; GFX12-NEXT: s_endpgm
143 ; GFX11-LABEL: prefetch_data_sgpr_flat:
144 ; GFX11: ; %bb.0: ; %entry
145 ; GFX11-NEXT: s_endpgm
147 tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1)
151 define amdgpu_ps void @prefetch_data_sgpr_global(ptr addrspace(1) inreg %ptr) {
152 ; GFX12-LABEL: prefetch_data_sgpr_global:
153 ; GFX12: ; %bb.0: ; %entry
154 ; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
155 ; GFX12-NEXT: s_endpgm
157 ; GFX11-LABEL: prefetch_data_sgpr_global:
158 ; GFX11: ; %bb.0: ; %entry
159 ; GFX11-NEXT: s_endpgm
161 tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
165 define amdgpu_ps void @prefetch_data_sgpr_constant_32bit(ptr addrspace(6) inreg %ptr) {
166 ; GFX12-LABEL: prefetch_data_sgpr_constant_32bit:
167 ; GFX12: ; %bb.0: ; %entry
168 ; GFX12-NEXT: s_mov_b32 s1, 0
169 ; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
170 ; GFX12-NEXT: s_endpgm
172 ; GFX11-LABEL: prefetch_data_sgpr_constant_32bit:
173 ; GFX11: ; %bb.0: ; %entry
174 ; GFX11-NEXT: s_endpgm
176 tail call void @llvm.prefetch.p6(ptr addrspace(6) %ptr, i32 0, i32 0, i32 1)
182 define amdgpu_ps void @prefetch_inst_sgpr(ptr addrspace(4) inreg %ptr) {
183 ; GFX12-LABEL: prefetch_inst_sgpr:
184 ; GFX12: ; %bb.0: ; %entry
185 ; GFX12-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
186 ; GFX12-NEXT: s_endpgm
188 ; GFX11-LABEL: prefetch_inst_sgpr:
189 ; GFX11: ; %bb.0: ; %entry
190 ; GFX11-NEXT: s_endpgm
192 tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 0)
196 define amdgpu_ps void @prefetch_inst_sgpr_offset(ptr addrspace(4) inreg %ptr) {
197 ; GFX12-LABEL: prefetch_inst_sgpr_offset:
198 ; GFX12: ; %bb.0: ; %entry
199 ; GFX12-NEXT: s_prefetch_inst s[0:1], 0x80, null, 0
200 ; GFX12-NEXT: s_endpgm
202 ; GFX11-LABEL: prefetch_inst_sgpr_offset:
203 ; GFX11: ; %bb.0: ; %entry
204 ; GFX11-NEXT: s_endpgm
206 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 128
207 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
211 ; Check large offsets
213 define amdgpu_ps void @prefetch_inst_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
214 ; GFX12-LABEL: prefetch_inst_sgpr_max_offset:
215 ; GFX12: ; %bb.0: ; %entry
216 ; GFX12-NEXT: s_prefetch_inst s[0:1], 0x7fffff, null, 0
217 ; GFX12-NEXT: s_endpgm
219 ; GFX11-LABEL: prefetch_inst_sgpr_max_offset:
220 ; GFX11: ; %bb.0: ; %entry
221 ; GFX11-NEXT: s_endpgm
223 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
224 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
228 define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
229 ; GFX12-SDAG-LABEL: prefetch_inst_sgpr_min_offset:
230 ; GFX12-SDAG: ; %bb.0: ; %entry
231 ; GFX12-SDAG-NEXT: s_mov_b32 s2, 0xff800000
232 ; GFX12-SDAG-NEXT: s_mov_b32 s3, -1
233 ; GFX12-SDAG-NEXT: s_delay_alu instid0(SALU_CYCLE_1)
234 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], s[2:3]
235 ; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
236 ; GFX12-SDAG-NEXT: s_endpgm
238 ; GFX11-LABEL: prefetch_inst_sgpr_min_offset:
239 ; GFX11: ; %bb.0: ; %entry
240 ; GFX11-NEXT: s_endpgm
242 ; GFX12-GISEL-LABEL: prefetch_inst_sgpr_min_offset:
243 ; GFX12-GISEL: ; %bb.0: ; %entry
244 ; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0xff800000
245 ; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, -1
246 ; GFX12-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
247 ; GFX12-GISEL-NEXT: s_endpgm
249 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
250 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
254 define amdgpu_ps void @prefetch_inst_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
255 ; GFX12-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset:
256 ; GFX12-SDAG: ; %bb.0: ; %entry
257 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
258 ; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
259 ; GFX12-SDAG-NEXT: s_endpgm
261 ; GFX11-LABEL: prefetch_inst_sgpr_too_large_offset:
262 ; GFX11: ; %bb.0: ; %entry
263 ; GFX11-NEXT: s_endpgm
265 ; GFX12-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset:
266 ; GFX12-GISEL: ; %bb.0: ; %entry
267 ; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
268 ; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
269 ; GFX12-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
270 ; GFX12-GISEL-NEXT: s_endpgm
272 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
273 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
277 declare void @llvm.prefetch.pf(ptr nocapture readonly, i32, i32, i32)
278 declare void @llvm.prefetch.p1(ptr addrspace(1) nocapture readonly, i32, i32, i32)
279 declare void @llvm.prefetch.p3(ptr addrspace(3) nocapture readonly, i32, i32, i32)
280 declare void @llvm.prefetch.p4(ptr addrspace(4) nocapture readonly, i32, i32, i32)
281 declare void @llvm.prefetch.p5(ptr addrspace(5) nocapture readonly, i32, i32, i32)
282 declare void @llvm.prefetch.p6(ptr addrspace(6) nocapture readonly, i32, i32, i32)