1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12,GFX12-SDAG %s
3 ; RUN: llc -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
4 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1200 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX12,GFX12-GISEL %s
5 ; RUN: llc -global-isel -mtriple=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck --check-prefixes=GCN,GFX11 %s
9 define amdgpu_ps void @prefetch_data_sgpr(ptr addrspace(4) inreg %ptr) {
10 ; GFX12-LABEL: prefetch_data_sgpr:
11 ; GFX12: ; %bb.0: ; %entry
12 ; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
13 ; GFX12-NEXT: s_endpgm
15 ; GFX11-LABEL: prefetch_data_sgpr:
16 ; GFX11: ; %bb.0: ; %entry
17 ; GFX11-NEXT: s_endpgm
19 tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 1)
23 define amdgpu_ps void @prefetch_data_sgpr_offset(ptr addrspace(4) inreg %ptr) {
24 ; GFX12-LABEL: prefetch_data_sgpr_offset:
25 ; GFX12: ; %bb.0: ; %entry
26 ; GFX12-NEXT: s_prefetch_data s[0:1], 0x200, null, 0
27 ; GFX12-NEXT: s_endpgm
29 ; GFX11-LABEL: prefetch_data_sgpr_offset:
30 ; GFX11: ; %bb.0: ; %entry
31 ; GFX11-NEXT: s_endpgm
33 %gep = getelementptr float, ptr addrspace(4) %ptr, i32 128
34 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
40 define amdgpu_ps void @prefetch_data_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
41 ; GFX12-LABEL: prefetch_data_sgpr_max_offset:
42 ; GFX12: ; %bb.0: ; %entry
43 ; GFX12-NEXT: s_prefetch_data s[0:1], 0x7fffff, null, 0
44 ; GFX12-NEXT: s_endpgm
46 ; GFX11-LABEL: prefetch_data_sgpr_max_offset:
47 ; GFX11: ; %bb.0: ; %entry
48 ; GFX11-NEXT: s_endpgm
50 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
51 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
55 define amdgpu_ps void @prefetch_data_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
56 ; GFX12-LABEL: prefetch_data_sgpr_min_offset:
57 ; GFX12: ; %bb.0: ; %entry
58 ; GFX12-NEXT: s_prefetch_data s[0:1], -0x800000, null, 0
59 ; GFX12-NEXT: s_endpgm
61 ; GFX11-LABEL: prefetch_data_sgpr_min_offset:
62 ; GFX11: ; %bb.0: ; %entry
63 ; GFX11-NEXT: s_endpgm
65 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
66 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
70 define amdgpu_ps void @prefetch_data_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
71 ; GFX12-SDAG-LABEL: prefetch_data_sgpr_too_large_offset:
72 ; GFX12-SDAG: ; %bb.0: ; %entry
73 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
74 ; GFX12-SDAG-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
75 ; GFX12-SDAG-NEXT: s_endpgm
77 ; GFX11-LABEL: prefetch_data_sgpr_too_large_offset:
78 ; GFX11: ; %bb.0: ; %entry
79 ; GFX11-NEXT: s_endpgm
81 ; GFX12-GISEL-LABEL: prefetch_data_sgpr_too_large_offset:
82 ; GFX12-GISEL: ; %bb.0: ; %entry
83 ; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
84 ; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
85 ; GFX12-GISEL-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
86 ; GFX12-GISEL-NEXT: s_endpgm
88 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
89 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 1)
93 ; Check divergent address
95 define amdgpu_ps void @prefetch_data_vgpr(ptr addrspace(1) %ptr) {
96 ; GCN-LABEL: prefetch_data_vgpr:
97 ; GCN: ; %bb.0: ; %entry
100 tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
104 ; Check LDS and Scratch, we cannot prefetch it
106 define amdgpu_ps void @prefetch_data_lds(ptr addrspace(3) inreg %ptr) {
107 ; GCN-LABEL: prefetch_data_lds:
108 ; GCN: ; %bb.0: ; %entry
111 tail call void @llvm.prefetch.p3(ptr addrspace(3) %ptr, i32 0, i32 0, i32 1)
115 define amdgpu_ps void @prefetch_data_scratch(ptr addrspace(5) inreg %ptr) {
116 ; GCN-LABEL: prefetch_data_scratch:
117 ; GCN: ; %bb.0: ; %entry
120 tail call void @llvm.prefetch.p5(ptr addrspace(5) %ptr, i32 0, i32 0, i32 1)
124 ; Check supported address spaces
126 define amdgpu_ps void @prefetch_data_sgpr_flat(ptr inreg %ptr) {
127 ; GFX12-LABEL: prefetch_data_sgpr_flat:
128 ; GFX12: ; %bb.0: ; %entry
129 ; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
130 ; GFX12-NEXT: s_endpgm
132 ; GFX11-LABEL: prefetch_data_sgpr_flat:
133 ; GFX11: ; %bb.0: ; %entry
134 ; GFX11-NEXT: s_endpgm
136 tail call void @llvm.prefetch.pf(ptr %ptr, i32 0, i32 0, i32 1)
140 define amdgpu_ps void @prefetch_data_sgpr_global(ptr addrspace(1) inreg %ptr) {
141 ; GFX12-LABEL: prefetch_data_sgpr_global:
142 ; GFX12: ; %bb.0: ; %entry
143 ; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
144 ; GFX12-NEXT: s_endpgm
146 ; GFX11-LABEL: prefetch_data_sgpr_global:
147 ; GFX11: ; %bb.0: ; %entry
148 ; GFX11-NEXT: s_endpgm
150 tail call void @llvm.prefetch.p1(ptr addrspace(1) %ptr, i32 0, i32 0, i32 1)
154 define amdgpu_ps void @prefetch_data_sgpr_constant_32bit(ptr addrspace(6) inreg %ptr) {
155 ; GFX12-LABEL: prefetch_data_sgpr_constant_32bit:
156 ; GFX12: ; %bb.0: ; %entry
157 ; GFX12-NEXT: s_mov_b32 s1, 0
158 ; GFX12-NEXT: s_prefetch_data s[0:1], 0x0, null, 0
159 ; GFX12-NEXT: s_endpgm
161 ; GFX11-LABEL: prefetch_data_sgpr_constant_32bit:
162 ; GFX11: ; %bb.0: ; %entry
163 ; GFX11-NEXT: s_endpgm
165 tail call void @llvm.prefetch.p6(ptr addrspace(6) %ptr, i32 0, i32 0, i32 1)
171 define amdgpu_ps void @prefetch_inst_sgpr(ptr addrspace(4) inreg %ptr) {
172 ; GFX12-LABEL: prefetch_inst_sgpr:
173 ; GFX12: ; %bb.0: ; %entry
174 ; GFX12-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
175 ; GFX12-NEXT: s_endpgm
177 ; GFX11-LABEL: prefetch_inst_sgpr:
178 ; GFX11: ; %bb.0: ; %entry
179 ; GFX11-NEXT: s_endpgm
181 tail call void @llvm.prefetch.p4(ptr addrspace(4) %ptr, i32 0, i32 0, i32 0)
185 define amdgpu_ps void @prefetch_inst_sgpr_offset(ptr addrspace(4) inreg %ptr) {
186 ; GFX12-LABEL: prefetch_inst_sgpr_offset:
187 ; GFX12: ; %bb.0: ; %entry
188 ; GFX12-NEXT: s_prefetch_inst s[0:1], 0x80, null, 0
189 ; GFX12-NEXT: s_endpgm
191 ; GFX11-LABEL: prefetch_inst_sgpr_offset:
192 ; GFX11: ; %bb.0: ; %entry
193 ; GFX11-NEXT: s_endpgm
195 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 128
196 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
200 ; Check large offsets
202 define amdgpu_ps void @prefetch_inst_sgpr_max_offset(ptr addrspace(4) inreg %ptr) {
203 ; GFX12-LABEL: prefetch_inst_sgpr_max_offset:
204 ; GFX12: ; %bb.0: ; %entry
205 ; GFX12-NEXT: s_prefetch_inst s[0:1], 0x7fffff, null, 0
206 ; GFX12-NEXT: s_endpgm
208 ; GFX11-LABEL: prefetch_inst_sgpr_max_offset:
209 ; GFX11: ; %bb.0: ; %entry
210 ; GFX11-NEXT: s_endpgm
212 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388607
213 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
217 define amdgpu_ps void @prefetch_inst_sgpr_min_offset(ptr addrspace(4) inreg %ptr) {
218 ; GFX12-LABEL: prefetch_inst_sgpr_min_offset:
219 ; GFX12: ; %bb.0: ; %entry
220 ; GFX12-NEXT: s_prefetch_inst s[0:1], -0x800000, null, 0
221 ; GFX12-NEXT: s_endpgm
223 ; GFX11-LABEL: prefetch_inst_sgpr_min_offset:
224 ; GFX11: ; %bb.0: ; %entry
225 ; GFX11-NEXT: s_endpgm
227 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 -8388608
228 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
232 define amdgpu_ps void @prefetch_inst_sgpr_too_large_offset(ptr addrspace(4) inreg %ptr) {
233 ; GFX12-SDAG-LABEL: prefetch_inst_sgpr_too_large_offset:
234 ; GFX12-SDAG: ; %bb.0: ; %entry
235 ; GFX12-SDAG-NEXT: s_add_nc_u64 s[0:1], s[0:1], 0x800000
236 ; GFX12-SDAG-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
237 ; GFX12-SDAG-NEXT: s_endpgm
239 ; GFX11-LABEL: prefetch_inst_sgpr_too_large_offset:
240 ; GFX11: ; %bb.0: ; %entry
241 ; GFX11-NEXT: s_endpgm
243 ; GFX12-GISEL-LABEL: prefetch_inst_sgpr_too_large_offset:
244 ; GFX12-GISEL: ; %bb.0: ; %entry
245 ; GFX12-GISEL-NEXT: s_add_co_u32 s0, s0, 0x800000
246 ; GFX12-GISEL-NEXT: s_add_co_ci_u32 s1, s1, 0
247 ; GFX12-GISEL-NEXT: s_prefetch_inst s[0:1], 0x0, null, 0
248 ; GFX12-GISEL-NEXT: s_endpgm
250 %gep = getelementptr i8, ptr addrspace(4) %ptr, i32 8388608
251 tail call void @llvm.prefetch.p4(ptr addrspace(4) %gep, i32 0, i32 0, i32 0)
255 declare void @llvm.prefetch.pf(ptr nocapture readonly, i32, i32, i32)
256 declare void @llvm.prefetch.p1(ptr addrspace(1) nocapture readonly, i32, i32, i32)
257 declare void @llvm.prefetch.p3(ptr addrspace(3) nocapture readonly, i32, i32, i32)
258 declare void @llvm.prefetch.p4(ptr addrspace(4) nocapture readonly, i32, i32, i32)
259 declare void @llvm.prefetch.p5(ptr addrspace(5) nocapture readonly, i32, i32, i32)
260 declare void @llvm.prefetch.p6(ptr addrspace(6) nocapture readonly, i32, i32, i32)