1 ; RUN: llc -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
2 ; RUN: llc -global-isel -march=amdgcn -mcpu=gfx1100 -verify-machineinstrs < %s | FileCheck -check-prefix=GFX11 %s
4 ; GFX11-LABEL: {{^}}lds_direct_load:
6 ; GFX11: lds_direct_load v{{[0-9]+}}
8 ; GFX11: lds_direct_load v{{[0-9]+}}
10 ; GFX11: lds_direct_load v{{[0-9]+}}
11 ; GFX11: s_waitcnt expcnt(2)
13 ; GFX11: buffer_store_b32
14 ; GFX11: s_waitcnt expcnt(1)
15 ; GFX11: buffer_store_b32
16 ; GFX11: s_waitcnt expcnt(0)
17 ; GFX11: buffer_store_b32
18 ; GFX11: buffer_store_b32
19 ; GFX11: buffer_store_b32
20 ; GFX11: buffer_store_b32
21 define amdgpu_ps void @lds_direct_load(ptr addrspace(8) inreg %buf, i32 inreg %arg0,
22 i32 inreg %arg1, i32 inreg %arg2) #0 {
24 %p0 = call float @llvm.amdgcn.lds.direct.load(i32 %arg0)
25 ; Ensure memory clustering is occuring for lds_direct_load
26 %p5 = fadd float %p0, 1.0
27 %p1 = call float @llvm.amdgcn.lds.direct.load(i32 %arg1)
28 %p2 = call float @llvm.amdgcn.lds.direct.load(i32 %arg2)
29 %p3 = call float @llvm.amdgcn.lds.direct.load(i32 %arg1)
30 %p4 = call float @llvm.amdgcn.lds.direct.load(i32 %arg2)
31 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %p5, ptr addrspace(8) %buf, i32 4, i32 0, i32 0)
32 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %p1, ptr addrspace(8) %buf, i32 4, i32 1, i32 0)
33 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %p2, ptr addrspace(8) %buf, i32 4, i32 2, i32 0)
34 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %p3, ptr addrspace(8) %buf, i32 4, i32 3, i32 0)
35 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %p4, ptr addrspace(8) %buf, i32 4, i32 4, i32 0)
36 call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float %p0, ptr addrspace(8) %buf, i32 4, i32 5, i32 0)
40 declare float @llvm.amdgcn.lds.direct.load(i32) #1
41 declare void @llvm.amdgcn.raw.ptr.buffer.store.f32(float, ptr addrspace(8), i32, i32, i32)
43 attributes #0 = { nounwind }
44 attributes #1 = { nounwind readonly }