1 # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s
3 # Check that SILoadStoreOptimizer honors memory dependencies between moved
6 # The following IR snippet would usually be optimized by the peephole optimizer.
7 # However, an equivalent situation can occur with buffer instructions as well.
9 # CHECK-LABEL: name: mem_dependency
10 # CHECK: DS_READ2_B32 %0, 0, 1,
11 # CHECK: DS_WRITE_B32 %0, killed %1, 64,
12 # CHECK: DS_READ2_B32 %0, 16, 17,
13 # CHECK: DS_WRITE_B32 killed %0, %5, 0
16 define amdgpu_kernel void @mem_dependency(i32 addrspace(3)* %ptr.0) nounwind {
17 %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1
18 %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16
19 %1 = load i32, i32 addrspace(3)* %ptr.0
20 store i32 %1, i32 addrspace(3)* %ptr.64
21 %2 = load i32, i32 addrspace(3)* %ptr.64
22 %3 = load i32, i32 addrspace(3)* %ptr.4
24 store i32 %4, i32 addrspace(3)* %ptr.0
31 exposesReturnsTwice: false
33 regBankSelected: false
35 tracksRegLiveness: true
37 - { reg: '%vgpr0', virtual-reg: '%1' }
39 isFrameAddressTaken: false
40 isReturnAddressTaken: false
49 hasOpaqueSPAdjustment: false
51 hasMustTailInVarArgFunc: false
56 %1:vgpr_32 = COPY %vgpr0
58 %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit %m0, implicit %exec :: (load 4 from %ir.ptr.0)
59 DS_WRITE_B32 %1, killed %2, 64, 0, implicit %m0, implicit %exec :: (store 4 into %ir.ptr.64)
61 ; Make this load unmergeable, to tempt SILoadStoreOptimizer into merging the
63 %6:vreg_64 = DS_READ2_B32 %1, 16, 17, 0, implicit %m0, implicit %exec :: (load 8 from %ir.ptr.64, align 4)
64 %3:vgpr_32 = COPY %6.sub0
65 %4:vgpr_32 = DS_READ_B32 %1, 4, 0, implicit %m0, implicit %exec :: (load 4 from %ir.ptr.4)
66 %5:vgpr_32 = V_ADD_I32_e32 killed %3, killed %4, implicit-def %vcc, implicit %exec
67 DS_WRITE_B32 killed %1, %5, 0, 0, implicit killed %m0, implicit %exec :: (store 4 into %ir.ptr.0)