1 # RUN: llc -march=amdgcn -verify-machineinstrs -run-pass si-load-store-opt -o - %s | FileCheck %s
3 # Check that SILoadStoreOptimizer honors physregs defs/uses between moved
6 # The following IR snippet would usually be optimized by the peephole optimizer.
7 # However, an equivalent situation can occur with buffer instructions as well.
9 # CHECK-LABEL: name: scc_def_and_use_no_dependency
14 define amdgpu_kernel void @scc_def_and_use_no_dependency(i32 addrspace(3)* %ptr.0) nounwind {
15 %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1
16 %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16
20 define amdgpu_kernel void @scc_def_and_use_dependency(i32 addrspace(3)* %ptr.0) nounwind {
21 %ptr.4 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 1
22 %ptr.64 = getelementptr i32, i32 addrspace(3)* %ptr.0, i32 16
27 name: scc_def_and_use_no_dependency
29 exposesReturnsTwice: false
31 regBankSelected: false
33 tracksRegLiveness: false
38 isFrameAddressTaken: false
39 isReturnAddressTaken: false
48 hasOpaqueSPAdjustment: false
50 hasMustTailInVarArgFunc: false
53 liveins: $vgpr0, $sgpr0
55 %1:vgpr_32 = COPY $vgpr0
56 %10:sgpr_32 = COPY $sgpr0
59 %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.0)
61 %11:sgpr_32 = S_ADD_U32 %10, 4, implicit-def $scc
62 %12:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc
64 %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.64)
69 # CHECK-LABEL: name: scc_def_and_use_dependency
74 name: scc_def_and_use_dependency
76 exposesReturnsTwice: false
78 regBankSelected: false
80 tracksRegLiveness: false
85 isFrameAddressTaken: false
86 isReturnAddressTaken: false
95 hasOpaqueSPAdjustment: false
97 hasMustTailInVarArgFunc: false
100 liveins: $vgpr0, $sgpr0
102 %1:vgpr_32 = COPY $vgpr0
103 %10:sgpr_32 = COPY $sgpr0
106 %2:vgpr_32 = DS_READ_B32 %1, 0, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.0)
107 %20:sgpr_32 = V_READFIRSTLANE_B32 %2, implicit $exec
109 %21:sgpr_32 = S_ADD_U32 %20, 4, implicit-def $scc
110 ; The S_ADDC_U32 depends on the first DS_READ_B32 only via SCC
111 %11:sgpr_32 = S_ADDC_U32 %10, 0, implicit-def dead $scc, implicit $scc
113 %3:vgpr_32 = DS_READ_B32 %1, 64, 0, implicit $m0, implicit $exec :: (load 4 from %ir.ptr.64)