1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx906 -run-pass=prologepilog,machine-cp -verify-machineinstrs %s -o - | FileCheck -check-prefix=GCN %s
4 # The COPY that moves the return value to VGPR0 should not be removed during machine-cp. The spill restore of the same register that follows,
5 # meant to only reload its inactive lanes. By marking the reg itself as the tied-op in the spill reload prevents the undesired optimization.
8 name: wwm_scratch_reg_spill_reload_of_outgoing_reg
9 tracksRegLiveness: true
11 wwmReservedRegs: ['$vgpr0']
12 isEntryFunction: false
13 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
14 stackPtrOffsetReg: '$sgpr32'
15 frameOffsetReg: '$sgpr33'
18 liveins: $sgpr20, $vgpr1
19 ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_reg
20 ; GCN: liveins: $sgpr20, $vgpr1
22 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
23 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
24 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
25 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
26 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
27 ; GCN-NEXT: $vgpr0 = COPY killed renamable $vgpr1, implicit $exec
28 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
29 ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
30 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
31 ; GCN-NEXT: SI_RETURN implicit $vgpr0
33 $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
34 $vgpr0 = COPY killed renamable $vgpr1, implicit $exec
35 SI_RETURN implicit $vgpr0
38 # The reload of vgpr0 require the tied-op as it is a subreg in the outgoing tuple register vgpr0_vgpr1.
39 # The vgpr2 doesn't need the tied-op in the reload as it isn't holding any return value.
41 name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg
42 tracksRegLiveness: true
44 wwmReservedRegs: ['$vgpr0', '$vgpr2']
45 isEntryFunction: false
46 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
47 stackPtrOffsetReg: '$sgpr32'
48 frameOffsetReg: '$sgpr33'
51 liveins: $sgpr20, $sgpr21, $vgpr1
52 ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_of_outgoing_tuple_subreg
53 ; GCN: liveins: $sgpr20, $sgpr21, $vgpr1
55 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
56 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr0, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
57 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (store (s32) into %stack.1, addrspace 5)
58 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
59 ; GCN-NEXT: $vgpr0 = IMPLICIT_DEF
60 ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
61 ; GCN-NEXT: $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
62 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr21, 0, $vgpr2
63 ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec
64 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
65 ; GCN-NEXT: $vgpr0 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec, implicit $vgpr0(tied-def 0) :: (load (s32) from %stack.0, addrspace 5)
66 ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 4, 0, 0, implicit $exec :: (load (s32) from %stack.1, addrspace 5)
67 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
68 ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
71 $vgpr0 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr0
72 $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr21, 0, $vgpr2
73 $vgpr0 = COPY $vgpr1, implicit $exec
74 SI_RETURN implicit $vgpr0_vgpr1
77 # Tied op not required in the spill reload of vgpr2.
80 name: wwm_scratch_reg_spill_reload_different_outgoing_reg
81 tracksRegLiveness: true
83 wwmReservedRegs: ['$vgpr2']
84 isEntryFunction: false
85 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
86 stackPtrOffsetReg: '$sgpr32'
87 frameOffsetReg: '$sgpr33'
90 liveins: $sgpr20, $vgpr1
91 ; GCN-LABEL: name: wwm_scratch_reg_spill_reload_different_outgoing_reg
92 ; GCN: liveins: $sgpr20, $vgpr1
94 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
95 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
96 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
97 ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
98 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
99 ; GCN-NEXT: $vgpr0 = COPY $vgpr1, implicit $exec
100 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
101 ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
102 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
103 ; GCN-NEXT: SI_RETURN implicit $vgpr0_vgpr1
104 $vgpr2 = IMPLICIT_DEF
105 $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
106 $vgpr0 = COPY $vgpr1, implicit $exec
107 SI_RETURN implicit $vgpr0_vgpr1
110 # Tied op not required in the spill reload of vgpr40 which is in the CSR range.
112 name: wwm_csr_spill_reload
113 tracksRegLiveness: true
115 wwmReservedRegs: ['$vgpr40']
116 isEntryFunction: false
117 scratchRSrcReg: '$sgpr0_sgpr1_sgpr2_sgpr3'
118 stackPtrOffsetReg: '$sgpr32'
119 frameOffsetReg: '$sgpr33'
122 liveins: $sgpr20, $vgpr1
123 ; GCN-LABEL: name: wwm_csr_spill_reload
124 ; GCN: liveins: $sgpr20, $vgpr1
126 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
127 ; GCN-NEXT: BUFFER_STORE_DWORD_OFFSET killed $vgpr2, $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (store (s32) into %stack.0, addrspace 5)
128 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
129 ; GCN-NEXT: $vgpr2 = IMPLICIT_DEF
130 ; GCN-NEXT: $vgpr2 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr2
131 ; GCN-NEXT: $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr2, 0, implicit $exec
132 ; GCN-NEXT: $vgpr0 = COPY killed $vgpr1, implicit $exec
133 ; GCN-NEXT: $sgpr4_sgpr5 = S_XOR_SAVEEXEC_B64 -1, implicit-def $exec, implicit-def dead $scc, implicit $exec
134 ; GCN-NEXT: $vgpr2 = BUFFER_LOAD_DWORD_OFFSET $sgpr0_sgpr1_sgpr2_sgpr3, $sgpr32, 0, 0, 0, implicit $exec :: (load (s32) from %stack.0, addrspace 5)
135 ; GCN-NEXT: $exec = S_MOV_B64 killed $sgpr4_sgpr5
136 ; GCN-NEXT: SI_RETURN implicit $vgpr0
137 $vgpr40 = IMPLICIT_DEF
138 $vgpr40 = SI_SPILL_S32_TO_VGPR killed $sgpr20, 0, $vgpr40
139 $sgpr20 = SI_RESTORE_S32_FROM_VGPR $vgpr40, 0, implicit $exec
140 $vgpr0 = COPY killed $vgpr1, implicit $exec
141 SI_RETURN implicit $vgpr0