1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -mtriple=amdgcn -run-pass register-coalescer -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
7 # the COPY can be coalesced based on subregister liveness
8 name: subrange_coalesce_liveout
9 tracksRegLiveness: true
11 ; GCN-LABEL: name: subrange_coalesce_liveout
13 ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
14 ; GCN-NEXT: liveins: $vgpr0_vgpr1
16 ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
17 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
18 ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
19 ; GCN-NEXT: S_BRANCH %bb.1
22 ; GCN-NEXT: successors: %bb.2(0x80000000)
24 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec
25 ; GCN-NEXT: S_BRANCH %bb.2
28 ; GCN-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
29 ; GCN-NEXT: S_ENDPGM 0
31 successors: %bb.1, %bb.2
34 %0:vreg_64 = COPY $vgpr0_vgpr1
35 %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
36 %2:vgpr_32 = COPY %1.sub0
37 S_CBRANCH_EXECZ %bb.2, implicit $exec
43 %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec
47 %4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec
52 # early-clobber stops the coalescer from coalescing the COPY
53 name: subrange_coalesce_early_clobber
54 tracksRegLiveness: true
56 ; GCN-LABEL: name: subrange_coalesce_early_clobber
58 ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
59 ; GCN-NEXT: liveins: $vgpr0_vgpr1
61 ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
62 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
63 ; GCN-NEXT: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0
64 ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
65 ; GCN-NEXT: S_BRANCH %bb.1
68 ; GCN-NEXT: successors: %bb.2(0x80000000)
70 ; GCN-NEXT: early-clobber [[COPY1]]:vgpr_32 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec
71 ; GCN-NEXT: S_BRANCH %bb.2
74 ; GCN-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[COPY1]], implicit $exec
75 ; GCN-NEXT: S_ENDPGM 0
77 successors: %bb.1, %bb.2
80 %0:vreg_64 = COPY $vgpr0_vgpr1
81 %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
82 %2:vgpr_32 = COPY %1.sub0
83 S_CBRANCH_EXECZ %bb.2, implicit $exec
89 early-clobber %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub2, implicit $exec
93 %4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec
98 # non-conflict lane(sub1) was redefined, coalescable
99 name: subrange_coalesce_unrelated_sub_redefined
100 tracksRegLiveness: true
102 ; GCN-LABEL: name: subrange_coalesce_unrelated_sub_redefined
104 ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
105 ; GCN-NEXT: liveins: $vgpr0_vgpr1
107 ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
108 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
109 ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
110 ; GCN-NEXT: S_BRANCH %bb.1
113 ; GCN-NEXT: successors: %bb.2(0x80000000)
115 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec
116 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub1:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
117 ; GCN-NEXT: S_BRANCH %bb.2
120 ; GCN-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
121 ; GCN-NEXT: S_ENDPGM 0
123 successors: %bb.1, %bb.2
124 liveins: $vgpr0_vgpr1
126 %0:vreg_64 = COPY $vgpr0_vgpr1
127 %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
128 %2:vgpr_32 = COPY %1.sub0
129 S_CBRANCH_EXECZ %bb.2, implicit $exec
135 %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec
136 %1.sub1:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec
140 %4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %2, implicit $exec
145 # Another complex example showing the capability of resolving lane conflict
146 # based on subranges.
147 name: subrange_coalesce_complex_pattern
148 tracksRegLiveness: true
150 ; GCN-LABEL: name: subrange_coalesce_complex_pattern
152 ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
153 ; GCN-NEXT: liveins: $vgpr0_vgpr1
155 ; GCN-NEXT: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
156 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
157 ; GCN-NEXT: S_CBRANCH_EXECZ %bb.2, implicit $exec
158 ; GCN-NEXT: S_BRANCH %bb.1
161 ; GCN-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
163 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
164 ; GCN-NEXT: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub2:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
165 ; GCN-NEXT: S_CBRANCH_EXECZ %bb.1, implicit $exec
166 ; GCN-NEXT: S_BRANCH %bb.2
169 ; GCN-NEXT: dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec
170 ; GCN-NEXT: S_ENDPGM 0
172 successors: %bb.1, %bb.2
173 liveins: $vgpr0_vgpr1
175 %0:vreg_64 = COPY $vgpr0_vgpr1
176 %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
177 %2:vgpr_32 = COPY %1.sub0
178 S_CBRANCH_EXECZ %bb.2, implicit $exec
182 successors: %bb.1, %bb.2
184 %2:vgpr_32 = V_AND_B32_e64 %1.sub1, %2, implicit $exec
185 %1.sub2:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec
186 S_CBRANCH_EXECZ %bb.1, implicit $exec
190 %4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %1.sub2, implicit $exec