1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -march=amdgcn -run-pass simple-register-coalescing -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
7 # the COPY can be coalesced based on subregister liveness
8 name: subrange_coalesce_liveout
9 tracksRegLiveness: true
11 ; GCN-LABEL: name: subrange_coalesce_liveout
13 ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
14 ; GCN: liveins: $vgpr0_vgpr1
15 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
16 ; GCN: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
17 ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec
20 ; GCN: successors: %bb.2(0x80000000)
21 ; GCN: [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec
24 ; GCN: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
27 successors: %bb.1, %bb.2
30 %0:vreg_64 = COPY $vgpr0_vgpr1
31 %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
32 %2:vgpr_32 = COPY %1.sub0
33 S_CBRANCH_EXECZ %bb.2, implicit $exec
39 %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec
43 %4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec
48 # early-clobber stops the coalescer from coalescing the COPY
49 name: subrange_coalesce_early_clobber
50 tracksRegLiveness: true
52 ; GCN-LABEL: name: subrange_coalesce_early_clobber
54 ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
55 ; GCN: liveins: $vgpr0_vgpr1
56 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
57 ; GCN: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
58 ; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0
59 ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec
62 ; GCN: successors: %bb.2(0x80000000)
63 ; GCN: early-clobber [[COPY1]]:vgpr_32 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec
66 ; GCN: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[COPY1]], implicit $exec
69 successors: %bb.1, %bb.2
72 %0:vreg_64 = COPY $vgpr0_vgpr1
73 %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
74 %2:vgpr_32 = COPY %1.sub0
75 S_CBRANCH_EXECZ %bb.2, implicit $exec
81 early-clobber %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub2, implicit $exec
85 %4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec
90 # non-conflict lane(sub1) was redefined, coalescable
91 name: subrange_coalesce_unrelated_sub_redefined
92 tracksRegLiveness: true
94 ; GCN-LABEL: name: subrange_coalesce_unrelated_sub_redefined
96 ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
97 ; GCN: liveins: $vgpr0_vgpr1
98 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
99 ; GCN: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
100 ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec
101 ; GCN: S_BRANCH %bb.1
103 ; GCN: successors: %bb.2(0x80000000)
104 ; GCN: [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec
105 ; GCN: [[GLOBAL_LOAD_DWORDX4_]].sub1:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
106 ; GCN: S_BRANCH %bb.2
108 ; GCN: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
111 successors: %bb.1, %bb.2
112 liveins: $vgpr0_vgpr1
114 %0:vreg_64 = COPY $vgpr0_vgpr1
115 %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
116 %2:vgpr_32 = COPY %1.sub0
117 S_CBRANCH_EXECZ %bb.2, implicit $exec
123 %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec
124 ; %1.sub1 was re-defined
125 %1.sub1:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec
129 %4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %2, implicit $exec
134 # Another complex example showing the capability of resolving lane conflict
135 # based on subranges.
136 name: subrange_coalesce_complex_pattern
137 tracksRegLiveness: true
139 ; GCN-LABEL: name: subrange_coalesce_complex_pattern
141 ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
142 ; GCN: liveins: $vgpr0_vgpr1
143 ; GCN: [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
144 ; GCN: [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
145 ; GCN: S_CBRANCH_EXECZ %bb.2, implicit $exec
146 ; GCN: S_BRANCH %bb.1
148 ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000)
149 ; GCN: [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
150 ; GCN: [[GLOBAL_LOAD_DWORDX4_]].sub2:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
151 ; GCN: S_CBRANCH_EXECZ %bb.1, implicit $exec
152 ; GCN: S_BRANCH %bb.2
154 ; GCN: dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec
157 successors: %bb.1, %bb.2
158 liveins: $vgpr0_vgpr1
160 %0:vreg_64 = COPY $vgpr0_vgpr1
161 %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
162 %2:vgpr_32 = COPY %1.sub0
163 S_CBRANCH_EXECZ %bb.2, implicit $exec
167 successors: %bb.1, %bb.2
169 %2:vgpr_32 = V_AND_B32_e64 %1.sub1, %2, implicit $exec
170 %1.sub2:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec
171 S_CBRANCH_EXECZ %bb.1, implicit $exec
175 %4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %1.sub2, implicit $exec