Run DCE after a LoopFlatten test to reduce spurious output [nfc]
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / split-liverange-overlapping-copies.mir
blobf4cf0f43e456b08014c027c5aa574e24a97153a8
1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -mtriple=amdgcn-amd-amdhsa -mcpu=gfx90a -verify-regalloc -run-pass=greedy -o - %s | FileCheck %s
4 # Check that liverange splitting does not create copies that overlap within a bundle.
5 # By overlap, we mean that they write to the same subregisters.
6 # e.g. the following bundle is desirable
7 #     %0.sub1_sub2 = COPY ... {
8 #       %0.sub3 = COPY ...
9 #     }
10 # but the following bundle isn't desirable as the overlap of the copies can make
11 # virtregrewriter fail due to cycles in the copy bundle.
12 #     %0.sub1_sub2 = COPY ... {
13 #       %0.sub2_sub3 = COPY ...
14 #     }
15 ---
16 name: split_liverange_copy_overlap_31
17 tracksRegLiveness: true
18 machineFunctionInfo:
19   scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
20   stackPtrOffsetReg: '$sgpr32'
21   occupancy:       7
22 body:             |
23   ; CHECK-LABEL: name: split_liverange_copy_overlap_31
24   ; CHECK: bb.0:
25   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
26   ; CHECK-NEXT: {{  $}}
27   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
28   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF
29   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:av_1024_align2 = COPY [[DEF1]]
30   ; CHECK-NEXT: {{  $}}
31   ; CHECK-NEXT: bb.1:
32   ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
33   ; CHECK-NEXT: {{  $}}
34   ; CHECK-NEXT:   dead [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
35   ; CHECK-NEXT:   S_NOP 0, implicit [[DEF1]]
36   ; CHECK-NEXT:   S_NOP 0, implicit [[DEF1]]
37   ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:vreg_1024_align2 = IMPLICIT_DEF
38   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
39   ; CHECK-NEXT: {{  $}}
40   ; CHECK-NEXT: bb.2:
41   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
42   ; CHECK-NEXT: {{  $}}
43   ; CHECK-NEXT:   undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024_align2 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 {
44   ; CHECK-NEXT:     internal %6.sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024_align2 = COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16
45   ; CHECK-NEXT:     internal %6.sub29_sub30_sub31:av_1024_align2 = COPY [[COPY]].sub29_sub30_sub31
46   ; CHECK-NEXT:   }
47   ; CHECK-NEXT:   %6.sub0:av_1024_align2 = IMPLICIT_DEF
48   ; CHECK-NEXT:   S_NOP 0, implicit %6.sub0
49   ; CHECK-NEXT: {{  $}}
50   ; CHECK-NEXT: bb.3:
51   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
52   ; CHECK-NEXT: {{  $}}
53   ; CHECK-NEXT:   S_NOP 0, implicit %6
54   ; CHECK-NEXT: {{  $}}
55   ; CHECK-NEXT: bb.4:
56   ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
57   ; CHECK-NEXT: {{  $}}
58   ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:av_1024_align2 = IMPLICIT_DEF
59   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
60   ; CHECK-NEXT: {{  $}}
61   ; CHECK-NEXT: bb.5:
62   ; CHECK-NEXT:   undef %4.sub0:vreg_1024_align2 = COPY [[DEF]]
63   ; CHECK-NEXT:   S_NOP 0, implicit %4
64   bb.0:
65     %0:vgpr_32 = IMPLICIT_DEF
66     %1:vreg_1024_align2 = IMPLICIT_DEF
67     %2:vreg_1024_align2 = COPY %1
69   bb.1:
70     %5:vreg_64 = IMPLICIT_DEF
71     S_NOP 0, implicit %1
72     S_NOP 0, implicit %1
73     %1:vreg_1024_align2 = IMPLICIT_DEF
74     S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
76   bb.2:
77     %2.sub0:vreg_1024_align2 = IMPLICIT_DEF
78     S_NOP 0, implicit %2.sub0
80   bb.3:
81     S_NOP 0, implicit %2
83   bb.4:
84     %2:vreg_1024_align2 = IMPLICIT_DEF
85     S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
87   bb.5:
88     undef %4.sub0:vreg_1024_align2 = COPY %0
89     S_NOP 0, implicit %4
90 ...
91 ---
92 name: split_liverange_copy_overlap_30
93 tracksRegLiveness: true
94 machineFunctionInfo:
95   scratchRSrcReg:  '$sgpr0_sgpr1_sgpr2_sgpr3'
96   stackPtrOffsetReg: '$sgpr32'
97   occupancy:       7
98 body:             |
99   ; CHECK-LABEL: name: split_liverange_copy_overlap_30
100   ; CHECK: bb.0:
101   ; CHECK-NEXT:   successors: %bb.1(0x80000000)
102   ; CHECK-NEXT: {{  $}}
103   ; CHECK-NEXT:   [[DEF:%[0-9]+]]:vgpr_32 = IMPLICIT_DEF
104   ; CHECK-NEXT:   [[DEF1:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
105   ; CHECK-NEXT:   [[COPY:%[0-9]+]]:av_1024 = COPY [[DEF1]]
106   ; CHECK-NEXT: {{  $}}
107   ; CHECK-NEXT: bb.1:
108   ; CHECK-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
109   ; CHECK-NEXT: {{  $}}
110   ; CHECK-NEXT:   dead [[DEF2:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
111   ; CHECK-NEXT:   S_NOP 0, implicit [[DEF1]]
112   ; CHECK-NEXT:   S_NOP 0, implicit [[DEF1]]
113   ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:vreg_1024 = IMPLICIT_DEF
114   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
115   ; CHECK-NEXT: {{  $}}
116   ; CHECK-NEXT: bb.2:
117   ; CHECK-NEXT:   successors: %bb.3(0x80000000)
118   ; CHECK-NEXT: {{  $}}
119   ; CHECK-NEXT:   undef %6.sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16:av_1024 = COPY [[COPY]].sub1_sub2_sub3_sub4_sub5_sub6_sub7_sub8_sub9_sub10_sub11_sub12_sub13_sub14_sub15_sub16 {
120   ; CHECK-NEXT:     internal %6.sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16:av_1024 = COPY [[COPY]].sub17_lo16_sub17_hi16_sub18_lo16_sub18_hi16_sub19_lo16_sub19_hi16_sub20_lo16_sub20_hi16_sub21_lo16_sub21_hi16_sub22_lo16_sub22_hi16_sub23_lo16_sub23_hi16_sub24_lo16_sub24_hi16_sub25_lo16_sub25_hi16_sub26_lo16_sub26_hi16_sub27_lo16_sub27_hi16_sub28_lo16_sub28_hi16
121   ; CHECK-NEXT:     internal %6.sub29_sub30:av_1024 = COPY [[COPY]].sub29_sub30
122   ; CHECK-NEXT:   }
123   ; CHECK-NEXT:   %6.sub0:av_1024 = IMPLICIT_DEF
124   ; CHECK-NEXT:   %6.sub31:av_1024 = IMPLICIT_DEF
125   ; CHECK-NEXT:   S_NOP 0, implicit %6.sub0, implicit %6.sub31
126   ; CHECK-NEXT: {{  $}}
127   ; CHECK-NEXT: bb.3:
128   ; CHECK-NEXT:   successors: %bb.4(0x80000000)
129   ; CHECK-NEXT: {{  $}}
130   ; CHECK-NEXT:   S_NOP 0, implicit %6
131   ; CHECK-NEXT: {{  $}}
132   ; CHECK-NEXT: bb.4:
133   ; CHECK-NEXT:   successors: %bb.3(0x40000000), %bb.5(0x40000000)
134   ; CHECK-NEXT: {{  $}}
135   ; CHECK-NEXT:   [[DEF3:%[0-9]+]]:av_1024 = IMPLICIT_DEF
136   ; CHECK-NEXT:   S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
137   ; CHECK-NEXT: {{  $}}
138   ; CHECK-NEXT: bb.5:
139   ; CHECK-NEXT:   undef %4.sub0:vreg_1024 = COPY [[DEF]]
140   ; CHECK-NEXT:   S_NOP 0, implicit %4
141   bb.0:
142     %0:vgpr_32 = IMPLICIT_DEF
143     %1:vreg_1024 = IMPLICIT_DEF
144     %2:vreg_1024 = COPY %1
146   bb.1:
147     %5:vreg_64 = IMPLICIT_DEF
148     S_NOP 0, implicit %1
149     S_NOP 0, implicit %1
150     %1:vreg_1024 = IMPLICIT_DEF
151     S_CBRANCH_VCCNZ %bb.1, implicit undef $vcc
153   bb.2:
154     %2.sub0:vreg_1024 = IMPLICIT_DEF
155     %2.sub31:vreg_1024 = IMPLICIT_DEF
156     S_NOP 0, implicit %2.sub0, implicit %2.sub31
158   bb.3:
159     S_NOP 0, implicit %2
161   bb.4:
162     %2:vreg_1024 = IMPLICIT_DEF
163     S_CBRANCH_VCCNZ %bb.3, implicit undef $vcc
165   bb.5:
166     undef %4.sub0:vreg_1024 = COPY %0
167     S_NOP 0, implicit %4