[clang][modules] Don't prevent translation of FW_Private includes when explicitly...
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / regcoalescer-resolve-lane-conflict-by-subranges.mir
blob7a026242dd883fec90d7fe5140746689f2f7fca7
1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -march=amdgcn -run-pass register-coalescer -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
6 ---
7 # the COPY can be coalesced based on subregister liveness
8 name:            subrange_coalesce_liveout
9 tracksRegLiveness: true
10 body:             |
11   ; GCN-LABEL: name: subrange_coalesce_liveout
12   ; GCN: bb.0:
13   ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
14   ; GCN-NEXT:   liveins: $vgpr0_vgpr1
15   ; GCN-NEXT: {{  $}}
16   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
17   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
18   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
19   ; GCN-NEXT:   S_BRANCH %bb.1
20   ; GCN-NEXT: {{  $}}
21   ; GCN-NEXT: bb.1:
22   ; GCN-NEXT:   successors: %bb.2(0x80000000)
23   ; GCN-NEXT: {{  $}}
24   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec
25   ; GCN-NEXT:   S_BRANCH %bb.2
26   ; GCN-NEXT: {{  $}}
27   ; GCN-NEXT: bb.2:
28   ; GCN-NEXT:   dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
29   ; GCN-NEXT:   S_ENDPGM 0
30   bb.0:
31     successors: %bb.1, %bb.2
32     liveins: $vgpr0_vgpr1
34     %0:vreg_64 = COPY $vgpr0_vgpr1
35     %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
36     %2:vgpr_32 = COPY %1.sub0
37     S_CBRANCH_EXECZ %bb.2, implicit $exec
38     S_BRANCH %bb.1
40   bb.1:
41     successors: %bb.2
43     %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec
44     S_BRANCH %bb.2
46   bb.2:
47     %4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec
48     S_ENDPGM 0
49 ...
51 ---
52 # early-clobber stops the coalescer from coalescing the COPY
53 name:            subrange_coalesce_early_clobber
54 tracksRegLiveness: true
55 body:             |
56   ; GCN-LABEL: name: subrange_coalesce_early_clobber
57   ; GCN: bb.0:
58   ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
59   ; GCN-NEXT:   liveins: $vgpr0_vgpr1
60   ; GCN-NEXT: {{  $}}
61   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
62   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
63   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0
64   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
65   ; GCN-NEXT:   S_BRANCH %bb.1
66   ; GCN-NEXT: {{  $}}
67   ; GCN-NEXT: bb.1:
68   ; GCN-NEXT:   successors: %bb.2(0x80000000)
69   ; GCN-NEXT: {{  $}}
70   ; GCN-NEXT:   early-clobber [[COPY1]]:vgpr_32 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec
71   ; GCN-NEXT:   S_BRANCH %bb.2
72   ; GCN-NEXT: {{  $}}
73   ; GCN-NEXT: bb.2:
74   ; GCN-NEXT:   dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[COPY1]], implicit $exec
75   ; GCN-NEXT:   S_ENDPGM 0
76   bb.0:
77     successors: %bb.1, %bb.2
78     liveins: $vgpr0_vgpr1
80     %0:vreg_64 = COPY $vgpr0_vgpr1
81     %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
82     %2:vgpr_32 = COPY %1.sub0
83     S_CBRANCH_EXECZ %bb.2, implicit $exec
84     S_BRANCH %bb.1
86   bb.1:
87     successors: %bb.2
89     early-clobber %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub2, implicit $exec
90     S_BRANCH %bb.2
92   bb.2:
93     %4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec
94     S_ENDPGM 0
95 ...
97 ---
98 # non-conflict lane(sub1) was redefined, coalescable
99 name:            subrange_coalesce_unrelated_sub_redefined
100 tracksRegLiveness: true
101 body:             |
102   ; GCN-LABEL: name: subrange_coalesce_unrelated_sub_redefined
103   ; GCN: bb.0:
104   ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
105   ; GCN-NEXT:   liveins: $vgpr0_vgpr1
106   ; GCN-NEXT: {{  $}}
107   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
108   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
109   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
110   ; GCN-NEXT:   S_BRANCH %bb.1
111   ; GCN-NEXT: {{  $}}
112   ; GCN-NEXT: bb.1:
113   ; GCN-NEXT:   successors: %bb.2(0x80000000)
114   ; GCN-NEXT: {{  $}}
115   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec
116   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_]].sub1:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
117   ; GCN-NEXT:   S_BRANCH %bb.2
118   ; GCN-NEXT: {{  $}}
119   ; GCN-NEXT: bb.2:
120   ; GCN-NEXT:   dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
121   ; GCN-NEXT:   S_ENDPGM 0
122   bb.0:
123     successors: %bb.1, %bb.2
124     liveins: $vgpr0_vgpr1
126     %0:vreg_64 = COPY $vgpr0_vgpr1
127     %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
128     %2:vgpr_32 = COPY %1.sub0
129     S_CBRANCH_EXECZ %bb.2, implicit $exec
130     S_BRANCH %bb.1
132   bb.1:
133     successors: %bb.2
135     %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec
136     ; %1.sub1 was re-defined
137     %1.sub1:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec
138     S_BRANCH %bb.2
140   bb.2:
141     %4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %2, implicit $exec
142     S_ENDPGM 0
146 # Another complex example showing the capability of resolving lane conflict
147 # based on subranges.
148 name:            subrange_coalesce_complex_pattern
149 tracksRegLiveness: true
150 body:             |
151   ; GCN-LABEL: name: subrange_coalesce_complex_pattern
152   ; GCN: bb.0:
153   ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
154   ; GCN-NEXT:   liveins: $vgpr0_vgpr1
155   ; GCN-NEXT: {{  $}}
156   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
157   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
158   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
159   ; GCN-NEXT:   S_BRANCH %bb.1
160   ; GCN-NEXT: {{  $}}
161   ; GCN-NEXT: bb.1:
162   ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
163   ; GCN-NEXT: {{  $}}
164   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
165   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_]].sub2:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
166   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
167   ; GCN-NEXT:   S_BRANCH %bb.2
168   ; GCN-NEXT: {{  $}}
169   ; GCN-NEXT: bb.2:
170   ; GCN-NEXT:   dead %3:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec
171   ; GCN-NEXT:   S_ENDPGM 0
172   bb.0:
173     successors: %bb.1, %bb.2
174     liveins: $vgpr0_vgpr1
176     %0:vreg_64 = COPY $vgpr0_vgpr1
177     %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
178     %2:vgpr_32 = COPY %1.sub0
179     S_CBRANCH_EXECZ %bb.2, implicit $exec
180     S_BRANCH %bb.1
182   bb.1:
183     successors: %bb.1, %bb.2
185     %2:vgpr_32 = V_AND_B32_e64 %1.sub1, %2, implicit $exec
186     %1.sub2:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec
187     S_CBRANCH_EXECZ %bb.1, implicit $exec
188     S_BRANCH %bb.2
190   bb.2:
191     %4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %1.sub2, implicit $exec
192     S_ENDPGM 0