[clang] Fix crashes when passing VLA to va_arg (#119563)
[llvm-project.git] / llvm / test / CodeGen / AMDGPU / regcoalescer-resolve-lane-conflict-by-subranges.mir
blobd0245ff1a73ae98dd4aca08ed3014ab34a395190
1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -mtriple=amdgcn -run-pass register-coalescer -verify-machineinstrs -o - %s | FileCheck --check-prefix=GCN %s
6 ---
7 # the COPY can be coalesced based on subregister liveness
8 name:            subrange_coalesce_liveout
9 tracksRegLiveness: true
10 body:             |
11   ; GCN-LABEL: name: subrange_coalesce_liveout
12   ; GCN: bb.0:
13   ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
14   ; GCN-NEXT:   liveins: $vgpr0_vgpr1
15   ; GCN-NEXT: {{  $}}
16   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
17   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
18   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
19   ; GCN-NEXT:   S_BRANCH %bb.1
20   ; GCN-NEXT: {{  $}}
21   ; GCN-NEXT: bb.1:
22   ; GCN-NEXT:   successors: %bb.2(0x80000000)
23   ; GCN-NEXT: {{  $}}
24   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec
25   ; GCN-NEXT:   S_BRANCH %bb.2
26   ; GCN-NEXT: {{  $}}
27   ; GCN-NEXT: bb.2:
28   ; GCN-NEXT:   dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
29   ; GCN-NEXT:   S_ENDPGM 0
30   bb.0:
31     successors: %bb.1, %bb.2
32     liveins: $vgpr0_vgpr1
34     %0:vreg_64 = COPY $vgpr0_vgpr1
35     %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
36     %2:vgpr_32 = COPY %1.sub0
37     S_CBRANCH_EXECZ %bb.2, implicit $exec
38     S_BRANCH %bb.1
40   bb.1:
41     successors: %bb.2
43     %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec
44     S_BRANCH %bb.2
46   bb.2:
47     %4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec
48     S_ENDPGM 0
49 ...
51 ---
52 # early-clobber stops the coalescer from coalescing the COPY
53 name:            subrange_coalesce_early_clobber
54 tracksRegLiveness: true
55 body:             |
56   ; GCN-LABEL: name: subrange_coalesce_early_clobber
57   ; GCN: bb.0:
58   ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
59   ; GCN-NEXT:   liveins: $vgpr0_vgpr1
60   ; GCN-NEXT: {{  $}}
61   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
62   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
63   ; GCN-NEXT:   [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[GLOBAL_LOAD_DWORDX4_]].sub0
64   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
65   ; GCN-NEXT:   S_BRANCH %bb.1
66   ; GCN-NEXT: {{  $}}
67   ; GCN-NEXT: bb.1:
68   ; GCN-NEXT:   successors: %bb.2(0x80000000)
69   ; GCN-NEXT: {{  $}}
70   ; GCN-NEXT:   early-clobber [[COPY1]]:vgpr_32 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec
71   ; GCN-NEXT:   S_BRANCH %bb.2
72   ; GCN-NEXT: {{  $}}
73   ; GCN-NEXT: bb.2:
74   ; GCN-NEXT:   dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub2, [[COPY1]], implicit $exec
75   ; GCN-NEXT:   S_ENDPGM 0
76   bb.0:
77     successors: %bb.1, %bb.2
78     liveins: $vgpr0_vgpr1
80     %0:vreg_64 = COPY $vgpr0_vgpr1
81     %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
82     %2:vgpr_32 = COPY %1.sub0
83     S_CBRANCH_EXECZ %bb.2, implicit $exec
84     S_BRANCH %bb.1
86   bb.1:
87     successors: %bb.2
89     early-clobber %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub2, implicit $exec
90     S_BRANCH %bb.2
92   bb.2:
93     %4:vgpr_32 = V_ADD_U32_e32 %1.sub2, %2, implicit $exec
94     S_ENDPGM 0
95 ...
97 ---
98 # non-conflict lane(sub1) was redefined, coalescable
99 name:            subrange_coalesce_unrelated_sub_redefined
100 tracksRegLiveness: true
101 body:             |
102   ; GCN-LABEL: name: subrange_coalesce_unrelated_sub_redefined
103   ; GCN: bb.0:
104   ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
105   ; GCN-NEXT:   liveins: $vgpr0_vgpr1
106   ; GCN-NEXT: {{  $}}
107   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
108   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
109   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
110   ; GCN-NEXT:   S_BRANCH %bb.1
111   ; GCN-NEXT: {{  $}}
112   ; GCN-NEXT: bb.1:
113   ; GCN-NEXT:   successors: %bb.2(0x80000000)
114   ; GCN-NEXT: {{  $}}
115   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub1, implicit $exec
116   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub1:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
117   ; GCN-NEXT:   S_BRANCH %bb.2
118   ; GCN-NEXT: {{  $}}
119   ; GCN-NEXT: bb.2:
120   ; GCN-NEXT:   dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
121   ; GCN-NEXT:   S_ENDPGM 0
122   bb.0:
123     successors: %bb.1, %bb.2
124     liveins: $vgpr0_vgpr1
126     %0:vreg_64 = COPY $vgpr0_vgpr1
127     %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
128     %2:vgpr_32 = COPY %1.sub0
129     S_CBRANCH_EXECZ %bb.2, implicit $exec
130     S_BRANCH %bb.1
132   bb.1:
133     successors: %bb.2
135     %2:vgpr_32 = V_AND_B32_e64 %1.sub0, %1.sub1, implicit $exec
136     %1.sub1:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec
137     S_BRANCH %bb.2
139   bb.2:
140     %4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %2, implicit $exec
141     S_ENDPGM 0
145 # Another complex example showing the capability of resolving lane conflict
146 # based on subranges.
147 name:            subrange_coalesce_complex_pattern
148 tracksRegLiveness: true
149 body:             |
150   ; GCN-LABEL: name: subrange_coalesce_complex_pattern
151   ; GCN: bb.0:
152   ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
153   ; GCN-NEXT:   liveins: $vgpr0_vgpr1
154   ; GCN-NEXT: {{  $}}
155   ; GCN-NEXT:   [[COPY:%[0-9]+]]:vreg_64 = COPY $vgpr0_vgpr1
156   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]]:vreg_128 = GLOBAL_LOAD_DWORDX4 [[COPY]], 0, 0, implicit $exec
157   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.2, implicit $exec
158   ; GCN-NEXT:   S_BRANCH %bb.1
159   ; GCN-NEXT: {{  $}}
160   ; GCN-NEXT: bb.1:
161   ; GCN-NEXT:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
162   ; GCN-NEXT: {{  $}}
163   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub0:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
164   ; GCN-NEXT:   [[GLOBAL_LOAD_DWORDX4_:%[0-9]+]].sub2:vreg_128 = V_AND_B32_e64 [[GLOBAL_LOAD_DWORDX4_]].sub0, [[GLOBAL_LOAD_DWORDX4_]].sub0, implicit $exec
165   ; GCN-NEXT:   S_CBRANCH_EXECZ %bb.1, implicit $exec
166   ; GCN-NEXT:   S_BRANCH %bb.2
167   ; GCN-NEXT: {{  $}}
168   ; GCN-NEXT: bb.2:
169   ; GCN-NEXT:   dead [[V_ADD_U32_e32_:%[0-9]+]]:vgpr_32 = V_ADD_U32_e32 [[GLOBAL_LOAD_DWORDX4_]].sub1, [[GLOBAL_LOAD_DWORDX4_]].sub2, implicit $exec
170   ; GCN-NEXT:   S_ENDPGM 0
171   bb.0:
172     successors: %bb.1, %bb.2
173     liveins: $vgpr0_vgpr1
175     %0:vreg_64 = COPY $vgpr0_vgpr1
176     %1:vreg_128 = GLOBAL_LOAD_DWORDX4 %0, 0, 0, implicit $exec
177     %2:vgpr_32 = COPY %1.sub0
178     S_CBRANCH_EXECZ %bb.2, implicit $exec
179     S_BRANCH %bb.1
181   bb.1:
182     successors: %bb.1, %bb.2
184     %2:vgpr_32 = V_AND_B32_e64 %1.sub1, %2, implicit $exec
185     %1.sub2:vreg_128 = V_AND_B32_e64 %2, %2, implicit $exec
186     S_CBRANCH_EXECZ %bb.1, implicit $exec
187     S_BRANCH %bb.2
189   bb.2:
190     %4:vgpr_32 = V_ADD_U32_e32 %1.sub1, %1.sub2, implicit $exec
191     S_ENDPGM 0