1 # NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
2 # RUN: llc -mtriple=arm64-appe-ios -o - -run-pass=machine-combiner %s | FileCheck %s
5 define float @reassoicate_some_inputs_in_different_block(ptr %a, i1 %c) {
9 define float @reassoicate_candidates_in_different_blocks(ptr %a, i1 %c) {
13 define float @reassoicate_candidates_in_different_blocks_no_sink(ptr %a, i1 %c) {
17 define float @no_reassociate_different_block(ptr %a, i1 %c) {
25 # FIXME: Should reassociate the serialized reduction in bb.1 to improve parallelism.
27 name: reassoicate_some_inputs_in_different_block
29 tracksRegLiveness: true
31 ; CHECK-LABEL: name: reassoicate_some_inputs_in_different_block
33 ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
34 ; CHECK-NEXT: liveins: $x0, $w1
36 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
37 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
38 ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
39 ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
40 ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
41 ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4)
42 ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2
46 ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
47 ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], [[LDRQui1]], implicit $fpcr
48 ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 killed [[FADDv4f32_1]], killed [[FADDv4f32_]], implicit $fpcr
49 ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
50 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
51 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
52 ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
53 ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]]
54 ; CHECK-NEXT: RET_ReallyLR implicit $s0
57 ; CHECK-NEXT: $q0 = COPY [[LDRQui]]
58 ; CHECK-NEXT: $q1 = COPY [[LDRQui2]]
59 ; CHECK-NEXT: $q2 = COPY [[LDRQui1]]
60 ; CHECK-NEXT: $q3 = COPY [[LDRQui3]]
61 ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
63 successors: %bb.1, %bb.2
67 %4:gpr64common = COPY $x0
68 %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
69 %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
70 %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
71 %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4)
76 %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
77 %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr
78 %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr
79 %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr
80 %10:gpr64all = COPY %9.dsub
82 %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr
84 RET_ReallyLR implicit $s0
91 TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
94 # Variation of reassoicate_some_inputs_in_different_block where the candidate
95 # instructions are split across 2 blocks.
97 name: reassoicate_candidates_in_different_blocks
99 tracksRegLiveness: true
101 ; CHECK-LABEL: name: reassoicate_candidates_in_different_blocks
103 ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
104 ; CHECK-NEXT: liveins: $x0, $w1
106 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
107 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
108 ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
109 ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
110 ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
111 ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4)
112 ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
113 ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr
114 ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2
115 ; CHECK-NEXT: B %bb.1
118 ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr
119 ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
120 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
121 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
122 ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
123 ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]]
124 ; CHECK-NEXT: RET_ReallyLR implicit $s0
127 ; CHECK-NEXT: $q0 = COPY [[LDRQui]]
128 ; CHECK-NEXT: $q1 = COPY [[LDRQui2]]
129 ; CHECK-NEXT: $q2 = COPY [[LDRQui1]]
130 ; CHECK-NEXT: $q3 = COPY [[LDRQui3]]
131 ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
133 successors: %bb.1, %bb.2
137 %4:gpr64common = COPY $x0
138 %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
139 %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
140 %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
141 %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4)
142 %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
143 %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr
148 %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr
149 %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr
150 %10:gpr64all = COPY %9.dsub
152 %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr
154 RET_ReallyLR implicit $s0
161 TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
166 name: reassoicate_candidates_in_different_blocks_no_sink
168 tracksRegLiveness: true
170 ; CHECK-LABEL: name: reassoicate_candidates_in_different_blocks_no_sink
172 ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
173 ; CHECK-NEXT: liveins: $x0, $w1
175 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
176 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
177 ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
178 ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
179 ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
180 ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 4 :: (load (s128), align 4)
181 ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
182 ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr
183 ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2
184 ; CHECK-NEXT: B %bb.1
187 ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr
188 ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
189 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
190 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
191 ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
192 ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]]
193 ; CHECK-NEXT: RET_ReallyLR implicit $s0
196 ; CHECK-NEXT: $q0 = COPY [[LDRQui]]
197 ; CHECK-NEXT: $q1 = COPY [[LDRQui2]]
198 ; CHECK-NEXT: $q2 = COPY [[LDRQui1]]
199 ; CHECK-NEXT: $q3 = COPY [[FADDv4f32_1]]
200 ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
202 successors: %bb.1, %bb.2
206 %4:gpr64common = COPY $x0
207 %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
208 %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
209 %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
210 %3:fpr128 = LDRQui %4, 4 :: (load (s128), align 4)
211 %6:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
212 %7:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %6, implicit $fpcr
217 %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %7, implicit $fpcr
218 %9:fpr128 = nofpexcept FADDPv4f32 %8, %8, implicit $fpcr
219 %10:gpr64all = COPY %9.dsub
221 %11:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %12, implicit $fpcr
223 RET_ReallyLR implicit $s0
230 TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
234 # Reassociation of the reduction in bb.1 is not profitable, because LDRQui3 has a
235 # much larger latency than the other loads.
237 name: no_reassociate_different_block
239 tracksRegLiveness: true
241 ; CHECK-LABEL: name: no_reassociate_different_block
243 ; CHECK-NEXT: successors: %bb.1(0x40000000), %bb.2(0x40000000)
244 ; CHECK-NEXT: liveins: $x0, $w1
246 ; CHECK-NEXT: [[COPY:%[0-9]+]]:gpr32 = COPY $w1
247 ; CHECK-NEXT: [[COPY1:%[0-9]+]]:gpr64common = COPY $x0
248 ; CHECK-NEXT: [[LDRQui:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 0 :: (load (s128), align 4)
249 ; CHECK-NEXT: [[LDRQui1:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 1 :: (load (s128), align 4)
250 ; CHECK-NEXT: [[LDRQui2:%[0-9]+]]:fpr128 = LDRQui [[COPY1]], 2 :: (load (s128), align 4)
251 ; CHECK-NEXT: [[LDRXui:%[0-9]+]]:gpr64common = LDRXui [[COPY1]], 8 :: (load (s64))
252 ; CHECK-NEXT: [[LDRXui1:%[0-9]+]]:gpr64common = LDRXui killed [[LDRXui]], 0 :: (load (s64))
253 ; CHECK-NEXT: [[LDRQui3:%[0-9]+]]:fpr128 = LDRQui killed [[LDRXui1]], 0 :: (load (s128), align 4)
254 ; CHECK-NEXT: TBZW [[COPY]], 0, %bb.2
255 ; CHECK-NEXT: B %bb.1
258 ; CHECK-NEXT: [[FADDv4f32_:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui]], [[LDRQui2]], implicit $fpcr
259 ; CHECK-NEXT: [[FADDv4f32_1:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui1]], killed [[FADDv4f32_]], implicit $fpcr
260 ; CHECK-NEXT: [[FADDv4f32_2:%[0-9]+]]:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 [[LDRQui3]], killed [[FADDv4f32_1]], implicit $fpcr
261 ; CHECK-NEXT: [[FADDPv4f32_:%[0-9]+]]:fpr128 = nofpexcept FADDPv4f32 [[FADDv4f32_2]], [[FADDv4f32_2]], implicit $fpcr
262 ; CHECK-NEXT: [[COPY2:%[0-9]+]]:gpr64all = COPY [[FADDPv4f32_]].dsub
263 ; CHECK-NEXT: [[COPY3:%[0-9]+]]:fpr64 = COPY [[COPY2]]
264 ; CHECK-NEXT: [[FADDPv2i32p:%[0-9]+]]:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed [[COPY3]], implicit $fpcr
265 ; CHECK-NEXT: $s0 = COPY [[FADDPv2i32p]]
266 ; CHECK-NEXT: RET_ReallyLR implicit $s0
269 ; CHECK-NEXT: $q0 = COPY [[LDRQui]]
270 ; CHECK-NEXT: $q1 = COPY [[LDRQui2]]
271 ; CHECK-NEXT: $q2 = COPY [[LDRQui1]]
272 ; CHECK-NEXT: $q3 = COPY [[LDRQui3]]
273 ; CHECK-NEXT: TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3
275 successors: %bb.1, %bb.2
279 %4:gpr64common = COPY $x0
280 %0:fpr128 = LDRQui %4, 0 :: (load (s128), align 4)
281 %1:fpr128 = LDRQui %4, 1 :: (load (s128), align 4)
282 %2:fpr128 = LDRQui %4, 2 :: (load (s128), align 4)
283 %6:gpr64common = LDRXui %4, 8 :: (load (s64))
284 %7:gpr64common = LDRXui killed %6, 0 :: (load (s64))
285 %3:fpr128 = LDRQui killed %7, 0 :: (load (s128), align 4)
290 %8:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %0, %2, implicit $fpcr
291 %9:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %1, killed %8, implicit $fpcr
292 %10:fpr128 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDv4f32 %3, killed %9, implicit $fpcr
293 %11:fpr128 = nofpexcept FADDPv4f32 %10, %10, implicit $fpcr
294 %12:gpr64all = COPY %11.dsub
296 %13:fpr32 = nnan ninf nsz arcp contract afn reassoc nofpexcept FADDPv2i32p killed %14, implicit $fpcr
298 RET_ReallyLR implicit $s0
305 TCRETURNdi @use, 0, csr_darwin_aarch64_aapcs, implicit $sp, implicit $q0, implicit $q1, implicit $q2, implicit $q3