1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 < %s | FileCheck %s --check-prefixes=CHECK,CHECK-STD
3 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
5 ; Incremental updates of the instruction depths should be enough for this test
7 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=neoverse-n2 -enable-unsafe-fp-math \
8 ; RUN: -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=CHECK,CHECK-UNSAFE
10 ; Verify that the first two adds are independent regardless of how the inputs are
11 ; commuted. The destination registers are used as source registers for the third add.
13 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
14 ; CHECK-STD-LABEL: reassociate_adds1:
15 ; CHECK-STD: // %bb.0:
16 ; CHECK-STD-NEXT: fadd s0, s0, s1
17 ; CHECK-STD-NEXT: fadd s0, s0, s2
18 ; CHECK-STD-NEXT: fadd s0, s0, s3
21 ; CHECK-UNSAFE-LABEL: reassociate_adds1:
22 ; CHECK-UNSAFE: // %bb.0:
23 ; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
24 ; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
25 ; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
26 ; CHECK-UNSAFE-NEXT: ret
27 %t0 = fadd float %x0, %x1
28 %t1 = fadd float %t0, %x2
29 %t2 = fadd float %t1, %x3
33 define float @reassociate_adds1_fast(float %x0, float %x1, float %x2, float %x3) {
34 ; CHECK-LABEL: reassociate_adds1_fast:
36 ; CHECK-NEXT: fadd s0, s0, s1
37 ; CHECK-NEXT: fadd s1, s2, s3
38 ; CHECK-NEXT: fadd s0, s0, s1
40 %t0 = fadd fast float %x0, %x1
41 %t1 = fadd fast float %t0, %x2
42 %t2 = fadd fast float %t1, %x3
46 define float @reassociate_adds1_reassoc(float %x0, float %x1, float %x2, float %x3) {
47 ; CHECK-STD-LABEL: reassociate_adds1_reassoc:
48 ; CHECK-STD: // %bb.0:
49 ; CHECK-STD-NEXT: fadd s0, s0, s1
50 ; CHECK-STD-NEXT: fadd s0, s0, s2
51 ; CHECK-STD-NEXT: fadd s0, s0, s3
54 ; CHECK-UNSAFE-LABEL: reassociate_adds1_reassoc:
55 ; CHECK-UNSAFE: // %bb.0:
56 ; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
57 ; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
58 ; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
59 ; CHECK-UNSAFE-NEXT: ret
60 %t0 = fadd reassoc float %x0, %x1
61 %t1 = fadd reassoc float %t0, %x2
62 %t2 = fadd reassoc float %t1, %x3
66 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
67 ; CHECK-STD-LABEL: reassociate_adds2:
68 ; CHECK-STD: // %bb.0:
69 ; CHECK-STD-NEXT: fadd s0, s0, s1
70 ; CHECK-STD-NEXT: fadd s0, s2, s0
71 ; CHECK-STD-NEXT: fadd s0, s0, s3
74 ; CHECK-UNSAFE-LABEL: reassociate_adds2:
75 ; CHECK-UNSAFE: // %bb.0:
76 ; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
77 ; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
78 ; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
79 ; CHECK-UNSAFE-NEXT: ret
80 %t0 = fadd float %x0, %x1
81 %t1 = fadd float %x2, %t0
82 %t2 = fadd float %t1, %x3
86 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
87 ; CHECK-STD-LABEL: reassociate_adds3:
88 ; CHECK-STD: // %bb.0:
89 ; CHECK-STD-NEXT: fadd s0, s0, s1
90 ; CHECK-STD-NEXT: fadd s0, s0, s2
91 ; CHECK-STD-NEXT: fadd s0, s3, s0
94 ; CHECK-UNSAFE-LABEL: reassociate_adds3:
95 ; CHECK-UNSAFE: // %bb.0:
96 ; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
97 ; CHECK-UNSAFE-NEXT: fadd s1, s3, s2
98 ; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
99 ; CHECK-UNSAFE-NEXT: ret
100 %t0 = fadd float %x0, %x1
101 %t1 = fadd float %t0, %x2
102 %t2 = fadd float %x3, %t1
106 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
107 ; CHECK-STD-LABEL: reassociate_adds4:
108 ; CHECK-STD: // %bb.0:
109 ; CHECK-STD-NEXT: fadd s0, s0, s1
110 ; CHECK-STD-NEXT: fadd s0, s2, s0
111 ; CHECK-STD-NEXT: fadd s0, s3, s0
112 ; CHECK-STD-NEXT: ret
114 ; CHECK-UNSAFE-LABEL: reassociate_adds4:
115 ; CHECK-UNSAFE: // %bb.0:
116 ; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
117 ; CHECK-UNSAFE-NEXT: fadd s1, s3, s2
118 ; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
119 ; CHECK-UNSAFE-NEXT: ret
120 %t0 = fadd float %x0, %x1
121 %t1 = fadd float %x2, %t0
122 %t2 = fadd float %x3, %t1
126 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
127 ; produced because that would cost more compile time.
129 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
130 ; CHECK-STD-LABEL: reassociate_adds5:
131 ; CHECK-STD: // %bb.0:
132 ; CHECK-STD-NEXT: fadd s0, s0, s1
133 ; CHECK-STD-NEXT: fadd s0, s0, s2
134 ; CHECK-STD-NEXT: fadd s0, s0, s3
135 ; CHECK-STD-NEXT: fadd s0, s0, s4
136 ; CHECK-STD-NEXT: fadd s0, s0, s5
137 ; CHECK-STD-NEXT: fadd s0, s0, s6
138 ; CHECK-STD-NEXT: fadd s0, s0, s7
139 ; CHECK-STD-NEXT: ret
141 ; CHECK-UNSAFE-LABEL: reassociate_adds5:
142 ; CHECK-UNSAFE: // %bb.0:
143 ; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
144 ; CHECK-UNSAFE-NEXT: fadd s1, s2, s3
145 ; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
146 ; CHECK-UNSAFE-NEXT: fadd s1, s4, s5
147 ; CHECK-UNSAFE-NEXT: fadd s1, s1, s6
148 ; CHECK-UNSAFE-NEXT: fadd s0, s0, s1
149 ; CHECK-UNSAFE-NEXT: fadd s0, s0, s7
150 ; CHECK-UNSAFE-NEXT: ret
151 %t0 = fadd float %x0, %x1
152 %t1 = fadd float %t0, %x2
153 %t2 = fadd float %t1, %x3
154 %t3 = fadd float %t2, %x4
155 %t4 = fadd float %t3, %x5
156 %t5 = fadd float %t4, %x6
157 %t6 = fadd float %t5, %x7
161 ; Verify that we only need two associative operations to reassociate the operands.
162 ; Also, we should reassociate such that the result of the high latency division
163 ; is used by the final 'add' rather than reassociating the %x3 operand with the
164 ; division. The latter reassociation would not improve anything.
166 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
167 ; CHECK-STD-LABEL: reassociate_adds6:
168 ; CHECK-STD: // %bb.0:
169 ; CHECK-STD-NEXT: fdiv s0, s0, s1
170 ; CHECK-STD-NEXT: fadd s0, s2, s0
171 ; CHECK-STD-NEXT: fadd s0, s3, s0
172 ; CHECK-STD-NEXT: ret
174 ; CHECK-UNSAFE-LABEL: reassociate_adds6:
175 ; CHECK-UNSAFE: // %bb.0:
176 ; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1
177 ; CHECK-UNSAFE-NEXT: fadd s1, s3, s2
178 ; CHECK-UNSAFE-NEXT: fadd s0, s1, s0
179 ; CHECK-UNSAFE-NEXT: ret
180 %t0 = fdiv float %x0, %x1
181 %t1 = fadd float %x2, %t0
182 %t2 = fadd float %x3, %t1
186 ; Verify that scalar single-precision multiplies are reassociated.
188 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
189 ; CHECK-STD-LABEL: reassociate_muls1:
190 ; CHECK-STD: // %bb.0:
191 ; CHECK-STD-NEXT: fdiv s0, s0, s1
192 ; CHECK-STD-NEXT: fmul s0, s2, s0
193 ; CHECK-STD-NEXT: fmul s0, s3, s0
194 ; CHECK-STD-NEXT: ret
196 ; CHECK-UNSAFE-LABEL: reassociate_muls1:
197 ; CHECK-UNSAFE: // %bb.0:
198 ; CHECK-UNSAFE-NEXT: fdiv s0, s0, s1
199 ; CHECK-UNSAFE-NEXT: fmul s1, s3, s2
200 ; CHECK-UNSAFE-NEXT: fmul s0, s1, s0
201 ; CHECK-UNSAFE-NEXT: ret
202 %t0 = fdiv float %x0, %x1
203 %t1 = fmul float %x2, %t0
204 %t2 = fmul float %x3, %t1
208 ; Verify that scalar double-precision adds are reassociated.
210 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
211 ; CHECK-STD-LABEL: reassociate_adds_double:
212 ; CHECK-STD: // %bb.0:
213 ; CHECK-STD-NEXT: fdiv d0, d0, d1
214 ; CHECK-STD-NEXT: fadd d0, d2, d0
215 ; CHECK-STD-NEXT: fadd d0, d3, d0
216 ; CHECK-STD-NEXT: ret
218 ; CHECK-UNSAFE-LABEL: reassociate_adds_double:
219 ; CHECK-UNSAFE: // %bb.0:
220 ; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1
221 ; CHECK-UNSAFE-NEXT: fadd d1, d3, d2
222 ; CHECK-UNSAFE-NEXT: fadd d0, d1, d0
223 ; CHECK-UNSAFE-NEXT: ret
224 %t0 = fdiv double %x0, %x1
225 %t1 = fadd double %x2, %t0
226 %t2 = fadd double %x3, %t1
230 ; Verify that scalar double-precision multiplies are reassociated.
232 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
233 ; CHECK-STD-LABEL: reassociate_muls_double:
234 ; CHECK-STD: // %bb.0:
235 ; CHECK-STD-NEXT: fdiv d0, d0, d1
236 ; CHECK-STD-NEXT: fmul d0, d2, d0
237 ; CHECK-STD-NEXT: fmul d0, d3, d0
238 ; CHECK-STD-NEXT: ret
240 ; CHECK-UNSAFE-LABEL: reassociate_muls_double:
241 ; CHECK-UNSAFE: // %bb.0:
242 ; CHECK-UNSAFE-NEXT: fdiv d0, d0, d1
243 ; CHECK-UNSAFE-NEXT: fmul d1, d3, d2
244 ; CHECK-UNSAFE-NEXT: fmul d0, d1, d0
245 ; CHECK-UNSAFE-NEXT: ret
246 %t0 = fdiv double %x0, %x1
247 %t1 = fmul double %x2, %t0
248 %t2 = fmul double %x3, %t1
252 ; Verify that scalar half-precision adds are reassociated.
254 define half @reassociate_adds_half(half %x0, half %x1, half %x2, half %x3) {
255 ; CHECK-STD-LABEL: reassociate_adds_half:
256 ; CHECK-STD: // %bb.0:
257 ; CHECK-STD-NEXT: fdiv h0, h0, h1
258 ; CHECK-STD-NEXT: fadd h0, h2, h0
259 ; CHECK-STD-NEXT: fadd h0, h3, h0
260 ; CHECK-STD-NEXT: ret
262 ; CHECK-UNSAFE-LABEL: reassociate_adds_half:
263 ; CHECK-UNSAFE: // %bb.0:
264 ; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
265 ; CHECK-UNSAFE-NEXT: fadd h1, h3, h2
266 ; CHECK-UNSAFE-NEXT: fadd h0, h1, h0
267 ; CHECK-UNSAFE-NEXT: ret
268 %t0 = fdiv half %x0, %x1
269 %t1 = fadd half %x2, %t0
270 %t2 = fadd half %x3, %t1
274 ; Verify that scalar half-precision multiplies are reassociated.
276 define half @reassociate_muls_half(half %x0, half %x1, half %x2, half %x3) {
277 ; CHECK-STD-LABEL: reassociate_muls_half:
278 ; CHECK-STD: // %bb.0:
279 ; CHECK-STD-NEXT: fdiv h0, h0, h1
280 ; CHECK-STD-NEXT: fmul h0, h2, h0
281 ; CHECK-STD-NEXT: fmul h0, h3, h0
282 ; CHECK-STD-NEXT: ret
284 ; CHECK-UNSAFE-LABEL: reassociate_muls_half:
285 ; CHECK-UNSAFE: // %bb.0:
286 ; CHECK-UNSAFE-NEXT: fdiv h0, h0, h1
287 ; CHECK-UNSAFE-NEXT: fmul h1, h3, h2
288 ; CHECK-UNSAFE-NEXT: fmul h0, h1, h0
289 ; CHECK-UNSAFE-NEXT: ret
290 %t0 = fdiv half %x0, %x1
291 %t1 = fmul half %x2, %t0
292 %t2 = fmul half %x3, %t1
296 ; Verify that scalar integer adds are reassociated.
298 define i32 @reassociate_adds_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
299 ; CHECK-LABEL: reassociate_adds_i32:
301 ; CHECK-NEXT: udiv w8, w0, w1
302 ; CHECK-NEXT: add w9, w3, w2
303 ; CHECK-NEXT: add w0, w9, w8
305 %t0 = udiv i32 %x0, %x1
306 %t1 = add i32 %x2, %t0
307 %t2 = add i32 %x3, %t1
311 define i64 @reassociate_adds_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
312 ; CHECK-LABEL: reassociate_adds_i64:
314 ; CHECK-NEXT: udiv x8, x0, x1
315 ; CHECK-NEXT: add x9, x3, x2
316 ; CHECK-NEXT: add x0, x9, x8
318 %t0 = udiv i64 %x0, %x1
319 %t1 = add i64 %x2, %t0
320 %t2 = add i64 %x3, %t1
324 ; Verify that scalar bitwise operations are reassociated.
326 define i32 @reassociate_ands_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
327 ; CHECK-LABEL: reassociate_ands_i32:
329 ; CHECK-NEXT: and w8, w0, w1
330 ; CHECK-NEXT: and w9, w2, w3
331 ; CHECK-NEXT: and w0, w8, w9
333 %t0 = and i32 %x0, %x1
334 %t1 = and i32 %t0, %x2
335 %t2 = and i32 %t1, %x3
339 define i64 @reassociate_ors_i64(i64 %x0, i64 %x1, i64 %x2, i64 %x3) {
340 ; CHECK-LABEL: reassociate_ors_i64:
342 ; CHECK-NEXT: orr x8, x0, x1
343 ; CHECK-NEXT: orr x9, x2, x3
344 ; CHECK-NEXT: orr x0, x8, x9
346 %t0 = or i64 %x0, %x1
347 %t1 = or i64 %t0, %x2
348 %t2 = or i64 %t1, %x3
352 define i32 @reassociate_xors_i32(i32 %x0, i32 %x1, i32 %x2, i32 %x3) {
353 ; CHECK-LABEL: reassociate_xors_i32:
355 ; CHECK-NEXT: eor w8, w0, w1
356 ; CHECK-NEXT: eor w9, w2, w3
357 ; CHECK-NEXT: eor w0, w8, w9
359 %t0 = xor i32 %x0, %x1
360 %t1 = xor i32 %t0, %x2
361 %t2 = xor i32 %t1, %x3
365 ; Verify that we reassociate vector instructions too.
367 define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
368 ; CHECK-STD-LABEL: vector_reassociate_adds1:
369 ; CHECK-STD: // %bb.0:
370 ; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
371 ; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v2.4s
372 ; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v3.4s
373 ; CHECK-STD-NEXT: ret
375 ; CHECK-UNSAFE-LABEL: vector_reassociate_adds1:
376 ; CHECK-UNSAFE: // %bb.0:
377 ; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
378 ; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s
379 ; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
380 ; CHECK-UNSAFE-NEXT: ret
381 %t0 = fadd <4 x float> %x0, %x1
382 %t1 = fadd <4 x float> %t0, %x2
383 %t2 = fadd <4 x float> %t1, %x3
387 define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
388 ; CHECK-STD-LABEL: vector_reassociate_adds2:
389 ; CHECK-STD: // %bb.0:
390 ; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
391 ; CHECK-STD-NEXT: fadd v0.4s, v2.4s, v0.4s
392 ; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v3.4s
393 ; CHECK-STD-NEXT: ret
395 ; CHECK-UNSAFE-LABEL: vector_reassociate_adds2:
396 ; CHECK-UNSAFE: // %bb.0:
397 ; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
398 ; CHECK-UNSAFE-NEXT: fadd v1.4s, v2.4s, v3.4s
399 ; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s
400 ; CHECK-UNSAFE-NEXT: ret
401 %t0 = fadd <4 x float> %x0, %x1
402 %t1 = fadd <4 x float> %x2, %t0
403 %t2 = fadd <4 x float> %t1, %x3
407 define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
408 ; CHECK-STD-LABEL: vector_reassociate_adds3:
409 ; CHECK-STD: // %bb.0:
410 ; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
411 ; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v2.4s
412 ; CHECK-STD-NEXT: fadd v0.4s, v3.4s, v0.4s
413 ; CHECK-STD-NEXT: ret
415 ; CHECK-UNSAFE-LABEL: vector_reassociate_adds3:
416 ; CHECK-UNSAFE: // %bb.0:
417 ; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
418 ; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s
419 ; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s
420 ; CHECK-UNSAFE-NEXT: ret
421 %t0 = fadd <4 x float> %x0, %x1
422 %t1 = fadd <4 x float> %t0, %x2
423 %t2 = fadd <4 x float> %x3, %t1
427 define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
428 ; CHECK-STD-LABEL: vector_reassociate_adds4:
429 ; CHECK-STD: // %bb.0:
430 ; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
431 ; CHECK-STD-NEXT: fadd v0.4s, v2.4s, v0.4s
432 ; CHECK-STD-NEXT: fadd v0.4s, v3.4s, v0.4s
433 ; CHECK-STD-NEXT: ret
435 ; CHECK-UNSAFE-LABEL: vector_reassociate_adds4:
436 ; CHECK-UNSAFE: // %bb.0:
437 ; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
438 ; CHECK-UNSAFE-NEXT: fadd v1.4s, v3.4s, v2.4s
439 ; CHECK-UNSAFE-NEXT: fadd v0.4s, v1.4s, v0.4s
440 ; CHECK-UNSAFE-NEXT: ret
441 %t0 = fadd <4 x float> %x0, %x1
442 %t1 = fadd <4 x float> %x2, %t0
443 %t2 = fadd <4 x float> %x3, %t1
447 ; Verify that 64-bit vector half-precision adds are reassociated.
449 define <4 x half> @reassociate_adds_v4f16(<4 x half> %x0, <4 x half> %x1, <4 x half> %x2, <4 x half> %x3) {
450 ; CHECK-STD-LABEL: reassociate_adds_v4f16:
451 ; CHECK-STD: // %bb.0:
452 ; CHECK-STD-NEXT: fadd v0.4h, v0.4h, v1.4h
453 ; CHECK-STD-NEXT: fadd v0.4h, v2.4h, v0.4h
454 ; CHECK-STD-NEXT: fadd v0.4h, v3.4h, v0.4h
455 ; CHECK-STD-NEXT: ret
457 ; CHECK-UNSAFE-LABEL: reassociate_adds_v4f16:
458 ; CHECK-UNSAFE: // %bb.0:
459 ; CHECK-UNSAFE-NEXT: fadd v0.4h, v0.4h, v1.4h
460 ; CHECK-UNSAFE-NEXT: fadd v1.4h, v3.4h, v2.4h
461 ; CHECK-UNSAFE-NEXT: fadd v0.4h, v1.4h, v0.4h
462 ; CHECK-UNSAFE-NEXT: ret
463 %t0 = fadd <4 x half> %x0, %x1
464 %t1 = fadd <4 x half> %x2, %t0
465 %t2 = fadd <4 x half> %x3, %t1
469 ; Verify that 128-bit vector half-precision multiplies are reassociated.
471 define <8 x half> @reassociate_muls_v8f16(<8 x half> %x0, <8 x half> %x1, <8 x half> %x2, <8 x half> %x3) {
472 ; CHECK-STD-LABEL: reassociate_muls_v8f16:
473 ; CHECK-STD: // %bb.0:
474 ; CHECK-STD-NEXT: fadd v0.8h, v0.8h, v1.8h
475 ; CHECK-STD-NEXT: fmul v0.8h, v2.8h, v0.8h
476 ; CHECK-STD-NEXT: fmul v0.8h, v3.8h, v0.8h
477 ; CHECK-STD-NEXT: ret
479 ; CHECK-UNSAFE-LABEL: reassociate_muls_v8f16:
480 ; CHECK-UNSAFE: // %bb.0:
481 ; CHECK-UNSAFE-NEXT: fadd v0.8h, v0.8h, v1.8h
482 ; CHECK-UNSAFE-NEXT: fmul v1.8h, v3.8h, v2.8h
483 ; CHECK-UNSAFE-NEXT: fmul v0.8h, v1.8h, v0.8h
484 ; CHECK-UNSAFE-NEXT: ret
485 %t0 = fadd <8 x half> %x0, %x1
486 %t1 = fmul <8 x half> %x2, %t0
487 %t2 = fmul <8 x half> %x3, %t1
491 ; Verify that 128-bit vector single-precision multiplies are reassociated.
493 define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
494 ; CHECK-STD-LABEL: reassociate_muls_v4f32:
495 ; CHECK-STD: // %bb.0:
496 ; CHECK-STD-NEXT: fadd v0.4s, v0.4s, v1.4s
497 ; CHECK-STD-NEXT: fmul v0.4s, v2.4s, v0.4s
498 ; CHECK-STD-NEXT: fmul v0.4s, v3.4s, v0.4s
499 ; CHECK-STD-NEXT: ret
501 ; CHECK-UNSAFE-LABEL: reassociate_muls_v4f32:
502 ; CHECK-UNSAFE: // %bb.0:
503 ; CHECK-UNSAFE-NEXT: fadd v0.4s, v0.4s, v1.4s
504 ; CHECK-UNSAFE-NEXT: fmul v1.4s, v3.4s, v2.4s
505 ; CHECK-UNSAFE-NEXT: fmul v0.4s, v1.4s, v0.4s
506 ; CHECK-UNSAFE-NEXT: ret
507 %t0 = fadd <4 x float> %x0, %x1
508 %t1 = fmul <4 x float> %x2, %t0
509 %t2 = fmul <4 x float> %x3, %t1
513 ; Verify that 128-bit vector double-precision multiplies are reassociated.
515 define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
516 ; CHECK-STD-LABEL: reassociate_muls_v2f64:
517 ; CHECK-STD: // %bb.0:
518 ; CHECK-STD-NEXT: fadd v0.2d, v0.2d, v1.2d
519 ; CHECK-STD-NEXT: fmul v0.2d, v2.2d, v0.2d
520 ; CHECK-STD-NEXT: fmul v0.2d, v3.2d, v0.2d
521 ; CHECK-STD-NEXT: ret
523 ; CHECK-UNSAFE-LABEL: reassociate_muls_v2f64:
524 ; CHECK-UNSAFE: // %bb.0:
525 ; CHECK-UNSAFE-NEXT: fadd v0.2d, v0.2d, v1.2d
526 ; CHECK-UNSAFE-NEXT: fmul v1.2d, v3.2d, v2.2d
527 ; CHECK-UNSAFE-NEXT: fmul v0.2d, v1.2d, v0.2d
528 ; CHECK-UNSAFE-NEXT: ret
529 %t0 = fadd <2 x double> %x0, %x1
530 %t1 = fmul <2 x double> %x2, %t0
531 %t2 = fmul <2 x double> %x3, %t1
535 ; Verify that vector integer arithmetic operations are reassociated.
537 define <2 x i32> @reassociate_muls_v2i32(<2 x i32> %x0, <2 x i32> %x1, <2 x i32> %x2, <2 x i32> %x3) {
538 ; CHECK-LABEL: reassociate_muls_v2i32:
540 ; CHECK-NEXT: mul v0.2s, v0.2s, v1.2s
541 ; CHECK-NEXT: mul v1.2s, v3.2s, v2.2s
542 ; CHECK-NEXT: mul v0.2s, v1.2s, v0.2s
544 %t0 = mul <2 x i32> %x0, %x1
545 %t1 = mul <2 x i32> %x2, %t0
546 %t2 = mul <2 x i32> %x3, %t1
550 define <2 x i64> @reassociate_adds_v2i64(<2 x i64> %x0, <2 x i64> %x1, <2 x i64> %x2, <2 x i64> %x3) {
551 ; CHECK-LABEL: reassociate_adds_v2i64:
553 ; CHECK-NEXT: add v0.2d, v0.2d, v1.2d
554 ; CHECK-NEXT: add v1.2d, v3.2d, v2.2d
555 ; CHECK-NEXT: add v0.2d, v1.2d, v0.2d
557 %t0 = add <2 x i64> %x0, %x1
558 %t1 = add <2 x i64> %x2, %t0
559 %t2 = add <2 x i64> %x3, %t1
563 ; Verify that vector bitwise operations are reassociated.
565 define <16 x i8> @reassociate_ands_v16i8(<16 x i8> %x0, <16 x i8> %x1, <16 x i8> %x2, <16 x i8> %x3) {
566 ; CHECK-LABEL: reassociate_ands_v16i8:
568 ; CHECK-NEXT: orr v0.16b, v0.16b, v1.16b
569 ; CHECK-NEXT: and v1.16b, v2.16b, v3.16b
570 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
572 %t0 = or <16 x i8> %x0, %x1
573 %t1 = and <16 x i8> %t0, %x2
574 %t2 = and <16 x i8> %t1, %x3
578 define <4 x i16> @reassociate_ors_v4i16(<4 x i16> %x0, <4 x i16> %x1, <4 x i16> %x2, <4 x i16> %x3) {
579 ; CHECK-LABEL: reassociate_ors_v4i16:
581 ; CHECK-NEXT: eor v0.8b, v0.8b, v1.8b
582 ; CHECK-NEXT: orr v1.8b, v2.8b, v3.8b
583 ; CHECK-NEXT: orr v0.8b, v0.8b, v1.8b
585 %t0 = xor <4 x i16> %x0, %x1
586 %t1 = or <4 x i16> %t0, %x2
587 %t2 = or <4 x i16> %t1, %x3
591 define <4 x i32> @reassociate_xors_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> %x2, <4 x i32> %x3) {
592 ; CHECK-LABEL: reassociate_xors_v4i32:
594 ; CHECK-NEXT: and v0.16b, v0.16b, v1.16b
595 ; CHECK-NEXT: eor v1.16b, v2.16b, v3.16b
596 ; CHECK-NEXT: eor v0.16b, v0.16b, v1.16b
598 %t0 = and <4 x i32> %x0, %x1
599 %t1 = xor <4 x i32> %t0, %x2
600 %t2 = xor <4 x i32> %t1, %x3
604 ; Verify that scalable vector FP arithmetic operations are reassociated.
606 define <vscale x 8 x half> @reassociate_adds_nxv4f16(<vscale x 8 x half> %x0, <vscale x 8 x half> %x1, <vscale x 8 x half> %x2, <vscale x 8 x half> %x3) {
607 ; CHECK-STD-LABEL: reassociate_adds_nxv4f16:
608 ; CHECK-STD: // %bb.0:
609 ; CHECK-STD-NEXT: fadd z0.h, z0.h, z1.h
610 ; CHECK-STD-NEXT: fadd z0.h, z2.h, z0.h
611 ; CHECK-STD-NEXT: fadd z0.h, z3.h, z0.h
612 ; CHECK-STD-NEXT: ret
614 ; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f16:
615 ; CHECK-UNSAFE: // %bb.0:
616 ; CHECK-UNSAFE-NEXT: fadd z0.h, z0.h, z1.h
617 ; CHECK-UNSAFE-NEXT: fadd z1.h, z3.h, z2.h
618 ; CHECK-UNSAFE-NEXT: fadd z0.h, z1.h, z0.h
619 ; CHECK-UNSAFE-NEXT: ret
620 %t0 = fadd reassoc <vscale x 8 x half> %x0, %x1
621 %t1 = fadd reassoc <vscale x 8 x half> %x2, %t0
622 %t2 = fadd reassoc <vscale x 8 x half> %x3, %t1
623 ret <vscale x 8 x half> %t2
626 define <vscale x 4 x float> @reassociate_adds_nxv4f32(<vscale x 4 x float> %x0, <vscale x 4 x float> %x1, <vscale x 4 x float> %x2, <vscale x 4 x float> %x3) {
627 ; CHECK-STD-LABEL: reassociate_adds_nxv4f32:
628 ; CHECK-STD: // %bb.0:
629 ; CHECK-STD-NEXT: fadd z0.s, z0.s, z1.s
630 ; CHECK-STD-NEXT: fadd z0.s, z2.s, z0.s
631 ; CHECK-STD-NEXT: fadd z0.s, z3.s, z0.s
632 ; CHECK-STD-NEXT: ret
634 ; CHECK-UNSAFE-LABEL: reassociate_adds_nxv4f32:
635 ; CHECK-UNSAFE: // %bb.0:
636 ; CHECK-UNSAFE-NEXT: fadd z0.s, z0.s, z1.s
637 ; CHECK-UNSAFE-NEXT: fadd z1.s, z3.s, z2.s
638 ; CHECK-UNSAFE-NEXT: fadd z0.s, z1.s, z0.s
639 ; CHECK-UNSAFE-NEXT: ret
640 %t0 = fadd reassoc <vscale x 4 x float> %x0, %x1
641 %t1 = fadd reassoc <vscale x 4 x float> %x2, %t0
642 %t2 = fadd reassoc <vscale x 4 x float> %x3, %t1
643 ret <vscale x 4 x float> %t2
646 define <vscale x 2 x double> @reassociate_muls_nxv2f64(<vscale x 2 x double> %x0, <vscale x 2 x double> %x1, <vscale x 2 x double> %x2, <vscale x 2 x double> %x3) {
647 ; CHECK-STD-LABEL: reassociate_muls_nxv2f64:
648 ; CHECK-STD: // %bb.0:
649 ; CHECK-STD-NEXT: fmul z0.d, z0.d, z1.d
650 ; CHECK-STD-NEXT: fmul z0.d, z2.d, z0.d
651 ; CHECK-STD-NEXT: fmul z0.d, z3.d, z0.d
652 ; CHECK-STD-NEXT: ret
654 ; CHECK-UNSAFE-LABEL: reassociate_muls_nxv2f64:
655 ; CHECK-UNSAFE: // %bb.0:
656 ; CHECK-UNSAFE-NEXT: fmul z0.d, z0.d, z1.d
657 ; CHECK-UNSAFE-NEXT: fmul z1.d, z3.d, z2.d
658 ; CHECK-UNSAFE-NEXT: fmul z0.d, z1.d, z0.d
659 ; CHECK-UNSAFE-NEXT: ret
660 %t0 = fmul reassoc <vscale x 2 x double> %x0, %x1
661 %t1 = fmul reassoc <vscale x 2 x double> %x2, %t0
662 %t2 = fmul reassoc <vscale x 2 x double> %x3, %t1
663 ret <vscale x 2 x double> %t2
666 ; Verify that scalable vector integer arithmetic operations are reassociated.
668 define <vscale x 16 x i8> @reassociate_muls_nxv16i8(<vscale x 16 x i8> %x0, <vscale x 16 x i8> %x1, <vscale x 16 x i8> %x2, <vscale x 16 x i8> %x3) {
669 ; CHECK-LABEL: reassociate_muls_nxv16i8:
671 ; CHECK-NEXT: mul z0.b, z0.b, z1.b
672 ; CHECK-NEXT: mul z1.b, z3.b, z2.b
673 ; CHECK-NEXT: mul z0.b, z1.b, z0.b
675 %t0 = mul <vscale x 16 x i8> %x0, %x1
676 %t1 = mul <vscale x 16 x i8> %x2, %t0
677 %t2 = mul <vscale x 16 x i8> %x3, %t1
678 ret <vscale x 16 x i8> %t2
681 define <vscale x 8 x i16> @reassociate_adds_nxv8i16(<vscale x 8 x i16> %x0, <vscale x 8 x i16> %x1, <vscale x 8 x i16> %x2, <vscale x 8 x i16> %x3) {
682 ; CHECK-LABEL: reassociate_adds_nxv8i16:
684 ; CHECK-NEXT: add z0.h, z0.h, z1.h
685 ; CHECK-NEXT: add z1.h, z3.h, z2.h
686 ; CHECK-NEXT: add z0.h, z1.h, z0.h
688 %t0 = add <vscale x 8 x i16> %x0, %x1
689 %t1 = add <vscale x 8 x i16> %x2, %t0
690 %t2 = add <vscale x 8 x i16> %x3, %t1
691 ret <vscale x 8 x i16> %t2
694 define <vscale x 4 x i32> @reassociate_muls_nxv4i32(<vscale x 4 x i32> %x0, <vscale x 4 x i32> %x1, <vscale x 4 x i32> %x2, <vscale x 4 x i32> %x3) {
695 ; CHECK-LABEL: reassociate_muls_nxv4i32:
697 ; CHECK-NEXT: mul z0.s, z0.s, z1.s
698 ; CHECK-NEXT: mul z1.s, z3.s, z2.s
699 ; CHECK-NEXT: mul z0.s, z1.s, z0.s
701 %t0 = mul <vscale x 4 x i32> %x0, %x1
702 %t1 = mul <vscale x 4 x i32> %x2, %t0
703 %t2 = mul <vscale x 4 x i32> %x3, %t1
704 ret <vscale x 4 x i32> %t2
707 define <vscale x 2 x i64> @reassociate_adds_nxv2i64(<vscale x 2 x i64> %x0, <vscale x 2 x i64> %x1, <vscale x 2 x i64> %x2, <vscale x 2 x i64> %x3) {
708 ; CHECK-LABEL: reassociate_adds_nxv2i64:
710 ; CHECK-NEXT: add z0.d, z0.d, z1.d
711 ; CHECK-NEXT: add z1.d, z3.d, z2.d
712 ; CHECK-NEXT: add z0.d, z1.d, z0.d
714 %t0 = add <vscale x 2 x i64> %x0, %x1
715 %t1 = add <vscale x 2 x i64> %x2, %t0
716 %t2 = add <vscale x 2 x i64> %x3, %t1
717 ret <vscale x 2 x i64> %t2
720 ; Verify that scalable vector bitwise operations are reassociated.
722 define <vscale x 16 x i8> @reassociate_ands_nxv16i8(<vscale x 16 x i8> %x0, <vscale x 16 x i8> %x1, <vscale x 16 x i8> %x2, <vscale x 16 x i8> %x3) {
723 ; CHECK-LABEL: reassociate_ands_nxv16i8:
725 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
726 ; CHECK-NEXT: and z1.d, z2.d, z3.d
727 ; CHECK-NEXT: and z0.d, z0.d, z1.d
729 %t0 = or <vscale x 16 x i8> %x0, %x1
730 %t1 = and <vscale x 16 x i8> %t0, %x2
731 %t2 = and <vscale x 16 x i8> %t1, %x3
732 ret <vscale x 16 x i8> %t2
735 define <vscale x 8 x i16> @reassociate_ors_nxv8i16(<vscale x 8 x i16> %x0, <vscale x 8 x i16> %x1, <vscale x 8 x i16> %x2, <vscale x 8 x i16> %x3) {
736 ; CHECK-LABEL: reassociate_ors_nxv8i16:
738 ; CHECK-NEXT: eor z0.d, z0.d, z1.d
739 ; CHECK-NEXT: orr z1.d, z2.d, z3.d
740 ; CHECK-NEXT: orr z0.d, z0.d, z1.d
742 %t0 = xor <vscale x 8 x i16> %x0, %x1
743 %t1 = or <vscale x 8 x i16> %t0, %x2
744 %t2 = or <vscale x 8 x i16> %t1, %x3
745 ret <vscale x 8 x i16> %t2
748 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
749 ; Verify that reassociation is not happening needlessly or wrongly.
751 declare double @bar()
753 define double @reassociate_adds_from_calls() {
754 ; CHECK-STD-LABEL: reassociate_adds_from_calls:
755 ; CHECK-STD: // %bb.0:
756 ; CHECK-STD-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
757 ; CHECK-STD-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
758 ; CHECK-STD-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
759 ; CHECK-STD-NEXT: .cfi_def_cfa_offset 32
760 ; CHECK-STD-NEXT: .cfi_offset w30, -8
761 ; CHECK-STD-NEXT: .cfi_offset b8, -16
762 ; CHECK-STD-NEXT: .cfi_offset b9, -24
763 ; CHECK-STD-NEXT: .cfi_offset b10, -32
764 ; CHECK-STD-NEXT: bl bar
765 ; CHECK-STD-NEXT: fmov d8, d0
766 ; CHECK-STD-NEXT: bl bar
767 ; CHECK-STD-NEXT: fmov d9, d0
768 ; CHECK-STD-NEXT: bl bar
769 ; CHECK-STD-NEXT: fmov d10, d0
770 ; CHECK-STD-NEXT: bl bar
771 ; CHECK-STD-NEXT: fadd d1, d8, d9
772 ; CHECK-STD-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
773 ; CHECK-STD-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
774 ; CHECK-STD-NEXT: fadd d1, d1, d10
775 ; CHECK-STD-NEXT: fadd d0, d1, d0
776 ; CHECK-STD-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
777 ; CHECK-STD-NEXT: ret
779 ; CHECK-UNSAFE-LABEL: reassociate_adds_from_calls:
780 ; CHECK-UNSAFE: // %bb.0:
781 ; CHECK-UNSAFE-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
782 ; CHECK-UNSAFE-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
783 ; CHECK-UNSAFE-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
784 ; CHECK-UNSAFE-NEXT: .cfi_def_cfa_offset 32
785 ; CHECK-UNSAFE-NEXT: .cfi_offset w30, -8
786 ; CHECK-UNSAFE-NEXT: .cfi_offset b8, -16
787 ; CHECK-UNSAFE-NEXT: .cfi_offset b9, -24
788 ; CHECK-UNSAFE-NEXT: .cfi_offset b10, -32
789 ; CHECK-UNSAFE-NEXT: bl bar
790 ; CHECK-UNSAFE-NEXT: fmov d8, d0
791 ; CHECK-UNSAFE-NEXT: bl bar
792 ; CHECK-UNSAFE-NEXT: fmov d9, d0
793 ; CHECK-UNSAFE-NEXT: bl bar
794 ; CHECK-UNSAFE-NEXT: fmov d10, d0
795 ; CHECK-UNSAFE-NEXT: bl bar
796 ; CHECK-UNSAFE-NEXT: fadd d1, d8, d9
797 ; CHECK-UNSAFE-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
798 ; CHECK-UNSAFE-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
799 ; CHECK-UNSAFE-NEXT: fadd d0, d10, d0
800 ; CHECK-UNSAFE-NEXT: fadd d0, d1, d0
801 ; CHECK-UNSAFE-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
802 ; CHECK-UNSAFE-NEXT: ret
803 %x0 = call double @bar()
804 %x1 = call double @bar()
805 %x2 = call double @bar()
806 %x3 = call double @bar()
807 %t0 = fadd double %x0, %x1
808 %t1 = fadd double %t0, %x2
809 %t2 = fadd double %t1, %x3
813 define double @already_reassociated() {
814 ; CHECK-LABEL: already_reassociated:
816 ; CHECK-NEXT: str d10, [sp, #-32]! // 8-byte Folded Spill
817 ; CHECK-NEXT: stp d9, d8, [sp, #8] // 16-byte Folded Spill
818 ; CHECK-NEXT: str x30, [sp, #24] // 8-byte Folded Spill
819 ; CHECK-NEXT: .cfi_def_cfa_offset 32
820 ; CHECK-NEXT: .cfi_offset w30, -8
821 ; CHECK-NEXT: .cfi_offset b8, -16
822 ; CHECK-NEXT: .cfi_offset b9, -24
823 ; CHECK-NEXT: .cfi_offset b10, -32
825 ; CHECK-NEXT: fmov d8, d0
827 ; CHECK-NEXT: fmov d9, d0
829 ; CHECK-NEXT: fmov d10, d0
831 ; CHECK-NEXT: fadd d1, d8, d9
832 ; CHECK-NEXT: ldp d9, d8, [sp, #8] // 16-byte Folded Reload
833 ; CHECK-NEXT: ldr x30, [sp, #24] // 8-byte Folded Reload
834 ; CHECK-NEXT: fadd d0, d10, d0
835 ; CHECK-NEXT: fadd d0, d1, d0
836 ; CHECK-NEXT: ldr d10, [sp], #32 // 8-byte Folded Reload
838 %x0 = call double @bar()
839 %x1 = call double @bar()
840 %x2 = call double @bar()
841 %x3 = call double @bar()
842 %t0 = fadd double %x0, %x1
843 %t1 = fadd double %x2, %x3
844 %t2 = fadd double %t0, %t1