1 ; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefix=SSE
3 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX1
4 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -machine-combiner-verify-pattern-order=true < %s | FileCheck %s --check-prefixes=AVX,AVX512
6 ; Incremental updates of the instruction depths should be enough for this test
8 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
9 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX1
10 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx512vl -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefixes=AVX,AVX512
12 ; Verify that the first two adds are independent regardless of how the inputs are
13 ; commuted. The destination registers are used as source registers for the third add.
15 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
16 ; SSE-LABEL: reassociate_adds1:
18 ; SSE-NEXT: addss %xmm1, %xmm0
19 ; SSE-NEXT: addss %xmm3, %xmm2
20 ; SSE-NEXT: addss %xmm2, %xmm0
23 ; AVX-LABEL: reassociate_adds1:
25 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
26 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
27 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
29 %t0 = fadd float %x0, %x1
30 %t1 = fadd float %t0, %x2
31 %t2 = fadd float %t1, %x3
35 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
36 ; SSE-LABEL: reassociate_adds2:
38 ; SSE-NEXT: addss %xmm1, %xmm0
39 ; SSE-NEXT: addss %xmm3, %xmm2
40 ; SSE-NEXT: addss %xmm2, %xmm0
43 ; AVX-LABEL: reassociate_adds2:
45 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
46 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
47 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
49 %t0 = fadd float %x0, %x1
50 %t1 = fadd float %x2, %t0
51 %t2 = fadd float %t1, %x3
55 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
56 ; SSE-LABEL: reassociate_adds3:
58 ; SSE-NEXT: addss %xmm1, %xmm0
59 ; SSE-NEXT: addss %xmm3, %xmm2
60 ; SSE-NEXT: addss %xmm2, %xmm0
63 ; AVX-LABEL: reassociate_adds3:
65 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
66 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
67 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
69 %t0 = fadd float %x0, %x1
70 %t1 = fadd float %t0, %x2
71 %t2 = fadd float %x3, %t1
75 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
76 ; SSE-LABEL: reassociate_adds4:
78 ; SSE-NEXT: addss %xmm1, %xmm0
79 ; SSE-NEXT: addss %xmm3, %xmm2
80 ; SSE-NEXT: addss %xmm2, %xmm0
83 ; AVX-LABEL: reassociate_adds4:
85 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
86 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
87 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
89 %t0 = fadd float %x0, %x1
90 %t1 = fadd float %x2, %t0
91 %t2 = fadd float %x3, %t1
95 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
96 ; produced because that would cost more compile time.
98 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
99 ; SSE-LABEL: reassociate_adds5:
101 ; SSE-NEXT: addss %xmm1, %xmm0
102 ; SSE-NEXT: addss %xmm3, %xmm2
103 ; SSE-NEXT: addss %xmm2, %xmm0
104 ; SSE-NEXT: addss %xmm5, %xmm4
105 ; SSE-NEXT: addss %xmm6, %xmm4
106 ; SSE-NEXT: addss %xmm4, %xmm0
107 ; SSE-NEXT: addss %xmm7, %xmm0
110 ; AVX-LABEL: reassociate_adds5:
112 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
113 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
114 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
115 ; AVX-NEXT: vaddss %xmm5, %xmm4, %xmm1
116 ; AVX-NEXT: vaddss %xmm6, %xmm1, %xmm1
117 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
118 ; AVX-NEXT: vaddss %xmm7, %xmm0, %xmm0
120 %t0 = fadd float %x0, %x1
121 %t1 = fadd float %t0, %x2
122 %t2 = fadd float %t1, %x3
123 %t3 = fadd float %t2, %x4
124 %t4 = fadd float %t3, %x5
125 %t5 = fadd float %t4, %x6
126 %t6 = fadd float %t5, %x7
130 ; Verify that we only need two associative operations to reassociate the operands.
131 ; Also, we should reassociate such that the result of the high latency division
132 ; is used by the final 'add' rather than reassociating the %x3 operand with the
133 ; division. The latter reassociation would not improve anything.
135 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
136 ; SSE-LABEL: reassociate_adds6:
138 ; SSE-NEXT: divss %xmm1, %xmm0
139 ; SSE-NEXT: addss %xmm3, %xmm2
140 ; SSE-NEXT: addss %xmm2, %xmm0
143 ; AVX-LABEL: reassociate_adds6:
145 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
146 ; AVX-NEXT: vaddss %xmm3, %xmm2, %xmm1
147 ; AVX-NEXT: vaddss %xmm1, %xmm0, %xmm0
149 %t0 = fdiv float %x0, %x1
150 %t1 = fadd float %x2, %t0
151 %t2 = fadd float %x3, %t1
155 ; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
157 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
158 ; SSE-LABEL: reassociate_muls1:
160 ; SSE-NEXT: divss %xmm1, %xmm0
161 ; SSE-NEXT: mulss %xmm3, %xmm2
162 ; SSE-NEXT: mulss %xmm2, %xmm0
165 ; AVX-LABEL: reassociate_muls1:
167 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
168 ; AVX-NEXT: vmulss %xmm3, %xmm2, %xmm1
169 ; AVX-NEXT: vmulss %xmm1, %xmm0, %xmm0
171 %t0 = fdiv float %x0, %x1
172 %t1 = fmul float %x2, %t0
173 %t2 = fmul float %x3, %t1
177 ; Verify that SSE and AVX scalar double-precision adds are reassociated.
179 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
180 ; SSE-LABEL: reassociate_adds_double:
182 ; SSE-NEXT: divsd %xmm1, %xmm0
183 ; SSE-NEXT: addsd %xmm3, %xmm2
184 ; SSE-NEXT: addsd %xmm2, %xmm0
187 ; AVX-LABEL: reassociate_adds_double:
189 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
190 ; AVX-NEXT: vaddsd %xmm3, %xmm2, %xmm1
191 ; AVX-NEXT: vaddsd %xmm1, %xmm0, %xmm0
193 %t0 = fdiv double %x0, %x1
194 %t1 = fadd double %x2, %t0
195 %t2 = fadd double %x3, %t1
199 ; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
201 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
202 ; SSE-LABEL: reassociate_muls_double:
204 ; SSE-NEXT: divsd %xmm1, %xmm0
205 ; SSE-NEXT: mulsd %xmm3, %xmm2
206 ; SSE-NEXT: mulsd %xmm2, %xmm0
209 ; AVX-LABEL: reassociate_muls_double:
211 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
212 ; AVX-NEXT: vmulsd %xmm3, %xmm2, %xmm1
213 ; AVX-NEXT: vmulsd %xmm1, %xmm0, %xmm0
215 %t0 = fdiv double %x0, %x1
216 %t1 = fmul double %x2, %t0
217 %t2 = fmul double %x3, %t1
221 ; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
223 define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
224 ; SSE-LABEL: reassociate_adds_v4f32:
226 ; SSE-NEXT: mulps %xmm1, %xmm0
227 ; SSE-NEXT: addps %xmm3, %xmm2
228 ; SSE-NEXT: addps %xmm2, %xmm0
231 ; AVX1-LABEL: reassociate_adds_v4f32:
233 ; AVX1-NEXT: vmulps %xmm1, %xmm0, %xmm0
234 ; AVX1-NEXT: vaddps %xmm3, %xmm2, %xmm1
235 ; AVX1-NEXT: vaddps %xmm1, %xmm0, %xmm0
238 ; AVX512-LABEL: reassociate_adds_v4f32:
240 ; AVX512-NEXT: vfmadd213ps {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
241 ; AVX512-NEXT: vaddps %xmm0, %xmm3, %xmm0
243 %t0 = fmul <4 x float> %x0, %x1
244 %t1 = fadd <4 x float> %x2, %t0
245 %t2 = fadd <4 x float> %x3, %t1
249 ; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
251 define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
252 ; SSE-LABEL: reassociate_adds_v2f64:
254 ; SSE-NEXT: mulpd %xmm1, %xmm0
255 ; SSE-NEXT: addpd %xmm3, %xmm2
256 ; SSE-NEXT: addpd %xmm2, %xmm0
259 ; AVX1-LABEL: reassociate_adds_v2f64:
261 ; AVX1-NEXT: vmulpd %xmm1, %xmm0, %xmm0
262 ; AVX1-NEXT: vaddpd %xmm3, %xmm2, %xmm1
263 ; AVX1-NEXT: vaddpd %xmm1, %xmm0, %xmm0
266 ; AVX512-LABEL: reassociate_adds_v2f64:
268 ; AVX512-NEXT: vfmadd213pd {{.*#+}} xmm0 = (xmm1 * xmm0) + xmm2
269 ; AVX512-NEXT: vaddpd %xmm0, %xmm3, %xmm0
271 %t0 = fmul <2 x double> %x0, %x1
272 %t1 = fadd <2 x double> %x2, %t0
273 %t2 = fadd <2 x double> %x3, %t1
277 ; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
279 define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
280 ; SSE-LABEL: reassociate_muls_v4f32:
282 ; SSE-NEXT: addps %xmm1, %xmm0
283 ; SSE-NEXT: mulps %xmm3, %xmm2
284 ; SSE-NEXT: mulps %xmm2, %xmm0
287 ; AVX-LABEL: reassociate_muls_v4f32:
289 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
290 ; AVX-NEXT: vmulps %xmm3, %xmm2, %xmm1
291 ; AVX-NEXT: vmulps %xmm1, %xmm0, %xmm0
293 %t0 = fadd <4 x float> %x0, %x1
294 %t1 = fmul <4 x float> %x2, %t0
295 %t2 = fmul <4 x float> %x3, %t1
299 ; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
301 define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
302 ; SSE-LABEL: reassociate_muls_v2f64:
304 ; SSE-NEXT: addpd %xmm1, %xmm0
305 ; SSE-NEXT: mulpd %xmm3, %xmm2
306 ; SSE-NEXT: mulpd %xmm2, %xmm0
309 ; AVX-LABEL: reassociate_muls_v2f64:
311 ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
312 ; AVX-NEXT: vmulpd %xmm3, %xmm2, %xmm1
313 ; AVX-NEXT: vmulpd %xmm1, %xmm0, %xmm0
315 %t0 = fadd <2 x double> %x0, %x1
316 %t1 = fmul <2 x double> %x2, %t0
317 %t2 = fmul <2 x double> %x3, %t1
321 ; Verify that AVX 256-bit vector single-precision adds are reassociated.
323 define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
324 ; SSE-LABEL: reassociate_adds_v8f32:
326 ; SSE-NEXT: mulps %xmm2, %xmm0
327 ; SSE-NEXT: mulps %xmm3, %xmm1
328 ; SSE-NEXT: addps %xmm6, %xmm4
329 ; SSE-NEXT: addps %xmm4, %xmm0
330 ; SSE-NEXT: addps %xmm7, %xmm5
331 ; SSE-NEXT: addps %xmm5, %xmm1
334 ; AVX1-LABEL: reassociate_adds_v8f32:
336 ; AVX1-NEXT: vmulps %ymm1, %ymm0, %ymm0
337 ; AVX1-NEXT: vaddps %ymm3, %ymm2, %ymm1
338 ; AVX1-NEXT: vaddps %ymm1, %ymm0, %ymm0
341 ; AVX512-LABEL: reassociate_adds_v8f32:
343 ; AVX512-NEXT: vfmadd213ps {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
344 ; AVX512-NEXT: vaddps %ymm0, %ymm3, %ymm0
346 %t0 = fmul <8 x float> %x0, %x1
347 %t1 = fadd <8 x float> %x2, %t0
348 %t2 = fadd <8 x float> %x3, %t1
352 ; Verify that AVX 256-bit vector double-precision adds are reassociated.
354 define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
355 ; SSE-LABEL: reassociate_adds_v4f64:
357 ; SSE-NEXT: mulpd %xmm2, %xmm0
358 ; SSE-NEXT: mulpd %xmm3, %xmm1
359 ; SSE-NEXT: addpd %xmm6, %xmm4
360 ; SSE-NEXT: addpd %xmm4, %xmm0
361 ; SSE-NEXT: addpd %xmm7, %xmm5
362 ; SSE-NEXT: addpd %xmm5, %xmm1
365 ; AVX1-LABEL: reassociate_adds_v4f64:
367 ; AVX1-NEXT: vmulpd %ymm1, %ymm0, %ymm0
368 ; AVX1-NEXT: vaddpd %ymm3, %ymm2, %ymm1
369 ; AVX1-NEXT: vaddpd %ymm1, %ymm0, %ymm0
372 ; AVX512-LABEL: reassociate_adds_v4f64:
374 ; AVX512-NEXT: vfmadd213pd {{.*#+}} ymm0 = (ymm1 * ymm0) + ymm2
375 ; AVX512-NEXT: vaddpd %ymm0, %ymm3, %ymm0
377 %t0 = fmul <4 x double> %x0, %x1
378 %t1 = fadd <4 x double> %x2, %t0
379 %t2 = fadd <4 x double> %x3, %t1
383 ; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
385 define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
386 ; SSE-LABEL: reassociate_muls_v8f32:
388 ; SSE-NEXT: addps %xmm2, %xmm0
389 ; SSE-NEXT: addps %xmm3, %xmm1
390 ; SSE-NEXT: mulps %xmm6, %xmm4
391 ; SSE-NEXT: mulps %xmm4, %xmm0
392 ; SSE-NEXT: mulps %xmm7, %xmm5
393 ; SSE-NEXT: mulps %xmm5, %xmm1
396 ; AVX-LABEL: reassociate_muls_v8f32:
398 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
399 ; AVX-NEXT: vmulps %ymm3, %ymm2, %ymm1
400 ; AVX-NEXT: vmulps %ymm1, %ymm0, %ymm0
402 %t0 = fadd <8 x float> %x0, %x1
403 %t1 = fmul <8 x float> %x2, %t0
404 %t2 = fmul <8 x float> %x3, %t1
408 ; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
410 define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
411 ; SSE-LABEL: reassociate_muls_v4f64:
413 ; SSE-NEXT: addpd %xmm2, %xmm0
414 ; SSE-NEXT: addpd %xmm3, %xmm1
415 ; SSE-NEXT: mulpd %xmm6, %xmm4
416 ; SSE-NEXT: mulpd %xmm4, %xmm0
417 ; SSE-NEXT: mulpd %xmm7, %xmm5
418 ; SSE-NEXT: mulpd %xmm5, %xmm1
421 ; AVX-LABEL: reassociate_muls_v4f64:
423 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
424 ; AVX-NEXT: vmulpd %ymm3, %ymm2, %ymm1
425 ; AVX-NEXT: vmulpd %ymm1, %ymm0, %ymm0
427 %t0 = fadd <4 x double> %x0, %x1
428 %t1 = fmul <4 x double> %x2, %t0
429 %t2 = fmul <4 x double> %x3, %t1
433 ; Verify that AVX512 512-bit vector single-precision adds are reassociated.
435 define <16 x float> @reassociate_adds_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
436 ; SSE-LABEL: reassociate_adds_v16f32:
438 ; SSE-NEXT: mulps %xmm4, %xmm0
439 ; SSE-NEXT: mulps %xmm5, %xmm1
440 ; SSE-NEXT: mulps %xmm6, %xmm2
441 ; SSE-NEXT: mulps %xmm7, %xmm3
442 ; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
443 ; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
444 ; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
445 ; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
446 ; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm0
447 ; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm1
448 ; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm2
449 ; SSE-NEXT: addps {{[0-9]+}}(%rsp), %xmm3
452 ; AVX1-LABEL: reassociate_adds_v16f32:
454 ; AVX1-NEXT: vmulps %ymm2, %ymm0, %ymm0
455 ; AVX1-NEXT: vmulps %ymm3, %ymm1, %ymm1
456 ; AVX1-NEXT: vaddps %ymm6, %ymm4, %ymm2
457 ; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
458 ; AVX1-NEXT: vaddps %ymm7, %ymm5, %ymm2
459 ; AVX1-NEXT: vaddps %ymm2, %ymm1, %ymm1
462 ; AVX512-LABEL: reassociate_adds_v16f32:
464 ; AVX512-NEXT: vfmadd213ps {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
465 ; AVX512-NEXT: vaddps %zmm0, %zmm3, %zmm0
467 %t0 = fmul <16 x float> %x0, %x1
468 %t1 = fadd <16 x float> %x2, %t0
469 %t2 = fadd <16 x float> %x3, %t1
473 ; Verify that AVX512 512-bit vector double-precision adds are reassociated.
475 define <8 x double> @reassociate_adds_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
476 ; SSE-LABEL: reassociate_adds_v8f64:
478 ; SSE-NEXT: mulpd %xmm4, %xmm0
479 ; SSE-NEXT: mulpd %xmm5, %xmm1
480 ; SSE-NEXT: mulpd %xmm6, %xmm2
481 ; SSE-NEXT: mulpd %xmm7, %xmm3
482 ; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm3
483 ; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm2
484 ; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm1
485 ; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm0
486 ; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm0
487 ; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm1
488 ; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm2
489 ; SSE-NEXT: addpd {{[0-9]+}}(%rsp), %xmm3
492 ; AVX1-LABEL: reassociate_adds_v8f64:
494 ; AVX1-NEXT: vmulpd %ymm2, %ymm0, %ymm0
495 ; AVX1-NEXT: vmulpd %ymm3, %ymm1, %ymm1
496 ; AVX1-NEXT: vaddpd %ymm6, %ymm4, %ymm2
497 ; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
498 ; AVX1-NEXT: vaddpd %ymm7, %ymm5, %ymm2
499 ; AVX1-NEXT: vaddpd %ymm2, %ymm1, %ymm1
502 ; AVX512-LABEL: reassociate_adds_v8f64:
504 ; AVX512-NEXT: vfmadd213pd {{.*#+}} zmm0 = (zmm1 * zmm0) + zmm2
505 ; AVX512-NEXT: vaddpd %zmm0, %zmm3, %zmm0
507 %t0 = fmul <8 x double> %x0, %x1
508 %t1 = fadd <8 x double> %x2, %t0
509 %t2 = fadd <8 x double> %x3, %t1
513 ; Verify that AVX512 512-bit vector single-precision multiplies are reassociated.
515 define <16 x float> @reassociate_muls_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
516 ; SSE-LABEL: reassociate_muls_v16f32:
518 ; SSE-NEXT: addps %xmm4, %xmm0
519 ; SSE-NEXT: addps %xmm5, %xmm1
520 ; SSE-NEXT: addps %xmm6, %xmm2
521 ; SSE-NEXT: addps %xmm7, %xmm3
522 ; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm3
523 ; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm2
524 ; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm1
525 ; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm0
526 ; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm0
527 ; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm1
528 ; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm2
529 ; SSE-NEXT: mulps {{[0-9]+}}(%rsp), %xmm3
532 ; AVX1-LABEL: reassociate_muls_v16f32:
534 ; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
535 ; AVX1-NEXT: vaddps %ymm3, %ymm1, %ymm1
536 ; AVX1-NEXT: vmulps %ymm6, %ymm4, %ymm2
537 ; AVX1-NEXT: vmulps %ymm2, %ymm0, %ymm0
538 ; AVX1-NEXT: vmulps %ymm7, %ymm5, %ymm2
539 ; AVX1-NEXT: vmulps %ymm2, %ymm1, %ymm1
542 ; AVX512-LABEL: reassociate_muls_v16f32:
544 ; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
545 ; AVX512-NEXT: vmulps %zmm3, %zmm2, %zmm1
546 ; AVX512-NEXT: vmulps %zmm1, %zmm0, %zmm0
548 %t0 = fadd <16 x float> %x0, %x1
549 %t1 = fmul <16 x float> %x2, %t0
550 %t2 = fmul <16 x float> %x3, %t1
554 ; Verify that AVX512 512-bit vector double-precision multiplies are reassociated.
556 define <8 x double> @reassociate_muls_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
557 ; SSE-LABEL: reassociate_muls_v8f64:
559 ; SSE-NEXT: addpd %xmm4, %xmm0
560 ; SSE-NEXT: addpd %xmm5, %xmm1
561 ; SSE-NEXT: addpd %xmm6, %xmm2
562 ; SSE-NEXT: addpd %xmm7, %xmm3
563 ; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm3
564 ; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm2
565 ; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm1
566 ; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm0
567 ; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm0
568 ; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm1
569 ; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm2
570 ; SSE-NEXT: mulpd {{[0-9]+}}(%rsp), %xmm3
573 ; AVX1-LABEL: reassociate_muls_v8f64:
575 ; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
576 ; AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1
577 ; AVX1-NEXT: vmulpd %ymm6, %ymm4, %ymm2
578 ; AVX1-NEXT: vmulpd %ymm2, %ymm0, %ymm0
579 ; AVX1-NEXT: vmulpd %ymm7, %ymm5, %ymm2
580 ; AVX1-NEXT: vmulpd %ymm2, %ymm1, %ymm1
583 ; AVX512-LABEL: reassociate_muls_v8f64:
585 ; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
586 ; AVX512-NEXT: vmulpd %zmm3, %zmm2, %zmm1
587 ; AVX512-NEXT: vmulpd %zmm1, %zmm0, %zmm0
589 %t0 = fadd <8 x double> %x0, %x1
590 %t1 = fmul <8 x double> %x2, %t0
591 %t2 = fmul <8 x double> %x3, %t1
595 ; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
597 define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
598 ; SSE-LABEL: reassociate_mins_single:
600 ; SSE-NEXT: divss %xmm1, %xmm0
601 ; SSE-NEXT: minss %xmm3, %xmm2
602 ; SSE-NEXT: minss %xmm2, %xmm0
605 ; AVX-LABEL: reassociate_mins_single:
607 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
608 ; AVX-NEXT: vminss %xmm3, %xmm2, %xmm1
609 ; AVX-NEXT: vminss %xmm1, %xmm0, %xmm0
611 %t0 = fdiv float %x0, %x1
612 %cmp1 = fcmp olt float %x2, %t0
613 %sel1 = select i1 %cmp1, float %x2, float %t0
614 %cmp2 = fcmp olt float %x3, %sel1
615 %sel2 = select i1 %cmp2, float %x3, float %sel1
619 ; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
621 define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
622 ; SSE-LABEL: reassociate_maxs_single:
624 ; SSE-NEXT: divss %xmm1, %xmm0
625 ; SSE-NEXT: maxss %xmm3, %xmm2
626 ; SSE-NEXT: maxss %xmm2, %xmm0
629 ; AVX-LABEL: reassociate_maxs_single:
631 ; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0
632 ; AVX-NEXT: vmaxss %xmm3, %xmm2, %xmm1
633 ; AVX-NEXT: vmaxss %xmm1, %xmm0, %xmm0
635 %t0 = fdiv float %x0, %x1
636 %cmp1 = fcmp ogt float %x2, %t0
637 %sel1 = select i1 %cmp1, float %x2, float %t0
638 %cmp2 = fcmp ogt float %x3, %sel1
639 %sel2 = select i1 %cmp2, float %x3, float %sel1
643 ; Verify that SSE and AVX scalar double-precision minimum ops are reassociated.
645 define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
646 ; SSE-LABEL: reassociate_mins_double:
648 ; SSE-NEXT: divsd %xmm1, %xmm0
649 ; SSE-NEXT: minsd %xmm3, %xmm2
650 ; SSE-NEXT: minsd %xmm2, %xmm0
653 ; AVX-LABEL: reassociate_mins_double:
655 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
656 ; AVX-NEXT: vminsd %xmm3, %xmm2, %xmm1
657 ; AVX-NEXT: vminsd %xmm1, %xmm0, %xmm0
659 %t0 = fdiv double %x0, %x1
660 %cmp1 = fcmp olt double %x2, %t0
661 %sel1 = select i1 %cmp1, double %x2, double %t0
662 %cmp2 = fcmp olt double %x3, %sel1
663 %sel2 = select i1 %cmp2, double %x3, double %sel1
667 ; Verify that SSE and AVX scalar double-precision maximum ops are reassociated.
669 define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
670 ; SSE-LABEL: reassociate_maxs_double:
672 ; SSE-NEXT: divsd %xmm1, %xmm0
673 ; SSE-NEXT: maxsd %xmm3, %xmm2
674 ; SSE-NEXT: maxsd %xmm2, %xmm0
677 ; AVX-LABEL: reassociate_maxs_double:
679 ; AVX-NEXT: vdivsd %xmm1, %xmm0, %xmm0
680 ; AVX-NEXT: vmaxsd %xmm3, %xmm2, %xmm1
681 ; AVX-NEXT: vmaxsd %xmm1, %xmm0, %xmm0
683 %t0 = fdiv double %x0, %x1
684 %cmp1 = fcmp ogt double %x2, %t0
685 %sel1 = select i1 %cmp1, double %x2, double %t0
686 %cmp2 = fcmp ogt double %x3, %sel1
687 %sel2 = select i1 %cmp2, double %x3, double %sel1
691 ; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated.
693 define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
694 ; SSE-LABEL: reassociate_mins_v4f32:
696 ; SSE-NEXT: addps %xmm1, %xmm0
697 ; SSE-NEXT: minps %xmm3, %xmm2
698 ; SSE-NEXT: minps %xmm2, %xmm0
701 ; AVX-LABEL: reassociate_mins_v4f32:
703 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
704 ; AVX-NEXT: vminps %xmm3, %xmm2, %xmm1
705 ; AVX-NEXT: vminps %xmm1, %xmm0, %xmm0
707 %t0 = fadd <4 x float> %x0, %x1
708 %cmp1 = fcmp olt <4 x float> %x2, %t0
709 %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
710 %cmp2 = fcmp olt <4 x float> %x3, %sel1
711 %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
712 ret <4 x float> %sel2
715 ; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated.
717 define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
718 ; SSE-LABEL: reassociate_maxs_v4f32:
720 ; SSE-NEXT: addps %xmm1, %xmm0
721 ; SSE-NEXT: maxps %xmm3, %xmm2
722 ; SSE-NEXT: maxps %xmm2, %xmm0
725 ; AVX-LABEL: reassociate_maxs_v4f32:
727 ; AVX-NEXT: vaddps %xmm1, %xmm0, %xmm0
728 ; AVX-NEXT: vmaxps %xmm3, %xmm2, %xmm1
729 ; AVX-NEXT: vmaxps %xmm1, %xmm0, %xmm0
731 %t0 = fadd <4 x float> %x0, %x1
732 %cmp1 = fcmp ogt <4 x float> %x2, %t0
733 %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
734 %cmp2 = fcmp ogt <4 x float> %x3, %sel1
735 %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
736 ret <4 x float> %sel2
739 ; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated.
741 define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
742 ; SSE-LABEL: reassociate_mins_v2f64:
744 ; SSE-NEXT: addpd %xmm1, %xmm0
745 ; SSE-NEXT: minpd %xmm3, %xmm2
746 ; SSE-NEXT: minpd %xmm2, %xmm0
749 ; AVX-LABEL: reassociate_mins_v2f64:
751 ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
752 ; AVX-NEXT: vminpd %xmm3, %xmm2, %xmm1
753 ; AVX-NEXT: vminpd %xmm1, %xmm0, %xmm0
755 %t0 = fadd <2 x double> %x0, %x1
756 %cmp1 = fcmp olt <2 x double> %x2, %t0
757 %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
758 %cmp2 = fcmp olt <2 x double> %x3, %sel1
759 %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
760 ret <2 x double> %sel2
763 ; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated.
765 define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
766 ; SSE-LABEL: reassociate_maxs_v2f64:
768 ; SSE-NEXT: addpd %xmm1, %xmm0
769 ; SSE-NEXT: maxpd %xmm3, %xmm2
770 ; SSE-NEXT: maxpd %xmm2, %xmm0
773 ; AVX-LABEL: reassociate_maxs_v2f64:
775 ; AVX-NEXT: vaddpd %xmm1, %xmm0, %xmm0
776 ; AVX-NEXT: vmaxpd %xmm3, %xmm2, %xmm1
777 ; AVX-NEXT: vmaxpd %xmm1, %xmm0, %xmm0
779 %t0 = fadd <2 x double> %x0, %x1
780 %cmp1 = fcmp ogt <2 x double> %x2, %t0
781 %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
782 %cmp2 = fcmp ogt <2 x double> %x3, %sel1
783 %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
784 ret <2 x double> %sel2
787 ; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
789 define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
790 ; SSE-LABEL: reassociate_mins_v8f32:
792 ; SSE-NEXT: addps %xmm2, %xmm0
793 ; SSE-NEXT: addps %xmm3, %xmm1
794 ; SSE-NEXT: minps %xmm6, %xmm4
795 ; SSE-NEXT: minps %xmm4, %xmm0
796 ; SSE-NEXT: minps %xmm7, %xmm5
797 ; SSE-NEXT: minps %xmm5, %xmm1
800 ; AVX-LABEL: reassociate_mins_v8f32:
802 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
803 ; AVX-NEXT: vminps %ymm3, %ymm2, %ymm1
804 ; AVX-NEXT: vminps %ymm1, %ymm0, %ymm0
806 %t0 = fadd <8 x float> %x0, %x1
807 %cmp1 = fcmp olt <8 x float> %x2, %t0
808 %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
809 %cmp2 = fcmp olt <8 x float> %x3, %sel1
810 %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
811 ret <8 x float> %sel2
814 ; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
816 define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
817 ; SSE-LABEL: reassociate_maxs_v8f32:
819 ; SSE-NEXT: addps %xmm2, %xmm0
820 ; SSE-NEXT: addps %xmm3, %xmm1
821 ; SSE-NEXT: maxps %xmm6, %xmm4
822 ; SSE-NEXT: maxps %xmm4, %xmm0
823 ; SSE-NEXT: maxps %xmm7, %xmm5
824 ; SSE-NEXT: maxps %xmm5, %xmm1
827 ; AVX-LABEL: reassociate_maxs_v8f32:
829 ; AVX-NEXT: vaddps %ymm1, %ymm0, %ymm0
830 ; AVX-NEXT: vmaxps %ymm3, %ymm2, %ymm1
831 ; AVX-NEXT: vmaxps %ymm1, %ymm0, %ymm0
833 %t0 = fadd <8 x float> %x0, %x1
834 %cmp1 = fcmp ogt <8 x float> %x2, %t0
835 %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
836 %cmp2 = fcmp ogt <8 x float> %x3, %sel1
837 %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
838 ret <8 x float> %sel2
841 ; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
843 define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
844 ; SSE-LABEL: reassociate_mins_v4f64:
846 ; SSE-NEXT: addpd %xmm2, %xmm0
847 ; SSE-NEXT: addpd %xmm3, %xmm1
848 ; SSE-NEXT: minpd %xmm6, %xmm4
849 ; SSE-NEXT: minpd %xmm4, %xmm0
850 ; SSE-NEXT: minpd %xmm7, %xmm5
851 ; SSE-NEXT: minpd %xmm5, %xmm1
854 ; AVX-LABEL: reassociate_mins_v4f64:
856 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
857 ; AVX-NEXT: vminpd %ymm3, %ymm2, %ymm1
858 ; AVX-NEXT: vminpd %ymm1, %ymm0, %ymm0
860 %t0 = fadd <4 x double> %x0, %x1
861 %cmp1 = fcmp olt <4 x double> %x2, %t0
862 %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
863 %cmp2 = fcmp olt <4 x double> %x3, %sel1
864 %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
865 ret <4 x double> %sel2
868 ; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
870 define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
871 ; SSE-LABEL: reassociate_maxs_v4f64:
873 ; SSE-NEXT: addpd %xmm2, %xmm0
874 ; SSE-NEXT: addpd %xmm3, %xmm1
875 ; SSE-NEXT: maxpd %xmm6, %xmm4
876 ; SSE-NEXT: maxpd %xmm4, %xmm0
877 ; SSE-NEXT: maxpd %xmm7, %xmm5
878 ; SSE-NEXT: maxpd %xmm5, %xmm1
881 ; AVX-LABEL: reassociate_maxs_v4f64:
883 ; AVX-NEXT: vaddpd %ymm1, %ymm0, %ymm0
884 ; AVX-NEXT: vmaxpd %ymm3, %ymm2, %ymm1
885 ; AVX-NEXT: vmaxpd %ymm1, %ymm0, %ymm0
887 %t0 = fadd <4 x double> %x0, %x1
888 %cmp1 = fcmp ogt <4 x double> %x2, %t0
889 %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
890 %cmp2 = fcmp ogt <4 x double> %x3, %sel1
891 %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
892 ret <4 x double> %sel2
895 ; Verify that AVX512 512-bit vector single-precision minimum ops are reassociated.
897 define <16 x float> @reassociate_mins_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
898 ; SSE-LABEL: reassociate_mins_v16f32:
900 ; SSE-NEXT: addps %xmm4, %xmm0
901 ; SSE-NEXT: addps %xmm5, %xmm1
902 ; SSE-NEXT: addps %xmm6, %xmm2
903 ; SSE-NEXT: addps %xmm7, %xmm3
904 ; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm3
905 ; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm2
906 ; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm1
907 ; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm0
908 ; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm0
909 ; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm1
910 ; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm2
911 ; SSE-NEXT: minps {{[0-9]+}}(%rsp), %xmm3
914 ; AVX1-LABEL: reassociate_mins_v16f32:
916 ; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
917 ; AVX1-NEXT: vaddps %ymm3, %ymm1, %ymm1
918 ; AVX1-NEXT: vminps %ymm6, %ymm4, %ymm2
919 ; AVX1-NEXT: vminps %ymm2, %ymm0, %ymm0
920 ; AVX1-NEXT: vminps %ymm7, %ymm5, %ymm2
921 ; AVX1-NEXT: vminps %ymm2, %ymm1, %ymm1
924 ; AVX512-LABEL: reassociate_mins_v16f32:
926 ; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
927 ; AVX512-NEXT: vminps %zmm3, %zmm2, %zmm1
928 ; AVX512-NEXT: vminps %zmm1, %zmm0, %zmm0
930 %t0 = fadd <16 x float> %x0, %x1
931 %cmp1 = fcmp olt <16 x float> %x2, %t0
932 %sel1 = select <16 x i1> %cmp1, <16 x float> %x2, <16 x float> %t0
933 %cmp2 = fcmp olt <16 x float> %x3, %sel1
934 %sel2 = select <16 x i1> %cmp2, <16 x float> %x3, <16 x float> %sel1
935 ret <16 x float> %sel2
938 ; Verify that AVX512 512-bit vector single-precision maximum ops are reassociated.
940 define <16 x float> @reassociate_maxs_v16f32(<16 x float> %x0, <16 x float> %x1, <16 x float> %x2, <16 x float> %x3) {
941 ; SSE-LABEL: reassociate_maxs_v16f32:
943 ; SSE-NEXT: addps %xmm4, %xmm0
944 ; SSE-NEXT: addps %xmm5, %xmm1
945 ; SSE-NEXT: addps %xmm6, %xmm2
946 ; SSE-NEXT: addps %xmm7, %xmm3
947 ; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm3
948 ; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm2
949 ; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm1
950 ; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm0
951 ; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm0
952 ; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm1
953 ; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm2
954 ; SSE-NEXT: maxps {{[0-9]+}}(%rsp), %xmm3
957 ; AVX1-LABEL: reassociate_maxs_v16f32:
959 ; AVX1-NEXT: vaddps %ymm2, %ymm0, %ymm0
960 ; AVX1-NEXT: vaddps %ymm3, %ymm1, %ymm1
961 ; AVX1-NEXT: vmaxps %ymm6, %ymm4, %ymm2
962 ; AVX1-NEXT: vmaxps %ymm2, %ymm0, %ymm0
963 ; AVX1-NEXT: vmaxps %ymm7, %ymm5, %ymm2
964 ; AVX1-NEXT: vmaxps %ymm2, %ymm1, %ymm1
967 ; AVX512-LABEL: reassociate_maxs_v16f32:
969 ; AVX512-NEXT: vaddps %zmm1, %zmm0, %zmm0
970 ; AVX512-NEXT: vmaxps %zmm3, %zmm2, %zmm1
971 ; AVX512-NEXT: vmaxps %zmm1, %zmm0, %zmm0
973 %t0 = fadd <16 x float> %x0, %x1
974 %cmp1 = fcmp ogt <16 x float> %x2, %t0
975 %sel1 = select <16 x i1> %cmp1, <16 x float> %x2, <16 x float> %t0
976 %cmp2 = fcmp ogt <16 x float> %x3, %sel1
977 %sel2 = select <16 x i1> %cmp2, <16 x float> %x3, <16 x float> %sel1
978 ret <16 x float> %sel2
981 ; Verify that AVX512 512-bit vector double-precision minimum ops are reassociated.
983 define <8 x double> @reassociate_mins_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
984 ; SSE-LABEL: reassociate_mins_v8f64:
986 ; SSE-NEXT: addpd %xmm4, %xmm0
987 ; SSE-NEXT: addpd %xmm5, %xmm1
988 ; SSE-NEXT: addpd %xmm6, %xmm2
989 ; SSE-NEXT: addpd %xmm7, %xmm3
990 ; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm3
991 ; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm2
992 ; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm1
993 ; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm0
994 ; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm0
995 ; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm1
996 ; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm2
997 ; SSE-NEXT: minpd {{[0-9]+}}(%rsp), %xmm3
1000 ; AVX1-LABEL: reassociate_mins_v8f64:
1002 ; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
1003 ; AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1
1004 ; AVX1-NEXT: vminpd %ymm6, %ymm4, %ymm2
1005 ; AVX1-NEXT: vminpd %ymm2, %ymm0, %ymm0
1006 ; AVX1-NEXT: vminpd %ymm7, %ymm5, %ymm2
1007 ; AVX1-NEXT: vminpd %ymm2, %ymm1, %ymm1
1010 ; AVX512-LABEL: reassociate_mins_v8f64:
1012 ; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
1013 ; AVX512-NEXT: vminpd %zmm3, %zmm2, %zmm1
1014 ; AVX512-NEXT: vminpd %zmm1, %zmm0, %zmm0
1016 %t0 = fadd <8 x double> %x0, %x1
1017 %cmp1 = fcmp olt <8 x double> %x2, %t0
1018 %sel1 = select <8 x i1> %cmp1, <8 x double> %x2, <8 x double> %t0
1019 %cmp2 = fcmp olt <8 x double> %x3, %sel1
1020 %sel2 = select <8 x i1> %cmp2, <8 x double> %x3, <8 x double> %sel1
1021 ret <8 x double> %sel2
1024 ; Verify that AVX512 512-bit vector double-precision maximum ops are reassociated.
1026 define <8 x double> @reassociate_maxs_v8f64(<8 x double> %x0, <8 x double> %x1, <8 x double> %x2, <8 x double> %x3) {
1027 ; SSE-LABEL: reassociate_maxs_v8f64:
1029 ; SSE-NEXT: addpd %xmm4, %xmm0
1030 ; SSE-NEXT: addpd %xmm5, %xmm1
1031 ; SSE-NEXT: addpd %xmm6, %xmm2
1032 ; SSE-NEXT: addpd %xmm7, %xmm3
1033 ; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm3
1034 ; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm2
1035 ; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm1
1036 ; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm0
1037 ; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm0
1038 ; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm1
1039 ; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm2
1040 ; SSE-NEXT: maxpd {{[0-9]+}}(%rsp), %xmm3
1043 ; AVX1-LABEL: reassociate_maxs_v8f64:
1045 ; AVX1-NEXT: vaddpd %ymm2, %ymm0, %ymm0
1046 ; AVX1-NEXT: vaddpd %ymm3, %ymm1, %ymm1
1047 ; AVX1-NEXT: vmaxpd %ymm6, %ymm4, %ymm2
1048 ; AVX1-NEXT: vmaxpd %ymm2, %ymm0, %ymm0
1049 ; AVX1-NEXT: vmaxpd %ymm7, %ymm5, %ymm2
1050 ; AVX1-NEXT: vmaxpd %ymm2, %ymm1, %ymm1
1053 ; AVX512-LABEL: reassociate_maxs_v8f64:
1055 ; AVX512-NEXT: vaddpd %zmm1, %zmm0, %zmm0
1056 ; AVX512-NEXT: vmaxpd %zmm3, %zmm2, %zmm1
1057 ; AVX512-NEXT: vmaxpd %zmm1, %zmm0, %zmm0
1059 %t0 = fadd <8 x double> %x0, %x1
1060 %cmp1 = fcmp ogt <8 x double> %x2, %t0
1061 %sel1 = select <8 x i1> %cmp1, <8 x double> %x2, <8 x double> %t0
1062 %cmp2 = fcmp ogt <8 x double> %x3, %sel1
1063 %sel2 = select <8 x i1> %cmp2, <8 x double> %x3, <8 x double> %sel1
1064 ret <8 x double> %sel2
1067 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
1068 ; Verify that reassociation is not happening needlessly or wrongly.
1070 declare double @bar()
1072 define double @reassociate_adds_from_calls() {
1073 ; SSE-LABEL: reassociate_adds_from_calls:
1075 ; SSE-NEXT: subq $24, %rsp
1076 ; SSE-NEXT: .cfi_def_cfa_offset 32
1077 ; SSE-NEXT: callq bar
1078 ; SSE-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1079 ; SSE-NEXT: callq bar
1080 ; SSE-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1081 ; SSE-NEXT: callq bar
1082 ; SSE-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
1083 ; SSE-NEXT: callq bar
1084 ; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1085 ; SSE-NEXT: # xmm1 = mem[0],zero
1086 ; SSE-NEXT: addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Folded Reload
1087 ; SSE-NEXT: addsd (%rsp), %xmm0 # 8-byte Folded Reload
1088 ; SSE-NEXT: addsd %xmm1, %xmm0
1089 ; SSE-NEXT: addq $24, %rsp
1090 ; SSE-NEXT: .cfi_def_cfa_offset 8
1093 ; AVX-LABEL: reassociate_adds_from_calls:
1095 ; AVX-NEXT: subq $24, %rsp
1096 ; AVX-NEXT: .cfi_def_cfa_offset 32
1097 ; AVX-NEXT: callq bar
1098 ; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1099 ; AVX-NEXT: callq bar
1100 ; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1101 ; AVX-NEXT: callq bar
1102 ; AVX-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
1103 ; AVX-NEXT: callq bar
1104 ; AVX-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1105 ; AVX-NEXT: # xmm1 = mem[0],zero
1106 ; AVX-NEXT: vaddsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 8-byte Folded Reload
1107 ; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0 # 8-byte Folded Reload
1108 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
1109 ; AVX-NEXT: addq $24, %rsp
1110 ; AVX-NEXT: .cfi_def_cfa_offset 8
1113 %x0 = call double @bar()
1114 %x1 = call double @bar()
1115 %x2 = call double @bar()
1116 %x3 = call double @bar()
1117 %t0 = fadd double %x0, %x1
1118 %t1 = fadd double %t0, %x2
1119 %t2 = fadd double %t1, %x3
1123 define double @already_reassociated() {
1124 ; SSE-LABEL: already_reassociated:
1126 ; SSE-NEXT: subq $24, %rsp
1127 ; SSE-NEXT: .cfi_def_cfa_offset 32
1128 ; SSE-NEXT: callq bar
1129 ; SSE-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1130 ; SSE-NEXT: callq bar
1131 ; SSE-NEXT: movsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1132 ; SSE-NEXT: callq bar
1133 ; SSE-NEXT: movsd %xmm0, (%rsp) # 8-byte Spill
1134 ; SSE-NEXT: callq bar
1135 ; SSE-NEXT: movsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1136 ; SSE-NEXT: # xmm1 = mem[0],zero
1137 ; SSE-NEXT: addsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Folded Reload
1138 ; SSE-NEXT: addsd (%rsp), %xmm0 # 8-byte Folded Reload
1139 ; SSE-NEXT: addsd %xmm1, %xmm0
1140 ; SSE-NEXT: addq $24, %rsp
1141 ; SSE-NEXT: .cfi_def_cfa_offset 8
1144 ; AVX-LABEL: already_reassociated:
1146 ; AVX-NEXT: subq $24, %rsp
1147 ; AVX-NEXT: .cfi_def_cfa_offset 32
1148 ; AVX-NEXT: callq bar
1149 ; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1150 ; AVX-NEXT: callq bar
1151 ; AVX-NEXT: vmovsd %xmm0, {{[-0-9]+}}(%r{{[sb]}}p) # 8-byte Spill
1152 ; AVX-NEXT: callq bar
1153 ; AVX-NEXT: vmovsd %xmm0, (%rsp) # 8-byte Spill
1154 ; AVX-NEXT: callq bar
1155 ; AVX-NEXT: vmovsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1 # 8-byte Reload
1156 ; AVX-NEXT: # xmm1 = mem[0],zero
1157 ; AVX-NEXT: vaddsd {{[-0-9]+}}(%r{{[sb]}}p), %xmm1, %xmm1 # 8-byte Folded Reload
1158 ; AVX-NEXT: vaddsd (%rsp), %xmm0, %xmm0 # 8-byte Folded Reload
1159 ; AVX-NEXT: vaddsd %xmm0, %xmm1, %xmm0
1160 ; AVX-NEXT: addq $24, %rsp
1161 ; AVX-NEXT: .cfi_def_cfa_offset 8
1164 %x0 = call double @bar()
1165 %x1 = call double @bar()
1166 %x2 = call double @bar()
1167 %x3 = call double @bar()
1168 %t0 = fadd double %x0, %x1
1169 %t1 = fadd double %x2, %x3
1170 %t2 = fadd double %t0, %t1