test/CodeGen/X86/machine-combiner.ll

   1 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=SSE
   2 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math < %s | FileCheck %s --check-prefix=AVX
   3
   4 ; Incremental updates of the instruction depths should be enough for this test
   5 ; case.
   6 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=sse -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=SSE
   7 ; RUN: llc -mtriple=x86_64-unknown-unknown -mcpu=x86-64 -mattr=avx -enable-unsafe-fp-math -machine-combiner-inc-threshold=0 < %s | FileCheck %s --check-prefix=AVX
   8
   9 ; Verify that the first two adds are independent regardless of how the inputs are
  10 ; commuted. The destination registers are used as source registers for the third add.
  11
  12 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
  13 ; SSE-LABEL: reassociate_adds1:
  14 ; SSE:       # BB#0:
  15 ; SSE-NEXT:    addss %xmm1, %xmm0
  16 ; SSE-NEXT:    addss %xmm3, %xmm2
  17 ; SSE-NEXT:    addss %xmm2, %xmm0
  18 ; SSE-NEXT:    retq
  19 ;
  20 ; AVX-LABEL: reassociate_adds1:
  21 ; AVX:       # BB#0:
  22 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  23 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  24 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  25 ; AVX-NEXT:    retq
  26   %t0 = fadd float %x0, %x1
  27   %t1 = fadd float %t0, %x2
  28   %t2 = fadd float %t1, %x3
  29   ret float %t2
  30 }
  31
  32 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
  33 ; SSE-LABEL: reassociate_adds2:
  34 ; SSE:       # BB#0:
  35 ; SSE-NEXT:    addss %xmm1, %xmm0
  36 ; SSE-NEXT:    addss %xmm3, %xmm2
  37 ; SSE-NEXT:    addss %xmm2, %xmm0
  38 ; SSE-NEXT:    retq
  39 ;
  40 ; AVX-LABEL: reassociate_adds2:
  41 ; AVX:       # BB#0:
  42 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  43 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  44 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  45 ; AVX-NEXT:    retq
  46   %t0 = fadd float %x0, %x1
  47   %t1 = fadd float %x2, %t0
  48   %t2 = fadd float %t1, %x3
  49   ret float %t2
  50 }
  51
  52 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
  53 ; SSE-LABEL: reassociate_adds3:
  54 ; SSE:       # BB#0:
  55 ; SSE-NEXT:    addss %xmm1, %xmm0
  56 ; SSE-NEXT:    addss %xmm3, %xmm2
  57 ; SSE-NEXT:    addss %xmm2, %xmm0
  58 ; SSE-NEXT:    retq
  59 ;
  60 ; AVX-LABEL: reassociate_adds3:
  61 ; AVX:       # BB#0:
  62 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  63 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  64 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  65 ; AVX-NEXT:    retq
  66   %t0 = fadd float %x0, %x1
  67   %t1 = fadd float %t0, %x2
  68   %t2 = fadd float %x3, %t1
  69   ret float %t2
  70 }
  71
  72 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
  73 ; SSE-LABEL: reassociate_adds4:
  74 ; SSE:       # BB#0:
  75 ; SSE-NEXT:    addss %xmm1, %xmm0
  76 ; SSE-NEXT:    addss %xmm3, %xmm2
  77 ; SSE-NEXT:    addss %xmm2, %xmm0
  78 ; SSE-NEXT:    retq
  79 ;
  80 ; AVX-LABEL: reassociate_adds4:
  81 ; AVX:       # BB#0:
  82 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  83 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
  84 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
  85 ; AVX-NEXT:    retq
  86   %t0 = fadd float %x0, %x1
  87   %t1 = fadd float %x2, %t0
  88   %t2 = fadd float %x3, %t1
  89   ret float %t2
  90 }
  91
  92 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
  93 ; produced because that would cost more compile time.
  94
  95 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
  96 ; SSE-LABEL: reassociate_adds5:
  97 ; SSE:       # BB#0:
  98 ; SSE-NEXT:    addss %xmm1, %xmm0
  99 ; SSE-NEXT:    addss %xmm3, %xmm2
 100 ; SSE-NEXT:    addss %xmm2, %xmm0
 101 ; SSE-NEXT:    addss %xmm5, %xmm4
 102 ; SSE-NEXT:    addss %xmm6, %xmm4
 103 ; SSE-NEXT:    addss %xmm4, %xmm0
 104 ; SSE-NEXT:    addss %xmm7, %xmm0
 105 ; SSE-NEXT:    retq
 106 ;
 107 ; AVX-LABEL: reassociate_adds5:
 108 ; AVX:       # BB#0:
 109 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 110 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 111 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 112 ; AVX-NEXT:    vaddss %xmm5, %xmm4, %xmm1
 113 ; AVX-NEXT:    vaddss %xmm6, %xmm1, %xmm1
 114 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 115 ; AVX-NEXT:    vaddss %xmm7, %xmm0, %xmm0
 116 ; AVX-NEXT:    retq
 117   %t0 = fadd float %x0, %x1
 118   %t1 = fadd float %t0, %x2
 119   %t2 = fadd float %t1, %x3
 120   %t3 = fadd float %t2, %x4
 121   %t4 = fadd float %t3, %x5
 122   %t5 = fadd float %t4, %x6
 123   %t6 = fadd float %t5, %x7
 124   ret float %t6
 125 }
 126
 127 ; Verify that we only need two associative operations to reassociate the operands.
 128 ; Also, we should reassociate such that the result of the high latency division
 129 ; is used by the final 'add' rather than reassociating the %x3 operand with the
 130 ; division. The latter reassociation would not improve anything.
 131
 132 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
 133 ; SSE-LABEL: reassociate_adds6:
 134 ; SSE:       # BB#0:
 135 ; SSE-NEXT:    divss %xmm1, %xmm0
 136 ; SSE-NEXT:    addss %xmm3, %xmm2
 137 ; SSE-NEXT:    addss %xmm2, %xmm0
 138 ; SSE-NEXT:    retq
 139 ;
 140 ; AVX-LABEL: reassociate_adds6:
 141 ; AVX:       # BB#0:
 142 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 143 ; AVX-NEXT:    vaddss %xmm3, %xmm2, %xmm1
 144 ; AVX-NEXT:    vaddss %xmm1, %xmm0, %xmm0
 145 ; AVX-NEXT:    retq
 146   %t0 = fdiv float %x0, %x1
 147   %t1 = fadd float %x2, %t0
 148   %t2 = fadd float %x3, %t1
 149   ret float %t2
 150 }
 151
 152 ; Verify that SSE and AVX scalar single-precision multiplies are reassociated.
 153
 154 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
 155 ; SSE-LABEL: reassociate_muls1:
 156 ; SSE:       # BB#0:
 157 ; SSE-NEXT:    divss %xmm1, %xmm0
 158 ; SSE-NEXT:    mulss %xmm3, %xmm2
 159 ; SSE-NEXT:    mulss %xmm2, %xmm0
 160 ; SSE-NEXT:    retq
 161 ;
 162 ; AVX-LABEL: reassociate_muls1:
 163 ; AVX:       # BB#0:
 164 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 165 ; AVX-NEXT:    vmulss %xmm3, %xmm2, %xmm1
 166 ; AVX-NEXT:    vmulss %xmm1, %xmm0, %xmm0
 167 ; AVX-NEXT:    retq
 168   %t0 = fdiv float %x0, %x1
 169   %t1 = fmul float %x2, %t0
 170   %t2 = fmul float %x3, %t1
 171   ret float %t2
 172 }
 173
 174 ; Verify that SSE and AVX scalar double-precision adds are reassociated.
 175
 176 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
 177 ; SSE-LABEL: reassociate_adds_double:
 178 ; SSE:       # BB#0:
 179 ; SSE-NEXT:    divsd %xmm1, %xmm0
 180 ; SSE-NEXT:    addsd %xmm3, %xmm2
 181 ; SSE-NEXT:    addsd %xmm2, %xmm0
 182 ; SSE-NEXT:    retq
 183 ;
 184 ; AVX-LABEL: reassociate_adds_double:
 185 ; AVX:       # BB#0:
 186 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 187 ; AVX-NEXT:    vaddsd %xmm3, %xmm2, %xmm1
 188 ; AVX-NEXT:    vaddsd %xmm1, %xmm0, %xmm0
 189 ; AVX-NEXT:    retq
 190   %t0 = fdiv double %x0, %x1
 191   %t1 = fadd double %x2, %t0
 192   %t2 = fadd double %x3, %t1
 193   ret double %t2
 194 }
 195
 196 ; Verify that SSE and AVX scalar double-precision multiplies are reassociated.
 197
 198 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
 199 ; SSE-LABEL: reassociate_muls_double:
 200 ; SSE:       # BB#0:
 201 ; SSE-NEXT:    divsd %xmm1, %xmm0
 202 ; SSE-NEXT:    mulsd %xmm3, %xmm2
 203 ; SSE-NEXT:    mulsd %xmm2, %xmm0
 204 ; SSE-NEXT:    retq
 205 ;
 206 ; AVX-LABEL: reassociate_muls_double:
 207 ; AVX:       # BB#0:
 208 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 209 ; AVX-NEXT:    vmulsd %xmm3, %xmm2, %xmm1
 210 ; AVX-NEXT:    vmulsd %xmm1, %xmm0, %xmm0
 211 ; AVX-NEXT:    retq
 212   %t0 = fdiv double %x0, %x1
 213   %t1 = fmul double %x2, %t0
 214   %t2 = fmul double %x3, %t1
 215   ret double %t2
 216 }
 217
 218 ; Verify that SSE and AVX 128-bit vector single-precision adds are reassociated.
 219
 220 define <4 x float> @reassociate_adds_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 221 ; SSE-LABEL: reassociate_adds_v4f32:
 222 ; SSE:       # BB#0:
 223 ; SSE-NEXT:    mulps %xmm1, %xmm0
 224 ; SSE-NEXT:    addps %xmm3, %xmm2
 225 ; SSE-NEXT:    addps %xmm2, %xmm0
 226 ; SSE-NEXT:    retq
 227 ;
 228 ; AVX-LABEL: reassociate_adds_v4f32:
 229 ; AVX:       # BB#0:
 230 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
 231 ; AVX-NEXT:    vaddps %xmm3, %xmm2, %xmm1
 232 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 233 ; AVX-NEXT:    retq
 234   %t0 = fmul <4 x float> %x0, %x1
 235   %t1 = fadd <4 x float> %x2, %t0
 236   %t2 = fadd <4 x float> %x3, %t1
 237   ret <4 x float> %t2
 238 }
 239
 240 ; Verify that SSE and AVX 128-bit vector double-precision adds are reassociated.
 241
 242 define <2 x double> @reassociate_adds_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
 243 ; SSE-LABEL: reassociate_adds_v2f64:
 244 ; SSE:       # BB#0:
 245 ; SSE-NEXT:    mulpd %xmm1, %xmm0
 246 ; SSE-NEXT:    addpd %xmm3, %xmm2
 247 ; SSE-NEXT:    addpd %xmm2, %xmm0
 248 ; SSE-NEXT:    retq
 249 ;
 250 ; AVX-LABEL: reassociate_adds_v2f64:
 251 ; AVX:       # BB#0:
 252 ; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
 253 ; AVX-NEXT:    vaddpd %xmm3, %xmm2, %xmm1
 254 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 255 ; AVX-NEXT:    retq
 256   %t0 = fmul <2 x double> %x0, %x1
 257   %t1 = fadd <2 x double> %x2, %t0
 258   %t2 = fadd <2 x double> %x3, %t1
 259   ret <2 x double> %t2
 260 }
 261
 262 ; Verify that SSE and AVX 128-bit vector single-precision multiplies are reassociated.
 263
 264 define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 265 ; SSE-LABEL: reassociate_muls_v4f32:
 266 ; SSE:       # BB#0:
 267 ; SSE-NEXT:    addps %xmm1, %xmm0
 268 ; SSE-NEXT:    mulps %xmm3, %xmm2
 269 ; SSE-NEXT:    mulps %xmm2, %xmm0
 270 ; SSE-NEXT:    retq
 271 ;
 272 ; AVX-LABEL: reassociate_muls_v4f32:
 273 ; AVX:       # BB#0:
 274 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 275 ; AVX-NEXT:    vmulps %xmm3, %xmm2, %xmm1
 276 ; AVX-NEXT:    vmulps %xmm1, %xmm0, %xmm0
 277 ; AVX-NEXT:    retq
 278   %t0 = fadd <4 x float> %x0, %x1
 279   %t1 = fmul <4 x float> %x2, %t0
 280   %t2 = fmul <4 x float> %x3, %t1
 281   ret <4 x float> %t2
 282 }
 283
 284 ; Verify that SSE and AVX 128-bit vector double-precision multiplies are reassociated.
 285
 286 define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
 287 ; SSE-LABEL: reassociate_muls_v2f64:
 288 ; SSE:       # BB#0:
 289 ; SSE-NEXT:    addpd %xmm1, %xmm0
 290 ; SSE-NEXT:    mulpd %xmm3, %xmm2
 291 ; SSE-NEXT:    mulpd %xmm2, %xmm0
 292 ; SSE-NEXT:    retq
 293 ;
 294 ; AVX-LABEL: reassociate_muls_v2f64:
 295 ; AVX:       # BB#0:
 296 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 297 ; AVX-NEXT:    vmulpd %xmm3, %xmm2, %xmm1
 298 ; AVX-NEXT:    vmulpd %xmm1, %xmm0, %xmm0
 299 ; AVX-NEXT:    retq
 300   %t0 = fadd <2 x double> %x0, %x1
 301   %t1 = fmul <2 x double> %x2, %t0
 302   %t2 = fmul <2 x double> %x3, %t1
 303   ret <2 x double> %t2
 304 }
 305
 306 ; Verify that AVX 256-bit vector single-precision adds are reassociated.
 307
 308 define <8 x float> @reassociate_adds_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
 309 ; AVX-LABEL: reassociate_adds_v8f32:
 310 ; AVX:       # BB#0:
 311 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
 312 ; AVX-NEXT:    vaddps %ymm3, %ymm2, %ymm1
 313 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 314 ; AVX-NEXT:    retq
 315   %t0 = fmul <8 x float> %x0, %x1
 316   %t1 = fadd <8 x float> %x2, %t0
 317   %t2 = fadd <8 x float> %x3, %t1
 318   ret <8 x float> %t2
 319 }
 320
 321 ; Verify that AVX 256-bit vector double-precision adds are reassociated.
 322
 323 define <4 x double> @reassociate_adds_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
 324 ; AVX-LABEL: reassociate_adds_v4f64:
 325 ; AVX:       # BB#0:
 326 ; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
 327 ; AVX-NEXT:    vaddpd %ymm3, %ymm2, %ymm1
 328 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 329 ; AVX-NEXT:    retq
 330   %t0 = fmul <4 x double> %x0, %x1
 331   %t1 = fadd <4 x double> %x2, %t0
 332   %t2 = fadd <4 x double> %x3, %t1
 333   ret <4 x double> %t2
 334 }
 335
 336 ; Verify that AVX 256-bit vector single-precision multiplies are reassociated.
 337
 338 define <8 x float> @reassociate_muls_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
 339 ; AVX-LABEL: reassociate_muls_v8f32:
 340 ; AVX:       # BB#0:
 341 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 342 ; AVX-NEXT:    vmulps %ymm3, %ymm2, %ymm1
 343 ; AVX-NEXT:    vmulps %ymm1, %ymm0, %ymm0
 344 ; AVX-NEXT:    retq
 345   %t0 = fadd <8 x float> %x0, %x1
 346   %t1 = fmul <8 x float> %x2, %t0
 347   %t2 = fmul <8 x float> %x3, %t1
 348   ret <8 x float> %t2
 349 }
 350
 351 ; Verify that AVX 256-bit vector double-precision multiplies are reassociated.
 352
 353 define <4 x double> @reassociate_muls_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
 354 ; AVX-LABEL: reassociate_muls_v4f64:
 355 ; AVX:       # BB#0:
 356 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 357 ; AVX-NEXT:    vmulpd %ymm3, %ymm2, %ymm1
 358 ; AVX-NEXT:    vmulpd %ymm1, %ymm0, %ymm0
 359 ; AVX-NEXT:    retq
 360   %t0 = fadd <4 x double> %x0, %x1
 361   %t1 = fmul <4 x double> %x2, %t0
 362   %t2 = fmul <4 x double> %x3, %t1
 363   ret <4 x double> %t2
 364 }
 365
 366 ; Verify that SSE and AVX scalar single-precision minimum ops are reassociated.
 367
 368 define float @reassociate_mins_single(float %x0, float %x1, float %x2, float %x3) {
 369 ; SSE-LABEL: reassociate_mins_single:
 370 ; SSE:       # BB#0:
 371 ; SSE-NEXT:    divss %xmm1, %xmm0
 372 ; SSE-NEXT:    minss %xmm3, %xmm2
 373 ; SSE-NEXT:    minss %xmm2, %xmm0
 374 ; SSE-NEXT:    retq
 375 ;
 376 ; AVX-LABEL: reassociate_mins_single:
 377 ; AVX:       # BB#0:
 378 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 379 ; AVX-NEXT:    vminss %xmm3, %xmm2, %xmm1
 380 ; AVX-NEXT:    vminss %xmm1, %xmm0, %xmm0
 381 ; AVX-NEXT:    retq
 382   %t0 = fdiv float %x0, %x1
 383   %cmp1 = fcmp olt float %x2, %t0
 384   %sel1 = select i1 %cmp1, float %x2, float %t0
 385   %cmp2 = fcmp olt float %x3, %sel1
 386   %sel2 = select i1 %cmp2, float %x3, float %sel1
 387   ret float %sel2
 388 }
 389
 390 ; Verify that SSE and AVX scalar single-precision maximum ops are reassociated.
 391
 392 define float @reassociate_maxs_single(float %x0, float %x1, float %x2, float %x3) {
 393 ; SSE-LABEL: reassociate_maxs_single:
 394 ; SSE:       # BB#0:
 395 ; SSE-NEXT:    divss %xmm1, %xmm0
 396 ; SSE-NEXT:    maxss %xmm3, %xmm2
 397 ; SSE-NEXT:    maxss %xmm2, %xmm0
 398 ; SSE-NEXT:    retq
 399 ;
 400 ; AVX-LABEL: reassociate_maxs_single:
 401 ; AVX:       # BB#0:
 402 ; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
 403 ; AVX-NEXT:    vmaxss %xmm3, %xmm2, %xmm1
 404 ; AVX-NEXT:    vmaxss %xmm1, %xmm0, %xmm0
 405 ; AVX-NEXT:    retq
 406   %t0 = fdiv float %x0, %x1
 407   %cmp1 = fcmp ogt float %x2, %t0
 408   %sel1 = select i1 %cmp1, float %x2, float %t0
 409   %cmp2 = fcmp ogt float %x3, %sel1
 410   %sel2 = select i1 %cmp2, float %x3, float %sel1
 411   ret float %sel2
 412 }
 413
 414 ; Verify that SSE and AVX scalar double-precision minimum ops are reassociated.
 415
 416 define double @reassociate_mins_double(double %x0, double %x1, double %x2, double %x3) {
 417 ; SSE-LABEL: reassociate_mins_double:
 418 ; SSE:       # BB#0:
 419 ; SSE-NEXT:    divsd %xmm1, %xmm0
 420 ; SSE-NEXT:    minsd %xmm3, %xmm2
 421 ; SSE-NEXT:    minsd %xmm2, %xmm0
 422 ; SSE-NEXT:    retq
 423 ;
 424 ; AVX-LABEL: reassociate_mins_double:
 425 ; AVX:       # BB#0:
 426 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 427 ; AVX-NEXT:    vminsd %xmm3, %xmm2, %xmm1
 428 ; AVX-NEXT:    vminsd %xmm1, %xmm0, %xmm0
 429 ; AVX-NEXT:    retq
 430   %t0 = fdiv double %x0, %x1
 431   %cmp1 = fcmp olt double %x2, %t0
 432   %sel1 = select i1 %cmp1, double %x2, double %t0
 433   %cmp2 = fcmp olt double %x3, %sel1
 434   %sel2 = select i1 %cmp2, double %x3, double %sel1
 435   ret double %sel2
 436 }
 437
 438 ; Verify that SSE and AVX scalar double-precision maximum ops are reassociated.
 439
 440 define double @reassociate_maxs_double(double %x0, double %x1, double %x2, double %x3) {
 441 ; SSE-LABEL: reassociate_maxs_double:
 442 ; SSE:       # BB#0:
 443 ; SSE-NEXT:    divsd %xmm1, %xmm0
 444 ; SSE-NEXT:    maxsd %xmm3, %xmm2
 445 ; SSE-NEXT:    maxsd %xmm2, %xmm0
 446 ; SSE-NEXT:    retq
 447 ;
 448 ; AVX-LABEL: reassociate_maxs_double:
 449 ; AVX:       # BB#0:
 450 ; AVX-NEXT:    vdivsd %xmm1, %xmm0, %xmm0
 451 ; AVX-NEXT:    vmaxsd %xmm3, %xmm2, %xmm1
 452 ; AVX-NEXT:    vmaxsd %xmm1, %xmm0, %xmm0
 453 ; AVX-NEXT:    retq
 454   %t0 = fdiv double %x0, %x1
 455   %cmp1 = fcmp ogt double %x2, %t0
 456   %sel1 = select i1 %cmp1, double %x2, double %t0
 457   %cmp2 = fcmp ogt double %x3, %sel1
 458   %sel2 = select i1 %cmp2, double %x3, double %sel1
 459   ret double %sel2
 460 }
 461
 462 ; Verify that SSE and AVX 128-bit vector single-precision minimum ops are reassociated.
 463
 464 define <4 x float> @reassociate_mins_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 465 ; SSE-LABEL: reassociate_mins_v4f32:
 466 ; SSE:       # BB#0:
 467 ; SSE-NEXT:    addps %xmm1, %xmm0
 468 ; SSE-NEXT:    minps %xmm3, %xmm2
 469 ; SSE-NEXT:    minps %xmm2, %xmm0
 470 ; SSE-NEXT:    retq
 471 ;
 472 ; AVX-LABEL: reassociate_mins_v4f32:
 473 ; AVX:       # BB#0:
 474 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 475 ; AVX-NEXT:    vminps %xmm3, %xmm2, %xmm1
 476 ; AVX-NEXT:    vminps %xmm1, %xmm0, %xmm0
 477 ; AVX-NEXT:    retq
 478   %t0 = fadd <4 x float> %x0, %x1
 479   %cmp1 = fcmp olt <4 x float> %x2, %t0
 480   %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
 481   %cmp2 = fcmp olt <4 x float> %x3, %sel1
 482   %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
 483   ret <4 x float> %sel2
 484 }
 485
 486 ; Verify that SSE and AVX 128-bit vector single-precision maximum ops are reassociated.
 487
 488 define <4 x float> @reassociate_maxs_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 489 ; SSE-LABEL: reassociate_maxs_v4f32:
 490 ; SSE:       # BB#0:
 491 ; SSE-NEXT:    addps %xmm1, %xmm0
 492 ; SSE-NEXT:    maxps %xmm3, %xmm2
 493 ; SSE-NEXT:    maxps %xmm2, %xmm0
 494 ; SSE-NEXT:    retq
 495 ;
 496 ; AVX-LABEL: reassociate_maxs_v4f32:
 497 ; AVX:       # BB#0:
 498 ; AVX-NEXT:    vaddps %xmm1, %xmm0, %xmm0
 499 ; AVX-NEXT:    vmaxps %xmm3, %xmm2, %xmm1
 500 ; AVX-NEXT:    vmaxps %xmm1, %xmm0, %xmm0
 501 ; AVX-NEXT:    retq
 502   %t0 = fadd <4 x float> %x0, %x1
 503   %cmp1 = fcmp ogt <4 x float> %x2, %t0
 504   %sel1 = select <4 x i1> %cmp1, <4 x float> %x2, <4 x float> %t0
 505   %cmp2 = fcmp ogt <4 x float> %x3, %sel1
 506   %sel2 = select <4 x i1> %cmp2, <4 x float> %x3, <4 x float> %sel1
 507   ret <4 x float> %sel2
 508 }
 509
 510 ; Verify that SSE and AVX 128-bit vector double-precision minimum ops are reassociated.
 511
 512 define <2 x double> @reassociate_mins_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
 513 ; SSE-LABEL: reassociate_mins_v2f64:
 514 ; SSE:       # BB#0:
 515 ; SSE-NEXT:    addpd %xmm1, %xmm0
 516 ; SSE-NEXT:    minpd %xmm3, %xmm2
 517 ; SSE-NEXT:    minpd %xmm2, %xmm0
 518 ; SSE-NEXT:    retq
 519 ;
 520 ; AVX-LABEL: reassociate_mins_v2f64:
 521 ; AVX:       # BB#0:
 522 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 523 ; AVX-NEXT:    vminpd %xmm3, %xmm2, %xmm1
 524 ; AVX-NEXT:    vminpd %xmm1, %xmm0, %xmm0
 525 ; AVX-NEXT:    retq
 526   %t0 = fadd <2 x double> %x0, %x1
 527   %cmp1 = fcmp olt <2 x double> %x2, %t0
 528   %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
 529   %cmp2 = fcmp olt <2 x double> %x3, %sel1
 530   %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
 531   ret <2 x double> %sel2
 532 }
 533
 534 ; Verify that SSE and AVX 128-bit vector double-precision maximum ops are reassociated.
 535
 536 define <2 x double> @reassociate_maxs_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
 537 ; SSE-LABEL: reassociate_maxs_v2f64:
 538 ; SSE:       # BB#0:
 539 ; SSE-NEXT:    addpd %xmm1, %xmm0
 540 ; SSE-NEXT:    maxpd %xmm3, %xmm2
 541 ; SSE-NEXT:    maxpd %xmm2, %xmm0
 542 ; SSE-NEXT:    retq
 543 ;
 544 ; AVX-LABEL: reassociate_maxs_v2f64:
 545 ; AVX:       # BB#0:
 546 ; AVX-NEXT:    vaddpd %xmm1, %xmm0, %xmm0
 547 ; AVX-NEXT:    vmaxpd %xmm3, %xmm2, %xmm1
 548 ; AVX-NEXT:    vmaxpd %xmm1, %xmm0, %xmm0
 549 ; AVX-NEXT:    retq
 550   %t0 = fadd <2 x double> %x0, %x1
 551   %cmp1 = fcmp ogt <2 x double> %x2, %t0
 552   %sel1 = select <2 x i1> %cmp1, <2 x double> %x2, <2 x double> %t0
 553   %cmp2 = fcmp ogt <2 x double> %x3, %sel1
 554   %sel2 = select <2 x i1> %cmp2, <2 x double> %x3, <2 x double> %sel1
 555   ret <2 x double> %sel2
 556 }
 557
 558 ; Verify that AVX 256-bit vector single-precision minimum ops are reassociated.
 559
 560 define <8 x float> @reassociate_mins_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
 561 ; AVX-LABEL: reassociate_mins_v8f32:
 562 ; AVX:       # BB#0:
 563 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 564 ; AVX-NEXT:    vminps %ymm3, %ymm2, %ymm1
 565 ; AVX-NEXT:    vminps %ymm1, %ymm0, %ymm0
 566 ; AVX-NEXT:    retq
 567   %t0 = fadd <8 x float> %x0, %x1
 568   %cmp1 = fcmp olt <8 x float> %x2, %t0
 569   %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
 570   %cmp2 = fcmp olt <8 x float> %x3, %sel1
 571   %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
 572   ret <8 x float> %sel2
 573 }
 574
 575 ; Verify that AVX 256-bit vector single-precision maximum ops are reassociated.
 576
 577 define <8 x float> @reassociate_maxs_v8f32(<8 x float> %x0, <8 x float> %x1, <8 x float> %x2, <8 x float> %x3) {
 578 ; AVX-LABEL: reassociate_maxs_v8f32:
 579 ; AVX:       # BB#0:
 580 ; AVX-NEXT:    vaddps %ymm1, %ymm0, %ymm0
 581 ; AVX-NEXT:    vmaxps %ymm3, %ymm2, %ymm1
 582 ; AVX-NEXT:    vmaxps %ymm1, %ymm0, %ymm0
 583 ; AVX-NEXT:    retq
 584   %t0 = fadd <8 x float> %x0, %x1
 585   %cmp1 = fcmp ogt <8 x float> %x2, %t0
 586   %sel1 = select <8 x i1> %cmp1, <8 x float> %x2, <8 x float> %t0
 587   %cmp2 = fcmp ogt <8 x float> %x3, %sel1
 588   %sel2 = select <8 x i1> %cmp2, <8 x float> %x3, <8 x float> %sel1
 589   ret <8 x float> %sel2
 590 }
 591
 592 ; Verify that AVX 256-bit vector double-precision minimum ops are reassociated.
 593
 594 define <4 x double> @reassociate_mins_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
 595 ; AVX-LABEL: reassociate_mins_v4f64:
 596 ; AVX:       # BB#0:
 597 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 598 ; AVX-NEXT:    vminpd %ymm3, %ymm2, %ymm1
 599 ; AVX-NEXT:    vminpd %ymm1, %ymm0, %ymm0
 600 ; AVX-NEXT:    retq
 601   %t0 = fadd <4 x double> %x0, %x1
 602   %cmp1 = fcmp olt <4 x double> %x2, %t0
 603   %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
 604   %cmp2 = fcmp olt <4 x double> %x3, %sel1
 605   %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
 606   ret <4 x double> %sel2
 607 }
 608
 609 ; Verify that AVX 256-bit vector double-precision maximum ops are reassociated.
 610
 611 define <4 x double> @reassociate_maxs_v4f64(<4 x double> %x0, <4 x double> %x1, <4 x double> %x2, <4 x double> %x3) {
 612 ; AVX-LABEL: reassociate_maxs_v4f64:
 613 ; AVX:       # BB#0:
 614 ; AVX-NEXT:    vaddpd %ymm1, %ymm0, %ymm0
 615 ; AVX-NEXT:    vmaxpd %ymm3, %ymm2, %ymm1
 616 ; AVX-NEXT:    vmaxpd %ymm1, %ymm0, %ymm0
 617 ; AVX-NEXT:    retq
 618   %t0 = fadd <4 x double> %x0, %x1
 619   %cmp1 = fcmp ogt <4 x double> %x2, %t0
 620   %sel1 = select <4 x i1> %cmp1, <4 x double> %x2, <4 x double> %t0
 621   %cmp2 = fcmp ogt <4 x double> %x3, %sel1
 622   %sel2 = select <4 x i1> %cmp2, <4 x double> %x3, <4 x double> %sel1
 623   ret <4 x double> %sel2
 624 }
 625
 626 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
 627 ; Verify that reassociation is not happening needlessly or wrongly.
 628
 629 declare double @bar()
 630
 631 define double @reassociate_adds_from_calls() {
 632 ; AVX-LABEL: reassociate_adds_from_calls:
 633 ; AVX:       callq   bar
 634 ; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
 635 ; AVX-NEXT:  callq   bar
 636 ; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
 637 ; AVX-NEXT:  callq   bar
 638 ; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
 639 ; AVX-NEXT:  callq   bar
 640 ; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
 641 ; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
 642 ; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
 643 ; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
 644
 645   %x0 = call double @bar()
 646   %x1 = call double @bar()
 647   %x2 = call double @bar()
 648   %x3 = call double @bar()
 649   %t0 = fadd double %x0, %x1
 650   %t1 = fadd double %t0, %x2
 651   %t2 = fadd double %t1, %x3
 652   ret double %t2
 653 }
 654
 655 define double @already_reassociated() {
 656 ; AVX-LABEL: already_reassociated:
 657 ; AVX:       callq   bar
 658 ; AVX-NEXT:  vmovsd  %xmm0, 16(%rsp)
 659 ; AVX-NEXT:  callq   bar
 660 ; AVX-NEXT:  vmovsd  %xmm0, 8(%rsp)
 661 ; AVX-NEXT:  callq   bar
 662 ; AVX-NEXT:  vmovsd  %xmm0, (%rsp)
 663 ; AVX-NEXT:  callq   bar
 664 ; AVX-NEXT:  vmovsd  8(%rsp), %xmm1
 665 ; AVX:       vaddsd  16(%rsp), %xmm1, %xmm1
 666 ; AVX-NEXT:  vaddsd  (%rsp), %xmm0, %xmm0
 667 ; AVX-NEXT:  vaddsd  %xmm0, %xmm1, %xmm0
 668
 669   %x0 = call double @bar()
 670   %x1 = call double @bar()
 671   %x2 = call double @bar()
 672   %x3 = call double @bar()
 673   %t0 = fadd double %x0, %x1
 674   %t1 = fadd double %x2, %x3
 675   %t2 = fadd double %t0, %t1
 676   ret double %t2
 677 }
 678