test/CodeGen/AArch64/machine-combiner.ll

   1 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math -disable-post-ra < %s | FileCheck %s
   2
   3 ; Incremental updates of the instruction depths should be enough for this test
   4 ; case.
   5 ; RUN: llc -mtriple=aarch64-gnu-linux -mcpu=cortex-a57 -enable-unsafe-fp-math \
   6 ; RUN:     -disable-post-ra -machine-combiner-inc-threshold=0 -machine-combiner-verify-pattern-order=true < %s | FileCheck %s
   7
   8 ; Verify that the first two adds are independent regardless of how the inputs are
   9 ; commuted. The destination registers are used as source registers for the third add.
  10
  11 define float @reassociate_adds1(float %x0, float %x1, float %x2, float %x3) {
  12 ; CHECK-LABEL:   reassociate_adds1:
  13 ; CHECK:         fadd  s0, s0, s1
  14 ; CHECK-NEXT:    fadd  s1, s2, s3
  15 ; CHECK-NEXT:    fadd  s0, s0, s1
  16 ; CHECK-NEXT:    ret
  17   %t0 = fadd float %x0, %x1
  18   %t1 = fadd float %t0, %x2
  19   %t2 = fadd float %t1, %x3
  20   ret float %t2
  21 }
  22
  23 define float @reassociate_adds2(float %x0, float %x1, float %x2, float %x3) {
  24 ; CHECK-LABEL:   reassociate_adds2:
  25 ; CHECK:         fadd  s0, s0, s1
  26 ; CHECK-NEXT:    fadd  s1, s2, s3
  27 ; CHECK-NEXT:    fadd  s0, s0, s1
  28 ; CHECK-NEXT:    ret
  29   %t0 = fadd float %x0, %x1
  30   %t1 = fadd float %x2, %t0
  31   %t2 = fadd float %t1, %x3
  32   ret float %t2
  33 }
  34
  35 define float @reassociate_adds3(float %x0, float %x1, float %x2, float %x3) {
  36 ; CHECK-LABEL:   reassociate_adds3:
  37 ; CHECK:         s0, s0, s1
  38 ; CHECK-NEXT:    s1, s2, s3
  39 ; CHECK-NEXT:    s0, s0, s1
  40 ; CHECK-NEXT:    ret
  41   %t0 = fadd float %x0, %x1
  42   %t1 = fadd float %t0, %x2
  43   %t2 = fadd float %x3, %t1
  44   ret float %t2
  45 }
  46
  47 define float @reassociate_adds4(float %x0, float %x1, float %x2, float %x3) {
  48 ; CHECK-LABEL:   reassociate_adds4:
  49 ; CHECK:         s0, s0, s1
  50 ; CHECK-NEXT:    s1, s2, s3
  51 ; CHECK-NEXT:    s0, s0, s1
  52 ; CHECK-NEXT:    ret
  53   %t0 = fadd float %x0, %x1
  54   %t1 = fadd float %x2, %t0
  55   %t2 = fadd float %x3, %t1
  56   ret float %t2
  57 }
  58
  59 ; Verify that we reassociate some of these ops. The optimal balanced tree of adds is not
  60 ; produced because that would cost more compile time.
  61
  62 define float @reassociate_adds5(float %x0, float %x1, float %x2, float %x3, float %x4, float %x5, float %x6, float %x7) {
  63 ; CHECK-LABEL:   reassociate_adds5:
  64 ; CHECK:         fadd  s0, s0, s1
  65 ; CHECK-NEXT:    fadd  s1, s2, s3
  66 ; CHECK-NEXT:    fadd  s0, s0, s1
  67 ; CHECK-NEXT:    fadd  s1, s4, s5
  68 ; CHECK-NEXT:    fadd  s1, s1, s6
  69 ; CHECK-NEXT:    fadd  s0, s0, s1
  70 ; CHECK-NEXT:    fadd  s0, s0, s7
  71 ; CHECK-NEXT:    ret
  72   %t0 = fadd float %x0, %x1
  73   %t1 = fadd float %t0, %x2
  74   %t2 = fadd float %t1, %x3
  75   %t3 = fadd float %t2, %x4
  76   %t4 = fadd float %t3, %x5
  77   %t5 = fadd float %t4, %x6
  78   %t6 = fadd float %t5, %x7
  79   ret float %t6
  80 }
  81
  82 ; Verify that we only need two associative operations to reassociate the operands.
  83 ; Also, we should reassociate such that the result of the high latency division
  84 ; is used by the final 'add' rather than reassociating the %x3 operand with the
  85 ; division. The latter reassociation would not improve anything.
  86
  87 define float @reassociate_adds6(float %x0, float %x1, float %x2, float %x3) {
  88 ; CHECK-LABEL:   reassociate_adds6:
  89 ; CHECK:         fdiv  s0, s0, s1
  90 ; CHECK-NEXT:    fadd  s1, s2, s3
  91 ; CHECK-NEXT:    fadd  s0, s0, s1
  92 ; CHECK-NEXT:    ret
  93   %t0 = fdiv float %x0, %x1
  94   %t1 = fadd float %x2, %t0
  95   %t2 = fadd float %x3, %t1
  96   ret float %t2
  97 }
  98
  99 ; Verify that scalar single-precision multiplies are reassociated.
 100
 101 define float @reassociate_muls1(float %x0, float %x1, float %x2, float %x3) {
 102 ; CHECK-LABEL:   reassociate_muls1:
 103 ; CHECK:         fdiv  s0, s0, s1
 104 ; CHECK-NEXT:    fmul  s1, s2, s3
 105 ; CHECK-NEXT:    fmul  s0, s0, s1
 106 ; CHECK-NEXT:    ret
 107   %t0 = fdiv float %x0, %x1
 108   %t1 = fmul float %x2, %t0
 109   %t2 = fmul float %x3, %t1
 110   ret float %t2
 111 }
 112
 113 ; Verify that scalar double-precision adds are reassociated.
 114
 115 define double @reassociate_adds_double(double %x0, double %x1, double %x2, double %x3) {
 116 ; CHECK-LABEL:   reassociate_adds_double:
 117 ; CHECK:         fdiv  d0, d0, d1
 118 ; CHECK-NEXT:    fadd  d1, d2, d3
 119 ; CHECK-NEXT:    fadd  d0, d0, d1
 120 ; CHECK-NEXT:    ret
 121   %t0 = fdiv double %x0, %x1
 122   %t1 = fadd double %x2, %t0
 123   %t2 = fadd double %x3, %t1
 124   ret double %t2
 125 }
 126
 127 ; Verify that scalar double-precision multiplies are reassociated.
 128
 129 define double @reassociate_muls_double(double %x0, double %x1, double %x2, double %x3) {
 130 ; CHECK-LABEL:   reassociate_muls_double:
 131 ; CHECK:         fdiv  d0, d0, d1
 132 ; CHECK-NEXT:    fmul  d1, d2, d3
 133 ; CHECK-NEXT:    fmul  d0, d0, d1
 134 ; CHECK-NEXT:    ret
 135   %t0 = fdiv double %x0, %x1
 136   %t1 = fmul double %x2, %t0
 137   %t2 = fmul double %x3, %t1
 138   ret double %t2
 139 }
 140
 141 ; Verify that we reassociate vector instructions too.
 142
 143 define <4 x float> @vector_reassociate_adds1(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 144 ; CHECK-LABEL:   vector_reassociate_adds1:
 145 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
 146 ; CHECK-NEXT:    fadd  v1.4s, v2.4s, v3.4s
 147 ; CHECK-NEXT:    fadd  v0.4s, v0.4s, v1.4s
 148 ; CHECK-NEXT:    ret
 149   %t0 = fadd <4 x float> %x0, %x1
 150   %t1 = fadd <4 x float> %t0, %x2
 151   %t2 = fadd <4 x float> %t1, %x3
 152   ret <4 x float> %t2
 153 }
 154
 155 define <4 x float> @vector_reassociate_adds2(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 156 ; CHECK-LABEL:   vector_reassociate_adds2:
 157 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
 158 ; CHECK-NEXT:    fadd  v1.4s, v2.4s, v3.4s
 159 ; CHECK-NEXT:    fadd  v0.4s, v0.4s, v1.4s
 160   %t0 = fadd <4 x float> %x0, %x1
 161   %t1 = fadd <4 x float> %x2, %t0
 162   %t2 = fadd <4 x float> %t1, %x3
 163   ret <4 x float> %t2
 164 }
 165
 166 define <4 x float> @vector_reassociate_adds3(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 167 ; CHECK-LABEL:   vector_reassociate_adds3:
 168 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
 169 ; CHECK-NEXT:    fadd  v1.4s, v2.4s, v3.4s
 170 ; CHECK-NEXT:    fadd  v0.4s, v0.4s, v1.4s
 171   %t0 = fadd <4 x float> %x0, %x1
 172   %t1 = fadd <4 x float> %t0, %x2
 173   %t2 = fadd <4 x float> %x3, %t1
 174   ret <4 x float> %t2
 175 }
 176
 177 define <4 x float> @vector_reassociate_adds4(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 178 ; CHECK-LABEL:   vector_reassociate_adds4:
 179 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
 180 ; CHECK-NEXT:    fadd  v1.4s, v2.4s, v3.4s
 181 ; CHECK-NEXT:    fadd  v0.4s, v0.4s, v1.4s
 182   %t0 = fadd <4 x float> %x0, %x1
 183   %t1 = fadd <4 x float> %x2, %t0
 184   %t2 = fadd <4 x float> %x3, %t1
 185   ret <4 x float> %t2
 186 }
 187 ; Verify that 128-bit vector single-precision multiplies are reassociated.
 188
 189 define <4 x float> @reassociate_muls_v4f32(<4 x float> %x0, <4 x float> %x1, <4 x float> %x2, <4 x float> %x3) {
 190 ; CHECK-LABEL:   reassociate_muls_v4f32:
 191 ; CHECK:         fadd  v0.4s, v0.4s, v1.4s
 192 ; CHECK-NEXT:    fmul  v1.4s, v2.4s, v3.4s
 193 ; CHECK-NEXT:    fmul  v0.4s, v0.4s, v1.4s
 194 ; CHECK-NEXT:    ret
 195   %t0 = fadd <4 x float> %x0, %x1
 196   %t1 = fmul <4 x float> %x2, %t0
 197   %t2 = fmul <4 x float> %x3, %t1
 198   ret <4 x float> %t2
 199 }
 200
 201 ; Verify that 128-bit vector double-precision multiplies are reassociated.
 202
 203 define <2 x double> @reassociate_muls_v2f64(<2 x double> %x0, <2 x double> %x1, <2 x double> %x2, <2 x double> %x3) {
 204 ; CHECK-LABEL:   reassociate_muls_v2f64:
 205 ; CHECK:         fadd  v0.2d, v0.2d, v1.2d
 206 ; CHECK-NEXT:    fmul  v1.2d, v2.2d, v3.2d
 207 ; CHECK-NEXT:    fmul  v0.2d, v0.2d, v1.2d
 208 ; CHECK-NEXT:    ret
 209   %t0 = fadd <2 x double> %x0, %x1
 210   %t1 = fmul <2 x double> %x2, %t0
 211   %t2 = fmul <2 x double> %x3, %t1
 212   ret <2 x double> %t2
 213 }
 214
 215 ; PR25016: https://llvm.org/bugs/show_bug.cgi?id=25016
 216 ; Verify that reassociation is not happening needlessly or wrongly.
 217
 218 declare double @bar()
 219
 220 define double @reassociate_adds_from_calls() {
 221 ; CHECK-LABEL: reassociate_adds_from_calls:
 222 ; CHECK:       bl   bar
 223 ; CHECK-NEXT:  mov  v8.16b, v0.16b
 224 ; CHECK-NEXT:  bl   bar
 225 ; CHECK-NEXT:  mov  v9.16b, v0.16b
 226 ; CHECK-NEXT:  bl   bar
 227 ; CHECK-NEXT:  mov  v10.16b, v0.16b
 228 ; CHECK-NEXT:  bl   bar
 229 ; CHECK:       fadd d1, d8, d9
 230 ; CHECK-NEXT:  fadd d0, d10, d0
 231 ; CHECK-NEXT:  fadd d0, d1, d0
 232   %x0 = call double @bar()
 233   %x1 = call double @bar()
 234   %x2 = call double @bar()
 235   %x3 = call double @bar()
 236   %t0 = fadd double %x0, %x1
 237   %t1 = fadd double %t0, %x2
 238   %t2 = fadd double %t1, %x3
 239   ret double %t2
 240 }
 241
 242 define double @already_reassociated() {
 243 ; CHECK-LABEL: already_reassociated:
 244 ; CHECK:       bl   bar
 245 ; CHECK-NEXT:  mov  v8.16b, v0.16b
 246 ; CHECK-NEXT:  bl   bar
 247 ; CHECK-NEXT:  mov  v9.16b, v0.16b
 248 ; CHECK-NEXT:  bl   bar
 249 ; CHECK-NEXT:  mov  v10.16b, v0.16b
 250 ; CHECK-NEXT:  bl   bar
 251 ; CHECK:       fadd d1, d8, d9
 252 ; CHECK-NEXT:  fadd d0, d10, d0
 253 ; CHECK-NEXT:  fadd d0, d1, d0
 254   %x0 = call double @bar()
 255   %x1 = call double @bar()
 256   %x2 = call double @bar()
 257   %x3 = call double @bar()
 258   %t0 = fadd double %x0, %x1
 259   %t1 = fadd double %x2, %x3
 260   %t2 = fadd double %t0, %t1
 261   ret double %t2
 262 }
 263