test/CodeGen/AArch64/aarch64-a57-fp-load-balancing.ll

   1 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-EVEN
   2 ; RUN: llc < %s -mcpu=cortex-a57 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-BALFP --check-prefix CHECK-ODD
   3 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-EVEN
   4 ; RUN: llc < %s -mcpu=cortex-a53 -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-A53 --check-prefix CHECK-ODD
   5
   6 ; The following tests use the balance-fp-ops feature, and should be independent of
   7 ; the target cpu.
   8
   9 ; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops -aarch64-a57-fp-load-balancing-override=1 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-EVEN --check-prefix CHECK-BALFP
  10 ; RUN: llc < %s -mtriple=aarch64-linux-gnueabi -mattr=+balance-fp-ops  -aarch64-a57-fp-load-balancing-override=2 -aarch64-a57-fp-load-balancing-force-all -enable-misched=false -enable-post-misched=false | FileCheck %s --check-prefix CHECK --check-prefix CHECK-ODD --check-prefix CHECK-BALFP
  11
  12 ; Test the AArch64A57FPLoadBalancing pass. This pass relies heavily on register allocation, so
  13 ; our test strategy is to:
  14 ;   * Force the pass to always perform register swapping even if the dest register is of the
  15 ;     correct color already (-force-all)
  16 ;   * Force the pass to ignore all hints it obtained from regalloc (-deterministic-balance),
  17 ;     and run it twice, once where it always hints odd, and once where it always hints even.
  18 ;
  19 ; We then use regex magic to check that in the two cases the register allocation is
  20 ; different; this is what gives us the testing coverage and distinguishes cases where
  21 ; the pass has done some work versus accidental regalloc.
  22
  23 target datalayout = "e-m:e-i64:64-i128:128-n32:64-S128"
  24 target triple = "aarch64"
  25
  26 ; Non-overlapping groups - shouldn't need any changing at all.
  27
  28 ; CHECK-LABEL: f1:
  29 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
  30 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
  31 ; CHECK: fmadd [[x]]
  32 ; CHECK: fmsub [[x]]
  33 ; CHECK: fmadd [[x]]
  34 ; CHECK: str [[x]]
  35
  36 define void @f1(double* nocapture readonly %p, double* nocapture %q) #0 {
  37 entry:
  38   %0 = load double, double* %p, align 8
  39   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
  40   %1 = load double, double* %arrayidx1, align 8
  41   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
  42   %2 = load double, double* %arrayidx2, align 8
  43   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
  44   %3 = load double, double* %arrayidx3, align 8
  45   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
  46   %4 = load double, double* %arrayidx4, align 8
  47   %mul = fmul fast double %0, %1
  48   %add = fadd fast double %mul, %4
  49   %mul5 = fmul fast double %1, %2
  50   %add6 = fadd fast double %mul5, %add
  51   %mul7 = fmul fast double %1, %3
  52   %sub = fsub fast double %add6, %mul7
  53   %mul8 = fmul fast double %2, %3
  54   %add9 = fadd fast double %mul8, %sub
  55   store double %add9, double* %q, align 8
  56   %arrayidx11 = getelementptr inbounds double, double* %p, i64 5
  57   %5 = load double, double* %arrayidx11, align 8
  58   %arrayidx12 = getelementptr inbounds double, double* %p, i64 6
  59   %6 = load double, double* %arrayidx12, align 8
  60   %arrayidx13 = getelementptr inbounds double, double* %p, i64 7
  61   %7 = load double, double* %arrayidx13, align 8
  62   %mul15 = fmul fast double %6, %7
  63   %mul16 = fmul fast double %0, %5
  64   %add17 = fadd fast double %mul16, %mul15
  65   %mul18 = fmul fast double %5, %6
  66   %add19 = fadd fast double %mul18, %add17
  67   %arrayidx20 = getelementptr inbounds double, double* %q, i64 1
  68   store double %add19, double* %arrayidx20, align 8
  69   ret void
  70 }
  71
  72 ; Overlapping groups - coloring needed.
  73
  74 ; CHECK-LABEL: f2:
  75 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
  76 ; CHECK-EVEN: fmul [[y:d[0-9]*[13579]]]
  77 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
  78 ; CHECK-ODD: fmul [[y:d[0-9]*[02468]]]
  79 ; CHECK: fmadd [[x]]
  80 ; CHECK: fmadd [[y]]
  81 ; CHECK: fmsub [[x]]
  82 ; CHECK: fmadd [[y]]
  83 ; CHECK: fmadd [[x]]
  84 ; CHECK-BALFP: stp [[x]], [[y]]
  85 ; CHECK-A53-DAG: str [[x]]
  86 ; CHECK-A53-DAG: str [[y]]
  87
  88 define void @f2(double* nocapture readonly %p, double* nocapture %q) #0 {
  89 entry:
  90   %0 = load double, double* %p, align 8
  91   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
  92   %1 = load double, double* %arrayidx1, align 8
  93   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
  94   %2 = load double, double* %arrayidx2, align 8
  95   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
  96   %3 = load double, double* %arrayidx3, align 8
  97   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
  98   %4 = load double, double* %arrayidx4, align 8
  99   %arrayidx5 = getelementptr inbounds double, double* %p, i64 5
 100   %5 = load double, double* %arrayidx5, align 8
 101   %arrayidx6 = getelementptr inbounds double, double* %p, i64 6
 102   %6 = load double, double* %arrayidx6, align 8
 103   %arrayidx7 = getelementptr inbounds double, double* %p, i64 7
 104   %7 = load double, double* %arrayidx7, align 8
 105   %mul = fmul fast double %0, %1
 106   %add = fadd fast double %mul, %7
 107   %mul8 = fmul fast double %5, %6
 108   %mul9 = fmul fast double %1, %2
 109   %add10 = fadd fast double %mul9, %add
 110   %mul11 = fmul fast double %3, %4
 111   %add12 = fadd fast double %mul11, %mul8
 112   %mul13 = fmul fast double %1, %3
 113   %sub = fsub fast double %add10, %mul13
 114   %mul14 = fmul fast double %4, %5
 115   %add15 = fadd fast double %mul14, %add12
 116   %mul16 = fmul fast double %2, %3
 117   %add17 = fadd fast double %mul16, %sub
 118   store double %add17, double* %q, align 8
 119   %arrayidx19 = getelementptr inbounds double, double* %q, i64 1
 120   store double %add15, double* %arrayidx19, align 8
 121   ret void
 122 }
 123
 124 ; Dest register is live on block exit - fixup needed.
 125
 126 ; CHECK-LABEL: f3:
 127 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
 128 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
 129 ; CHECK: fmadd [[x]]
 130 ; CHECK: fmsub [[x]]
 131 ; CHECK: fmadd [[y:d[0-9]+]], {{.*}}, [[x]]
 132 ; CHECK: str [[y]]
 133
 134 define void @f3(double* nocapture readonly %p, double* nocapture %q) #0 {
 135 entry:
 136   %0 = load double, double* %p, align 8
 137   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
 138   %1 = load double, double* %arrayidx1, align 8
 139   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
 140   %2 = load double, double* %arrayidx2, align 8
 141   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
 142   %3 = load double, double* %arrayidx3, align 8
 143   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
 144   %4 = load double, double* %arrayidx4, align 8
 145   %mul = fmul fast double %0, %1
 146   %add = fadd fast double %mul, %4
 147   %mul5 = fmul fast double %1, %2
 148   %add6 = fadd fast double %mul5, %add
 149   %mul7 = fmul fast double %1, %3
 150   %sub = fsub fast double %add6, %mul7
 151   %mul8 = fmul fast double %2, %3
 152   %add9 = fadd fast double %mul8, %sub
 153   %cmp = fcmp oeq double %3, 0.000000e+00
 154   br i1 %cmp, label %if.then, label %if.end
 155
 156 if.then:                                          ; preds = %entry
 157   tail call void bitcast (void (...)* @g to void ()*)() #2
 158   br label %if.end
 159
 160 if.end:                                           ; preds = %if.then, %entry
 161   store double %add9, double* %q, align 8
 162   ret void
 163 }
 164
 165 declare void @g(...) #1
 166
 167 ; Single precision version of f2.
 168
 169 ; CHECK-LABEL: f4:
 170 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
 171 ; CHECK-EVEN: fmul [[y:s[0-9]*[13579]]]
 172 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
 173 ; CHECK-ODD: fmul [[y:s[0-9]*[02468]]]
 174 ; CHECK: fmadd [[x]]
 175 ; CHECK: fmadd [[y]]
 176 ; CHECK: fmsub [[x]]
 177 ; CHECK: fmadd [[y]]
 178 ; CHECK: fmadd [[x]]
 179 ; CHECK-BALFP: stp [[x]], [[y]]
 180 ; CHECK-A53-DAG: str [[x]]
 181 ; CHECK-A53-DAG: str [[y]]
 182
 183 define void @f4(float* nocapture readonly %p, float* nocapture %q) #0 {
 184 entry:
 185   %0 = load float, float* %p, align 4
 186   %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
 187   %1 = load float, float* %arrayidx1, align 4
 188   %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
 189   %2 = load float, float* %arrayidx2, align 4
 190   %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
 191   %3 = load float, float* %arrayidx3, align 4
 192   %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
 193   %4 = load float, float* %arrayidx4, align 4
 194   %arrayidx5 = getelementptr inbounds float, float* %p, i64 5
 195   %5 = load float, float* %arrayidx5, align 4
 196   %arrayidx6 = getelementptr inbounds float, float* %p, i64 6
 197   %6 = load float, float* %arrayidx6, align 4
 198   %arrayidx7 = getelementptr inbounds float, float* %p, i64 7
 199   %7 = load float, float* %arrayidx7, align 4
 200   %mul = fmul fast float %0, %1
 201   %add = fadd fast float %mul, %7
 202   %mul8 = fmul fast float %5, %6
 203   %mul9 = fmul fast float %1, %2
 204   %add10 = fadd fast float %mul9, %add
 205   %mul11 = fmul fast float %3, %4
 206   %add12 = fadd fast float %mul11, %mul8
 207   %mul13 = fmul fast float %1, %3
 208   %sub = fsub fast float %add10, %mul13
 209   %mul14 = fmul fast float %4, %5
 210   %add15 = fadd fast float %mul14, %add12
 211   %mul16 = fmul fast float %2, %3
 212   %add17 = fadd fast float %mul16, %sub
 213   store float %add17, float* %q, align 4
 214   %arrayidx19 = getelementptr inbounds float, float* %q, i64 1
 215   store float %add15, float* %arrayidx19, align 4
 216   ret void
 217 }
 218
 219 ; Single precision version of f3
 220
 221 ; CHECK-LABEL: f5:
 222 ; CHECK-EVEN: fmadd [[x:s[0-9]*[02468]]]
 223 ; CHECK-ODD: fmadd [[x:s[0-9]*[13579]]]
 224 ; CHECK: fmadd [[x]]
 225 ; CHECK: fmsub [[x]]
 226 ; CHECK: fmadd [[y:s[0-9]+]], {{.*}}, [[x]]
 227 ; CHECK: str [[y]]
 228
 229 define void @f5(float* nocapture readonly %p, float* nocapture %q) #0 {
 230 entry:
 231   %0 = load float, float* %p, align 4
 232   %arrayidx1 = getelementptr inbounds float, float* %p, i64 1
 233   %1 = load float, float* %arrayidx1, align 4
 234   %arrayidx2 = getelementptr inbounds float, float* %p, i64 2
 235   %2 = load float, float* %arrayidx2, align 4
 236   %arrayidx3 = getelementptr inbounds float, float* %p, i64 3
 237   %3 = load float, float* %arrayidx3, align 4
 238   %arrayidx4 = getelementptr inbounds float, float* %p, i64 4
 239   %4 = load float, float* %arrayidx4, align 4
 240   %mul = fmul fast float %0, %1
 241   %add = fadd fast float %mul, %4
 242   %mul5 = fmul fast float %1, %2
 243   %add6 = fadd fast float %mul5, %add
 244   %mul7 = fmul fast float %1, %3
 245   %sub = fsub fast float %add6, %mul7
 246   %mul8 = fmul fast float %2, %3
 247   %add9 = fadd fast float %mul8, %sub
 248   %cmp = fcmp oeq float %3, 0.000000e+00
 249   br i1 %cmp, label %if.then, label %if.end
 250
 251 if.then:                                          ; preds = %entry
 252   tail call void bitcast (void (...)* @g to void ()*)() #2
 253   br label %if.end
 254
 255 if.end:                                           ; preds = %if.then, %entry
 256   store float %add9, float* %q, align 4
 257   ret void
 258 }
 259
 260 ; Test that regmask clobbering stops a chain sequence.
 261
 262 ; CHECK-LABEL: f6:
 263 ; CHECK-EVEN: fmadd [[x:d[0-9]*[02468]]]
 264 ; CHECK-ODD: fmadd [[x:d[0-9]*[13579]]]
 265 ; CHECK: fmadd [[x]]
 266 ; CHECK: fmsub [[x]]
 267 ; CHECK: fmadd d0, {{.*}}, [[x]]
 268 ; CHECK: bl hh
 269 ; CHECK: str d0
 270
 271 define void @f6(double* nocapture readonly %p, double* nocapture %q) #0 {
 272 entry:
 273   %0 = load double, double* %p, align 8
 274   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
 275   %1 = load double, double* %arrayidx1, align 8
 276   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
 277   %2 = load double, double* %arrayidx2, align 8
 278   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
 279   %3 = load double, double* %arrayidx3, align 8
 280   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
 281   %4 = load double, double* %arrayidx4, align 8
 282   %mul = fmul fast double %0, %1
 283   %add = fadd fast double %mul, %4
 284   %mul5 = fmul fast double %1, %2
 285   %add6 = fadd fast double %mul5, %add
 286   %mul7 = fmul fast double %1, %3
 287   %sub = fsub fast double %add6, %mul7
 288   %mul8 = fmul fast double %2, %3
 289   %add9 = fadd fast double %mul8, %sub
 290   %call = tail call double @hh(double %add9) #2
 291   store double %call, double* %q, align 8
 292   ret void
 293 }
 294
 295 declare double @hh(double) #1
 296
 297 ; Check that we correctly deal with repeated operands.
 298 ; The following testcase creates:
 299 ;   %d1 = FADDDrr killed %d0, %d0
 300 ; We'll get a crash if we naively look at the first operand, remove it
 301 ; from the substitution list then look at the second operand.
 302
 303 ; CHECK: fmadd [[x:d[0-9]+]]
 304 ; CHECK: fadd d1, [[x]], [[x]]
 305
 306 define void @f7(double* nocapture readonly %p, double* nocapture %q) #0 {
 307 entry:
 308   %0 = load double, double* %p, align 8
 309   %arrayidx1 = getelementptr inbounds double, double* %p, i64 1
 310   %1 = load double, double* %arrayidx1, align 8
 311   %arrayidx2 = getelementptr inbounds double, double* %p, i64 2
 312   %2 = load double, double* %arrayidx2, align 8
 313   %arrayidx3 = getelementptr inbounds double, double* %p, i64 3
 314   %3 = load double, double* %arrayidx3, align 8
 315   %arrayidx4 = getelementptr inbounds double, double* %p, i64 4
 316   %4 = load double, double* %arrayidx4, align 8
 317   %mul = fmul fast double %0, %1
 318   %add = fadd fast double %mul, %4
 319   %mul5 = fmul fast double %1, %2
 320   %add6 = fadd fast double %mul5, %add
 321   %mul7 = fmul fast double %1, %3
 322   %sub = fsub fast double %add6, %mul7
 323   %mul8 = fmul fast double %2, %3
 324   %add9 = fadd fast double %mul8, %sub
 325   %add10 = fadd fast double %add9, %add9
 326   call void @hhh(double 0.0, double %add10)
 327   ret void
 328 }
 329
 330 declare void @hhh(double, double)
 331
 332 attributes #0 = { nounwind "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
 333 attributes #1 = { "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="true" "no-nans-fp-math"="true" "stack-protector-buffer-size"="8" "unsafe-fp-math"="true" "use-soft-float"="false" }
 334 attributes #2 = { nounwind }
 335