From f41b0c1dff55d2d8c30f95dada248bb3e2e8c5cf Mon Sep 17 00:00:00 2001
From: Nemanja Ivanovic <nemanja.i.ibm@gmail.com>
Date: Fri, 23 Feb 2018 23:08:34 +0000
Subject: [PATCH] [PowerPC] Disable shrink-wrapping when getting PC address
 through the LR

The instruction sequence used to get the address of the PC into a GPR requires
that we clobber the link register. Doing so without having first saved it in
the prologue leaves the function unable to return. Currently, this sequence is
emitted into the entry block. To ensure the prologue is inserted before this
sequence, disable shrink-wrapping.

This fixes PR33547.

Differential Revision: https://reviews.llvm.org/D43677


git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@325972 91177308-0d34-0410-b5e6-96231b3b80d8
---
 lib/Target/PowerPC/PPCFrameLowering.cpp     |  2 +
 lib/Target/PowerPC/PPCISelDAGToDAG.cpp      | 10 +++++
 lib/Target/PowerPC/PPCMachineFunctionInfo.h | 11 +++++
 test/CodeGen/PowerPC/pr33547.ll             | 70 +++++++++++++++++++++++++++++
 4 files changed, 93 insertions(+)
 create mode 100644 test/CodeGen/PowerPC/pr33547.ll
diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp
index bdda9d13ad8..55d740821d3 100644
--- a/lib/Target/PowerPC/PPCFrameLowering.cpp
+++ b/lib/Target/PowerPC/PPCFrameLowering.cpp
@@ -2159,6 +2159,8 @@ PPCFrameLowering::restoreCalleeSavedRegisters(MachineBasicBlock &MBB,
 }
 
 bool PPCFrameLowering::enableShrinkWrapping(const MachineFunction &MF) const {
+  if (MF.getInfo<PPCFunctionInfo>()->shrinkWrapDisabled())
+    return false;
   return (MF.getSubtarget<PPCSubtarget>().isSVR4ABI() &&
           MF.getSubtarget<PPCSubtarget>().isPPC64());
 }
diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
index d3a223fe03e..29fe0e03cdd 100644
--- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
+++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp
@@ -417,6 +417,16 @@ SDNode *PPCDAGToDAGISel::getGlobalBaseReg() {
         BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR), GlobalBaseReg);
       }
     } else {
+      // We must ensure that this sequence is dominated by the prologue.
+      // FIXME: This is a bit of a big hammer since we don't get the benefits
+      // of shrink-wrapping whenever we emit this instruction. Considering
+      // this is used in any function where we emit a jump table, this may be
+      // a significant limitation. We should consider inserting this in the
+      // block where it is used and then commoning this sequence up if it
+      // appears in multiple places.
+      // Note: on ISA 3.0 cores, we can use lnia (addpcis) insteand of
+      // MovePCtoLR8.
+      MF->getInfo<PPCFunctionInfo>()->setShrinkWrapDisabled(true);
       GlobalBaseReg = RegInfo->createVirtualRegister(&PPC::G8RC_and_G8RC_NOX0RegClass);
       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MovePCtoLR8));
       BuildMI(FirstMBB, MBBI, dl, TII.get(PPC::MFLR8), GlobalBaseReg);
diff --git a/lib/Target/PowerPC/PPCMachineFunctionInfo.h b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
index a9b6073106e..24426aff012 100644
--- a/lib/Target/PowerPC/PPCMachineFunctionInfo.h
+++ b/lib/Target/PowerPC/PPCMachineFunctionInfo.h
@@ -45,6 +45,11 @@ class PPCFunctionInfo : public MachineFunctionInfo {
   /// PEI.
   bool MustSaveLR;
 
+  /// Do we have to disable shrink-wrapping? This has to be set if we emit any
+  /// instructions that clobber LR in the entry block because discovering this
+  /// in PEI is too late (happens after shrink-wrapping);
+  bool ShrinkWrapDisabled = false;
+
   /// Does this function have any stack spills.
   bool HasSpills = false;
 
@@ -147,6 +152,12 @@ public:
   void setMustSaveLR(bool U) { MustSaveLR = U; }
   bool mustSaveLR() const    { return MustSaveLR; }
 
+  /// We certainly don't want to shrink wrap functions if we've emitted a
+  /// MovePCtoLR8 as that has to go into the entry, so the prologue definitely
+  /// has to go into the entry block.
+  void setShrinkWrapDisabled(bool U) { ShrinkWrapDisabled = U; }
+  bool shrinkWrapDisabled() const { return ShrinkWrapDisabled; }
+
   void setHasSpills()      { HasSpills = true; }
   bool hasSpills() const   { return HasSpills; }
 
diff --git a/test/CodeGen/PowerPC/pr33547.ll b/test/CodeGen/PowerPC/pr33547.ll
new file mode 100644
index 00000000000..166b5c6327e
--- /dev/null
+++ b/test/CodeGen/PowerPC/pr33547.ll
@@ -0,0 +1,70 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-unknown-unknown \
+; RUN:   -mcpu=pwr8 -code-model=large < %s | FileCheck %s
+%struct.STATICS1 = type <{ [128 x i8] }>
+
+@.STATICS1 = internal global %struct.STATICS1 <{ [128 x i8] c"\09\00\00\00\03\00\00\00\05\00\00\00\04\00\00\00\0A\00\00\00\0A\00\00\00\0B\00\00\00\0A\08\AF/\B8\B6\87\04 \A1\07\00\08\9D\00\00\09\00\00\00\05\00\00\00\03\00\00\00\03\00\00\00\05\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00\00" }>, align 16
+@.C302_MAIN_ = internal constant i32 4
+
+; Function Attrs: noinline norecurse nounwind
+define void @main() {
+L.entry:
+  tail call void @testFunc(i64* bitcast (i8* getelementptr inbounds (%struct.STATICS1, %struct.STATICS1* @.STATICS1, i64 0, i32 0, i64 124) to i64*), i64* bitcast (i32* @.C302_MAIN_ to i64*))
+  ret void
+}
+
+; Function Attrs: noinline norecurse nounwind readonly
+define signext i32 @ifunc_(i64* nocapture readonly %i) {
+; CHECK-LABEL: ifunc_:
+; CHECK:       # %bb.0: # %L.entry
+; CHECK-NEXT:    lwa 3, 0(3)
+; CHECK-NEXT:    blr
+L.entry:
+  %0 = bitcast i64* %i to i32*
+  %1 = load i32, i32* %0, align 4
+  ret i32 %1
+}
+
+; Function Attrs: noinline norecurse nounwind
+define void @testFunc(i64* nocapture %r, i64* nocapture readonly %k) {
+; CHECK-LABEL: testFunc
+; CHECK: mflr 0
+; CHECK: std 0, 16(1)
+; CHECK: bl .[[BRANCHNEXT:[L0-9\$a-z]+]]
+; CHECK-NEXT: [[BRANCHNEXT]]
+L.entry:
+  %0 = bitcast i64* %k to i32*
+  %1 = load i32, i32* %0, align 4
+  switch i32 %1, label %L.LB3_307 [
+    i32 1, label %L.LB3_307.sink.split
+    i32 3, label %L.LB3_307.sink.split
+    i32 4, label %L.LB3_321.split
+    i32 5, label %L.LB3_307.sink.split
+    i32 6, label %infloop.preheader
+    i32 2, label %infloop11.preheader
+  ]
+
+infloop11.preheader:                              ; preds = %L.entry
+  br label %infloop11
+
+infloop.preheader:                                ; preds = %L.entry
+  br label %infloop
+
+L.LB3_321.split:                                  ; preds = %L.entry
+  br label %L.LB3_307.sink.split
+
+L.LB3_307.sink.split:                             ; preds = %L.LB3_321.split, %L.entry, %L.entry, %L.entry
+  %.sink = phi i32 [ 5, %L.LB3_321.split ], [ -3, %L.entry ], [ -3, %L.entry ], [ -3, %L.entry ]
+  %2 = bitcast i64* %r to i32*
+  store i32 %.sink, i32* %2, align 4
+  br label %L.LB3_307
+
+L.LB3_307:                                        ; preds = %L.LB3_307.sink.split, %L.entry
+  ret void
+
+infloop:                                          ; preds = %infloop.preheader, %infloop
+  br label %infloop
+
+infloop11:                                        ; preds = %infloop11.preheader, %infloop11
+  br label %infloop11
+}
-- 
2.11.4.GIT