From 815c23a1ce10135b3158451b56c3ff469febbef3 Mon Sep 17 00:00:00 2001 From: Evan Cheng Date: Fri, 7 Aug 2009 00:34:42 +0000 Subject: [PATCH] It turns out most of the thumb2 instructions are not allowed to touch SP. The semantics of such instructions are unpredictable. We have just been lucky that tests have been passing. This patch takes pain to ensure all the PEI lowering code does the right thing when lowering frame indices, insert code to manipulate stack pointers, etc. It's also custom lowering dynamic stack alloc into pseudo instructions so we can insert the right instructions at scheduling time. This fixes PR4659 and PR4682. git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@78361 91177308-0d34-0410-b5e6-96231b3b80d8 --- lib/Target/ARM/ARMBaseInstrInfo.cpp | 2 +- lib/Target/ARM/ARMBaseRegisterInfo.cpp | 11 +-- lib/Target/ARM/ARMISelDAGToDAG.cpp | 76 ++++++++++++----- lib/Target/ARM/ARMISelLowering.cpp | 130 ++++++++++++++++++++++++++++- lib/Target/ARM/ARMISelLowering.h | 7 +- lib/Target/ARM/ARMInstrThumb.td | 30 +++++-- lib/Target/ARM/ARMInstrThumb2.td | 38 ++++++++- lib/Target/ARM/ARMLoadStoreOptimizer.cpp | 20 ++++- lib/Target/ARM/README-Thumb2.txt | 4 + lib/Target/ARM/Thumb2InstrInfo.cpp | 98 +++++++++++++++------- test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll | 24 ++++++ test/CodeGen/Thumb2/large-stack.ll | 6 +- 12 files changed, 373 insertions(+), 73 deletions(-) create mode 100644 test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll diff --git a/lib/Target/ARM/ARMBaseInstrInfo.cpp b/lib/Target/ARM/ARMBaseInstrInfo.cpp index 9c5f3aab9b..911b84dc7d 100644 --- a/lib/Target/ARM/ARMBaseInstrInfo.cpp +++ b/lib/Target/ARM/ARMBaseInstrInfo.cpp @@ -688,7 +688,7 @@ foldMemoryOperandImpl(MachineFunction &MF, MachineInstr *MI, unsigned OpNum = Ops[0]; unsigned Opc = MI->getOpcode(); MachineInstr *NewMI = NULL; - if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { + if (Opc == ARM::MOVr || Opc == ARM::t2MOVr) { // FIXME: tMOVgpr2gpr etc.? // If it is updating CPSR, then it cannot be folded. if (MI->getOperand(4).getReg() != ARM::CPSR || MI->getOperand(4).isDead()) { unsigned Pred = MI->getOperand(2).getImm(); diff --git a/lib/Target/ARM/ARMBaseRegisterInfo.cpp b/lib/Target/ARM/ARMBaseRegisterInfo.cpp index 97b9ad1dc3..a72e9dd3bd 100644 --- a/lib/Target/ARM/ARMBaseRegisterInfo.cpp +++ b/lib/Target/ARM/ARMBaseRegisterInfo.cpp @@ -1289,11 +1289,12 @@ emitEpilogue(MachineFunction &MF, MachineBasicBlock &MBB) const { AFI->getDPRCalleeSavedAreaOffset()|| hasFP(MF)) { if (NumBytes) { - unsigned SUBriOpc = isARM ? ARM::SUBri : ARM::t2SUBri; - BuildMI(MBB, MBBI, dl, TII.get(SUBriOpc), ARM::SP) - .addReg(FramePtr) - .addImm(NumBytes) - .addImm((unsigned)ARMCC::AL).addReg(0).addReg(0); + if (isARM) + emitARMRegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); + else + emitT2RegPlusImmediate(MBB, MBBI, dl, ARM::SP, FramePtr, -NumBytes, + ARMCC::AL, 0, TII); } else { // Thumb2 or ARM. unsigned MOVrOpc = isARM ? ARM::MOVr : ARM::t2MOVr; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index d996b24ddf..04f4dd3ca4 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -122,6 +122,8 @@ private: SDNode *SelectARMIndexedLoad(SDValue Op); SDNode *SelectT2IndexedLoad(SDValue Op); + /// SelectDYN_ALLOC - Select dynamic alloc for Thumb. + SDNode *SelectDYN_ALLOC(SDValue Op); /// SelectInlineAsmMemoryOperand - Implement addressing mode selection for /// inline asm expressions. @@ -868,6 +870,59 @@ SDNode *ARMDAGToDAGISel::SelectT2IndexedLoad(SDValue Op) { return NULL; } +SDNode *ARMDAGToDAGISel::SelectDYN_ALLOC(SDValue Op) { + SDNode *N = Op.getNode(); + DebugLoc dl = N->getDebugLoc(); + MVT VT = Op.getValueType(); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); + SDValue SP = CurDAG->getRegister(ARM::SP, MVT::i32); + int32_t AlignVal = cast(Align)->getSExtValue(); + if (AlignVal < 0) + // We need to align the stack. Use Thumb1 tAND which is the only thumb + // instruction that can read and write SP. This matches to a pseudo + // instruction that has a chain to ensure the result is written back to + // the stack pointer. + SP = SDValue(CurDAG->getTargetNode(ARM::tANDsp, dl, VT, SP, Align), 0); + + bool isC = isa(Size); + uint32_t C = isC ? cast(Size)->getZExtValue() : ~0UL; + // Handle the most common case for both Thumb1 and Thumb2: + // tSUBspi - immediate is between 0 ... 508 inclusive. + if (C <= 508 && ((C & 3) == 0)) + // FIXME: tSUBspi encode scale 4 implicitly. + return CurDAG->SelectNodeTo(N, ARM::tSUBspi_, VT, MVT::Other, SP, + CurDAG->getTargetConstant(C/4, MVT::i32), + Chain); + + if (Subtarget->isThumb1Only()) { + // Use tADDrSPr since Thumb1 does not have a sub r, sp, r. ARMISelLowering + // should have negated the size operand already. FIXME: We can't insert + // new target independent node at this stage so we are forced to negate + // it earlier. Is there a better solution? + return CurDAG->SelectNodeTo(N, ARM::tADDspr_, VT, MVT::Other, SP, Size, + Chain); + } else if (Subtarget->isThumb2()) { + if (isC && Predicate_t2_so_imm(Size.getNode())) { + // t2SUBrSPi + SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain }; + return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi_, VT, MVT::Other, Ops, 3); + } else if (isC && Predicate_imm0_4095(Size.getNode())) { + // t2SUBrSPi12 + SDValue Ops[] = { SP, CurDAG->getTargetConstant(C, MVT::i32), Chain }; + return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPi12_, VT, MVT::Other, Ops, 3); + } else { + // t2SUBrSPs + SDValue Ops[] = { SP, Size, + getI32Imm(ARM_AM::getSORegOpc(ARM_AM::lsl,0)), Chain }; + return CurDAG->SelectNodeTo(N, ARM::t2SUBrSPs_, VT, MVT::Other, Ops, 4); + } + } + + // FIXME: Add ADD / SUB sp instructions for ARM. + return 0; +} SDNode *ARMDAGToDAGISel::Select(SDValue Op) { SDNode *N = Op.getNode(); @@ -941,25 +996,8 @@ SDNode *ARMDAGToDAGISel::Select(SDValue Op) { return CurDAG->SelectNodeTo(N, Opc, MVT::i32, Ops, 5); } } - case ISD::ADD: { - if (!Subtarget->isThumb1Only()) - break; - // Select add sp, c to tADDhirr. - SDValue N0 = Op.getOperand(0); - SDValue N1 = Op.getOperand(1); - RegisterSDNode *LHSR = dyn_cast(Op.getOperand(0)); - RegisterSDNode *RHSR = dyn_cast(Op.getOperand(1)); - if (LHSR && LHSR->getReg() == ARM::SP) { - std::swap(N0, N1); - std::swap(LHSR, RHSR); - } - if (RHSR && RHSR->getReg() == ARM::SP) { - SDValue Val = SDValue(CurDAG->getTargetNode(ARM::tMOVtgpr2gpr, dl, - Op.getValueType(), N0, N0),0); - return CurDAG->SelectNodeTo(N, ARM::tADDhirr, Op.getValueType(), Val, N1); - } - break; - } + case ARMISD::DYN_ALLOC: + return SelectDYN_ALLOC(Op); case ISD::MUL: if (Subtarget->isThumb1Only()) break; diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 0bfe213cf0..4922f7d12e 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -307,7 +307,10 @@ ARMTargetLowering::ARMTargetLowering(TargetMachine &TM) setOperationAction(ISD::VAEND, MVT::Other, Expand); setOperationAction(ISD::STACKSAVE, MVT::Other, Expand); setOperationAction(ISD::STACKRESTORE, MVT::Other, Expand); - setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); + if (Subtarget->isThumb()) + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Custom); + else + setOperationAction(ISD::DYNAMIC_STACKALLOC, MVT::i32, Expand); setOperationAction(ISD::MEMBARRIER, MVT::Other, Expand); if (!Subtarget->hasV6Ops() && !Subtarget->isThumb2()) { @@ -435,6 +438,8 @@ const char *ARMTargetLowering::getTargetNodeName(unsigned Opcode) const { case ARMISD::THREAD_POINTER:return "ARMISD::THREAD_POINTER"; + case ARMISD::DYN_ALLOC: return "ARMISD::DYN_ALLOC"; + case ARMISD::VCEQ: return "ARMISD::VCEQ"; case ARMISD::VCGE: return "ARMISD::VCGE"; case ARMISD::VCGEU: return "ARMISD::VCGEU"; @@ -1399,6 +1404,53 @@ static SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG, } SDValue +ARMTargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG) { + SDNode *Node = Op.getNode(); + DebugLoc dl = Node->getDebugLoc(); + MVT VT = Node->getValueType(0); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + SDValue Align = Op.getOperand(2); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, true)); + + unsigned AlignVal = cast(Align)->getZExtValue(); + unsigned StackAlign = getTargetMachine().getFrameInfo()->getStackAlignment(); + if (AlignVal > StackAlign) + // Do this now since selection pass cannot introduce new target + // independent node. + Align = DAG.getConstant(-(uint64_t)AlignVal, VT); + + // In Thumb1 mode, there isn't a "sub r, sp, r" instruction, we will end up + // using a "add r, sp, r" instead. Negate the size now so we don't have to + // do even more horrible hack later. + MachineFunction &MF = DAG.getMachineFunction(); + ARMFunctionInfo *AFI = MF.getInfo(); + if (AFI->isThumb1OnlyFunction()) { + bool Negate = true; + ConstantSDNode *C = dyn_cast(Size); + if (C) { + uint32_t Val = C->getZExtValue(); + if (Val <= 508 && ((Val & 3) == 0)) + Negate = false; + } + if (Negate) + Size = DAG.getNode(ISD::SUB, dl, VT, DAG.getConstant(0, VT), Size); + } + + SDVTList VTList = DAG.getVTList(VT, MVT::Other); + SDValue Ops1[] = { Chain, Size, Align }; + SDValue Res = DAG.getNode(ARMISD::DYN_ALLOC, dl, VTList, Ops1, 3); + Chain = Res.getValue(1); + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, true), + DAG.getIntPtrConstant(0, true), SDValue()); + SDValue Ops2[] = { Res, Chain }; + return DAG.getMergeValues(Ops2, 2, dl); +} + +SDValue ARMTargetLowering::GetF64FormalArgument(CCValAssign &VA, CCValAssign &NextVA, SDValue &Root, SelectionDAG &DAG, DebugLoc dl) { @@ -2396,6 +2448,7 @@ SDValue ARMTargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) { case ISD::SELECT_CC: return LowerSELECT_CC(Op, DAG, Subtarget); case ISD::BR_CC: return LowerBR_CC(Op, DAG, Subtarget); case ISD::BR_JT: return LowerBR_JT(Op, DAG); + case ISD::DYNAMIC_STACKALLOC: return LowerDYNAMIC_STACKALLOC(Op, DAG); case ISD::VASTART: return LowerVASTART(Op, DAG, VarArgsFrameIndex); case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: return LowerINT_TO_FP(Op, DAG); @@ -2454,7 +2507,8 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, const TargetInstrInfo *TII = getTargetMachine().getInstrInfo(); DebugLoc dl = MI->getDebugLoc(); switch (MI->getOpcode()) { - default: assert(false && "Unexpected instr type to insert"); + default: + llvm_unreachable("Unexpected instr type to insert"); case ARM::tMOVCCr: { // To "insert" a SELECT_CC instruction, we actually have to insert the // diamond control-flow pattern. The incoming instruction knows the @@ -2509,6 +2563,78 @@ ARMTargetLowering::EmitInstrWithCustomInserter(MachineInstr *MI, F->DeleteMachineInstr(MI); // The pseudo instruction is gone now. return BB; } + + case ARM::tANDsp: + case ARM::tADDspr_: + case ARM::tSUBspi_: + case ARM::t2SUBrSPi_: + case ARM::t2SUBrSPi12_: + case ARM::t2SUBrSPs_: { + MachineFunction *MF = BB->getParent(); + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + bool DstIsDead = MI->getOperand(0).isDead(); + bool SrcIsKill = MI->getOperand(1).isKill(); + + if (SrcReg != ARM::SP) { + // Copy the source to SP from virtual register. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(SrcReg); + unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) + ? ARM::tMOVtgpr2gpr : ARM::tMOVgpr2gpr; + BuildMI(BB, dl, TII->get(CopyOpc), ARM::SP) + .addReg(SrcReg, getKillRegState(SrcIsKill)); + } + + unsigned OpOpc = 0; + bool NeedPred = false, NeedCC = false, NeedOp3 = false; + switch (MI->getOpcode()) { + default: + llvm_unreachable("Unexpected pseudo instruction!"); + case ARM::tANDsp: + OpOpc = ARM::tAND; + NeedPred = true; + break; + case ARM::tADDspr_: + OpOpc = ARM::tADDspr; + break; + case ARM::tSUBspi_: + OpOpc = ARM::tSUBspi; + break; + case ARM::t2SUBrSPi_: + OpOpc = ARM::t2SUBrSPi; + NeedPred = true; NeedCC = true; + break; + case ARM::t2SUBrSPi12_: + OpOpc = ARM::t2SUBrSPi12; + NeedPred = true; + break; + case ARM::t2SUBrSPs_: + OpOpc = ARM::t2SUBrSPs; + NeedPred = true; NeedCC = true; NeedOp3 = true; + break; + } + MachineInstrBuilder MIB = BuildMI(BB, dl, TII->get(OpOpc), ARM::SP); + if (OpOpc == ARM::tAND) + AddDefaultT1CC(MIB); + MIB.addReg(ARM::SP); + MIB.addOperand(MI->getOperand(2)); + if (NeedOp3) + MIB.addOperand(MI->getOperand(3)); + if (NeedPred) + AddDefaultPred(MIB); + if (NeedCC) + AddDefaultCC(MIB); + + // Copy the result from SP to virtual register. + const TargetRegisterClass *RC = MF->getRegInfo().getRegClass(DstReg); + unsigned CopyOpc = (RC == ARM::tGPRRegisterClass) + ? ARM::tMOVgpr2tgpr : ARM::tMOVgpr2gpr; + BuildMI(BB, dl, TII->get(CopyOpc)) + .addReg(DstReg, getDefRegState(true) | getDeadRegState(DstIsDead)) + .addReg(ARM::SP); + MF->DeleteMachineInstr(MI); // The pseudo instruction is gone now. + return BB; + } } } diff --git a/lib/Target/ARM/ARMISelLowering.h b/lib/Target/ARM/ARMISelLowering.h index 4fe4d8bf94..4649c18de8 100644 --- a/lib/Target/ARM/ARMISelLowering.h +++ b/lib/Target/ARM/ARMISelLowering.h @@ -65,11 +65,13 @@ namespace llvm { FMRRD, // double to two gprs. FMDRR, // Two gprs to double. - EH_SJLJ_SETJMP, // SjLj exception handling setjmp - EH_SJLJ_LONGJMP, // SjLj exception handling longjmp + EH_SJLJ_SETJMP, // SjLj exception handling setjmp. + EH_SJLJ_LONGJMP, // SjLj exception handling longjmp. THREAD_POINTER, + DYN_ALLOC, // Dynamic allocation on the stack. + VCEQ, // Vector compare equal. VCGE, // Vector compare greater than or equal. VCGEU, // Vector compare unsigned greater than or equal. @@ -255,6 +257,7 @@ namespace llvm { SDValue LowerGLOBAL_OFFSET_TABLE(SDValue Op, SelectionDAG &DAG); SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG); SDValue LowerFRAMEADDR(SDValue Op, SelectionDAG &DAG); + SDValue LowerDYNAMIC_STACKALLOC(SDValue Op, SelectionDAG &DAG); SDValue EmitTargetCodeForMemcpy(SelectionDAG &DAG, DebugLoc dl, SDValue Chain, diff --git a/lib/Target/ARM/ARMInstrThumb.td b/lib/Target/ARM/ARMInstrThumb.td index 5dfc4fc25a..9b54e67bed 100644 --- a/lib/Target/ARM/ARMInstrThumb.td +++ b/lib/Target/ARM/ARMInstrThumb.td @@ -138,18 +138,37 @@ def tADDrPCi : T1I<(outs tGPR:$dst), (ins i32imm:$rhs), IIC_iALU, "add $dst, pc, $rhs * 4", []>; // ADD rd, sp, #imm8 -// FIXME: hard code sp? def tADDrSPi : T1I<(outs tGPR:$dst), (ins GPR:$sp, i32imm:$rhs), IIC_iALU, "add $dst, $sp, $rhs * 4 @ addrspi", []>; // ADD sp, sp, #imm7 -// FIXME: hard code sp? -def tADDspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU, +def tADDspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU, "add $dst, $rhs * 4", []>; -// FIXME: Make use of the following? +// SUB sp, sp, #imm7 +def tSUBspi : TIt<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU, + "sub $dst, $rhs * 4", []>; + // ADD rm, sp, rm +def tADDrSPr : TI<(outs GPR:$dst), (ins GPR:$sp, GPR:$rhs), IIC_iALU, + "add $dst, $sp, $rhs", []>; + // ADD sp, rm +def tADDspr : TIt<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), IIC_iALU, + "add $dst, $rhs", []>; + +// Pseudo instruction that will expand into a tSUBspi + a copy. +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. +def tSUBspi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), + NoItinerary, "@ sub $dst, $rhs * 4", []>; + +def tADDspr_ : PseudoInst<(outs GPR:$dst), (ins GPR:$lhs, GPR:$rhs), + NoItinerary, "@ add $dst, $rhs", []>; + +let Defs = [CPSR] in +def tANDsp : PseudoInst<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), + NoItinerary, "@ and $dst, $rhs", []>; +} // usesCustomDAGSchedInserter //===----------------------------------------------------------------------===// // Control Flow Instructions. @@ -549,9 +568,6 @@ def tSUBrr : T1sI<(outs tGPR:$dst), (ins tGPR:$lhs, tGPR:$rhs), IIC_iALU, // TODO: A7-96: STMIA - store multiple. -def tSUBspi : T1It<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU, - "sub $dst, $rhs * 4", []>; - // sign-extend byte def tSXTB : T1pI<(outs tGPR:$dst), (ins tGPR:$src), IIC_iALU, "sxtb", " $dst, $src", diff --git a/lib/Target/ARM/ARMInstrThumb2.td b/lib/Target/ARM/ARMInstrThumb2.td index 9305c8ad6b..11b0454802 100644 --- a/lib/Target/ARM/ARMInstrThumb2.td +++ b/lib/Target/ARM/ARMInstrThumb2.td @@ -75,7 +75,8 @@ def imm1_31 : PatLeaf<(i32 imm), [{ }]>; /// imm0_4095 predicate - True if the 32-bit immediate is in the range [0.4095]. -def imm0_4095 : PatLeaf<(i32 imm), [{ +def imm0_4095 : Operand, + PatLeaf<(i32 imm), [{ return (uint32_t)N->getZExtValue() < 4096; }]>; @@ -239,7 +240,7 @@ multiclass T2I_bin_ii12rs { opc, ".w $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, t2_so_imm:$rhs))]>; // 12-bit imm - def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, i32imm:$rhs), IIC_iALU, + def ri12 : T2sI<(outs GPR:$dst), (ins GPR:$lhs, imm0_4095:$rhs), IIC_iALU, !strconcat(opc, "w"), " $dst, $lhs, $rhs", [(set GPR:$dst, (opnode GPR:$lhs, imm0_4095:$rhs))]>; // register @@ -431,6 +432,39 @@ def t2LEApcrelJT : T2XI<(outs GPR:$dst), (ins i32imm:$label, i32imm:$id, pred:$p), IIC_iALU, "adr$p.w $dst, #${label}_${id:no_hash}", []>; + +// ADD r, sp, {so_imm|i12} +def t2ADDrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), IIC_iALU, + "add", ".w $dst, $sp, $imm", []>; +def t2ADDrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), IIC_iALU, + "addw", " $dst, $sp, $imm", []>; + +// ADD r, sp, so_reg +def t2ADDrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), IIC_iALU, + "add", ".w $dst, $sp, $rhs", []>; + +// SUB r, sp, {so_imm|i12} +def t2SUBrSPi : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), IIC_iALU, + "sub", ".w $dst, $sp, $imm", []>; +def t2SUBrSPi12 : T2I<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), IIC_iALU, + "subw", " $dst, $sp, $imm", []>; + +// SUB r, sp, so_reg +def t2SUBrSPs : T2sI<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), IIC_iALU, + "sub", " $dst, $sp, $rhs", []>; + + +// Pseudo instruction that will expand into a t2SUBrSPi + a copy. +let usesCustomDAGSchedInserter = 1 in { // Expanded by the scheduler. +def t2SUBrSPi_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_imm:$imm), + NoItinerary, "@ sub.w $dst, $sp, $imm", []>; +def t2SUBrSPi12_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, imm0_4095:$imm), + NoItinerary, "@ subw $dst, $sp, $imm", []>; +def t2SUBrSPs_ : PseudoInst<(outs GPR:$dst), (ins GPR:$sp, t2_so_reg:$rhs), + NoItinerary, "@ sub $dst, $sp, $rhs", []>; +} // usesCustomDAGSchedInserter + + //===----------------------------------------------------------------------===// // Load / store Instructions. // diff --git a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp index a81b790f9d..ea80e47589 100644 --- a/lib/Target/ARM/ARMLoadStoreOptimizer.cpp +++ b/lib/Target/ARM/ARMLoadStoreOptimizer.cpp @@ -206,9 +206,13 @@ ARMLoadStoreOpt::MergeOps(MachineBasicBlock &MBB, if (NewBase == 0) return false; } - int BaseOpc = isThumb2 ? ARM::t2ADDri : ARM::ADDri; + int BaseOpc = !isThumb2 + ? ARM::ADDri + : ((Base == ARM::SP) ? ARM::t2ADDrSPi : ARM::t2ADDri); if (Offset < 0) { - BaseOpc = isThumb2 ? ARM::t2SUBri : ARM::SUBri; + BaseOpc = !isThumb2 + ? ARM::SUBri + : ((Base == ARM::SP) ? ARM::t2SUBrSPi : ARM::t2SUBri); Offset = - Offset; } int ImmedOffset = isThumb2 @@ -329,6 +333,9 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (!MI) return false; if (MI->getOpcode() != ARM::t2SUBri && + MI->getOpcode() != ARM::t2SUBrSPi && + MI->getOpcode() != ARM::t2SUBrSPi12 && + MI->getOpcode() != ARM::tSUBspi && MI->getOpcode() != ARM::SUBri) return false; @@ -336,9 +343,10 @@ static inline bool isMatchingDecrement(MachineInstr *MI, unsigned Base, if (Bytes <= 0 || (Limit && Bytes >= Limit)) return false; + unsigned Scale = (MI->getOpcode() == ARM::tSUBspi) ? 4 : 1; // FIXME return (MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - MI->getOperand(2).getImm() == Bytes && + (MI->getOperand(2).getImm()*Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg); } @@ -350,6 +358,9 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, if (!MI) return false; if (MI->getOpcode() != ARM::t2ADDri && + MI->getOpcode() != ARM::t2ADDrSPi && + MI->getOpcode() != ARM::t2ADDrSPi12 && + MI->getOpcode() != ARM::tADDspi && MI->getOpcode() != ARM::ADDri) return false; @@ -357,9 +368,10 @@ static inline bool isMatchingIncrement(MachineInstr *MI, unsigned Base, // Make sure the offset fits in 8 bits. return false; + unsigned Scale = (MI->getOpcode() == ARM::tADDspi) ? 4 : 1; // FIXME return (MI->getOperand(0).getReg() == Base && MI->getOperand(1).getReg() == Base && - MI->getOperand(2).getImm() == Bytes && + (MI->getOperand(2).getImm()*Scale) == Bytes && getInstrPredicate(MI, MyPredReg) == Pred && MyPredReg == PredReg); } diff --git a/lib/Target/ARM/README-Thumb2.txt b/lib/Target/ARM/README-Thumb2.txt index 651d39311e..48b5278122 100644 --- a/lib/Target/ARM/README-Thumb2.txt +++ b/lib/Target/ARM/README-Thumb2.txt @@ -1,3 +1,7 @@ //===---------------------------------------------------------------------===// // Random ideas for the ARM backend (Thumb2 specific). //===---------------------------------------------------------------------===// + +We should be using ADD / SUB rd, sp, rm instructions. + +copyRegToReg should use tMOVgpr2gpr instead of t2MOVr? diff --git a/lib/Target/ARM/Thumb2InstrInfo.cpp b/lib/Target/ARM/Thumb2InstrInfo.cpp index cf2d09912d..2b329e03db 100644 --- a/lib/Target/ARM/Thumb2InstrInfo.cpp +++ b/lib/Target/ARM/Thumb2InstrInfo.cpp @@ -65,11 +65,15 @@ Thumb2InstrInfo::copyRegToReg(MachineBasicBlock &MBB, if (DestRC == ARM::GPRRegisterClass && SrcRC == ARM::GPRRegisterClass) { - AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2MOVr), - DestReg).addReg(SrcReg))); + // FIXME: Just use tMOVgpr2gpr since it's shorter? + if (SrcReg == ARM::SP || DestReg == ARM::SP) + BuildMI(MBB, I, DL, get(ARM::tMOVgpr2gpr), DestReg).addReg(SrcReg); + else + AddDefaultCC(AddDefaultPred(BuildMI(MBB, I, DL, get(ARM::t2MOVr), + DestReg).addReg(SrcReg))); return true; } else if (DestRC == ARM::GPRRegisterClass && - SrcRC == ARM::tGPRRegisterClass) { + SrcRC == ARM::tGPRRegisterClass) { BuildMI(MBB, I, DL, get(ARM::tMOVtgpr2gpr), DestReg).addReg(SrcReg); return true; } else if (DestRC == ARM::tGPRRegisterClass && @@ -162,26 +166,62 @@ void llvm::emitT2RegPlusImmediate(MachineBasicBlock &MBB, } while (NumBytes) { - unsigned Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; unsigned ThisVal = NumBytes; - if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { - NumBytes = 0; - } else if (ThisVal < 4096) { - Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; - NumBytes = 0; + unsigned Opc = 0; + if (DestReg == ARM::SP && BaseReg != ARM::SP) { + // mov sp, rn. Note t2MOVr cannot be used. + BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVgpr2gpr),DestReg).addReg(BaseReg); + BaseReg = ARM::SP; + continue; + } + + if (BaseReg == ARM::SP) { + // sub sp, sp, #imm7 + if (DestReg == ARM::SP && (ThisVal < ((1 << 7)-1) * 4)) { + assert((ThisVal & 3) == 0 && "Stack update is not multiple of 4?"); + Opc = isSub ? ARM::tSUBspi : ARM::tADDspi; + // FIXME: Fix Thumb1 immediate encoding. + BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg).addImm(ThisVal/4); + NumBytes = 0; + continue; + } + + // sub rd, sp, so_imm + Opc = isSub ? ARM::t2SUBrSPi : ARM::t2ADDrSPi; + if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { + NumBytes = 0; + } else { + // FIXME: Move this to ARMAddressingModes.h? + unsigned RotAmt = CountLeadingZeros_32(ThisVal); + ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); + NumBytes &= ~ThisVal; + assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && + "Bit extraction didn't work?"); + } } else { - // FIXME: Move this to ARMAddressingModes.h? - unsigned RotAmt = CountLeadingZeros_32(ThisVal); - ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); - NumBytes &= ~ThisVal; - assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && - "Bit extraction didn't work?"); + assert(DestReg != ARM::SP && BaseReg != ARM::SP); + Opc = isSub ? ARM::t2SUBri : ARM::t2ADDri; + if (ARM_AM::getT2SOImmVal(NumBytes) != -1) { + NumBytes = 0; + } else if (ThisVal < 4096) { + Opc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; + NumBytes = 0; + } else { + // FIXME: Move this to ARMAddressingModes.h? + unsigned RotAmt = CountLeadingZeros_32(ThisVal); + ThisVal = ThisVal & ARM_AM::rotr32(0xff000000U, RotAmt); + NumBytes &= ~ThisVal; + assert(ARM_AM::getT2SOImmVal(ThisVal) != -1 && + "Bit extraction didn't work?"); + } } // Build the new ADD / SUB. - BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) - .addReg(BaseReg, RegState::Kill).addImm(ThisVal) - .addImm((unsigned)Pred).addReg(PredReg).addReg(0); + AddDefaultCC(AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(Opc), DestReg) + .addReg(BaseReg, RegState::Kill) + .addImm(ThisVal))); + BaseReg = DestReg; } } @@ -288,7 +328,6 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, unsigned FrameReg, int Offset, const ARMBaseInstrInfo &TII) { unsigned Opcode = MI.getOpcode(); - unsigned NewOpc = Opcode; const TargetInstrDesc &Desc = MI.getDesc(); unsigned AddrMode = (Desc.TSFlags & ARMII::AddrModeMask); bool isSub = false; @@ -299,9 +338,12 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, if (Opcode == ARM::t2ADDri || Opcode == ARM::t2ADDri12) { Offset += MI.getOperand(FrameRegIdx+1).getImm(); + + bool isSP = FrameReg == ARM::SP; if (Offset == 0) { // Turn it into a move. - MI.setDesc(TII.get(ARM::t2MOVr)); + unsigned NewOpc = isSP ? ARM::tMOVgpr2gpr : ARM::t2MOVr; + MI.setDesc(TII.get(NewOpc)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); MI.RemoveOperand(FrameRegIdx+1); return 0; @@ -310,23 +352,23 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, if (Offset < 0) { Offset = -Offset; isSub = true; - MI.setDesc(TII.get(ARM::t2SUBri)); + MI.setDesc(TII.get(isSP ? ARM::t2SUBrSPi : ARM::t2SUBri)); + } else { + MI.setDesc(TII.get(isSP ? ARM::t2ADDrSPi : ARM::t2ADDri)); } // Common case: small offset, fits into instruction. if (ARM_AM::getT2SOImmVal(Offset) != -1) { - NewOpc = isSub ? ARM::t2SUBri : ARM::t2ADDri; - if (NewOpc != Opcode) - MI.setDesc(TII.get(NewOpc)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); return 0; } // Another common case: imm12. if (Offset < 4096) { - NewOpc = isSub ? ARM::t2SUBri12 : ARM::t2ADDri12; - if (NewOpc != Opcode) - MI.setDesc(TII.get(NewOpc)); + unsigned NewOpc = isSP + ? (isSub ? ARM::t2SUBrSPi12 : ARM::t2ADDrSPi12) + : (isSub ? ARM::t2SUBri12 : ARM::t2ADDri12); + MI.setDesc(TII.get(NewOpc)); MI.getOperand(FrameRegIdx).ChangeToRegister(FrameReg, false); MI.getOperand(FrameRegIdx+1).ChangeToImmediate(Offset); return 0; @@ -346,7 +388,7 @@ int llvm::rewriteT2FrameIndex(MachineInstr &MI, unsigned FrameRegIdx, } else { // AddrModeT2_so cannot handle any offset. If there is no offset // register then we change to an immediate version. - NewOpc = Opcode; + unsigned NewOpc = Opcode; if (AddrMode == ARMII::AddrModeT2_so) { unsigned OffsetReg = MI.getOperand(FrameRegIdx+1).getReg(); if (OffsetReg != 0) { diff --git a/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll new file mode 100644 index 0000000000..bdc23bab7c --- /dev/null +++ b/test/CodeGen/Thumb2/2009-08-06-SpDecBug.ll @@ -0,0 +1,24 @@ +; RUN: llvm-as < %s | llc -mtriple=thumbv7-none-linux-gnueabi | FileCheck %s +; PR4659 +; PR4682 + +define hidden arm_aapcscc i32 @__gcov_execlp(i8* %path, i8* %arg, ...) nounwind { +entry: +; CHECK: __gcov_execlp: +; CHECK: mov sp, r7 +; CHECK: sub sp, #1 * 4 + call arm_aapcscc void @__gcov_flush() nounwind + br i1 undef, label %bb5, label %bb + +bb: ; preds = %bb, %entry + br i1 undef, label %bb5, label %bb + +bb5: ; preds = %bb, %entry + %0 = alloca i8*, i32 undef, align 4 ; [#uses=1] + %1 = call arm_aapcscc i32 @execvp(i8* %path, i8** %0) nounwind ; [#uses=1] + ret i32 %1 +} + +declare hidden arm_aapcscc void @__gcov_flush() + +declare arm_aapcscc i32 @execvp(i8*, i8**) nounwind diff --git a/test/CodeGen/Thumb2/large-stack.ll b/test/CodeGen/Thumb2/large-stack.ll index 60604f020f..d183da44c1 100644 --- a/test/CodeGen/Thumb2/large-stack.ll +++ b/test/CodeGen/Thumb2/large-stack.ll @@ -2,7 +2,7 @@ define void @test1() { ; CHECK: test1: -; CHECK: sub.w sp, sp, #256 +; CHECK: sub sp, #64 * 4 %tmp = alloca [ 64 x i32 ] , align 4 ret void } @@ -10,7 +10,7 @@ define void @test1() { define void @test2() { ; CHECK: test2: ; CHECK: sub.w sp, sp, #4160 -; CHECK: sub.w sp, sp, #8 +; CHECK: sub sp, #2 * 4 %tmp = alloca [ 4168 x i8 ] , align 4 ret void } @@ -18,7 +18,7 @@ define void @test2() { define i32 @test3() { ; CHECK: test3: ; CHECK: sub.w sp, sp, #805306368 -; CHECK: sub.w sp, sp, #16 +; CHECK: sub sp, #4 * 4 %retval = alloca i32, align 4 %tmp = alloca i32, align 4 %a = alloca [805306369 x i8], align 16 -- 2.11.4.GIT