From daf067da04c98ce666f67ba2b3beed4cf4be7549 Mon Sep 17 00:00:00 2001 From: wanglei Date: Fri, 21 Oct 2022 17:03:37 +0800 Subject: [PATCH] [LoongArch] Stack realignment support This patch adds support for stack realignment while adding support for variable sized objects. Differential Revision: https://reviews.llvm.org/D136074 --- .../Target/LoongArch/LoongArchFrameLowering.cpp | 46 +- ...tack-realignment-with-variable-sized-objects.ll | 73 +++ llvm/test/CodeGen/LoongArch/stack-realignment.ll | 627 +++++++++++++++++++++ 3 files changed, 743 insertions(+), 3 deletions(-) create mode 100644 llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll create mode 100644 llvm/test/CodeGen/LoongArch/stack-realignment.ll diff --git a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp index 5de473bfc4dd..45472157b482 100644 --- a/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp +++ b/llvm/lib/Target/LoongArch/LoongArchFrameLowering.cpp @@ -184,6 +184,36 @@ void LoongArchFrameLowering::emitPrologue(MachineFunction &MF, BuildMI(MBB, MBBI, DL, TII->get(TargetOpcode::CFI_INSTRUCTION)) .addCFIIndex(CFIIndex) .setMIFlag(MachineInstr::FrameSetup); + + // Realign stack. + if (RI->hasStackRealignment(MF)) { + unsigned ShiftAmount = Log2(MFI.getMaxAlign()); + Register VR = + MF.getRegInfo().createVirtualRegister(&LoongArch::GPRRegClass); + BuildMI(MBB, MBBI, DL, + TII->get(STI.is64Bit() ? LoongArch::SRLI_D : LoongArch::SRLI_W), + VR) + .addReg(SPReg) + .addImm(ShiftAmount) + .setMIFlag(MachineInstr::FrameSetup); + BuildMI(MBB, MBBI, DL, + TII->get(STI.is64Bit() ? LoongArch::SLLI_D : LoongArch::SLLI_W), + SPReg) + .addReg(VR) + .addImm(ShiftAmount) + .setMIFlag(MachineInstr::FrameSetup); + // FP will be used to restore the frame in the epilogue, so we need + // another base register BP to record SP after re-alignment. SP will + // track the current stack after allocating variable sized objects. + if (hasBP(MF)) { + // move BP, $sp + BuildMI(MBB, MBBI, DL, TII->get(LoongArch::OR), + LoongArchABI::getBPReg()) + .addReg(SPReg) + .addReg(LoongArch::R0) + .setMIFlag(MachineInstr::FrameSetup); + } + } } } @@ -276,6 +306,7 @@ StackOffset LoongArchFrameLowering::getFrameIndexReference( const MachineFrameInfo &MFI = MF.getFrameInfo(); const TargetRegisterInfo *RI = MF.getSubtarget().getRegisterInfo(); auto *LoongArchFI = MF.getInfo(); + uint64_t StackSize = MFI.getStackSize(); // Callee-saved registers should be referenced relative to the stack // pointer (positive offset), otherwise use the frame pointer (negative @@ -292,12 +323,21 @@ StackOffset LoongArchFrameLowering::getFrameIndexReference( MaxCSFI = CSI[CSI.size() - 1].getFrameIdx(); } - if ((FI >= MinCSFI && FI <= MaxCSFI) || !hasFP(MF)) { + if (FI >= MinCSFI && FI <= MaxCSFI) { FrameReg = LoongArch::R3; - Offset += StackOffset::getFixed(MFI.getStackSize()); + Offset += StackOffset::getFixed(StackSize); + } else if (RI->hasStackRealignment(MF) && !MFI.isFixedObjectIndex(FI)) { + // If the stack was realigned, the frame pointer is set in order to allow + // SP to be restored, so we need another base register to record the stack + // after realignment. + FrameReg = hasBP(MF) ? LoongArchABI::getBPReg() : LoongArch::R3; + Offset += StackOffset::getFixed(StackSize); } else { FrameReg = RI->getFrameRegister(MF); - Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize()); + if (hasFP(MF)) + Offset += StackOffset::getFixed(LoongArchFI->getVarArgsSaveSize()); + else + Offset += StackOffset::getFixed(StackSize); } return Offset; diff --git a/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll new file mode 100644 index 000000000000..7e2f8246bca4 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/stack-realignment-with-variable-sized-objects.ll @@ -0,0 +1,73 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +declare void @callee(i8*, i32*) + +define void @caller(i32 %n) { +; LA32-LABEL: caller: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -64 +; LA32-NEXT: .cfi_def_cfa_offset 64 +; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: st.w $s8, $sp, 52 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: .cfi_offset 31, -12 +; LA32-NEXT: addi.w $fp, $sp, 64 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a1, $sp, 6 +; LA32-NEXT: slli.w $sp, $a1, 6 +; LA32-NEXT: move $s8, $sp +; LA32-NEXT: addi.w $a0, $a0, 15 +; LA32-NEXT: addi.w $a1, $zero, -16 +; LA32-NEXT: and $a0, $a0, $a1 +; LA32-NEXT: sub.w $a0, $sp, $a0 +; LA32-NEXT: move $sp, $a0 +; LA32-NEXT: addi.w $a1, $s8, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -64 +; LA32-NEXT: ld.w $s8, $sp, 52 # 4-byte Folded Reload +; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 64 +; LA32-NEXT: ret +; +; LA64-LABEL: caller: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -64 +; LA64-NEXT: .cfi_def_cfa_offset 64 +; LA64-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64-NEXT: st.d $s8, $sp, 40 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: .cfi_offset 31, -24 +; LA64-NEXT: addi.d $fp, $sp, 64 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a1, $sp, 6 +; LA64-NEXT: slli.d $sp, $a1, 6 +; LA64-NEXT: move $s8, $sp +; LA64-NEXT: addi.w $a1, $zero, -16 +; LA64-NEXT: lu32i.d $a1, 1 +; LA64-NEXT: bstrpick.d $a0, $a0, 31, 0 +; LA64-NEXT: addi.d $a0, $a0, 15 +; LA64-NEXT: and $a0, $a0, $a1 +; LA64-NEXT: sub.d $a0, $sp, $a0 +; LA64-NEXT: move $sp, $a0 +; LA64-NEXT: addi.d $a1, $s8, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -64 +; LA64-NEXT: ld.d $s8, $sp, 40 # 8-byte Folded Reload +; LA64-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 64 +; LA64-NEXT: ret + %1 = alloca i8, i32 %n + %2 = alloca i32, align 64 + call void @callee(i8* %1, i32 *%2) + ret void +} diff --git a/llvm/test/CodeGen/LoongArch/stack-realignment.ll b/llvm/test/CodeGen/LoongArch/stack-realignment.ll new file mode 100644 index 000000000000..16c7bcd8b1c5 --- /dev/null +++ b/llvm/test/CodeGen/LoongArch/stack-realignment.ll @@ -0,0 +1,627 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc --mtriple=loongarch32 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA32 +; RUN: llc --mtriple=loongarch64 --verify-machineinstrs < %s \ +; RUN: | FileCheck %s --check-prefix=LA64 + +declare void @callee(i8*) + +define void @caller32() { +; LA32-LABEL: caller32: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -32 +; LA32-NEXT: .cfi_def_cfa_offset 32 +; LA32-NEXT: st.w $ra, $sp, 28 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 24 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 32 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 5 +; LA32-NEXT: slli.w $sp, $a0, 5 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -32 +; LA32-NEXT: ld.w $fp, $sp, 24 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 28 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 32 +; LA32-NEXT: ret +; +; LA64-LABEL: caller32: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -32 +; LA64-NEXT: .cfi_def_cfa_offset 32 +; LA64-NEXT: st.d $ra, $sp, 24 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 16 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 32 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 5 +; LA64-NEXT: slli.d $sp, $a0, 5 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -32 +; LA64-NEXT: ld.d $fp, $sp, 16 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 24 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 32 +; LA64-NEXT: ret + %1 = alloca i8, align 32 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign32() "no-realign-stack" { +; LA32-LABEL: caller_no_realign32: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign32: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 32 + call void @callee(i8* %1) + ret void +} + +define void @caller64() { +; LA32-LABEL: caller64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -64 +; LA32-NEXT: .cfi_def_cfa_offset 64 +; LA32-NEXT: st.w $ra, $sp, 60 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 56 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 64 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 6 +; LA32-NEXT: slli.w $sp, $a0, 6 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -64 +; LA32-NEXT: ld.w $fp, $sp, 56 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 60 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 64 +; LA32-NEXT: ret +; +; LA64-LABEL: caller64: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -64 +; LA64-NEXT: .cfi_def_cfa_offset 64 +; LA64-NEXT: st.d $ra, $sp, 56 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 48 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 64 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 6 +; LA64-NEXT: slli.d $sp, $a0, 6 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -64 +; LA64-NEXT: ld.d $fp, $sp, 48 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 56 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 64 +; LA64-NEXT: ret + %1 = alloca i8, align 64 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign64() "no-realign-stack" { +; LA32-LABEL: caller_no_realign64: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign64: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 64 + call void @callee(i8* %1) + ret void +} + +define void @caller128() { +; LA32-LABEL: caller128: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -128 +; LA32-NEXT: .cfi_def_cfa_offset 128 +; LA32-NEXT: st.w $ra, $sp, 124 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 120 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 128 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 7 +; LA32-NEXT: slli.w $sp, $a0, 7 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -128 +; LA32-NEXT: ld.w $fp, $sp, 120 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 124 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 128 +; LA32-NEXT: ret +; +; LA64-LABEL: caller128: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -128 +; LA64-NEXT: .cfi_def_cfa_offset 128 +; LA64-NEXT: st.d $ra, $sp, 120 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 112 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 128 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 7 +; LA64-NEXT: slli.d $sp, $a0, 7 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -128 +; LA64-NEXT: ld.d $fp, $sp, 112 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 120 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 128 +; LA64-NEXT: ret + %1 = alloca i8, align 128 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign128() "no-realign-stack" { +; LA32-LABEL: caller_no_realign128: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign128: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 128 + call void @callee(i8* %1) + ret void +} + +define void @caller256() { +; LA32-LABEL: caller256: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -256 +; LA32-NEXT: .cfi_def_cfa_offset 256 +; LA32-NEXT: st.w $ra, $sp, 252 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 248 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 256 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 8 +; LA32-NEXT: slli.w $sp, $a0, 8 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -256 +; LA32-NEXT: ld.w $fp, $sp, 248 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 252 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 256 +; LA32-NEXT: ret +; +; LA64-LABEL: caller256: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -256 +; LA64-NEXT: .cfi_def_cfa_offset 256 +; LA64-NEXT: st.d $ra, $sp, 248 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 240 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 256 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 8 +; LA64-NEXT: slli.d $sp, $a0, 8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -256 +; LA64-NEXT: ld.d $fp, $sp, 240 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 248 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 256 +; LA64-NEXT: ret + %1 = alloca i8, align 256 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign256() "no-realign-stack" { +; LA32-LABEL: caller_no_realign256: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign256: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 256 + call void @callee(i8* %1) + ret void +} + +define void @caller512() { +; LA32-LABEL: caller512: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -512 +; LA32-NEXT: .cfi_def_cfa_offset 512 +; LA32-NEXT: st.w $ra, $sp, 508 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 504 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 512 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 9 +; LA32-NEXT: slli.w $sp, $a0, 9 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -512 +; LA32-NEXT: ld.w $fp, $sp, 504 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 508 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 512 +; LA32-NEXT: ret +; +; LA64-LABEL: caller512: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -512 +; LA64-NEXT: .cfi_def_cfa_offset 512 +; LA64-NEXT: st.d $ra, $sp, 504 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 496 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 512 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 9 +; LA64-NEXT: slli.d $sp, $a0, 9 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -512 +; LA64-NEXT: ld.d $fp, $sp, 496 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 504 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 512 +; LA64-NEXT: ret + %1 = alloca i8, align 512 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign512() "no-realign-stack" { +; LA32-LABEL: caller_no_realign512: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign512: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 512 + call void @callee(i8* %1) + ret void +} + +define void @caller1024() { +; LA32-LABEL: caller1024: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -1024 +; LA32-NEXT: .cfi_def_cfa_offset 1024 +; LA32-NEXT: st.w $ra, $sp, 1020 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 1016 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 1024 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 10 +; LA32-NEXT: slli.w $sp, $a0, 10 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -1024 +; LA32-NEXT: ld.w $fp, $sp, 1016 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 1020 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 1024 +; LA32-NEXT: ret +; +; LA64-LABEL: caller1024: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -1024 +; LA64-NEXT: .cfi_def_cfa_offset 1024 +; LA64-NEXT: st.d $ra, $sp, 1016 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 1008 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 1024 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 10 +; LA64-NEXT: slli.d $sp, $a0, 10 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -1024 +; LA64-NEXT: ld.d $fp, $sp, 1008 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 1016 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 1024 +; LA64-NEXT: ret + %1 = alloca i8, align 1024 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign1024() "no-realign-stack" { +; LA32-LABEL: caller_no_realign1024: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign1024: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 1024 + call void @callee(i8* %1) + ret void +} + +define void @caller2048() { +; LA32-LABEL: caller2048: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -2048 +; LA32-NEXT: .cfi_def_cfa_offset 2048 +; LA32-NEXT: st.w $ra, $sp, 2044 # 4-byte Folded Spill +; LA32-NEXT: st.w $fp, $sp, 2040 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: addi.w $fp, $sp, 2032 +; LA32-NEXT: addi.w $fp, $fp, 16 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 11 +; LA32-NEXT: slli.w $sp, $a0, 11 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: addi.w $sp, $fp, -2048 +; LA32-NEXT: ld.w $fp, $sp, 2040 # 4-byte Folded Reload +; LA32-NEXT: ld.w $ra, $sp, 2044 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 2032 +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller2048: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -2048 +; LA64-NEXT: .cfi_def_cfa_offset 2048 +; LA64-NEXT: st.d $ra, $sp, 2040 # 8-byte Folded Spill +; LA64-NEXT: st.d $fp, $sp, 2032 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: addi.d $fp, $sp, 2032 +; LA64-NEXT: addi.d $fp, $fp, 16 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 11 +; LA64-NEXT: slli.d $sp, $a0, 11 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: addi.d $sp, $fp, -2048 +; LA64-NEXT: ld.d $fp, $sp, 2032 # 8-byte Folded Reload +; LA64-NEXT: ld.d $ra, $sp, 2040 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 2032 +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 2048 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign2048() "no-realign-stack" { +; LA32-LABEL: caller_no_realign2048: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign2048: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 2048 + call void @callee(i8* %1) + ret void +} + +define void @caller4096() { +; LA32-LABEL: caller4096: +; LA32: # %bb.0: +; LA32-NEXT: lu12i.w $a0, 1 +; LA32-NEXT: sub.w $sp, $sp, $a0 +; LA32-NEXT: .cfi_def_cfa_offset 4096 +; LA32-NEXT: ori $a0, $zero, 4092 +; LA32-NEXT: add.w $a0, $sp, $a0 +; LA32-NEXT: st.w $ra, $a0, 0 # 4-byte Folded Spill +; LA32-NEXT: ori $a0, $zero, 4088 +; LA32-NEXT: add.w $a0, $sp, $a0 +; LA32-NEXT: st.w $fp, $a0, 0 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: .cfi_offset 22, -8 +; LA32-NEXT: lu12i.w $a0, 1 +; LA32-NEXT: add.w $fp, $sp, $a0 +; LA32-NEXT: .cfi_def_cfa 22, 0 +; LA32-NEXT: srli.w $a0, $sp, 12 +; LA32-NEXT: slli.w $sp, $a0, 12 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: lu12i.w $a0, 1 +; LA32-NEXT: sub.w $sp, $fp, $a0 +; LA32-NEXT: ori $a0, $zero, 4088 +; LA32-NEXT: add.w $a0, $sp, $a0 +; LA32-NEXT: ld.w $fp, $a0, 0 # 4-byte Folded Reload +; LA32-NEXT: ori $a0, $zero, 4092 +; LA32-NEXT: add.w $a0, $sp, $a0 +; LA32-NEXT: ld.w $ra, $a0, 0 # 4-byte Folded Reload +; LA32-NEXT: lu12i.w $a0, 1 +; LA32-NEXT: add.w $sp, $sp, $a0 +; LA32-NEXT: ret +; +; LA64-LABEL: caller4096: +; LA64: # %bb.0: +; LA64-NEXT: lu12i.w $a0, 1 +; LA64-NEXT: sub.d $sp, $sp, $a0 +; LA64-NEXT: .cfi_def_cfa_offset 4096 +; LA64-NEXT: ori $a0, $zero, 4088 +; LA64-NEXT: add.d $a0, $sp, $a0 +; LA64-NEXT: st.d $ra, $a0, 0 # 8-byte Folded Spill +; LA64-NEXT: ori $a0, $zero, 4080 +; LA64-NEXT: add.d $a0, $sp, $a0 +; LA64-NEXT: st.d $fp, $a0, 0 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: .cfi_offset 22, -16 +; LA64-NEXT: lu12i.w $a0, 1 +; LA64-NEXT: add.d $fp, $sp, $a0 +; LA64-NEXT: .cfi_def_cfa 22, 0 +; LA64-NEXT: srli.d $a0, $sp, 12 +; LA64-NEXT: slli.d $sp, $a0, 12 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: lu12i.w $a0, 1 +; LA64-NEXT: sub.d $sp, $fp, $a0 +; LA64-NEXT: ori $a0, $zero, 4080 +; LA64-NEXT: add.d $a0, $sp, $a0 +; LA64-NEXT: ld.d $fp, $a0, 0 # 8-byte Folded Reload +; LA64-NEXT: ori $a0, $zero, 4088 +; LA64-NEXT: add.d $a0, $sp, $a0 +; LA64-NEXT: ld.d $ra, $a0, 0 # 8-byte Folded Reload +; LA64-NEXT: lu12i.w $a0, 1 +; LA64-NEXT: add.d $sp, $sp, $a0 +; LA64-NEXT: ret + %1 = alloca i8, align 4096 + call void @callee(i8* %1) + ret void +} + +define void @caller_no_realign4096() "no-realign-stack" { +; LA32-LABEL: caller_no_realign4096: +; LA32: # %bb.0: +; LA32-NEXT: addi.w $sp, $sp, -16 +; LA32-NEXT: .cfi_def_cfa_offset 16 +; LA32-NEXT: st.w $ra, $sp, 12 # 4-byte Folded Spill +; LA32-NEXT: .cfi_offset 1, -4 +; LA32-NEXT: addi.w $a0, $sp, 0 +; LA32-NEXT: bl %plt(callee) +; LA32-NEXT: ld.w $ra, $sp, 12 # 4-byte Folded Reload +; LA32-NEXT: addi.w $sp, $sp, 16 +; LA32-NEXT: ret +; +; LA64-LABEL: caller_no_realign4096: +; LA64: # %bb.0: +; LA64-NEXT: addi.d $sp, $sp, -16 +; LA64-NEXT: .cfi_def_cfa_offset 16 +; LA64-NEXT: st.d $ra, $sp, 8 # 8-byte Folded Spill +; LA64-NEXT: .cfi_offset 1, -8 +; LA64-NEXT: addi.d $a0, $sp, 0 +; LA64-NEXT: bl %plt(callee) +; LA64-NEXT: ld.d $ra, $sp, 8 # 8-byte Folded Reload +; LA64-NEXT: addi.d $sp, $sp, 16 +; LA64-NEXT: ret + %1 = alloca i8, align 4096 + call void @callee(i8* %1) + ret void +} -- 2.11.4.GIT