1 //===- StackColoring.cpp --------------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass implements the stack-coloring optimization that looks for
10 // lifetime markers machine instructions (LIFESTART_BEGIN and LIFESTART_END),
11 // which represent the possible lifetime of stack slots. It attempts to
12 // merge disjoint stack slots and reduce the used stack space.
13 // NOTE: This pass is not StackSlotColoring, which optimizes spill slots.
15 // TODO: In the future we plan to improve stack coloring in the following ways:
16 // 1. Allow merging multiple small slots into a single larger slot at different
18 // 2. Merge this pass with StackSlotColoring and allow merging of allocas with
21 //===----------------------------------------------------------------------===//
23 #include "llvm/ADT/BitVector.h"
24 #include "llvm/ADT/DenseMap.h"
25 #include "llvm/ADT/DepthFirstIterator.h"
26 #include "llvm/ADT/SmallPtrSet.h"
27 #include "llvm/ADT/SmallVector.h"
28 #include "llvm/ADT/Statistic.h"
29 #include "llvm/Analysis/ValueTracking.h"
30 #include "llvm/CodeGen/LiveInterval.h"
31 #include "llvm/CodeGen/MachineBasicBlock.h"
32 #include "llvm/CodeGen/MachineFrameInfo.h"
33 #include "llvm/CodeGen/MachineFunction.h"
34 #include "llvm/CodeGen/MachineFunctionPass.h"
35 #include "llvm/CodeGen/MachineInstr.h"
36 #include "llvm/CodeGen/MachineMemOperand.h"
37 #include "llvm/CodeGen/MachineOperand.h"
38 #include "llvm/CodeGen/Passes.h"
39 #include "llvm/CodeGen/SelectionDAGNodes.h"
40 #include "llvm/CodeGen/SlotIndexes.h"
41 #include "llvm/CodeGen/TargetOpcodes.h"
42 #include "llvm/CodeGen/WinEHFuncInfo.h"
43 #include "llvm/Config/llvm-config.h"
44 #include "llvm/IR/Constants.h"
45 #include "llvm/IR/DebugInfoMetadata.h"
46 #include "llvm/IR/Function.h"
47 #include "llvm/IR/Instructions.h"
48 #include "llvm/IR/Metadata.h"
49 #include "llvm/IR/Use.h"
50 #include "llvm/IR/Value.h"
51 #include "llvm/InitializePasses.h"
52 #include "llvm/Pass.h"
53 #include "llvm/Support/Casting.h"
54 #include "llvm/Support/CommandLine.h"
55 #include "llvm/Support/Compiler.h"
56 #include "llvm/Support/Debug.h"
57 #include "llvm/Support/raw_ostream.h"
66 #define DEBUG_TYPE "stack-coloring"
69 DisableColoring("no-stack-coloring",
70 cl::init(false), cl::Hidden
,
71 cl::desc("Disable stack coloring"));
73 /// The user may write code that uses allocas outside of the declared lifetime
74 /// zone. This can happen when the user returns a reference to a local
75 /// data-structure. We can detect these cases and decide not to optimize the
76 /// code. If this flag is enabled, we try to save the user. This option
77 /// is treated as overriding LifetimeStartOnFirstUse below.
79 ProtectFromEscapedAllocas("protect-from-escaped-allocas",
80 cl::init(false), cl::Hidden
,
81 cl::desc("Do not optimize lifetime zones that "
84 /// Enable enhanced dataflow scheme for lifetime analysis (treat first
85 /// use of stack slot as start of slot lifetime, as opposed to looking
86 /// for LIFETIME_START marker). See "Implementation notes" below for
89 LifetimeStartOnFirstUse("stackcoloring-lifetime-start-on-first-use",
90 cl::init(true), cl::Hidden
,
91 cl::desc("Treat stack lifetimes as starting on first use, not on START marker."));
94 STATISTIC(NumMarkerSeen
, "Number of lifetime markers found.");
95 STATISTIC(StackSpaceSaved
, "Number of bytes saved due to merging slots.");
96 STATISTIC(StackSlotMerged
, "Number of stack slot merged.");
97 STATISTIC(EscapedAllocas
, "Number of allocas that escaped the lifetime region");
99 //===----------------------------------------------------------------------===//
100 // StackColoring Pass
101 //===----------------------------------------------------------------------===//
103 // Stack Coloring reduces stack usage by merging stack slots when they
104 // can't be used together. For example, consider the following C program:
106 // void bar(char *, int);
107 // void foo(bool var) {
126 // Naively-compiled, this program would use 12k of stack space. However, the
127 // stack slot corresponding to `z` is always destroyed before either of the
128 // stack slots for `x` or `y` are used, and then `x` is only used if `var`
129 // is true, while `y` is only used if `var` is false. So in no time are 2
130 // of the stack slots used together, and therefore we can merge them,
131 // compiling the function using only a single 4k alloca:
133 // void foo(bool var) { // equivalent
146 // This is an important optimization if we want stack space to be under
147 // control in large functions, both open-coded ones and ones created by
150 // Implementation Notes:
151 // ---------------------
153 // An important part of the above reasoning is that `z` can't be accessed
154 // while the latter 2 calls to `bar` are running. This is justified because
155 // `z`'s lifetime is over after we exit from block `A:`, so any further
156 // accesses to it would be UB. The way we represent this information
157 // in LLVM is by having frontends delimit blocks with `lifetime.start`
158 // and `lifetime.end` intrinsics.
160 // The effect of these intrinsics seems to be as follows (maybe I should
161 // specify this in the reference?):
163 // L1) at start, each stack-slot is marked as *out-of-scope*, unless no
164 // lifetime intrinsic refers to that stack slot, in which case
165 // it is marked as *in-scope*.
166 // L2) on a `lifetime.start`, a stack slot is marked as *in-scope* and
167 // the stack slot is overwritten with `undef`.
168 // L3) on a `lifetime.end`, a stack slot is marked as *out-of-scope*.
169 // L4) on function exit, all stack slots are marked as *out-of-scope*.
170 // L5) `lifetime.end` is a no-op when called on a slot that is already
172 // L6) memory accesses to *out-of-scope* stack slots are UB.
173 // L7) when a stack-slot is marked as *out-of-scope*, all pointers to it
174 // are invalidated, unless the slot is "degenerate". This is used to
175 // justify not marking slots as in-use until the pointer to them is
176 // used, but feels a bit hacky in the presence of things like LICM. See
177 // the "Degenerate Slots" section for more details.
179 // Now, let's ground stack coloring on these rules. We'll define a slot
180 // as *in-use* at a (dynamic) point in execution if it either can be
181 // written to at that point, or if it has a live and non-undef content
184 // Obviously, slots that are never *in-use* together can be merged, and
185 // in our example `foo`, the slots for `x`, `y` and `z` are never
186 // in-use together (of course, sometimes slots that *are* in-use together
187 // might still be mergable, but we don't care about that here).
189 // In this implementation, we successively merge pairs of slots that are
190 // not *in-use* together. We could be smarter - for example, we could merge
191 // a single large slot with 2 small slots, or we could construct the
192 // interference graph and run a "smart" graph coloring algorithm, but with
193 // that aside, how do we find out whether a pair of slots might be *in-use*
196 // From our rules, we see that *out-of-scope* slots are never *in-use*,
197 // and from (L7) we see that "non-degenerate" slots remain non-*in-use*
198 // until their address is taken. Therefore, we can approximate slot activity
201 // A subtle point: naively, we might try to figure out which pairs of
202 // stack-slots interfere by propagating `S in-use` through the CFG for every
203 // stack-slot `S`, and having `S` and `T` interfere if there is a CFG point in
204 // which they are both *in-use*.
206 // That is sound, but overly conservative in some cases: in our (artificial)
207 // example `foo`, either `x` or `y` might be in use at the label `B:`, but
208 // as `x` is only in use if we came in from the `var` edge and `y` only
209 // if we came from the `!var` edge, they still can't be in use together.
210 // See PR32488 for an important real-life case.
212 // If we wanted to find all points of interference precisely, we could
213 // propagate `S in-use` and `S&T in-use` predicates through the CFG. That
214 // would be precise, but requires propagating `O(n^2)` dataflow facts.
216 // However, we aren't interested in the *set* of points of interference
217 // between 2 stack slots, only *whether* there *is* such a point. So we
218 // can rely on a little trick: for `S` and `T` to be in-use together,
219 // one of them needs to become in-use while the other is in-use (or
220 // they might both become in use simultaneously). We can check this
221 // by also keeping track of the points at which a stack slot might *start*
227 // Consider the following motivating example:
230 // char b1[1024], b2[1024];
236 // char b4[1024], b5[1024];
237 // <uses of b2, b4, b5>;
242 // In the code above, "b3" and "b4" are declared in distinct lexical
243 // scopes, meaning that it is easy to prove that they can share the
244 // same stack slot. Variables "b1" and "b2" are declared in the same
245 // scope, meaning that from a lexical point of view, their lifetimes
246 // overlap. From a control flow pointer of view, however, the two
247 // variables are accessed in disjoint regions of the CFG, thus it
248 // should be possible for them to share the same stack slot. An ideal
249 // stack allocation for the function above would look like:
255 // Achieving this allocation is tricky, however, due to the way
256 // lifetime markers are inserted. Here is a simplified view of the
257 // control flow graph for the code above:
259 // +------ block 0 -------+
260 // 0| LIFETIME_START b1, b2 |
261 // 1| <test 'if' condition> |
262 // +-----------------------+
264 // +------ block 1 -------+ +------ block 2 -------+
265 // 2| LIFETIME_START b3 | 5| LIFETIME_START b4, b5 |
266 // 3| <uses of b1, b3> | 6| <uses of b2, b4, b5> |
267 // 4| LIFETIME_END b3 | 7| LIFETIME_END b4, b5 |
268 // +-----------------------+ +-----------------------+
270 // +------ block 3 -------+
271 // 8| <cleanupcode> |
272 // 9| LIFETIME_END b1, b2 |
274 // +-----------------------+
276 // If we create live intervals for the variables above strictly based
277 // on the lifetime markers, we'll get the set of intervals on the
278 // left. If we ignore the lifetime start markers and instead treat a
279 // variable's lifetime as beginning with the first reference to the
280 // var, then we get the intervals on the right.
282 // LIFETIME_START First Use
283 // b1: [0,9] [3,4] [8,9]
289 // For the intervals on the left, the best we can do is overlap two
290 // variables (b3 and b4, for example); this gives us a stack size of
291 // 4*1024 bytes, not ideal. When treating first-use as the start of a
292 // lifetime, we can additionally overlap b1 and b5, giving us a 3*1024
293 // byte stack (better).
298 // Relying entirely on first-use of stack slots is problematic,
299 // however, due to the fact that optimizations can sometimes migrate
300 // uses of a variable outside of its lifetime start/end region. Here
304 // char b1[1024], b2[1024];
317 // Before optimization, the control flow graph for the code above
318 // might look like the following:
320 // +------ block 0 -------+
321 // 0| LIFETIME_START b1, b2 |
322 // 1| <test 'if' condition> |
323 // +-----------------------+
325 // +------ block 1 -------+ +------- block 2 -------+
326 // 2| <uses of b2> | 3| <uses of b1> |
327 // +-----------------------+ +-----------------------+
329 // | +------- block 3 -------+ <-\.
330 // | 4| <while condition> | |
331 // | +-----------------------+ |
333 // | / +------- block 4 -------+
334 // \ / 5| LIFETIME_START b3 | |
335 // \ / 6| <uses of b3> | |
336 // \ / 7| LIFETIME_END b3 | |
337 // \ | +------------------------+ |
339 // +------ block 5 -----+ \---------------
340 // 8| <cleanupcode> |
341 // 9| LIFETIME_END b1, b2 |
343 // +---------------------+
345 // During optimization, however, it can happen that an instruction
346 // computing an address in "b3" (for example, a loop-invariant GEP) is
347 // hoisted up out of the loop from block 4 to block 2. [Note that
348 // this is not an actual load from the stack, only an instruction that
349 // computes the address to be loaded]. If this happens, there is now a
350 // path leading from the first use of b3 to the return instruction
351 // that does not encounter the b3 LIFETIME_END, hence b3's lifetime is
352 // now larger than if we were computing live intervals strictly based
353 // on lifetime markers. In the example above, this lengthened lifetime
354 // would mean that it would appear illegal to overlap b3 with b2.
356 // To deal with this such cases, the code in ::collectMarkers() below
357 // tries to identify "degenerate" slots -- those slots where on a single
358 // forward pass through the CFG we encounter a first reference to slot
359 // K before we hit the slot K lifetime start marker. For such slots,
360 // we fall back on using the lifetime start marker as the beginning of
361 // the variable's lifetime. NB: with this implementation, slots can
362 // appear degenerate in cases where there is unstructured control flow:
367 // memcpy(&b[0], ...);
372 // If in RPO ordering chosen to walk the CFG we happen to visit the b[k]
373 // before visiting the memcpy block (which will contain the lifetime start
374 // for "b" then it will appear that 'b' has a degenerate lifetime.
376 // Handle Windows Exception with LifetimeStartOnFirstUse:
379 // There was a bug for using LifetimeStartOnFirstUse in win32.
382 // ~Type1(){ write memory;}
388 // } catch (Type2 X){
391 // For variable X in catch(X), we put point pX=&(&X) into ConservativeSlots
392 // to prevent using LifetimeStartOnFirstUse. Because pX may merged with
393 // object V which may call destructor after implicitly writing pX. All these
394 // are done in C++ EH runtime libs (through CxxThrowException), and can't
395 // obviously check it in IR level.
397 // The loader of pX, without obvious writing IR, is usually the first LOAD MI
398 // in EHPad, Some like:
399 // bb.x.catch.i (landing-pad, ehfunclet-entry):
400 // ; predecessors: %bb...
401 // successors: %bb...
402 // %n:gr32 = MOV32rm %stack.pX ...
404 // The Type2** %stack.pX will only be written in EH runtime libs, so we
405 // check the StoreSlots to screen it out.
409 /// StackColoring - A machine pass for merging disjoint stack allocations,
410 /// marked by the LIFETIME_START and LIFETIME_END pseudo instructions.
411 class StackColoring
: public MachineFunctionPass
{
412 MachineFrameInfo
*MFI
;
415 /// A class representing liveness information for a single basic block.
416 /// Each bit in the BitVector represents the liveness property
417 /// for a different stack slot.
418 struct BlockLifetimeInfo
{
419 /// Which slots BEGINs in each basic block.
422 /// Which slots ENDs in each basic block.
425 /// Which slots are marked as LIVE_IN, coming into each basic block.
428 /// Which slots are marked as LIVE_OUT, coming out of each basic block.
432 /// Maps active slots (per bit) for each basic block.
433 using LivenessMap
= DenseMap
<const MachineBasicBlock
*, BlockLifetimeInfo
>;
434 LivenessMap BlockLiveness
;
436 /// Maps serial numbers to basic blocks.
437 DenseMap
<const MachineBasicBlock
*, int> BasicBlocks
;
439 /// Maps basic blocks to a serial number.
440 SmallVector
<const MachineBasicBlock
*, 8> BasicBlockNumbering
;
442 /// Maps slots to their use interval. Outside of this interval, slots
443 /// values are either dead or `undef` and they will not be written to.
444 SmallVector
<std::unique_ptr
<LiveInterval
>, 16> Intervals
;
446 /// Maps slots to the points where they can become in-use.
447 SmallVector
<SmallVector
<SlotIndex
, 4>, 16> LiveStarts
;
449 /// VNInfo is used for the construction of LiveIntervals.
450 VNInfo::Allocator VNInfoAllocator
;
452 /// SlotIndex analysis object.
453 SlotIndexes
*Indexes
;
455 /// The list of lifetime markers found. These markers are to be removed
456 /// once the coloring is done.
457 SmallVector
<MachineInstr
*, 8> Markers
;
459 /// Record the FI slots for which we have seen some sort of
460 /// lifetime marker (either start or end).
461 BitVector InterestingSlots
;
463 /// FI slots that need to be handled conservatively (for these
464 /// slots lifetime-start-on-first-use is disabled).
465 BitVector ConservativeSlots
;
467 /// Record the FI slots referenced by a 'may write to memory'.
468 BitVector StoreSlots
;
470 /// Number of iterations taken during data flow analysis.
471 unsigned NumIterations
;
476 StackColoring() : MachineFunctionPass(ID
) {
477 initializeStackColoringPass(*PassRegistry::getPassRegistry());
480 void getAnalysisUsage(AnalysisUsage
&AU
) const override
;
481 bool runOnMachineFunction(MachineFunction
&Func
) override
;
484 /// Used in collectMarkers
485 using BlockBitVecMap
= DenseMap
<const MachineBasicBlock
*, BitVector
>;
489 void dumpIntervals() const;
490 void dumpBB(MachineBasicBlock
*MBB
) const;
491 void dumpBV(const char *tag
, const BitVector
&BV
) const;
493 /// Removes all of the lifetime marker instructions from the function.
494 /// \returns true if any markers were removed.
495 bool removeAllMarkers();
497 /// Scan the machine function and find all of the lifetime markers.
498 /// Record the findings in the BEGIN and END vectors.
499 /// \returns the number of markers found.
500 unsigned collectMarkers(unsigned NumSlot
);
502 /// Perform the dataflow calculation and calculate the lifetime for each of
503 /// the slots, based on the BEGIN/END vectors. Set the LifetimeLIVE_IN and
504 /// LifetimeLIVE_OUT maps that represent which stack slots are live coming
505 /// in and out blocks.
506 void calculateLocalLiveness();
508 /// Returns TRUE if we're using the first-use-begins-lifetime method for
509 /// this slot (if FALSE, then the start marker is treated as start of lifetime).
510 bool applyFirstUse(int Slot
) {
511 if (!LifetimeStartOnFirstUse
|| ProtectFromEscapedAllocas
)
513 if (ConservativeSlots
.test(Slot
))
518 /// Examines the specified instruction and returns TRUE if the instruction
519 /// represents the start or end of an interesting lifetime. The slot or slots
520 /// starting or ending are added to the vector "slots" and "isStart" is set
522 /// \returns True if inst contains a lifetime start or end
523 bool isLifetimeStartOrEnd(const MachineInstr
&MI
,
524 SmallVector
<int, 4> &slots
,
527 /// Construct the LiveIntervals for the slots.
528 void calculateLiveIntervals(unsigned NumSlots
);
530 /// Go over the machine function and change instructions which use stack
531 /// slots to use the joint slots.
532 void remapInstructions(DenseMap
<int, int> &SlotRemap
);
534 /// The input program may contain instructions which are not inside lifetime
535 /// markers. This can happen due to a bug in the compiler or due to a bug in
536 /// user code (for example, returning a reference to a local variable).
537 /// This procedure checks all of the instructions in the function and
538 /// invalidates lifetime ranges which do not contain all of the instructions
539 /// which access that frame slot.
540 void removeInvalidSlotRanges();
542 /// Map entries which point to other entries to their destination.
543 /// A->B->C becomes A->C.
544 void expungeSlotMap(DenseMap
<int, int> &SlotRemap
, unsigned NumSlots
);
547 } // end anonymous namespace
549 char StackColoring::ID
= 0;
551 char &llvm::StackColoringID
= StackColoring::ID
;
553 INITIALIZE_PASS_BEGIN(StackColoring
, DEBUG_TYPE
,
554 "Merge disjoint stack slots", false, false)
555 INITIALIZE_PASS_DEPENDENCY(SlotIndexes
)
556 INITIALIZE_PASS_END(StackColoring
, DEBUG_TYPE
,
557 "Merge disjoint stack slots", false, false)
559 void StackColoring::getAnalysisUsage(AnalysisUsage
&AU
) const {
560 AU
.addRequired
<SlotIndexes
>();
561 MachineFunctionPass::getAnalysisUsage(AU
);
564 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
565 LLVM_DUMP_METHOD
void StackColoring::dumpBV(const char *tag
,
566 const BitVector
&BV
) const {
567 dbgs() << tag
<< " : { ";
568 for (unsigned I
= 0, E
= BV
.size(); I
!= E
; ++I
)
569 dbgs() << BV
.test(I
) << " ";
573 LLVM_DUMP_METHOD
void StackColoring::dumpBB(MachineBasicBlock
*MBB
) const {
574 LivenessMap::const_iterator BI
= BlockLiveness
.find(MBB
);
575 assert(BI
!= BlockLiveness
.end() && "Block not found");
576 const BlockLifetimeInfo
&BlockInfo
= BI
->second
;
578 dumpBV("BEGIN", BlockInfo
.Begin
);
579 dumpBV("END", BlockInfo
.End
);
580 dumpBV("LIVE_IN", BlockInfo
.LiveIn
);
581 dumpBV("LIVE_OUT", BlockInfo
.LiveOut
);
584 LLVM_DUMP_METHOD
void StackColoring::dump() const {
585 for (MachineBasicBlock
*MBB
: depth_first(MF
)) {
586 dbgs() << "Inspecting block #" << MBB
->getNumber() << " ["
587 << MBB
->getName() << "]\n";
592 LLVM_DUMP_METHOD
void StackColoring::dumpIntervals() const {
593 for (unsigned I
= 0, E
= Intervals
.size(); I
!= E
; ++I
) {
594 dbgs() << "Interval[" << I
<< "]:\n";
595 Intervals
[I
]->dump();
600 static inline int getStartOrEndSlot(const MachineInstr
&MI
)
602 assert((MI
.getOpcode() == TargetOpcode::LIFETIME_START
||
603 MI
.getOpcode() == TargetOpcode::LIFETIME_END
) &&
604 "Expected LIFETIME_START or LIFETIME_END op");
605 const MachineOperand
&MO
= MI
.getOperand(0);
606 int Slot
= MO
.getIndex();
612 // At the moment the only way to end a variable lifetime is with
613 // a VARIABLE_LIFETIME op (which can't contain a start). If things
614 // change and the IR allows for a single inst that both begins
615 // and ends lifetime(s), this interface will need to be reworked.
616 bool StackColoring::isLifetimeStartOrEnd(const MachineInstr
&MI
,
617 SmallVector
<int, 4> &slots
,
619 if (MI
.getOpcode() == TargetOpcode::LIFETIME_START
||
620 MI
.getOpcode() == TargetOpcode::LIFETIME_END
) {
621 int Slot
= getStartOrEndSlot(MI
);
624 if (!InterestingSlots
.test(Slot
))
626 slots
.push_back(Slot
);
627 if (MI
.getOpcode() == TargetOpcode::LIFETIME_END
) {
631 if (!applyFirstUse(Slot
)) {
635 } else if (LifetimeStartOnFirstUse
&& !ProtectFromEscapedAllocas
) {
636 if (!MI
.isDebugInstr()) {
638 for (const MachineOperand
&MO
: MI
.operands()) {
641 int Slot
= MO
.getIndex();
644 if (InterestingSlots
.test(Slot
) && applyFirstUse(Slot
)) {
645 slots
.push_back(Slot
);
658 unsigned StackColoring::collectMarkers(unsigned NumSlot
) {
659 unsigned MarkersFound
= 0;
660 BlockBitVecMap SeenStartMap
;
661 InterestingSlots
.clear();
662 InterestingSlots
.resize(NumSlot
);
663 ConservativeSlots
.clear();
664 ConservativeSlots
.resize(NumSlot
);
666 StoreSlots
.resize(NumSlot
);
668 // number of start and end lifetime ops for each slot
669 SmallVector
<int, 8> NumStartLifetimes(NumSlot
, 0);
670 SmallVector
<int, 8> NumEndLifetimes(NumSlot
, 0);
671 SmallVector
<int, 8> NumLoadInCatchPad(NumSlot
, 0);
673 // Step 1: collect markers and populate the "InterestingSlots"
674 // and "ConservativeSlots" sets.
675 for (MachineBasicBlock
*MBB
: depth_first(MF
)) {
676 // Compute the set of slots for which we've seen a START marker but have
677 // not yet seen an END marker at this point in the walk (e.g. on entry
679 BitVector BetweenStartEnd
;
680 BetweenStartEnd
.resize(NumSlot
);
681 for (const MachineBasicBlock
*Pred
: MBB
->predecessors()) {
682 BlockBitVecMap::const_iterator I
= SeenStartMap
.find(Pred
);
683 if (I
!= SeenStartMap
.end()) {
684 BetweenStartEnd
|= I
->second
;
688 // Walk the instructions in the block to look for start/end ops.
689 for (MachineInstr
&MI
: *MBB
) {
690 if (MI
.isDebugInstr())
692 if (MI
.getOpcode() == TargetOpcode::LIFETIME_START
||
693 MI
.getOpcode() == TargetOpcode::LIFETIME_END
) {
694 int Slot
= getStartOrEndSlot(MI
);
697 InterestingSlots
.set(Slot
);
698 if (MI
.getOpcode() == TargetOpcode::LIFETIME_START
) {
699 BetweenStartEnd
.set(Slot
);
700 NumStartLifetimes
[Slot
] += 1;
702 BetweenStartEnd
.reset(Slot
);
703 NumEndLifetimes
[Slot
] += 1;
705 const AllocaInst
*Allocation
= MFI
->getObjectAllocation(Slot
);
707 LLVM_DEBUG(dbgs() << "Found a lifetime ");
708 LLVM_DEBUG(dbgs() << (MI
.getOpcode() == TargetOpcode::LIFETIME_START
711 LLVM_DEBUG(dbgs() << " marker for slot #" << Slot
);
713 << " with allocation: " << Allocation
->getName() << "\n");
715 Markers
.push_back(&MI
);
718 for (const MachineOperand
&MO
: MI
.operands()) {
721 int Slot
= MO
.getIndex();
724 if (! BetweenStartEnd
.test(Slot
)) {
725 ConservativeSlots
.set(Slot
);
727 // Here we check the StoreSlots to screen catch point out. For more
728 // information, please refer "Handle Windows Exception with
729 // LifetimeStartOnFirstUse" at the head of this file.
731 StoreSlots
.set(Slot
);
732 if (MF
->getWinEHFuncInfo() && MBB
->isEHPad() && MI
.mayLoad())
733 NumLoadInCatchPad
[Slot
] += 1;
737 BitVector
&SeenStart
= SeenStartMap
[MBB
];
738 SeenStart
|= BetweenStartEnd
;
744 // 1) PR27903: slots with multiple start or end lifetime ops are not
745 // safe to enable for "lifetime-start-on-first-use".
746 // 2) And also not safe for variable X in catch(X) in windows.
747 for (unsigned slot
= 0; slot
< NumSlot
; ++slot
) {
748 if (NumStartLifetimes
[slot
] > 1 || NumEndLifetimes
[slot
] > 1 ||
749 (NumLoadInCatchPad
[slot
] > 1 && !StoreSlots
.test(slot
)))
750 ConservativeSlots
.set(slot
);
752 LLVM_DEBUG(dumpBV("Conservative slots", ConservativeSlots
));
754 // Step 2: compute begin/end sets for each block
756 // NOTE: We use a depth-first iteration to ensure that we obtain a
757 // deterministic numbering.
758 for (MachineBasicBlock
*MBB
: depth_first(MF
)) {
759 // Assign a serial number to this basic block.
760 BasicBlocks
[MBB
] = BasicBlockNumbering
.size();
761 BasicBlockNumbering
.push_back(MBB
);
763 // Keep a reference to avoid repeated lookups.
764 BlockLifetimeInfo
&BlockInfo
= BlockLiveness
[MBB
];
766 BlockInfo
.Begin
.resize(NumSlot
);
767 BlockInfo
.End
.resize(NumSlot
);
769 SmallVector
<int, 4> slots
;
770 for (MachineInstr
&MI
: *MBB
) {
771 bool isStart
= false;
773 if (isLifetimeStartOrEnd(MI
, slots
, isStart
)) {
775 assert(slots
.size() == 1 && "unexpected: MI ends multiple slots");
777 if (BlockInfo
.Begin
.test(Slot
)) {
778 BlockInfo
.Begin
.reset(Slot
);
780 BlockInfo
.End
.set(Slot
);
782 for (auto Slot
: slots
) {
783 LLVM_DEBUG(dbgs() << "Found a use of slot #" << Slot
);
785 << " at " << printMBBReference(*MBB
) << " index ");
786 LLVM_DEBUG(Indexes
->getInstructionIndex(MI
).print(dbgs()));
787 const AllocaInst
*Allocation
= MFI
->getObjectAllocation(Slot
);
790 << " with allocation: " << Allocation
->getName());
792 LLVM_DEBUG(dbgs() << "\n");
793 if (BlockInfo
.End
.test(Slot
)) {
794 BlockInfo
.End
.reset(Slot
);
796 BlockInfo
.Begin
.set(Slot
);
803 // Update statistics.
804 NumMarkerSeen
+= MarkersFound
;
808 void StackColoring::calculateLocalLiveness() {
809 unsigned NumIters
= 0;
815 for (const MachineBasicBlock
*BB
: BasicBlockNumbering
) {
816 // Use an iterator to avoid repeated lookups.
817 LivenessMap::iterator BI
= BlockLiveness
.find(BB
);
818 assert(BI
!= BlockLiveness
.end() && "Block not found");
819 BlockLifetimeInfo
&BlockInfo
= BI
->second
;
821 // Compute LiveIn by unioning together the LiveOut sets of all preds.
822 BitVector LocalLiveIn
;
823 for (MachineBasicBlock
*Pred
: BB
->predecessors()) {
824 LivenessMap::const_iterator I
= BlockLiveness
.find(Pred
);
825 // PR37130: transformations prior to stack coloring can
826 // sometimes leave behind statically unreachable blocks; these
827 // can be safely skipped here.
828 if (I
!= BlockLiveness
.end())
829 LocalLiveIn
|= I
->second
.LiveOut
;
832 // Compute LiveOut by subtracting out lifetimes that end in this
833 // block, then adding in lifetimes that begin in this block. If
834 // we have both BEGIN and END markers in the same basic block
835 // then we know that the BEGIN marker comes after the END,
836 // because we already handle the case where the BEGIN comes
837 // before the END when collecting the markers (and building the
838 // BEGIN/END vectors).
839 BitVector LocalLiveOut
= LocalLiveIn
;
840 LocalLiveOut
.reset(BlockInfo
.End
);
841 LocalLiveOut
|= BlockInfo
.Begin
;
843 // Update block LiveIn set, noting whether it has changed.
844 if (LocalLiveIn
.test(BlockInfo
.LiveIn
)) {
846 BlockInfo
.LiveIn
|= LocalLiveIn
;
849 // Update block LiveOut set, noting whether it has changed.
850 if (LocalLiveOut
.test(BlockInfo
.LiveOut
)) {
852 BlockInfo
.LiveOut
|= LocalLiveOut
;
857 NumIterations
= NumIters
;
860 void StackColoring::calculateLiveIntervals(unsigned NumSlots
) {
861 SmallVector
<SlotIndex
, 16> Starts
;
862 SmallVector
<bool, 16> DefinitelyInUse
;
864 // For each block, find which slots are active within this block
865 // and update the live intervals.
866 for (const MachineBasicBlock
&MBB
: *MF
) {
868 Starts
.resize(NumSlots
);
869 DefinitelyInUse
.clear();
870 DefinitelyInUse
.resize(NumSlots
);
872 // Start the interval of the slots that we previously found to be 'in-use'.
873 BlockLifetimeInfo
&MBBLiveness
= BlockLiveness
[&MBB
];
874 for (int pos
= MBBLiveness
.LiveIn
.find_first(); pos
!= -1;
875 pos
= MBBLiveness
.LiveIn
.find_next(pos
)) {
876 Starts
[pos
] = Indexes
->getMBBStartIdx(&MBB
);
879 // Create the interval for the basic blocks containing lifetime begin/end.
880 for (const MachineInstr
&MI
: MBB
) {
881 SmallVector
<int, 4> slots
;
882 bool IsStart
= false;
883 if (!isLifetimeStartOrEnd(MI
, slots
, IsStart
))
885 SlotIndex ThisIndex
= Indexes
->getInstructionIndex(MI
);
886 for (auto Slot
: slots
) {
888 // If a slot is already definitely in use, we don't have to emit
889 // a new start marker because there is already a pre-existing
891 if (!DefinitelyInUse
[Slot
]) {
892 LiveStarts
[Slot
].push_back(ThisIndex
);
893 DefinitelyInUse
[Slot
] = true;
895 if (!Starts
[Slot
].isValid())
896 Starts
[Slot
] = ThisIndex
;
898 if (Starts
[Slot
].isValid()) {
899 VNInfo
*VNI
= Intervals
[Slot
]->getValNumInfo(0);
900 Intervals
[Slot
]->addSegment(
901 LiveInterval::Segment(Starts
[Slot
], ThisIndex
, VNI
));
902 Starts
[Slot
] = SlotIndex(); // Invalidate the start index
903 DefinitelyInUse
[Slot
] = false;
909 // Finish up started segments
910 for (unsigned i
= 0; i
< NumSlots
; ++i
) {
911 if (!Starts
[i
].isValid())
914 SlotIndex EndIdx
= Indexes
->getMBBEndIdx(&MBB
);
915 VNInfo
*VNI
= Intervals
[i
]->getValNumInfo(0);
916 Intervals
[i
]->addSegment(LiveInterval::Segment(Starts
[i
], EndIdx
, VNI
));
921 bool StackColoring::removeAllMarkers() {
923 for (MachineInstr
*MI
: Markers
) {
924 MI
->eraseFromParent();
929 LLVM_DEBUG(dbgs() << "Removed " << Count
<< " markers.\n");
933 void StackColoring::remapInstructions(DenseMap
<int, int> &SlotRemap
) {
934 unsigned FixedInstr
= 0;
935 unsigned FixedMemOp
= 0;
936 unsigned FixedDbg
= 0;
938 // Remap debug information that refers to stack slots.
939 for (auto &VI
: MF
->getVariableDbgInfo()) {
942 if (SlotRemap
.count(VI
.Slot
)) {
943 LLVM_DEBUG(dbgs() << "Remapping debug info for ["
944 << cast
<DILocalVariable
>(VI
.Var
)->getName() << "].\n");
945 VI
.Slot
= SlotRemap
[VI
.Slot
];
950 // Keep a list of *allocas* which need to be remapped.
951 DenseMap
<const AllocaInst
*, const AllocaInst
*> Allocas
;
953 // Keep a list of allocas which has been affected by the remap.
954 SmallPtrSet
<const AllocaInst
*, 32> MergedAllocas
;
956 for (const std::pair
<int, int> &SI
: SlotRemap
) {
957 const AllocaInst
*From
= MFI
->getObjectAllocation(SI
.first
);
958 const AllocaInst
*To
= MFI
->getObjectAllocation(SI
.second
);
959 assert(To
&& From
&& "Invalid allocation object");
962 // If From is before wo, its possible that there is a use of From between
964 if (From
->comesBefore(To
))
965 const_cast<AllocaInst
*>(To
)->moveBefore(const_cast<AllocaInst
*>(From
));
967 // AA might be used later for instruction scheduling, and we need it to be
968 // able to deduce the correct aliasing releationships between pointers
969 // derived from the alloca being remapped and the target of that remapping.
970 // The only safe way, without directly informing AA about the remapping
971 // somehow, is to directly update the IR to reflect the change being made
973 Instruction
*Inst
= const_cast<AllocaInst
*>(To
);
974 if (From
->getType() != To
->getType()) {
975 BitCastInst
*Cast
= new BitCastInst(Inst
, From
->getType());
976 Cast
->insertAfter(Inst
);
980 // We keep both slots to maintain AliasAnalysis metadata later.
981 MergedAllocas
.insert(From
);
982 MergedAllocas
.insert(To
);
984 // Transfer the stack protector layout tag, but make sure that SSPLK_AddrOf
985 // does not overwrite SSPLK_SmallArray or SSPLK_LargeArray, and make sure
986 // that SSPLK_SmallArray does not overwrite SSPLK_LargeArray.
987 MachineFrameInfo::SSPLayoutKind FromKind
988 = MFI
->getObjectSSPLayout(SI
.first
);
989 MachineFrameInfo::SSPLayoutKind ToKind
= MFI
->getObjectSSPLayout(SI
.second
);
990 if (FromKind
!= MachineFrameInfo::SSPLK_None
&&
991 (ToKind
== MachineFrameInfo::SSPLK_None
||
992 (ToKind
!= MachineFrameInfo::SSPLK_LargeArray
&&
993 FromKind
!= MachineFrameInfo::SSPLK_AddrOf
)))
994 MFI
->setObjectSSPLayout(SI
.second
, FromKind
);
996 // The new alloca might not be valid in a llvm.dbg.declare for this
997 // variable, so undef out the use to make the verifier happy.
998 AllocaInst
*FromAI
= const_cast<AllocaInst
*>(From
);
999 if (FromAI
->isUsedByMetadata())
1000 ValueAsMetadata::handleRAUW(FromAI
, UndefValue::get(FromAI
->getType()));
1001 for (auto &Use
: FromAI
->uses()) {
1002 if (BitCastInst
*BCI
= dyn_cast
<BitCastInst
>(Use
.get()))
1003 if (BCI
->isUsedByMetadata())
1004 ValueAsMetadata::handleRAUW(BCI
, UndefValue::get(BCI
->getType()));
1007 // Note that this will not replace uses in MMOs (which we'll update below),
1008 // or anywhere else (which is why we won't delete the original
1010 FromAI
->replaceAllUsesWith(Inst
);
1013 // Remap all instructions to the new stack slots.
1014 std::vector
<std::vector
<MachineMemOperand
*>> SSRefs(
1015 MFI
->getObjectIndexEnd());
1016 for (MachineBasicBlock
&BB
: *MF
)
1017 for (MachineInstr
&I
: BB
) {
1018 // Skip lifetime markers. We'll remove them soon.
1019 if (I
.getOpcode() == TargetOpcode::LIFETIME_START
||
1020 I
.getOpcode() == TargetOpcode::LIFETIME_END
)
1023 // Update the MachineMemOperand to use the new alloca.
1024 for (MachineMemOperand
*MMO
: I
.memoperands()) {
1025 // We've replaced IR-level uses of the remapped allocas, so we only
1026 // need to replace direct uses here.
1027 const AllocaInst
*AI
= dyn_cast_or_null
<AllocaInst
>(MMO
->getValue());
1031 if (!Allocas
.count(AI
))
1034 MMO
->setValue(Allocas
[AI
]);
1038 // Update all of the machine instruction operands.
1039 for (MachineOperand
&MO
: I
.operands()) {
1042 int FromSlot
= MO
.getIndex();
1044 // Don't touch arguments.
1048 // Only look at mapped slots.
1049 if (!SlotRemap
.count(FromSlot
))
1052 // In a debug build, check that the instruction that we are modifying is
1053 // inside the expected live range. If the instruction is not inside
1054 // the calculated range then it means that the alloca usage moved
1055 // outside of the lifetime markers, or that the user has a bug.
1056 // NOTE: Alloca address calculations which happen outside the lifetime
1057 // zone are okay, despite the fact that we don't have a good way
1058 // for validating all of the usages of the calculation.
1060 bool TouchesMemory
= I
.mayLoadOrStore();
1061 // If we *don't* protect the user from escaped allocas, don't bother
1062 // validating the instructions.
1063 if (!I
.isDebugInstr() && TouchesMemory
&& ProtectFromEscapedAllocas
) {
1064 SlotIndex Index
= Indexes
->getInstructionIndex(I
);
1065 const LiveInterval
*Interval
= &*Intervals
[FromSlot
];
1066 assert(Interval
->find(Index
) != Interval
->end() &&
1067 "Found instruction usage outside of live range.");
1071 // Fix the machine instructions.
1072 int ToSlot
= SlotRemap
[FromSlot
];
1073 MO
.setIndex(ToSlot
);
1077 // We adjust AliasAnalysis information for merged stack slots.
1078 SmallVector
<MachineMemOperand
*, 2> NewMMOs
;
1079 bool ReplaceMemOps
= false;
1080 for (MachineMemOperand
*MMO
: I
.memoperands()) {
1081 // Collect MachineMemOperands which reference
1082 // FixedStackPseudoSourceValues with old frame indices.
1083 if (const auto *FSV
= dyn_cast_or_null
<FixedStackPseudoSourceValue
>(
1084 MMO
->getPseudoValue())) {
1085 int FI
= FSV
->getFrameIndex();
1086 auto To
= SlotRemap
.find(FI
);
1087 if (To
!= SlotRemap
.end())
1088 SSRefs
[FI
].push_back(MMO
);
1091 // If this memory location can be a slot remapped here,
1092 // we remove AA information.
1093 bool MayHaveConflictingAAMD
= false;
1094 if (MMO
->getAAInfo()) {
1095 if (const Value
*MMOV
= MMO
->getValue()) {
1096 SmallVector
<Value
*, 4> Objs
;
1097 getUnderlyingObjectsForCodeGen(MMOV
, Objs
);
1100 MayHaveConflictingAAMD
= true;
1102 for (Value
*V
: Objs
) {
1103 // If this memory location comes from a known stack slot
1104 // that is not remapped, we continue checking.
1105 // Otherwise, we need to invalidate AA infomation.
1106 const AllocaInst
*AI
= dyn_cast_or_null
<AllocaInst
>(V
);
1107 if (AI
&& MergedAllocas
.count(AI
)) {
1108 MayHaveConflictingAAMD
= true;
1114 if (MayHaveConflictingAAMD
) {
1115 NewMMOs
.push_back(MF
->getMachineMemOperand(MMO
, AAMDNodes()));
1116 ReplaceMemOps
= true;
1118 NewMMOs
.push_back(MMO
);
1122 // If any memory operand is updated, set memory references of
1123 // this instruction.
1125 I
.setMemRefs(*MF
, NewMMOs
);
1128 // Rewrite MachineMemOperands that reference old frame indices.
1129 for (auto E
: enumerate(SSRefs
))
1130 if (!E
.value().empty()) {
1131 const PseudoSourceValue
*NewSV
=
1132 MF
->getPSVManager().getFixedStack(SlotRemap
.find(E
.index())->second
);
1133 for (MachineMemOperand
*Ref
: E
.value())
1134 Ref
->setValue(NewSV
);
1137 // Update the location of C++ catch objects for the MSVC personality routine.
1138 if (WinEHFuncInfo
*EHInfo
= MF
->getWinEHFuncInfo())
1139 for (WinEHTryBlockMapEntry
&TBME
: EHInfo
->TryBlockMap
)
1140 for (WinEHHandlerType
&H
: TBME
.HandlerArray
)
1141 if (H
.CatchObj
.FrameIndex
!= std::numeric_limits
<int>::max() &&
1142 SlotRemap
.count(H
.CatchObj
.FrameIndex
))
1143 H
.CatchObj
.FrameIndex
= SlotRemap
[H
.CatchObj
.FrameIndex
];
1145 LLVM_DEBUG(dbgs() << "Fixed " << FixedMemOp
<< " machine memory operands.\n");
1146 LLVM_DEBUG(dbgs() << "Fixed " << FixedDbg
<< " debug locations.\n");
1147 LLVM_DEBUG(dbgs() << "Fixed " << FixedInstr
<< " machine instructions.\n");
1150 void StackColoring::removeInvalidSlotRanges() {
1151 for (MachineBasicBlock
&BB
: *MF
)
1152 for (MachineInstr
&I
: BB
) {
1153 if (I
.getOpcode() == TargetOpcode::LIFETIME_START
||
1154 I
.getOpcode() == TargetOpcode::LIFETIME_END
|| I
.isDebugInstr())
1157 // Some intervals are suspicious! In some cases we find address
1158 // calculations outside of the lifetime zone, but not actual memory
1159 // read or write. Memory accesses outside of the lifetime zone are a clear
1160 // violation, but address calculations are okay. This can happen when
1161 // GEPs are hoisted outside of the lifetime zone.
1162 // So, in here we only check instructions which can read or write memory.
1163 if (!I
.mayLoad() && !I
.mayStore())
1166 // Check all of the machine operands.
1167 for (const MachineOperand
&MO
: I
.operands()) {
1171 int Slot
= MO
.getIndex();
1176 if (Intervals
[Slot
]->empty())
1179 // Check that the used slot is inside the calculated lifetime range.
1180 // If it is not, warn about it and invalidate the range.
1181 LiveInterval
*Interval
= &*Intervals
[Slot
];
1182 SlotIndex Index
= Indexes
->getInstructionIndex(I
);
1183 if (Interval
->find(Index
) == Interval
->end()) {
1185 LLVM_DEBUG(dbgs() << "Invalidating range #" << Slot
<< "\n");
1192 void StackColoring::expungeSlotMap(DenseMap
<int, int> &SlotRemap
,
1193 unsigned NumSlots
) {
1194 // Expunge slot remap map.
1195 for (unsigned i
=0; i
< NumSlots
; ++i
) {
1196 // If we are remapping i
1197 if (SlotRemap
.count(i
)) {
1198 int Target
= SlotRemap
[i
];
1199 // As long as our target is mapped to something else, follow it.
1200 while (SlotRemap
.count(Target
)) {
1201 Target
= SlotRemap
[Target
];
1202 SlotRemap
[i
] = Target
;
1208 bool StackColoring::runOnMachineFunction(MachineFunction
&Func
) {
1209 LLVM_DEBUG(dbgs() << "********** Stack Coloring **********\n"
1210 << "********** Function: " << Func
.getName() << '\n');
1212 MFI
= &MF
->getFrameInfo();
1213 Indexes
= &getAnalysis
<SlotIndexes
>();
1214 BlockLiveness
.clear();
1215 BasicBlocks
.clear();
1216 BasicBlockNumbering
.clear();
1220 VNInfoAllocator
.Reset();
1222 unsigned NumSlots
= MFI
->getObjectIndexEnd();
1224 // If there are no stack slots then there are no markers to remove.
1228 SmallVector
<int, 8> SortedSlots
;
1229 SortedSlots
.reserve(NumSlots
);
1230 Intervals
.reserve(NumSlots
);
1231 LiveStarts
.resize(NumSlots
);
1233 unsigned NumMarkers
= collectMarkers(NumSlots
);
1235 unsigned TotalSize
= 0;
1236 LLVM_DEBUG(dbgs() << "Found " << NumMarkers
<< " markers and " << NumSlots
1238 LLVM_DEBUG(dbgs() << "Slot structure:\n");
1240 for (int i
=0; i
< MFI
->getObjectIndexEnd(); ++i
) {
1241 LLVM_DEBUG(dbgs() << "Slot #" << i
<< " - " << MFI
->getObjectSize(i
)
1243 TotalSize
+= MFI
->getObjectSize(i
);
1246 LLVM_DEBUG(dbgs() << "Total Stack size: " << TotalSize
<< " bytes\n\n");
1248 // Don't continue because there are not enough lifetime markers, or the
1249 // stack is too small, or we are told not to optimize the slots.
1250 if (NumMarkers
< 2 || TotalSize
< 16 || DisableColoring
||
1251 skipFunction(Func
.getFunction())) {
1252 LLVM_DEBUG(dbgs() << "Will not try to merge slots.\n");
1253 return removeAllMarkers();
1256 for (unsigned i
=0; i
< NumSlots
; ++i
) {
1257 std::unique_ptr
<LiveInterval
> LI(new LiveInterval(i
, 0));
1258 LI
->getNextValue(Indexes
->getZeroIndex(), VNInfoAllocator
);
1259 Intervals
.push_back(std::move(LI
));
1260 SortedSlots
.push_back(i
);
1263 // Calculate the liveness of each block.
1264 calculateLocalLiveness();
1265 LLVM_DEBUG(dbgs() << "Dataflow iterations: " << NumIterations
<< "\n");
1268 // Propagate the liveness information.
1269 calculateLiveIntervals(NumSlots
);
1270 LLVM_DEBUG(dumpIntervals());
1272 // Search for allocas which are used outside of the declared lifetime
1274 if (ProtectFromEscapedAllocas
)
1275 removeInvalidSlotRanges();
1277 // Maps old slots to new slots.
1278 DenseMap
<int, int> SlotRemap
;
1279 unsigned RemovedSlots
= 0;
1280 unsigned ReducedSize
= 0;
1282 // Do not bother looking at empty intervals.
1283 for (unsigned I
= 0; I
< NumSlots
; ++I
) {
1284 if (Intervals
[SortedSlots
[I
]]->empty())
1285 SortedSlots
[I
] = -1;
1288 // This is a simple greedy algorithm for merging allocas. First, sort the
1289 // slots, placing the largest slots first. Next, perform an n^2 scan and look
1290 // for disjoint slots. When you find disjoint slots, merge the smaller one
1291 // into the bigger one and update the live interval. Remove the small alloca
1294 // Sort the slots according to their size. Place unused slots at the end.
1295 // Use stable sort to guarantee deterministic code generation.
1296 llvm::stable_sort(SortedSlots
, [this](int LHS
, int RHS
) {
1297 // We use -1 to denote a uninteresting slot. Place these slots at the end.
1302 // Sort according to size.
1303 return MFI
->getObjectSize(LHS
) > MFI
->getObjectSize(RHS
);
1306 for (auto &s
: LiveStarts
)
1309 bool Changed
= true;
1312 for (unsigned I
= 0; I
< NumSlots
; ++I
) {
1313 if (SortedSlots
[I
] == -1)
1316 for (unsigned J
=I
+1; J
< NumSlots
; ++J
) {
1317 if (SortedSlots
[J
] == -1)
1320 int FirstSlot
= SortedSlots
[I
];
1321 int SecondSlot
= SortedSlots
[J
];
1322 LiveInterval
*First
= &*Intervals
[FirstSlot
];
1323 LiveInterval
*Second
= &*Intervals
[SecondSlot
];
1324 auto &FirstS
= LiveStarts
[FirstSlot
];
1325 auto &SecondS
= LiveStarts
[SecondSlot
];
1326 assert(!First
->empty() && !Second
->empty() && "Found an empty range");
1328 // Merge disjoint slots. This is a little bit tricky - see the
1329 // Implementation Notes section for an explanation.
1330 if (!First
->isLiveAtIndexes(SecondS
) &&
1331 !Second
->isLiveAtIndexes(FirstS
)) {
1333 First
->MergeSegmentsInAsValue(*Second
, First
->getValNumInfo(0));
1335 int OldSize
= FirstS
.size();
1336 FirstS
.append(SecondS
.begin(), SecondS
.end());
1337 auto Mid
= FirstS
.begin() + OldSize
;
1338 std::inplace_merge(FirstS
.begin(), Mid
, FirstS
.end());
1340 SlotRemap
[SecondSlot
] = FirstSlot
;
1341 SortedSlots
[J
] = -1;
1342 LLVM_DEBUG(dbgs() << "Merging #" << FirstSlot
<< " and slots #"
1343 << SecondSlot
<< " together.\n");
1344 Align MaxAlignment
= std::max(MFI
->getObjectAlign(FirstSlot
),
1345 MFI
->getObjectAlign(SecondSlot
));
1347 assert(MFI
->getObjectSize(FirstSlot
) >=
1348 MFI
->getObjectSize(SecondSlot
) &&
1349 "Merging a small object into a larger one");
1352 ReducedSize
+= MFI
->getObjectSize(SecondSlot
);
1353 MFI
->setObjectAlignment(FirstSlot
, MaxAlignment
);
1354 MFI
->RemoveStackObject(SecondSlot
);
1360 // Record statistics.
1361 StackSpaceSaved
+= ReducedSize
;
1362 StackSlotMerged
+= RemovedSlots
;
1363 LLVM_DEBUG(dbgs() << "Merge " << RemovedSlots
<< " slots. Saved "
1364 << ReducedSize
<< " bytes\n");
1366 // Scan the entire function and update all machine operands that use frame
1367 // indices to use the remapped frame index.
1368 expungeSlotMap(SlotRemap
, NumSlots
);
1369 remapInstructions(SlotRemap
);
1371 return removeAllMarkers();