1 //===- ValueEnumerator.cpp - Number values and types for bitcode writer ---===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file implements the ValueEnumerator class.
11 //===----------------------------------------------------------------------===//
13 #include "ValueEnumerator.h"
14 #include "llvm/ADT/SmallVector.h"
15 #include "llvm/Config/llvm-config.h"
16 #include "llvm/IR/Argument.h"
17 #include "llvm/IR/BasicBlock.h"
18 #include "llvm/IR/Constant.h"
19 #include "llvm/IR/DebugInfoMetadata.h"
20 #include "llvm/IR/DerivedTypes.h"
21 #include "llvm/IR/Function.h"
22 #include "llvm/IR/GlobalAlias.h"
23 #include "llvm/IR/GlobalIFunc.h"
24 #include "llvm/IR/GlobalObject.h"
25 #include "llvm/IR/GlobalValue.h"
26 #include "llvm/IR/GlobalVariable.h"
27 #include "llvm/IR/Instruction.h"
28 #include "llvm/IR/Instructions.h"
29 #include "llvm/IR/Metadata.h"
30 #include "llvm/IR/Module.h"
31 #include "llvm/IR/Operator.h"
32 #include "llvm/IR/Type.h"
33 #include "llvm/IR/Use.h"
34 #include "llvm/IR/User.h"
35 #include "llvm/IR/Value.h"
36 #include "llvm/IR/ValueSymbolTable.h"
37 #include "llvm/Support/Casting.h"
38 #include "llvm/Support/Compiler.h"
39 #include "llvm/Support/Debug.h"
40 #include "llvm/Support/MathExtras.h"
41 #include "llvm/Support/raw_ostream.h"
52 DenseMap
<const Value
*, std::pair
<unsigned, bool>> IDs
;
53 unsigned LastGlobalConstantID
= 0;
54 unsigned LastGlobalValueID
= 0;
58 bool isGlobalConstant(unsigned ID
) const {
59 return ID
<= LastGlobalConstantID
;
62 bool isGlobalValue(unsigned ID
) const {
63 return ID
<= LastGlobalValueID
&& !isGlobalConstant(ID
);
66 unsigned size() const { return IDs
.size(); }
67 std::pair
<unsigned, bool> &operator[](const Value
*V
) { return IDs
[V
]; }
69 std::pair
<unsigned, bool> lookup(const Value
*V
) const {
73 void index(const Value
*V
) {
74 // Explicitly sequence get-size and insert-value operations to avoid UB.
75 unsigned ID
= IDs
.size() + 1;
80 } // end anonymous namespace
82 static void orderValue(const Value
*V
, OrderMap
&OM
) {
83 if (OM
.lookup(V
).first
)
86 if (const Constant
*C
= dyn_cast
<Constant
>(V
)) {
87 if (C
->getNumOperands() && !isa
<GlobalValue
>(C
)) {
88 for (const Value
*Op
: C
->operands())
89 if (!isa
<BasicBlock
>(Op
) && !isa
<GlobalValue
>(Op
))
91 if (auto *CE
= dyn_cast
<ConstantExpr
>(C
))
92 if (CE
->getOpcode() == Instruction::ShuffleVector
)
93 orderValue(CE
->getShuffleMaskForBitcode(), OM
);
97 // Note: we cannot cache this lookup above, since inserting into the map
98 // changes the map's size, and thus affects the other IDs.
102 static OrderMap
orderModule(const Module
&M
) {
103 // This needs to match the order used by ValueEnumerator::ValueEnumerator()
104 // and ValueEnumerator::incorporateFunction().
107 // In the reader, initializers of GlobalValues are set *after* all the
108 // globals have been read. Rather than awkwardly modeling this behaviour
109 // directly in predictValueUseListOrderImpl(), just assign IDs to
110 // initializers of GlobalValues before GlobalValues themselves to model this
112 for (const GlobalVariable
&G
: M
.globals())
113 if (G
.hasInitializer())
114 if (!isa
<GlobalValue
>(G
.getInitializer()))
115 orderValue(G
.getInitializer(), OM
);
116 for (const GlobalAlias
&A
: M
.aliases())
117 if (!isa
<GlobalValue
>(A
.getAliasee()))
118 orderValue(A
.getAliasee(), OM
);
119 for (const GlobalIFunc
&I
: M
.ifuncs())
120 if (!isa
<GlobalValue
>(I
.getResolver()))
121 orderValue(I
.getResolver(), OM
);
122 for (const Function
&F
: M
) {
123 for (const Use
&U
: F
.operands())
124 if (!isa
<GlobalValue
>(U
.get()))
125 orderValue(U
.get(), OM
);
128 // As constants used in metadata operands are emitted as module-level
129 // constants, we must order them before other operands. Also, we must order
130 // these before global values, as these will be read before setting the
131 // global values' initializers. The latter matters for constants which have
132 // uses towards other constants that are used as initializers.
133 auto orderConstantValue
= [&OM
](const Value
*V
) {
134 if ((isa
<Constant
>(V
) && !isa
<GlobalValue
>(V
)) || isa
<InlineAsm
>(V
))
137 for (const Function
&F
: M
) {
138 if (F
.isDeclaration())
140 for (const BasicBlock
&BB
: F
)
141 for (const Instruction
&I
: BB
)
142 for (const Value
*V
: I
.operands()) {
143 if (const auto *MAV
= dyn_cast
<MetadataAsValue
>(V
)) {
144 if (const auto *VAM
=
145 dyn_cast
<ValueAsMetadata
>(MAV
->getMetadata())) {
146 orderConstantValue(VAM
->getValue());
147 } else if (const auto *AL
=
148 dyn_cast
<DIArgList
>(MAV
->getMetadata())) {
149 for (const auto *VAM
: AL
->getArgs())
150 orderConstantValue(VAM
->getValue());
155 OM
.LastGlobalConstantID
= OM
.size();
157 // Initializers of GlobalValues are processed in
158 // BitcodeReader::ResolveGlobalAndAliasInits(). Match the order there rather
159 // than ValueEnumerator, and match the code in predictValueUseListOrderImpl()
160 // by giving IDs in reverse order.
162 // Since GlobalValues never reference each other directly (just through
163 // initializers), their relative IDs only matter for determining order of
164 // uses in their initializers.
165 for (const Function
&F
: M
)
167 for (const GlobalAlias
&A
: M
.aliases())
169 for (const GlobalIFunc
&I
: M
.ifuncs())
171 for (const GlobalVariable
&G
: M
.globals())
173 OM
.LastGlobalValueID
= OM
.size();
175 for (const Function
&F
: M
) {
176 if (F
.isDeclaration())
178 // Here we need to match the union of ValueEnumerator::incorporateFunction()
179 // and WriteFunction(). Basic blocks are implicitly declared before
180 // anything else (by declaring their size).
181 for (const BasicBlock
&BB
: F
)
183 for (const Argument
&A
: F
.args())
185 for (const BasicBlock
&BB
: F
)
186 for (const Instruction
&I
: BB
) {
187 for (const Value
*Op
: I
.operands())
188 if ((isa
<Constant
>(*Op
) && !isa
<GlobalValue
>(*Op
)) ||
191 if (auto *SVI
= dyn_cast
<ShuffleVectorInst
>(&I
))
192 orderValue(SVI
->getShuffleMaskForBitcode(), OM
);
194 for (const BasicBlock
&BB
: F
)
195 for (const Instruction
&I
: BB
)
201 static void predictValueUseListOrderImpl(const Value
*V
, const Function
*F
,
202 unsigned ID
, const OrderMap
&OM
,
203 UseListOrderStack
&Stack
) {
204 // Predict use-list order for this one.
205 using Entry
= std::pair
<const Use
*, unsigned>;
206 SmallVector
<Entry
, 64> List
;
207 for (const Use
&U
: V
->uses())
208 // Check if this user will be serialized.
209 if (OM
.lookup(U
.getUser()).first
)
210 List
.push_back(std::make_pair(&U
, List
.size()));
213 // We may have lost some users.
216 bool IsGlobalValue
= OM
.isGlobalValue(ID
);
217 llvm::sort(List
, [&](const Entry
&L
, const Entry
&R
) {
218 const Use
*LU
= L
.first
;
219 const Use
*RU
= R
.first
;
223 auto LID
= OM
.lookup(LU
->getUser()).first
;
224 auto RID
= OM
.lookup(RU
->getUser()).first
;
226 // Global values are processed in reverse order.
228 // Moreover, initializers of GlobalValues are set *after* all the globals
229 // have been read (despite having earlier IDs). Rather than awkwardly
230 // modeling this behaviour here, orderModule() has assigned IDs to
231 // initializers of GlobalValues before GlobalValues themselves.
232 if (OM
.isGlobalValue(LID
) && OM
.isGlobalValue(RID
))
235 // If ID is 4, then expect: 7 6 5 1 2 3.
238 if (!IsGlobalValue
) // GlobalValue uses don't get reversed.
244 if (!IsGlobalValue
) // GlobalValue uses don't get reversed.
249 // LID and RID are equal, so we have different operands of the same user.
250 // Assume operands are added in order for all instructions.
252 if (!IsGlobalValue
) // GlobalValue uses don't get reversed.
253 return LU
->getOperandNo() < RU
->getOperandNo();
254 return LU
->getOperandNo() > RU
->getOperandNo();
257 if (llvm::is_sorted(List
, [](const Entry
&L
, const Entry
&R
) {
258 return L
.second
< R
.second
;
260 // Order is already correct.
263 // Store the shuffle.
264 Stack
.emplace_back(V
, F
, List
.size());
265 assert(List
.size() == Stack
.back().Shuffle
.size() && "Wrong size");
266 for (size_t I
= 0, E
= List
.size(); I
!= E
; ++I
)
267 Stack
.back().Shuffle
[I
] = List
[I
].second
;
270 static void predictValueUseListOrder(const Value
*V
, const Function
*F
,
271 OrderMap
&OM
, UseListOrderStack
&Stack
) {
272 auto &IDPair
= OM
[V
];
273 assert(IDPair
.first
&& "Unmapped value");
275 // Already predicted.
278 // Do the actual prediction.
279 IDPair
.second
= true;
280 if (!V
->use_empty() && std::next(V
->use_begin()) != V
->use_end())
281 predictValueUseListOrderImpl(V
, F
, IDPair
.first
, OM
, Stack
);
283 // Recursive descent into constants.
284 if (const Constant
*C
= dyn_cast
<Constant
>(V
)) {
285 if (C
->getNumOperands()) { // Visit GlobalValues.
286 for (const Value
*Op
: C
->operands())
287 if (isa
<Constant
>(Op
)) // Visit GlobalValues.
288 predictValueUseListOrder(Op
, F
, OM
, Stack
);
289 if (auto *CE
= dyn_cast
<ConstantExpr
>(C
))
290 if (CE
->getOpcode() == Instruction::ShuffleVector
)
291 predictValueUseListOrder(CE
->getShuffleMaskForBitcode(), F
, OM
,
297 static UseListOrderStack
predictUseListOrder(const Module
&M
) {
298 OrderMap OM
= orderModule(M
);
300 // Use-list orders need to be serialized after all the users have been added
301 // to a value, or else the shuffles will be incomplete. Store them per
302 // function in a stack.
304 // Aside from function order, the order of values doesn't matter much here.
305 UseListOrderStack Stack
;
307 // We want to visit the functions backward now so we can list function-local
308 // constants in the last Function they're used in. Module-level constants
309 // have already been visited above.
310 for (auto I
= M
.rbegin(), E
= M
.rend(); I
!= E
; ++I
) {
311 const Function
&F
= *I
;
312 if (F
.isDeclaration())
314 for (const BasicBlock
&BB
: F
)
315 predictValueUseListOrder(&BB
, &F
, OM
, Stack
);
316 for (const Argument
&A
: F
.args())
317 predictValueUseListOrder(&A
, &F
, OM
, Stack
);
318 for (const BasicBlock
&BB
: F
)
319 for (const Instruction
&I
: BB
) {
320 for (const Value
*Op
: I
.operands())
321 if (isa
<Constant
>(*Op
) || isa
<InlineAsm
>(*Op
)) // Visit GlobalValues.
322 predictValueUseListOrder(Op
, &F
, OM
, Stack
);
323 if (auto *SVI
= dyn_cast
<ShuffleVectorInst
>(&I
))
324 predictValueUseListOrder(SVI
->getShuffleMaskForBitcode(), &F
, OM
,
327 for (const BasicBlock
&BB
: F
)
328 for (const Instruction
&I
: BB
)
329 predictValueUseListOrder(&I
, &F
, OM
, Stack
);
332 // Visit globals last, since the module-level use-list block will be seen
333 // before the function bodies are processed.
334 for (const GlobalVariable
&G
: M
.globals())
335 predictValueUseListOrder(&G
, nullptr, OM
, Stack
);
336 for (const Function
&F
: M
)
337 predictValueUseListOrder(&F
, nullptr, OM
, Stack
);
338 for (const GlobalAlias
&A
: M
.aliases())
339 predictValueUseListOrder(&A
, nullptr, OM
, Stack
);
340 for (const GlobalIFunc
&I
: M
.ifuncs())
341 predictValueUseListOrder(&I
, nullptr, OM
, Stack
);
342 for (const GlobalVariable
&G
: M
.globals())
343 if (G
.hasInitializer())
344 predictValueUseListOrder(G
.getInitializer(), nullptr, OM
, Stack
);
345 for (const GlobalAlias
&A
: M
.aliases())
346 predictValueUseListOrder(A
.getAliasee(), nullptr, OM
, Stack
);
347 for (const GlobalIFunc
&I
: M
.ifuncs())
348 predictValueUseListOrder(I
.getResolver(), nullptr, OM
, Stack
);
349 for (const Function
&F
: M
) {
350 for (const Use
&U
: F
.operands())
351 predictValueUseListOrder(U
.get(), nullptr, OM
, Stack
);
357 static bool isIntOrIntVectorValue(const std::pair
<const Value
*, unsigned> &V
) {
358 return V
.first
->getType()->isIntOrIntVectorTy();
361 ValueEnumerator::ValueEnumerator(const Module
&M
,
362 bool ShouldPreserveUseListOrder
)
363 : ShouldPreserveUseListOrder(ShouldPreserveUseListOrder
) {
364 if (ShouldPreserveUseListOrder
)
365 UseListOrders
= predictUseListOrder(M
);
367 // Enumerate the global variables.
368 for (const GlobalVariable
&GV
: M
.globals()) {
370 EnumerateType(GV
.getValueType());
373 // Enumerate the functions.
374 for (const Function
& F
: M
) {
376 EnumerateType(F
.getValueType());
377 EnumerateAttributes(F
.getAttributes());
380 // Enumerate the aliases.
381 for (const GlobalAlias
&GA
: M
.aliases()) {
383 EnumerateType(GA
.getValueType());
386 // Enumerate the ifuncs.
387 for (const GlobalIFunc
&GIF
: M
.ifuncs())
388 EnumerateValue(&GIF
);
390 // Remember what is the cutoff between globalvalue's and other constants.
391 unsigned FirstConstant
= Values
.size();
393 // Enumerate the global variable initializers and attributes.
394 for (const GlobalVariable
&GV
: M
.globals()) {
395 if (GV
.hasInitializer())
396 EnumerateValue(GV
.getInitializer());
397 if (GV
.hasAttributes())
398 EnumerateAttributes(GV
.getAttributesAsList(AttributeList::FunctionIndex
));
401 // Enumerate the aliasees.
402 for (const GlobalAlias
&GA
: M
.aliases())
403 EnumerateValue(GA
.getAliasee());
405 // Enumerate the ifunc resolvers.
406 for (const GlobalIFunc
&GIF
: M
.ifuncs())
407 EnumerateValue(GIF
.getResolver());
409 // Enumerate any optional Function data.
410 for (const Function
&F
: M
)
411 for (const Use
&U
: F
.operands())
412 EnumerateValue(U
.get());
414 // Enumerate the metadata type.
416 // TODO: Move this to ValueEnumerator::EnumerateOperandType() once bitcode
417 // only encodes the metadata type when it's used as a value.
418 EnumerateType(Type::getMetadataTy(M
.getContext()));
420 // Insert constants and metadata that are named at module level into the slot
421 // pool so that the module symbol table can refer to them...
422 EnumerateValueSymbolTable(M
.getValueSymbolTable());
423 EnumerateNamedMetadata(M
);
425 SmallVector
<std::pair
<unsigned, MDNode
*>, 8> MDs
;
426 for (const GlobalVariable
&GV
: M
.globals()) {
428 GV
.getAllMetadata(MDs
);
429 for (const auto &I
: MDs
)
430 // FIXME: Pass GV to EnumerateMetadata and arrange for the bitcode writer
431 // to write metadata to the global variable's own metadata block
433 EnumerateMetadata(nullptr, I
.second
);
436 // Enumerate types used by function bodies and argument lists.
437 for (const Function
&F
: M
) {
438 for (const Argument
&A
: F
.args())
439 EnumerateType(A
.getType());
441 // Enumerate metadata attached to this function.
443 F
.getAllMetadata(MDs
);
444 for (const auto &I
: MDs
)
445 EnumerateMetadata(F
.isDeclaration() ? nullptr : &F
, I
.second
);
447 for (const BasicBlock
&BB
: F
)
448 for (const Instruction
&I
: BB
) {
449 for (const Use
&Op
: I
.operands()) {
450 auto *MD
= dyn_cast
<MetadataAsValue
>(&Op
);
452 EnumerateOperandType(Op
);
456 // Local metadata is enumerated during function-incorporation, but
457 // any ConstantAsMetadata arguments in a DIArgList should be examined
459 if (isa
<LocalAsMetadata
>(MD
->getMetadata()))
461 if (auto *AL
= dyn_cast
<DIArgList
>(MD
->getMetadata())) {
462 for (auto *VAM
: AL
->getArgs())
463 if (isa
<ConstantAsMetadata
>(VAM
))
464 EnumerateMetadata(&F
, VAM
);
468 EnumerateMetadata(&F
, MD
->getMetadata());
470 if (auto *SVI
= dyn_cast
<ShuffleVectorInst
>(&I
))
471 EnumerateType(SVI
->getShuffleMaskForBitcode()->getType());
472 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(&I
))
473 EnumerateType(GEP
->getSourceElementType());
474 if (auto *AI
= dyn_cast
<AllocaInst
>(&I
))
475 EnumerateType(AI
->getAllocatedType());
476 EnumerateType(I
.getType());
477 if (const auto *Call
= dyn_cast
<CallBase
>(&I
)) {
478 EnumerateAttributes(Call
->getAttributes());
479 EnumerateType(Call
->getFunctionType());
482 // Enumerate metadata attached with this instruction.
484 I
.getAllMetadataOtherThanDebugLoc(MDs
);
485 for (unsigned i
= 0, e
= MDs
.size(); i
!= e
; ++i
)
486 EnumerateMetadata(&F
, MDs
[i
].second
);
488 // Don't enumerate the location directly -- it has a special record
489 // type -- but enumerate its operands.
490 if (DILocation
*L
= I
.getDebugLoc())
491 for (const Metadata
*Op
: L
->operands())
492 EnumerateMetadata(&F
, Op
);
496 // Optimize constant ordering.
497 OptimizeConstants(FirstConstant
, Values
.size());
499 // Organize metadata ordering.
503 unsigned ValueEnumerator::getInstructionID(const Instruction
*Inst
) const {
504 InstructionMapType::const_iterator I
= InstructionMap
.find(Inst
);
505 assert(I
!= InstructionMap
.end() && "Instruction is not mapped!");
509 unsigned ValueEnumerator::getComdatID(const Comdat
*C
) const {
510 unsigned ComdatID
= Comdats
.idFor(C
);
511 assert(ComdatID
&& "Comdat not found!");
515 void ValueEnumerator::setInstructionID(const Instruction
*I
) {
516 InstructionMap
[I
] = InstructionCount
++;
519 unsigned ValueEnumerator::getValueID(const Value
*V
) const {
520 if (auto *MD
= dyn_cast
<MetadataAsValue
>(V
))
521 return getMetadataID(MD
->getMetadata());
523 ValueMapType::const_iterator I
= ValueMap
.find(V
);
524 assert(I
!= ValueMap
.end() && "Value not in slotcalculator!");
528 #if !defined(NDEBUG) || defined(LLVM_ENABLE_DUMP)
529 LLVM_DUMP_METHOD
void ValueEnumerator::dump() const {
530 print(dbgs(), ValueMap
, "Default");
532 print(dbgs(), MetadataMap
, "MetaData");
537 void ValueEnumerator::print(raw_ostream
&OS
, const ValueMapType
&Map
,
538 const char *Name
) const {
539 OS
<< "Map Name: " << Name
<< "\n";
540 OS
<< "Size: " << Map
.size() << "\n";
541 for (ValueMapType::const_iterator I
= Map
.begin(),
542 E
= Map
.end(); I
!= E
; ++I
) {
543 const Value
*V
= I
->first
;
545 OS
<< "Value: " << V
->getName();
547 OS
<< "Value: [null]\n";
551 OS
<< " Uses(" << V
->getNumUses() << "):";
552 for (const Use
&U
: V
->uses()) {
553 if (&U
!= &*V
->use_begin())
556 OS
<< " " << U
->getName();
565 void ValueEnumerator::print(raw_ostream
&OS
, const MetadataMapType
&Map
,
566 const char *Name
) const {
567 OS
<< "Map Name: " << Name
<< "\n";
568 OS
<< "Size: " << Map
.size() << "\n";
569 for (auto I
= Map
.begin(), E
= Map
.end(); I
!= E
; ++I
) {
570 const Metadata
*MD
= I
->first
;
571 OS
<< "Metadata: slot = " << I
->second
.ID
<< "\n";
572 OS
<< "Metadata: function = " << I
->second
.F
<< "\n";
578 /// OptimizeConstants - Reorder constant pool for denser encoding.
579 void ValueEnumerator::OptimizeConstants(unsigned CstStart
, unsigned CstEnd
) {
580 if (CstStart
== CstEnd
|| CstStart
+1 == CstEnd
) return;
582 if (ShouldPreserveUseListOrder
)
583 // Optimizing constants makes the use-list order difficult to predict.
584 // Disable it for now when trying to preserve the order.
587 std::stable_sort(Values
.begin() + CstStart
, Values
.begin() + CstEnd
,
588 [this](const std::pair
<const Value
*, unsigned> &LHS
,
589 const std::pair
<const Value
*, unsigned> &RHS
) {
591 if (LHS
.first
->getType() != RHS
.first
->getType())
592 return getTypeID(LHS
.first
->getType()) < getTypeID(RHS
.first
->getType());
593 // Then by frequency.
594 return LHS
.second
> RHS
.second
;
597 // Ensure that integer and vector of integer constants are at the start of the
598 // constant pool. This is important so that GEP structure indices come before
599 // gep constant exprs.
600 std::stable_partition(Values
.begin() + CstStart
, Values
.begin() + CstEnd
,
601 isIntOrIntVectorValue
);
603 // Rebuild the modified portion of ValueMap.
604 for (; CstStart
!= CstEnd
; ++CstStart
)
605 ValueMap
[Values
[CstStart
].first
] = CstStart
+1;
608 /// EnumerateValueSymbolTable - Insert all of the values in the specified symbol
609 /// table into the values table.
610 void ValueEnumerator::EnumerateValueSymbolTable(const ValueSymbolTable
&VST
) {
611 for (ValueSymbolTable::const_iterator VI
= VST
.begin(), VE
= VST
.end();
613 EnumerateValue(VI
->getValue());
616 /// Insert all of the values referenced by named metadata in the specified
618 void ValueEnumerator::EnumerateNamedMetadata(const Module
&M
) {
619 for (const auto &I
: M
.named_metadata())
620 EnumerateNamedMDNode(&I
);
623 void ValueEnumerator::EnumerateNamedMDNode(const NamedMDNode
*MD
) {
624 for (unsigned i
= 0, e
= MD
->getNumOperands(); i
!= e
; ++i
)
625 EnumerateMetadata(nullptr, MD
->getOperand(i
));
628 unsigned ValueEnumerator::getMetadataFunctionID(const Function
*F
) const {
629 return F
? getValueID(F
) + 1 : 0;
632 void ValueEnumerator::EnumerateMetadata(const Function
*F
, const Metadata
*MD
) {
633 EnumerateMetadata(getMetadataFunctionID(F
), MD
);
636 void ValueEnumerator::EnumerateFunctionLocalMetadata(
637 const Function
&F
, const LocalAsMetadata
*Local
) {
638 EnumerateFunctionLocalMetadata(getMetadataFunctionID(&F
), Local
);
641 void ValueEnumerator::EnumerateFunctionLocalListMetadata(
642 const Function
&F
, const DIArgList
*ArgList
) {
643 EnumerateFunctionLocalListMetadata(getMetadataFunctionID(&F
), ArgList
);
646 void ValueEnumerator::dropFunctionFromMetadata(
647 MetadataMapType::value_type
&FirstMD
) {
648 SmallVector
<const MDNode
*, 64> Worklist
;
649 auto push
= [&Worklist
](MetadataMapType::value_type
&MD
) {
650 auto &Entry
= MD
.second
;
652 // Nothing to do if this metadata isn't tagged.
656 // Drop the function tag.
659 // If this is has an ID and is an MDNode, then its operands have entries as
660 // well. We need to drop the function from them too.
662 if (auto *N
= dyn_cast
<MDNode
>(MD
.first
))
663 Worklist
.push_back(N
);
666 while (!Worklist
.empty())
667 for (const Metadata
*Op
: Worklist
.pop_back_val()->operands()) {
670 auto MD
= MetadataMap
.find(Op
);
671 if (MD
!= MetadataMap
.end())
676 void ValueEnumerator::EnumerateMetadata(unsigned F
, const Metadata
*MD
) {
677 // It's vital for reader efficiency that uniqued subgraphs are done in
678 // post-order; it's expensive when their operands have forward references.
679 // If a distinct node is referenced from a uniqued node, it'll be delayed
680 // until the uniqued subgraph has been completely traversed.
681 SmallVector
<const MDNode
*, 32> DelayedDistinctNodes
;
683 // Start by enumerating MD, and then work through its transitive operands in
684 // post-order. This requires a depth-first search.
685 SmallVector
<std::pair
<const MDNode
*, MDNode::op_iterator
>, 32> Worklist
;
686 if (const MDNode
*N
= enumerateMetadataImpl(F
, MD
))
687 Worklist
.push_back(std::make_pair(N
, N
->op_begin()));
689 while (!Worklist
.empty()) {
690 const MDNode
*N
= Worklist
.back().first
;
692 // Enumerate operands until we hit a new node. We need to traverse these
693 // nodes' operands before visiting the rest of N's operands.
694 MDNode::op_iterator I
= std::find_if(
695 Worklist
.back().second
, N
->op_end(),
696 [&](const Metadata
*MD
) { return enumerateMetadataImpl(F
, MD
); });
697 if (I
!= N
->op_end()) {
698 auto *Op
= cast
<MDNode
>(*I
);
699 Worklist
.back().second
= ++I
;
701 // Delay traversing Op if it's a distinct node and N is uniqued.
702 if (Op
->isDistinct() && !N
->isDistinct())
703 DelayedDistinctNodes
.push_back(Op
);
705 Worklist
.push_back(std::make_pair(Op
, Op
->op_begin()));
709 // All the operands have been visited. Now assign an ID.
712 MetadataMap
[N
].ID
= MDs
.size();
714 // Flush out any delayed distinct nodes; these are all the distinct nodes
715 // that are leaves in last uniqued subgraph.
716 if (Worklist
.empty() || Worklist
.back().first
->isDistinct()) {
717 for (const MDNode
*N
: DelayedDistinctNodes
)
718 Worklist
.push_back(std::make_pair(N
, N
->op_begin()));
719 DelayedDistinctNodes
.clear();
724 const MDNode
*ValueEnumerator::enumerateMetadataImpl(unsigned F
, const Metadata
*MD
) {
729 (isa
<MDNode
>(MD
) || isa
<MDString
>(MD
) || isa
<ConstantAsMetadata
>(MD
)) &&
730 "Invalid metadata kind");
732 auto Insertion
= MetadataMap
.insert(std::make_pair(MD
, MDIndex(F
)));
733 MDIndex
&Entry
= Insertion
.first
->second
;
734 if (!Insertion
.second
) {
735 // Already mapped. If F doesn't match the function tag, drop it.
736 if (Entry
.hasDifferentFunction(F
))
737 dropFunctionFromMetadata(*Insertion
.first
);
741 // Don't assign IDs to metadata nodes.
742 if (auto *N
= dyn_cast
<MDNode
>(MD
))
745 // Save the metadata.
747 Entry
.ID
= MDs
.size();
749 // Enumerate the constant, if any.
750 if (auto *C
= dyn_cast
<ConstantAsMetadata
>(MD
))
751 EnumerateValue(C
->getValue());
756 /// EnumerateFunctionLocalMetadata - Incorporate function-local metadata
757 /// information reachable from the metadata.
758 void ValueEnumerator::EnumerateFunctionLocalMetadata(
759 unsigned F
, const LocalAsMetadata
*Local
) {
760 assert(F
&& "Expected a function");
762 // Check to see if it's already in!
763 MDIndex
&Index
= MetadataMap
[Local
];
765 assert(Index
.F
== F
&& "Expected the same function");
769 MDs
.push_back(Local
);
771 Index
.ID
= MDs
.size();
773 EnumerateValue(Local
->getValue());
776 /// EnumerateFunctionLocalListMetadata - Incorporate function-local metadata
777 /// information reachable from the metadata.
778 void ValueEnumerator::EnumerateFunctionLocalListMetadata(
779 unsigned F
, const DIArgList
*ArgList
) {
780 assert(F
&& "Expected a function");
782 // Check to see if it's already in!
783 MDIndex
&Index
= MetadataMap
[ArgList
];
785 assert(Index
.F
== F
&& "Expected the same function");
789 for (ValueAsMetadata
*VAM
: ArgList
->getArgs()) {
790 if (isa
<LocalAsMetadata
>(VAM
)) {
791 assert(MetadataMap
.count(VAM
) &&
792 "LocalAsMetadata should be enumerated before DIArgList");
793 assert(MetadataMap
[VAM
].F
== F
&&
794 "Expected LocalAsMetadata in the same function");
796 assert(isa
<ConstantAsMetadata
>(VAM
) &&
797 "Expected LocalAsMetadata or ConstantAsMetadata");
798 assert(ValueMap
.count(VAM
->getValue()) &&
799 "Constant should be enumerated beforeDIArgList");
800 EnumerateMetadata(F
, VAM
);
804 MDs
.push_back(ArgList
);
806 Index
.ID
= MDs
.size();
809 static unsigned getMetadataTypeOrder(const Metadata
*MD
) {
810 // Strings are emitted in bulk and must come first.
811 if (isa
<MDString
>(MD
))
814 // ConstantAsMetadata doesn't reference anything. We may as well shuffle it
815 // to the front since we can detect it.
816 auto *N
= dyn_cast
<MDNode
>(MD
);
820 // The reader is fast forward references for distinct node operands, but slow
821 // when uniqued operands are unresolved.
822 return N
->isDistinct() ? 2 : 3;
825 void ValueEnumerator::organizeMetadata() {
826 assert(MetadataMap
.size() == MDs
.size() &&
827 "Metadata map and vector out of sync");
832 // Copy out the index information from MetadataMap in order to choose a new
834 SmallVector
<MDIndex
, 64> Order
;
835 Order
.reserve(MetadataMap
.size());
836 for (const Metadata
*MD
: MDs
)
837 Order
.push_back(MetadataMap
.lookup(MD
));
840 // - by function, then
841 // - by isa<MDString>
842 // and then sort by the original/current ID. Since the IDs are guaranteed to
843 // be unique, the result of std::sort will be deterministic. There's no need
844 // for std::stable_sort.
845 llvm::sort(Order
, [this](MDIndex LHS
, MDIndex RHS
) {
846 return std::make_tuple(LHS
.F
, getMetadataTypeOrder(LHS
.get(MDs
)), LHS
.ID
) <
847 std::make_tuple(RHS
.F
, getMetadataTypeOrder(RHS
.get(MDs
)), RHS
.ID
);
850 // Rebuild MDs, index the metadata ranges for each function in FunctionMDs,
851 // and fix up MetadataMap.
852 std::vector
<const Metadata
*> OldMDs
;
854 MDs
.reserve(OldMDs
.size());
855 for (unsigned I
= 0, E
= Order
.size(); I
!= E
&& !Order
[I
].F
; ++I
) {
856 auto *MD
= Order
[I
].get(OldMDs
);
858 MetadataMap
[MD
].ID
= I
+ 1;
859 if (isa
<MDString
>(MD
))
863 // Return early if there's nothing for the functions.
864 if (MDs
.size() == Order
.size())
867 // Build the function metadata ranges.
869 FunctionMDs
.reserve(OldMDs
.size());
871 for (unsigned I
= MDs
.size(), E
= Order
.size(), ID
= MDs
.size(); I
!= E
;
873 unsigned F
= Order
[I
].F
;
876 } else if (PrevF
!= F
) {
877 R
.Last
= FunctionMDs
.size();
878 std::swap(R
, FunctionMDInfo
[PrevF
]);
879 R
.First
= FunctionMDs
.size();
885 auto *MD
= Order
[I
].get(OldMDs
);
886 FunctionMDs
.push_back(MD
);
887 MetadataMap
[MD
].ID
= ++ID
;
888 if (isa
<MDString
>(MD
))
891 R
.Last
= FunctionMDs
.size();
892 FunctionMDInfo
[PrevF
] = R
;
895 void ValueEnumerator::incorporateFunctionMetadata(const Function
&F
) {
896 NumModuleMDs
= MDs
.size();
898 auto R
= FunctionMDInfo
.lookup(getValueID(&F
) + 1);
899 NumMDStrings
= R
.NumStrings
;
900 MDs
.insert(MDs
.end(), FunctionMDs
.begin() + R
.First
,
901 FunctionMDs
.begin() + R
.Last
);
904 void ValueEnumerator::EnumerateValue(const Value
*V
) {
905 assert(!V
->getType()->isVoidTy() && "Can't insert void values!");
906 assert(!isa
<MetadataAsValue
>(V
) && "EnumerateValue doesn't handle Metadata!");
908 // Check to see if it's already in!
909 unsigned &ValueID
= ValueMap
[V
];
911 // Increment use count.
912 Values
[ValueID
-1].second
++;
916 if (auto *GO
= dyn_cast
<GlobalObject
>(V
))
917 if (const Comdat
*C
= GO
->getComdat())
920 // Enumerate the type of this value.
921 EnumerateType(V
->getType());
923 if (const Constant
*C
= dyn_cast
<Constant
>(V
)) {
924 if (isa
<GlobalValue
>(C
)) {
925 // Initializers for globals are handled explicitly elsewhere.
926 } else if (C
->getNumOperands()) {
927 // If a constant has operands, enumerate them. This makes sure that if a
928 // constant has uses (for example an array of const ints), that they are
931 // We prefer to enumerate them with values before we enumerate the user
932 // itself. This makes it more likely that we can avoid forward references
933 // in the reader. We know that there can be no cycles in the constants
934 // graph that don't go through a global variable.
935 for (User::const_op_iterator I
= C
->op_begin(), E
= C
->op_end();
937 if (!isa
<BasicBlock
>(*I
)) // Don't enumerate BB operand to BlockAddress.
939 if (auto *CE
= dyn_cast
<ConstantExpr
>(C
))
940 if (CE
->getOpcode() == Instruction::ShuffleVector
)
941 EnumerateValue(CE
->getShuffleMaskForBitcode());
943 // Finally, add the value. Doing this could make the ValueID reference be
944 // dangling, don't reuse it.
945 Values
.push_back(std::make_pair(V
, 1U));
946 ValueMap
[V
] = Values
.size();
952 Values
.push_back(std::make_pair(V
, 1U));
953 ValueID
= Values
.size();
957 void ValueEnumerator::EnumerateType(Type
*Ty
) {
958 unsigned *TypeID
= &TypeMap
[Ty
];
960 // We've already seen this type.
964 // If it is a non-anonymous struct, mark the type as being visited so that we
965 // don't recursively visit it. This is safe because we allow forward
966 // references of these in the bitcode reader.
967 if (StructType
*STy
= dyn_cast
<StructType
>(Ty
))
968 if (!STy
->isLiteral())
971 // Enumerate all of the subtypes before we enumerate this type. This ensures
972 // that the type will be enumerated in an order that can be directly built.
973 for (Type
*SubTy
: Ty
->subtypes())
974 EnumerateType(SubTy
);
976 // Refresh the TypeID pointer in case the table rehashed.
977 TypeID
= &TypeMap
[Ty
];
979 // Check to see if we got the pointer another way. This can happen when
980 // enumerating recursive types that hit the base case deeper than they start.
982 // If this is actually a struct that we are treating as forward ref'able,
983 // then emit the definition now that all of its contents are available.
984 if (*TypeID
&& *TypeID
!= ~0U)
987 // Add this type now that its contents are all happily enumerated.
990 *TypeID
= Types
.size();
993 // Enumerate the types for the specified value. If the value is a constant,
994 // walk through it, enumerating the types of the constant.
995 void ValueEnumerator::EnumerateOperandType(const Value
*V
) {
996 EnumerateType(V
->getType());
998 assert(!isa
<MetadataAsValue
>(V
) && "Unexpected metadata operand");
1000 const Constant
*C
= dyn_cast
<Constant
>(V
);
1004 // If this constant is already enumerated, ignore it, we know its type must
1006 if (ValueMap
.count(C
))
1009 // This constant may have operands, make sure to enumerate the types in
1011 for (const Value
*Op
: C
->operands()) {
1012 // Don't enumerate basic blocks here, this happens as operands to
1014 if (isa
<BasicBlock
>(Op
))
1017 EnumerateOperandType(Op
);
1019 if (auto *CE
= dyn_cast
<ConstantExpr
>(C
)) {
1020 if (CE
->getOpcode() == Instruction::ShuffleVector
)
1021 EnumerateOperandType(CE
->getShuffleMaskForBitcode());
1022 if (CE
->getOpcode() == Instruction::GetElementPtr
)
1023 EnumerateType(cast
<GEPOperator
>(CE
)->getSourceElementType());
1027 void ValueEnumerator::EnumerateAttributes(AttributeList PAL
) {
1028 if (PAL
.isEmpty()) return; // null is always 0.
1031 unsigned &Entry
= AttributeListMap
[PAL
];
1033 // Never saw this before, add it.
1034 AttributeLists
.push_back(PAL
);
1035 Entry
= AttributeLists
.size();
1038 // Do lookups for all attribute groups.
1039 for (unsigned i
= PAL
.index_begin(), e
= PAL
.index_end(); i
!= e
; ++i
) {
1040 AttributeSet AS
= PAL
.getAttributes(i
);
1041 if (!AS
.hasAttributes())
1043 IndexAndAttrSet Pair
= {i
, AS
};
1044 unsigned &Entry
= AttributeGroupMap
[Pair
];
1046 AttributeGroups
.push_back(Pair
);
1047 Entry
= AttributeGroups
.size();
1049 for (Attribute Attr
: AS
) {
1050 if (Attr
.isTypeAttribute())
1051 EnumerateType(Attr
.getValueAsType());
1057 void ValueEnumerator::incorporateFunction(const Function
&F
) {
1058 InstructionCount
= 0;
1059 NumModuleValues
= Values
.size();
1061 // Add global metadata to the function block. This doesn't include
1063 incorporateFunctionMetadata(F
);
1065 // Adding function arguments to the value table.
1066 for (const auto &I
: F
.args()) {
1068 if (I
.hasAttribute(Attribute::ByVal
))
1069 EnumerateType(I
.getParamByValType());
1070 else if (I
.hasAttribute(Attribute::StructRet
))
1071 EnumerateType(I
.getParamStructRetType());
1072 else if (I
.hasAttribute(Attribute::ByRef
))
1073 EnumerateType(I
.getParamByRefType());
1075 FirstFuncConstantID
= Values
.size();
1077 // Add all function-level constants to the value table.
1078 for (const BasicBlock
&BB
: F
) {
1079 for (const Instruction
&I
: BB
) {
1080 for (const Use
&OI
: I
.operands()) {
1081 if ((isa
<Constant
>(OI
) && !isa
<GlobalValue
>(OI
)) || isa
<InlineAsm
>(OI
))
1084 if (auto *SVI
= dyn_cast
<ShuffleVectorInst
>(&I
))
1085 EnumerateValue(SVI
->getShuffleMaskForBitcode());
1087 BasicBlocks
.push_back(&BB
);
1088 ValueMap
[&BB
] = BasicBlocks
.size();
1091 // Optimize the constant layout.
1092 OptimizeConstants(FirstFuncConstantID
, Values
.size());
1094 // Add the function's parameter attributes so they are available for use in
1095 // the function's instruction.
1096 EnumerateAttributes(F
.getAttributes());
1098 FirstInstID
= Values
.size();
1100 SmallVector
<LocalAsMetadata
*, 8> FnLocalMDVector
;
1101 SmallVector
<DIArgList
*, 8> ArgListMDVector
;
1102 // Add all of the instructions.
1103 for (const BasicBlock
&BB
: F
) {
1104 for (const Instruction
&I
: BB
) {
1105 for (const Use
&OI
: I
.operands()) {
1106 if (auto *MD
= dyn_cast
<MetadataAsValue
>(&OI
)) {
1107 if (auto *Local
= dyn_cast
<LocalAsMetadata
>(MD
->getMetadata())) {
1108 // Enumerate metadata after the instructions they might refer to.
1109 FnLocalMDVector
.push_back(Local
);
1110 } else if (auto *ArgList
= dyn_cast
<DIArgList
>(MD
->getMetadata())) {
1111 ArgListMDVector
.push_back(ArgList
);
1112 for (ValueAsMetadata
*VMD
: ArgList
->getArgs()) {
1113 if (auto *Local
= dyn_cast
<LocalAsMetadata
>(VMD
)) {
1114 // Enumerate metadata after the instructions they might refer
1116 FnLocalMDVector
.push_back(Local
);
1123 if (!I
.getType()->isVoidTy())
1128 // Add all of the function-local metadata.
1129 for (unsigned i
= 0, e
= FnLocalMDVector
.size(); i
!= e
; ++i
) {
1130 // At this point, every local values have been incorporated, we shouldn't
1131 // have a metadata operand that references a value that hasn't been seen.
1132 assert(ValueMap
.count(FnLocalMDVector
[i
]->getValue()) &&
1133 "Missing value for metadata operand");
1134 EnumerateFunctionLocalMetadata(F
, FnLocalMDVector
[i
]);
1136 // DIArgList entries must come after function-local metadata, as it is not
1137 // possible to forward-reference them.
1138 for (const DIArgList
*ArgList
: ArgListMDVector
)
1139 EnumerateFunctionLocalListMetadata(F
, ArgList
);
1142 void ValueEnumerator::purgeFunction() {
1143 /// Remove purged values from the ValueMap.
1144 for (unsigned i
= NumModuleValues
, e
= Values
.size(); i
!= e
; ++i
)
1145 ValueMap
.erase(Values
[i
].first
);
1146 for (unsigned i
= NumModuleMDs
, e
= MDs
.size(); i
!= e
; ++i
)
1147 MetadataMap
.erase(MDs
[i
]);
1148 for (unsigned i
= 0, e
= BasicBlocks
.size(); i
!= e
; ++i
)
1149 ValueMap
.erase(BasicBlocks
[i
]);
1151 Values
.resize(NumModuleValues
);
1152 MDs
.resize(NumModuleMDs
);
1153 BasicBlocks
.clear();
1157 static void IncorporateFunctionInfoGlobalBBIDs(const Function
*F
,
1158 DenseMap
<const BasicBlock
*, unsigned> &IDMap
) {
1159 unsigned Counter
= 0;
1160 for (const BasicBlock
&BB
: *F
)
1161 IDMap
[&BB
] = ++Counter
;
1164 /// getGlobalBasicBlockID - This returns the function-specific ID for the
1165 /// specified basic block. This is relatively expensive information, so it
1166 /// should only be used by rare constructs such as address-of-label.
1167 unsigned ValueEnumerator::getGlobalBasicBlockID(const BasicBlock
*BB
) const {
1168 unsigned &Idx
= GlobalBasicBlockIDs
[BB
];
1172 IncorporateFunctionInfoGlobalBBIDs(BB
->getParent(), GlobalBasicBlockIDs
);
1173 return getGlobalBasicBlockID(BB
);
1176 uint64_t ValueEnumerator::computeBitsRequiredForTypeIndicies() const {
1177 return Log2_32_Ceil(getTypes().size() + 1);