1 //===--------------- IRNormalizer.cpp - IR Normalizer ---------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 /// This file implements the IRNormalizer class which aims to transform LLVM
10 /// Modules into a normal form by reordering and renaming instructions while
11 /// preserving the same semantics. The normalizer makes it easier to spot
12 /// semantic differences while diffing two modules which have undergone
15 //===----------------------------------------------------------------------===//
17 #include "llvm/Transforms/Utils/IRNormalizer.h"
18 #include "llvm/ADT/SetVector.h"
19 #include "llvm/ADT/SmallPtrSet.h"
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/IR/BasicBlock.h"
23 #include "llvm/IR/Function.h"
24 #include "llvm/IR/IRBuilder.h"
25 #include "llvm/IR/InstIterator.h"
26 #include "llvm/IR/Module.h"
27 #include "llvm/InitializePasses.h"
28 #include "llvm/Pass.h"
29 #include "llvm/PassRegistry.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Transforms/Utils.h"
35 #define DEBUG_TYPE "normalize"
40 /// IRNormalizer aims to transform LLVM IR into normal form.
43 /// \name Normalizer flags.
45 /// Preserves original order of instructions.
46 static cl::opt
<bool> PreserveOrder
;
47 /// Renames all instructions (including user-named).
48 static cl::opt
<bool> RenameAll
; // TODO: Don't rename on empty name
49 /// Folds all regular instructions (including pre-outputs).
50 static cl::opt
<bool> FoldPreOutputs
;
51 /// Sorts and reorders operands in commutative instructions.
52 static cl::opt
<bool> ReorderOperands
;
55 bool runOnFunction(Function
&F
);
58 // Random constant for hashing, so the state isn't zero.
59 const uint64_t MagicHashConstant
= 0x6acaa36bef8325c5ULL
;
60 DenseSet
<const Instruction
*> NamedInstructions
;
62 SmallVector
<Instruction
*, 16> Outputs
;
66 void nameFunctionArguments(Function
&F
) const;
67 void nameBasicBlocks(Function
&F
) const;
68 void nameInstruction(Instruction
*I
);
69 void nameAsInitialInstruction(Instruction
*I
) const;
70 void nameAsRegularInstruction(Instruction
*I
);
71 void foldInstructionName(Instruction
*I
) const;
76 void reorderInstructions(Function
&F
) const;
77 void reorderDefinition(Instruction
*Definition
,
78 std::stack
<Instruction
*> &TopologicalSort
,
79 SmallPtrSet
<const Instruction
*, 32> &Visited
) const;
80 void reorderInstructionOperandsByNames(Instruction
*I
) const;
81 void reorderPHIIncomingValues(PHINode
*Phi
) const;
84 /// \name Utility methods.
87 void sortCommutativeOperands(Instruction
*I
, T
&Operands
) const;
88 SmallVector
<Instruction
*, 16> collectOutputInstructions(Function
&F
) const;
89 bool isOutput(const Instruction
*I
) const;
90 bool isInitialInstruction(const Instruction
*I
) const;
91 bool hasOnlyImmediateOperands(const Instruction
*I
) const;
93 getOutputFootprint(Instruction
*I
,
94 SmallPtrSet
<const Instruction
*, 32> &Visited
) const;
99 cl::opt
<bool> IRNormalizer::PreserveOrder(
100 "norm-preserve-order", cl::Hidden
, cl::init(false),
101 cl::desc("Preserves original instruction order"));
102 cl::opt
<bool> IRNormalizer::RenameAll(
103 "norm-rename-all", cl::Hidden
, cl::init(true),
104 cl::desc("Renames all instructions (including user-named)"));
105 cl::opt
<bool> IRNormalizer::FoldPreOutputs(
106 "norm-fold-all", cl::Hidden
, cl::init(true),
107 cl::desc("Folds all regular instructions (including pre-outputs)"));
108 cl::opt
<bool> IRNormalizer::ReorderOperands(
109 "norm-reorder-operands", cl::Hidden
, cl::init(true),
110 cl::desc("Sorts and reorders operands in commutative instructions"));
112 /// Entry method to the IRNormalizer.
114 /// \param F Function to normalize.
115 bool IRNormalizer::runOnFunction(Function
&F
) {
116 nameFunctionArguments(F
);
119 Outputs
= collectOutputInstructions(F
);
122 reorderInstructions(F
);
124 // TODO: Reorder basic blocks via a topological sort.
126 for (auto &I
: Outputs
)
129 for (auto &I
: instructions(F
)) {
130 if (!PreserveOrder
) {
132 reorderInstructionOperandsByNames(&I
);
134 if (auto *Phi
= dyn_cast
<PHINode
>(&I
))
135 reorderPHIIncomingValues(Phi
);
137 foldInstructionName(&I
);
143 /// Numbers arguments.
145 /// \param F Function whose arguments will be renamed.
146 void IRNormalizer::nameFunctionArguments(Function
&F
) const {
147 int ArgumentCounter
= 0;
148 for (auto &A
: F
.args()) {
149 if (RenameAll
|| A
.getName().empty()) {
150 A
.setName("a" + Twine(ArgumentCounter
));
151 ArgumentCounter
+= 1;
156 /// Names basic blocks using a generated hash for each basic block in
157 /// a function considering the opcode and the order of output instructions.
159 /// \param F Function containing basic blocks to rename.
160 void IRNormalizer::nameBasicBlocks(Function
&F
) const {
162 // Initialize to a magic constant, so the state isn't zero.
163 uint64_t Hash
= MagicHashConstant
;
165 // Hash considering output instruction opcodes.
168 Hash
= hashing::detail::hash_16_bytes(Hash
, I
.getOpcode());
170 if (RenameAll
|| B
.getName().empty()) {
171 // Name basic block. Substring hash to make diffs more readable.
172 B
.setName("bb" + std::to_string(Hash
).substr(0, 5));
177 /// Names instructions graphically (recursive) in accordance with the
178 /// def-use tree, starting from the initial instructions (defs), finishing at
179 /// the output (top-most user) instructions (depth-first).
181 /// \param I Instruction to be renamed.
182 void IRNormalizer::nameInstruction(Instruction
*I
) {
183 // Ensure instructions are not renamed. This is done
184 // to prevent situation where instructions are used
185 // before their definition (in phi nodes)
186 if (NamedInstructions
.contains(I
))
188 NamedInstructions
.insert(I
);
189 if (isInitialInstruction(I
)) {
190 nameAsInitialInstruction(I
);
192 // This must be a regular instruction.
193 nameAsRegularInstruction(I
);
197 template <typename T
>
198 void IRNormalizer::sortCommutativeOperands(Instruction
*I
, T
&Operands
) const {
199 if (!(I
->isCommutative() && Operands
.size() >= 2))
201 auto CommutativeEnd
= Operands
.begin();
202 std::advance(CommutativeEnd
, 2);
203 llvm::sort(Operands
.begin(), CommutativeEnd
);
206 /// Names instruction following the scheme:
207 /// vl00000Callee(Operands)
209 /// Where 00000 is a hash calculated considering instruction's opcode and output
210 /// footprint. Callee's name is only included when instruction's type is
211 /// CallInst. In cases where instruction is commutative, operands list is also
214 /// Renames instruction only when RenameAll flag is raised or instruction is
217 /// \see getOutputFootprint()
218 /// \param I Instruction to be renamed.
219 void IRNormalizer::nameAsInitialInstruction(Instruction
*I
) const {
220 if (I
->getType()->isVoidTy())
222 if (!(I
->getName().empty() || RenameAll
))
224 LLVM_DEBUG(dbgs() << "Naming initial instruction: " << *I
<< "\n");
226 // Instruction operands for further sorting.
227 SmallVector
<SmallString
<64>, 4> Operands
;
230 for (auto &Op
: I
->operands()) {
231 if (!isa
<Function
>(Op
)) {
232 std::string TextRepresentation
;
233 raw_string_ostream
Stream(TextRepresentation
);
234 Op
->printAsOperand(Stream
, false);
235 Operands
.push_back(StringRef(Stream
.str()));
239 sortCommutativeOperands(I
, Operands
);
241 // Initialize to a magic constant, so the state isn't zero.
242 uint64_t Hash
= MagicHashConstant
;
244 // Consider instruction's opcode in the hash.
245 Hash
= hashing::detail::hash_16_bytes(Hash
, I
->getOpcode());
247 SmallPtrSet
<const Instruction
*, 32> Visited
;
248 // Get output footprint for I.
249 SetVector
<int> OutputFootprint
= getOutputFootprint(I
, Visited
);
251 // Consider output footprint in the hash.
252 for (const int &Output
: OutputFootprint
)
253 Hash
= hashing::detail::hash_16_bytes(Hash
, Output
);
255 // Base instruction name.
256 SmallString
<256> Name
;
257 Name
.append("vl" + std::to_string(Hash
).substr(0, 5));
259 // In case of CallInst, consider callee in the instruction name.
260 if (const auto *CI
= dyn_cast
<CallInst
>(I
)) {
261 Function
*F
= CI
->getCalledFunction();
264 Name
.append(F
->getName());
268 for (size_t i
= 0; i
< Operands
.size(); ++i
) {
269 Name
.append(Operands
[i
]);
271 if (i
< Operands
.size() - 1)
279 /// Names instruction following the scheme:
280 /// op00000Callee(Operands)
282 /// Where 00000 is a hash calculated considering instruction's opcode, its
283 /// operands' opcodes and order. Callee's name is only included when
284 /// instruction's type is CallInst. In cases where instruction is commutative,
285 /// operand list is also sorted.
287 /// Names instructions recursively in accordance with the def-use tree,
288 /// starting from the initial instructions (defs), finishing at
289 /// the output (top-most user) instructions (depth-first).
291 /// Renames instruction only when RenameAll flag is raised or instruction is
294 /// \see getOutputFootprint()
295 /// \param I Instruction to be renamed.
296 void IRNormalizer::nameAsRegularInstruction(Instruction
*I
) {
297 LLVM_DEBUG(dbgs() << "Naming regular instruction: " << *I
<< "\n");
299 // Instruction operands for further sorting.
300 SmallVector
<SmallString
<128>, 4> Operands
;
302 // The name of a regular instruction depends
303 // on the names of its operands. Hence, all
304 // operands must be named first in the use-def
308 for (auto &Op
: I
->operands()) {
309 if (auto *I
= dyn_cast
<Instruction
>(Op
)) {
310 // Walk down the use-def chain.
312 Operands
.push_back(I
->getName());
313 } else if (!isa
<Function
>(Op
)) {
314 // This must be an immediate value.
315 std::string TextRepresentation
;
316 raw_string_ostream
Stream(TextRepresentation
);
317 Op
->printAsOperand(Stream
, false);
318 Operands
.push_back(StringRef(Stream
.str()));
322 sortCommutativeOperands(I
, Operands
);
324 // Initialize to a magic constant, so the state isn't zero.
325 uint64_t Hash
= MagicHashConstant
;
327 // Consider instruction opcode in the hash.
328 Hash
= hashing::detail::hash_16_bytes(Hash
, I
->getOpcode());
330 // Operand opcodes for further sorting (commutative).
331 SmallVector
<int, 4> OperandsOpcodes
;
333 // Collect operand opcodes for hashing.
334 for (auto &Op
: I
->operands())
335 if (auto *I
= dyn_cast
<Instruction
>(Op
))
336 OperandsOpcodes
.push_back(I
->getOpcode());
338 sortCommutativeOperands(I
, OperandsOpcodes
);
340 // Consider operand opcodes in the hash.
341 for (const int Code
: OperandsOpcodes
)
342 Hash
= hashing::detail::hash_16_bytes(Hash
, Code
);
344 // Base instruction name.
345 SmallString
<512> Name
;
346 Name
.append("op" + std::to_string(Hash
).substr(0, 5));
348 // In case of CallInst, consider callee in the instruction name.
349 if (const auto *CI
= dyn_cast
<CallInst
>(I
))
350 if (const Function
*F
= CI
->getCalledFunction())
351 Name
.append(F
->getName());
354 for (size_t i
= 0; i
< Operands
.size(); ++i
) {
355 Name
.append(Operands
[i
]);
357 if (i
< Operands
.size() - 1)
362 if ((I
->getName().empty() || RenameAll
) && !I
->getType()->isVoidTy())
366 /// Shortens instruction's name. This method removes called function name from
367 /// the instruction name and substitutes the call chain with a corresponding
368 /// list of operands.
371 /// op00000Callee(op00001Callee(...), vl00000Callee(1, 2), ...) ->
372 /// op00000(op00001, vl00000, ...) vl00000Callee(1, 2) -> vl00000(1, 2)
374 /// This method omits output instructions and pre-output (instructions directly
375 /// used by an output instruction) instructions (by default). By default it also
376 /// does not affect user named instructions.
378 /// \param I Instruction whose name will be folded.
379 void IRNormalizer::foldInstructionName(Instruction
*I
) const {
380 // If this flag is raised, fold all regular
381 // instructions (including pre-outputs).
382 if (!FoldPreOutputs
) {
383 // Don't fold if one of the users is an output instruction.
384 for (auto *U
: I
->users())
385 if (auto *IU
= dyn_cast
<Instruction
>(U
))
390 // Don't fold if it is an output instruction or has no op prefix.
391 if (isOutput(I
) || I
->getName().substr(0, 2) != "op")
394 // Instruction operands.
395 SmallVector
<SmallString
<64>, 4> Operands
;
397 for (auto &Op
: I
->operands()) {
398 if (const auto *I
= dyn_cast
<Instruction
>(Op
)) {
399 bool HasNormalName
= I
->getName().substr(0, 2) == "op" ||
400 I
->getName().substr(0, 2) == "vl";
402 Operands
.push_back(HasNormalName
? I
->getName().substr(0, 7)
407 sortCommutativeOperands(I
, Operands
);
409 SmallString
<256> Name
;
410 Name
.append(I
->getName().substr(0, 7));
413 for (size_t i
= 0; i
< Operands
.size(); ++i
) {
414 Name
.append(Operands
[i
]);
416 if (i
< Operands
.size() - 1)
424 /// Reorders instructions by walking up the tree from each operand of an output
425 /// instruction and reducing the def-use distance.
426 /// This method assumes that output instructions were collected top-down,
427 /// otherwise the def-use chain may be broken.
428 /// This method is a wrapper for recursive reorderInstruction().
430 /// \see reorderInstruction()
431 void IRNormalizer::reorderInstructions(Function
&F
) const {
433 LLVM_DEBUG(dbgs() << "Reordering instructions in basic block: "
434 << BB
.getName() << "\n");
435 // Find the source nodes of the DAG of instructions in this basic block.
436 // Source nodes are instructions that have side effects, are terminators, or
437 // don't have a parent in the DAG of instructions.
439 // We must iterate from the first to the last instruction otherwise side
440 // effecting instructions could be reordered.
442 std::stack
<Instruction
*> TopologicalSort
;
443 SmallPtrSet
<const Instruction
*, 32> Visited
;
445 // First process side effecting and terminating instructions.
446 if (!(isOutput(&I
) || I
.isTerminator()))
448 LLVM_DEBUG(dbgs() << "\tReordering from source effecting instruction: ";
450 reorderDefinition(&I
, TopologicalSort
, Visited
);
454 // Process the remaining instructions.
456 // TODO: Do more a intelligent sorting of these instructions. For example,
457 // seperate between dead instructinos and instructions used in another
458 // block. Use properties of the CFG the order instructions that are used
460 if (Visited
.contains(&I
))
462 LLVM_DEBUG(dbgs() << "\tReordering from source instruction: "; I
.dump());
463 reorderDefinition(&I
, TopologicalSort
, Visited
);
466 LLVM_DEBUG(dbgs() << "Inserting instructions into: " << BB
.getName()
468 // Reorder based on the topological sort.
469 while (!TopologicalSort
.empty()) {
470 auto *Instruction
= TopologicalSort
.top();
471 auto FirstNonPHIOrDbgOrAlloca
= BB
.getFirstNonPHIOrDbgOrAlloca();
472 if (auto *Call
= dyn_cast
<CallInst
>(&*FirstNonPHIOrDbgOrAlloca
)) {
473 if (Call
->getIntrinsicID() ==
474 Intrinsic::experimental_convergence_entry
||
475 Call
->getIntrinsicID() == Intrinsic::experimental_convergence_loop
)
476 FirstNonPHIOrDbgOrAlloca
++;
478 Instruction
->moveBefore(&*FirstNonPHIOrDbgOrAlloca
);
479 TopologicalSort
.pop();
484 void IRNormalizer::reorderDefinition(
485 Instruction
*Definition
, std::stack
<Instruction
*> &TopologicalSort
,
486 SmallPtrSet
<const Instruction
*, 32> &Visited
) const {
487 if (Visited
.contains(Definition
))
489 Visited
.insert(Definition
);
492 const auto *BasicBlock
= Definition
->getParent();
493 const auto FirstNonPHIOrDbgOrAlloca
=
494 BasicBlock
->getFirstNonPHIOrDbgOrAlloca();
495 if (FirstNonPHIOrDbgOrAlloca
== BasicBlock
->end())
496 return; // TODO: Is this necessary?
497 if (Definition
->comesBefore(&*FirstNonPHIOrDbgOrAlloca
))
498 return; // TODO: Do some kind of ordering for these instructions.
501 for (auto &Operand
: Definition
->operands()) {
502 if (auto *Op
= dyn_cast
<Instruction
>(Operand
)) {
503 if (Op
->getParent() != Definition
->getParent())
504 continue; // Only reorder instruction within the same basic block
505 reorderDefinition(Op
, TopologicalSort
, Visited
);
509 LLVM_DEBUG(dbgs() << "\t\tNext in topological sort: "; Definition
->dump());
510 if (Definition
->isTerminator())
512 if (auto *Call
= dyn_cast
<CallInst
>(Definition
)) {
513 if (Call
->isMustTailCall())
515 if (Call
->getIntrinsicID() == Intrinsic::experimental_deoptimize
)
517 if (Call
->getIntrinsicID() == Intrinsic::experimental_convergence_entry
)
519 if (Call
->getIntrinsicID() == Intrinsic::experimental_convergence_loop
)
522 if (auto *BitCast
= dyn_cast
<BitCastInst
>(Definition
)) {
523 if (auto *Call
= dyn_cast
<CallInst
>(BitCast
->getOperand(0))) {
524 if (Call
->isMustTailCall())
529 TopologicalSort
.emplace(Definition
);
532 /// Reorders instruction's operands alphabetically. This method assumes
533 /// that passed instruction is commutative. Changing the operand order
534 /// in other instructions may change the semantics.
536 /// \param I Instruction whose operands will be reordered.
537 void IRNormalizer::reorderInstructionOperandsByNames(Instruction
*I
) const {
538 // This method assumes that passed I is commutative,
539 // changing the order of operands in other instructions
540 // may change the semantics.
542 // Instruction operands for further sorting.
543 SmallVector
<std::pair
<std::string
, Value
*>, 4> Operands
;
546 for (auto &Op
: I
->operands()) {
547 if (auto *V
= dyn_cast
<Value
>(Op
)) {
548 if (isa
<Instruction
>(V
)) {
549 // This is an an instruction.
550 Operands
.push_back(std::pair
<std::string
, Value
*>(V
->getName(), V
));
552 std::string TextRepresentation
;
553 raw_string_ostream
Stream(TextRepresentation
);
554 Op
->printAsOperand(Stream
, false);
555 Operands
.push_back(std::pair
<std::string
, Value
*>(Stream
.str(), V
));
561 sortCommutativeOperands(I
, Operands
);
564 unsigned Position
= 0;
565 for (auto &Op
: I
->operands()) {
566 Op
.set(Operands
[Position
].second
);
571 /// Reorders PHI node's values according to the names of corresponding basic
574 /// \param Phi PHI node to normalize.
575 void IRNormalizer::reorderPHIIncomingValues(PHINode
*Phi
) const {
576 // Values for further sorting.
577 SmallVector
<std::pair
<Value
*, BasicBlock
*>, 2> Values
;
579 // Collect blocks and corresponding values.
580 for (auto &BB
: Phi
->blocks()) {
581 Value
*V
= Phi
->getIncomingValueForBlock(BB
);
582 Values
.push_back(std::pair
<Value
*, BasicBlock
*>(V
, BB
));
585 // Sort values according to the name of a basic block.
586 llvm::sort(Values
, [](const std::pair
<Value
*, BasicBlock
*> &LHS
,
587 const std::pair
<Value
*, BasicBlock
*> &RHS
) {
588 return LHS
.second
->getName() < RHS
.second
->getName();
592 for (unsigned i
= 0; i
< Values
.size(); ++i
) {
593 Phi
->setIncomingBlock(i
, Values
[i
].second
);
594 Phi
->setIncomingValue(i
, Values
[i
].first
);
598 /// Returns a vector of output instructions. An output is an instruction which
599 /// has side-effects or is ReturnInst. Uses isOutput().
602 /// \param F Function to collect outputs from.
603 SmallVector
<Instruction
*, 16>
604 IRNormalizer::collectOutputInstructions(Function
&F
) const {
605 // Output instructions are collected top-down in each function,
606 // any change may break the def-use chain in reordering methods.
607 SmallVector
<Instruction
*, 16> Outputs
;
608 for (auto &I
: instructions(F
))
610 Outputs
.push_back(&I
);
614 /// Helper method checking whether the instruction may have side effects or is
617 /// \param I Considered instruction.
618 bool IRNormalizer::isOutput(const Instruction
*I
) const {
619 // Outputs are such instructions which may have side effects or is ReturnInst.
620 return I
->mayHaveSideEffects() || isa
<ReturnInst
>(I
);
623 /// Helper method checking whether the instruction has users and only
624 /// immediate operands.
626 /// \param I Considered instruction.
627 bool IRNormalizer::isInitialInstruction(const Instruction
*I
) const {
628 // Initial instructions are such instructions whose values are used by
629 // other instructions, yet they only depend on immediate values.
630 return !I
->user_empty() && hasOnlyImmediateOperands(I
);
633 /// Helper method checking whether the instruction has only immediate operands.
635 /// \param I Considered instruction.
636 bool IRNormalizer::hasOnlyImmediateOperands(const Instruction
*I
) const {
637 for (const auto &Op
: I
->operands())
638 if (isa
<Instruction
>(Op
))
639 return false; // Found non-immediate operand (instruction).
643 /// Helper method returning indices (distance from the beginning of the basic
644 /// block) of outputs using the \p I (eliminates repetitions). Walks down the
645 /// def-use tree recursively.
647 /// \param I Considered instruction.
648 /// \param Visited Set of visited instructions.
649 SetVector
<int> IRNormalizer::getOutputFootprint(
650 Instruction
*I
, SmallPtrSet
<const Instruction
*, 32> &Visited
) const {
652 // Vector containing indexes of outputs (no repetitions),
653 // which use I in the order of walking down the def-use tree.
654 SetVector
<int> Outputs
;
656 if (!Visited
.count(I
)) {
660 // Gets output instruction's parent function.
661 Function
*Func
= I
->getParent()->getParent();
663 // Finds and inserts the index of the output to the vector.
665 for (const auto &B
: *Func
) {
666 for (const auto &E
: B
) {
668 Outputs
.insert(Count
);
673 // Returns to the used instruction.
677 for (auto *U
: I
->users()) {
678 if (auto *UI
= dyn_cast
<Instruction
>(U
)) {
679 // Vector for outputs which use UI.
680 SetVector
<int> OutputsUsingUI
= getOutputFootprint(UI
, Visited
);
681 // Insert the indexes of outputs using UI.
682 Outputs
.insert(OutputsUsingUI
.begin(), OutputsUsingUI
.end());
687 // Return to the used instruction.
691 PreservedAnalyses
IRNormalizerPass::run(Function
&F
,
692 FunctionAnalysisManager
&AM
) const {
693 IRNormalizer
{}.runOnFunction(F
);
694 PreservedAnalyses PA
;
695 PA
.preserveSet
<CFGAnalyses
>();