1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 // Implementation file for the IRSimilarityIdentifier for identifying
11 // similarities in IR including the IRInstructionMapper.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Analysis/IRSimilarityIdentifier.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SetOperations.h"
18 #include "llvm/IR/Intrinsics.h"
19 #include "llvm/IR/Operator.h"
20 #include "llvm/IR/User.h"
21 #include "llvm/InitializePasses.h"
22 #include "llvm/Support/SuffixTree.h"
25 using namespace IRSimilarity
;
29 DisableBranches("no-ir-sim-branch-matching", cl::init(false),
31 cl::desc("disable similarity matching, and outlining, "
32 "across branches for debugging purposes."));
35 DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
37 cl::desc("disable outlining indirect calls."));
40 MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden
,
41 cl::desc("only allow matching call instructions if the "
42 "name and type signature match."));
45 DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden
,
46 cl::desc("Don't match or outline intrinsics"));
49 IRInstructionData::IRInstructionData(Instruction
&I
, bool Legality
,
50 IRInstructionDataList
&IDList
)
51 : Inst(&I
), Legal(Legality
), IDL(&IDList
) {
52 initializeInstruction();
55 void IRInstructionData::initializeInstruction() {
56 // We check for whether we have a comparison instruction. If it is, we
57 // find the "less than" version of the predicate for consistency for
58 // comparison instructions throught the program.
59 if (CmpInst
*C
= dyn_cast
<CmpInst
>(Inst
)) {
60 CmpInst::Predicate Predicate
= predicateForConsistency(C
);
61 if (Predicate
!= C
->getPredicate())
62 RevisedPredicate
= Predicate
;
65 // Here we collect the operands and their types for determining whether
66 // the structure of the operand use matches between two different candidates.
67 for (Use
&OI
: Inst
->operands()) {
68 if (isa
<CmpInst
>(Inst
) && RevisedPredicate
) {
69 // If we have a CmpInst where the predicate is reversed, it means the
70 // operands must be reversed as well.
71 OperVals
.insert(OperVals
.begin(), OI
.get());
75 OperVals
.push_back(OI
.get());
78 // We capture the incoming BasicBlocks as values as well as the incoming
79 // Values in order to check for structural similarity.
80 if (PHINode
*PN
= dyn_cast
<PHINode
>(Inst
))
81 for (BasicBlock
*BB
: PN
->blocks())
82 OperVals
.push_back(BB
);
85 IRInstructionData::IRInstructionData(IRInstructionDataList
&IDList
)
88 void IRInstructionData::setBranchSuccessors(
89 DenseMap
<BasicBlock
*, unsigned> &BasicBlockToInteger
) {
90 assert(isa
<BranchInst
>(Inst
) && "Instruction must be branch");
92 BranchInst
*BI
= cast
<BranchInst
>(Inst
);
93 DenseMap
<BasicBlock
*, unsigned>::iterator BBNumIt
;
95 BBNumIt
= BasicBlockToInteger
.find(BI
->getParent());
96 assert(BBNumIt
!= BasicBlockToInteger
.end() &&
97 "Could not find location for BasicBlock!");
99 int CurrentBlockNumber
= static_cast<int>(BBNumIt
->second
);
101 for (Value
*V
: getBlockOperVals()) {
102 BasicBlock
*Successor
= cast
<BasicBlock
>(V
);
103 BBNumIt
= BasicBlockToInteger
.find(Successor
);
104 assert(BBNumIt
!= BasicBlockToInteger
.end() &&
105 "Could not find number for BasicBlock!");
106 int OtherBlockNumber
= static_cast<int>(BBNumIt
->second
);
108 int Relative
= OtherBlockNumber
- CurrentBlockNumber
;
109 RelativeBlockLocations
.push_back(Relative
);
113 ArrayRef
<Value
*> IRInstructionData::getBlockOperVals() {
114 assert((isa
<BranchInst
>(Inst
) ||
115 isa
<PHINode
>(Inst
)) && "Instruction must be branch or PHINode");
117 if (BranchInst
*BI
= dyn_cast
<BranchInst
>(Inst
))
118 return ArrayRef
<Value
*>(
119 std::next(OperVals
.begin(), BI
->isConditional() ? 1 : 0),
123 if (PHINode
*PN
= dyn_cast
<PHINode
>(Inst
))
124 return ArrayRef
<Value
*>(
125 std::next(OperVals
.begin(), PN
->getNumIncomingValues()),
129 return ArrayRef
<Value
*>();
132 void IRInstructionData::setCalleeName(bool MatchByName
) {
133 CallInst
*CI
= dyn_cast
<CallInst
>(Inst
);
134 assert(CI
&& "Instruction must be call");
137 if (IntrinsicInst
*II
= dyn_cast
<IntrinsicInst
>(Inst
)) {
138 // To hash intrinsics, we use the opcode, and types like the other
139 // instructions, but also, the Intrinsic ID, and the Name of the
141 Intrinsic::ID IntrinsicID
= II
->getIntrinsicID();
142 FunctionType
*FT
= II
->getFunctionType();
143 // If there is an overloaded name, we have to use the complex version
144 // of getName to get the entire string.
145 if (Intrinsic::isOverloaded(IntrinsicID
))
147 Intrinsic::getName(IntrinsicID
, FT
->params(), II
->getModule(), FT
);
148 // If there is not an overloaded name, we only need to use this version.
150 CalleeName
= Intrinsic::getName(IntrinsicID
).str();
155 if (!CI
->isIndirectCall() && MatchByName
)
156 CalleeName
= CI
->getCalledFunction()->getName().str();
159 void IRInstructionData::setPHIPredecessors(
160 DenseMap
<BasicBlock
*, unsigned> &BasicBlockToInteger
) {
161 assert(isa
<PHINode
>(Inst
) && "Instruction must be phi node");
163 PHINode
*PN
= cast
<PHINode
>(Inst
);
164 DenseMap
<BasicBlock
*, unsigned>::iterator BBNumIt
;
166 BBNumIt
= BasicBlockToInteger
.find(PN
->getParent());
167 assert(BBNumIt
!= BasicBlockToInteger
.end() &&
168 "Could not find location for BasicBlock!");
170 int CurrentBlockNumber
= static_cast<int>(BBNumIt
->second
);
172 // Convert the incoming blocks of the PHINode to an integer value, based on
173 // the relative distances between the current block and the incoming block.
174 for (unsigned Idx
= 0; Idx
< PN
->getNumIncomingValues(); Idx
++) {
175 BasicBlock
*Incoming
= PN
->getIncomingBlock(Idx
);
176 BBNumIt
= BasicBlockToInteger
.find(Incoming
);
177 assert(BBNumIt
!= BasicBlockToInteger
.end() &&
178 "Could not find number for BasicBlock!");
179 int OtherBlockNumber
= static_cast<int>(BBNumIt
->second
);
181 int Relative
= OtherBlockNumber
- CurrentBlockNumber
;
182 RelativeBlockLocations
.push_back(Relative
);
186 CmpInst::Predicate
IRInstructionData::predicateForConsistency(CmpInst
*CI
) {
187 switch (CI
->getPredicate()) {
188 case CmpInst::FCMP_OGT
:
189 case CmpInst::FCMP_UGT
:
190 case CmpInst::FCMP_OGE
:
191 case CmpInst::FCMP_UGE
:
192 case CmpInst::ICMP_SGT
:
193 case CmpInst::ICMP_UGT
:
194 case CmpInst::ICMP_SGE
:
195 case CmpInst::ICMP_UGE
:
196 return CI
->getSwappedPredicate();
198 return CI
->getPredicate();
202 CmpInst::Predicate
IRInstructionData::getPredicate() const {
203 assert(isa
<CmpInst
>(Inst
) &&
204 "Can only get a predicate from a compare instruction");
206 if (RevisedPredicate
)
207 return *RevisedPredicate
;
209 return cast
<CmpInst
>(Inst
)->getPredicate();
212 StringRef
IRInstructionData::getCalleeName() const {
213 assert(isa
<CallInst
>(Inst
) &&
214 "Can only get a name from a call instruction");
216 assert(CalleeName
&& "CalleeName has not been set");
221 bool IRSimilarity::isClose(const IRInstructionData
&A
,
222 const IRInstructionData
&B
) {
224 if (!A
.Legal
|| !B
.Legal
)
227 // Check if we are performing the same sort of operation on the same types
228 // but not on the same values.
229 if (!A
.Inst
->isSameOperationAs(B
.Inst
)) {
230 // If there is a predicate, this means that either there is a swapped
231 // predicate, or that the types are different, we want to make sure that
232 // the predicates are equivalent via swapping.
233 if (isa
<CmpInst
>(A
.Inst
) && isa
<CmpInst
>(B
.Inst
)) {
235 if (A
.getPredicate() != B
.getPredicate())
238 // If the predicates are the same via swap, make sure that the types are
240 auto ZippedTypes
= zip(A
.OperVals
, B
.OperVals
);
243 ZippedTypes
, [](std::tuple
<llvm::Value
*, llvm::Value
*> R
) {
244 return std::get
<0>(R
)->getType() == std::get
<1>(R
)->getType();
251 // Since any GEP Instruction operands after the first operand cannot be
252 // defined by a register, we must make sure that the operands after the first
253 // are the same in the two instructions
254 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(A
.Inst
)) {
255 auto *OtherGEP
= cast
<GetElementPtrInst
>(B
.Inst
);
257 // If the instructions do not have the same inbounds restrictions, we do
258 // not consider them the same.
259 if (GEP
->isInBounds() != OtherGEP
->isInBounds())
262 auto ZippedOperands
= zip(GEP
->indices(), OtherGEP
->indices());
264 // We increment here since we do not care about the first instruction,
265 // we only care about the following operands since they must be the
266 // exact same to be considered similar.
267 return all_of(drop_begin(ZippedOperands
),
268 [](std::tuple
<llvm::Use
&, llvm::Use
&> R
) {
269 return std::get
<0>(R
) == std::get
<1>(R
);
273 // If the instructions are functions calls, we make sure that the function
274 // name is the same. We already know that the types are since is
275 // isSameOperationAs is true.
276 if (isa
<CallInst
>(A
.Inst
) && isa
<CallInst
>(B
.Inst
)) {
277 if (A
.getCalleeName().str() != B
.getCalleeName().str())
281 if (isa
<BranchInst
>(A
.Inst
) && isa
<BranchInst
>(B
.Inst
) &&
282 A
.RelativeBlockLocations
.size() != B
.RelativeBlockLocations
.size())
288 // TODO: This is the same as the MachineOutliner, and should be consolidated
289 // into the same interface.
290 void IRInstructionMapper::convertToUnsignedVec(
291 BasicBlock
&BB
, std::vector
<IRInstructionData
*> &InstrList
,
292 std::vector
<unsigned> &IntegerMapping
) {
293 BasicBlock::iterator It
= BB
.begin();
295 std::vector
<unsigned> IntegerMappingForBB
;
296 std::vector
<IRInstructionData
*> InstrListForBB
;
298 for (BasicBlock::iterator Et
= BB
.end(); It
!= Et
; ++It
) {
299 switch (InstClassifier
.visit(*It
)) {
300 case InstrType::Legal
:
301 mapToLegalUnsigned(It
, IntegerMappingForBB
, InstrListForBB
);
303 case InstrType::Illegal
:
304 mapToIllegalUnsigned(It
, IntegerMappingForBB
, InstrListForBB
);
306 case InstrType::Invisible
:
307 AddedIllegalLastTime
= false;
312 if (AddedIllegalLastTime
)
313 mapToIllegalUnsigned(It
, IntegerMappingForBB
, InstrListForBB
, true);
314 for (IRInstructionData
*ID
: InstrListForBB
)
315 this->IDL
->push_back(*ID
);
316 llvm::append_range(InstrList
, InstrListForBB
);
317 llvm::append_range(IntegerMapping
, IntegerMappingForBB
);
320 // TODO: This is the same as the MachineOutliner, and should be consolidated
321 // into the same interface.
322 unsigned IRInstructionMapper::mapToLegalUnsigned(
323 BasicBlock::iterator
&It
, std::vector
<unsigned> &IntegerMappingForBB
,
324 std::vector
<IRInstructionData
*> &InstrListForBB
) {
325 // We added something legal, so we should unset the AddedLegalLastTime
327 AddedIllegalLastTime
= false;
329 // If we have at least two adjacent legal instructions (which may have
330 // invisible instructions in between), remember that.
331 if (CanCombineWithPrevInstr
)
332 HaveLegalRange
= true;
333 CanCombineWithPrevInstr
= true;
335 // Get the integer for this instruction or give it the current
337 IRInstructionData
*ID
= allocateIRInstructionData(*It
, true, *IDL
);
338 InstrListForBB
.push_back(ID
);
340 if (isa
<BranchInst
>(*It
))
341 ID
->setBranchSuccessors(BasicBlockToInteger
);
343 if (isa
<CallInst
>(*It
))
344 ID
->setCalleeName(EnableMatchCallsByName
);
346 if (isa
<PHINode
>(*It
))
347 ID
->setPHIPredecessors(BasicBlockToInteger
);
349 // Add to the instruction list
351 DenseMap
<IRInstructionData
*, unsigned, IRInstructionDataTraits
>::iterator
353 std::tie(ResultIt
, WasInserted
) =
354 InstructionIntegerMap
.insert(std::make_pair(ID
, LegalInstrNumber
));
355 unsigned INumber
= ResultIt
->second
;
357 // There was an insertion.
361 IntegerMappingForBB
.push_back(INumber
);
363 // Make sure we don't overflow or use any integers reserved by the DenseMap.
364 assert(LegalInstrNumber
< IllegalInstrNumber
&&
365 "Instruction mapping overflow!");
367 assert(LegalInstrNumber
!= DenseMapInfo
<unsigned>::getEmptyKey() &&
368 "Tried to assign DenseMap tombstone or empty key to instruction.");
369 assert(LegalInstrNumber
!= DenseMapInfo
<unsigned>::getTombstoneKey() &&
370 "Tried to assign DenseMap tombstone or empty key to instruction.");
376 IRInstructionMapper::allocateIRInstructionData(Instruction
&I
, bool Legality
,
377 IRInstructionDataList
&IDL
) {
378 return new (InstDataAllocator
->Allocate()) IRInstructionData(I
, Legality
, IDL
);
382 IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList
&IDL
) {
383 return new (InstDataAllocator
->Allocate()) IRInstructionData(IDL
);
386 IRInstructionDataList
*
387 IRInstructionMapper::allocateIRInstructionDataList() {
388 return new (IDLAllocator
->Allocate()) IRInstructionDataList();
391 // TODO: This is the same as the MachineOutliner, and should be consolidated
392 // into the same interface.
393 unsigned IRInstructionMapper::mapToIllegalUnsigned(
394 BasicBlock::iterator
&It
, std::vector
<unsigned> &IntegerMappingForBB
,
395 std::vector
<IRInstructionData
*> &InstrListForBB
, bool End
) {
396 // Can't combine an illegal instruction. Set the flag.
397 CanCombineWithPrevInstr
= false;
399 // Only add one illegal number per range of legal numbers.
400 if (AddedIllegalLastTime
)
401 return IllegalInstrNumber
;
403 IRInstructionData
*ID
= nullptr;
405 ID
= allocateIRInstructionData(*It
, false, *IDL
);
407 ID
= allocateIRInstructionData(*IDL
);
408 InstrListForBB
.push_back(ID
);
410 // Remember that we added an illegal number last time.
411 AddedIllegalLastTime
= true;
412 unsigned INumber
= IllegalInstrNumber
;
413 IntegerMappingForBB
.push_back(IllegalInstrNumber
--);
415 assert(LegalInstrNumber
< IllegalInstrNumber
&&
416 "Instruction mapping overflow!");
418 assert(IllegalInstrNumber
!= DenseMapInfo
<unsigned>::getEmptyKey() &&
419 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
421 assert(IllegalInstrNumber
!= DenseMapInfo
<unsigned>::getTombstoneKey() &&
422 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
427 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx
, unsigned Len
,
428 IRInstructionData
*FirstInstIt
,
429 IRInstructionData
*LastInstIt
)
430 : StartIdx(StartIdx
), Len(Len
) {
432 assert(FirstInstIt
!= nullptr && "Instruction is nullptr!");
433 assert(LastInstIt
!= nullptr && "Instruction is nullptr!");
434 assert(StartIdx
+ Len
> StartIdx
&&
435 "Overflow for IRSimilarityCandidate range?");
436 assert(Len
- 1 == static_cast<unsigned>(std::distance(
437 iterator(FirstInstIt
), iterator(LastInstIt
))) &&
438 "Length of the first and last IRInstructionData do not match the "
441 // We iterate over the given instructions, and map each unique value
442 // to a unique number in the IRSimilarityCandidate ValueToNumber and
443 // NumberToValue maps. A constant get its own value globally, the individual
444 // uses of the constants are not considered to be unique.
446 // IR: Mapping Added:
447 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
448 // %add2 = add i32 %a, %1 %add2 -> 4
449 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
451 // when replace with global values, starting from 1, would be
456 unsigned LocalValNumber
= 1;
457 IRInstructionDataList::iterator ID
= iterator(*FirstInstIt
);
458 for (unsigned Loc
= StartIdx
; Loc
< StartIdx
+ Len
; Loc
++, ID
++) {
459 // Map the operand values to an unsigned integer if it does not already
460 // have an unsigned integer assigned to it.
461 for (Value
*Arg
: ID
->OperVals
)
462 if (!ValueToNumber
.contains(Arg
)) {
463 ValueToNumber
.try_emplace(Arg
, LocalValNumber
);
464 NumberToValue
.try_emplace(LocalValNumber
, Arg
);
468 // Mapping the instructions to an unsigned integer if it is not already
469 // exist in the mapping.
470 if (!ValueToNumber
.contains(ID
->Inst
)) {
471 ValueToNumber
.try_emplace(ID
->Inst
, LocalValNumber
);
472 NumberToValue
.try_emplace(LocalValNumber
, ID
->Inst
);
477 // Setting the first and last instruction data pointers for the candidate. If
478 // we got through the entire for loop without hitting an assert, we know
479 // that both of these instructions are not nullptrs.
480 FirstInst
= FirstInstIt
;
481 LastInst
= LastInstIt
;
483 // Add the basic blocks contained in the set into the global value numbering.
484 DenseSet
<BasicBlock
*> BBSet
;
485 getBasicBlocks(BBSet
);
486 for (BasicBlock
*BB
: BBSet
) {
487 if (ValueToNumber
.contains(BB
))
490 ValueToNumber
.try_emplace(BB
, LocalValNumber
);
491 NumberToValue
.try_emplace(LocalValNumber
, BB
);
496 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate
&A
,
497 const IRSimilarityCandidate
&B
) {
498 if (A
.getLength() != B
.getLength())
501 auto InstrDataForBoth
=
502 zip(make_range(A
.begin(), A
.end()), make_range(B
.begin(), B
.end()));
504 return all_of(InstrDataForBoth
,
505 [](std::tuple
<IRInstructionData
&, IRInstructionData
&> R
) {
506 IRInstructionData
&A
= std::get
<0>(R
);
507 IRInstructionData
&B
= std::get
<1>(R
);
508 if (!A
.Legal
|| !B
.Legal
)
510 return isClose(A
, B
);
514 /// Determine if one or more of the assigned global value numbers for the
515 /// operands in \p TargetValueNumbers is in the current mapping set for operand
516 /// numbers in \p SourceOperands. The set of possible corresponding global
517 /// value numbers are replaced with the most recent version of compatible
520 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
521 /// value number for the source IRInstructionCandidate.
522 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
523 /// IRSimilarityCandidate global value numbers to a set of possible numbers in
525 /// \param [in] SourceOperands - The operands in the original
526 /// IRSimilarityCandidate in the current instruction.
527 /// \param [in] TargetValueNumbers - The global value numbers of the operands in
528 /// the corresponding Instruction in the other IRSimilarityCandidate.
529 /// \returns true if there exists a possible mapping between the source
530 /// Instruction operands and the target Instruction operands, and false if not.
531 static bool checkNumberingAndReplaceCommutative(
532 const DenseMap
<Value
*, unsigned> &SourceValueToNumberMapping
,
533 DenseMap
<unsigned, DenseSet
<unsigned>> &CurrentSrcTgtNumberMapping
,
534 ArrayRef
<Value
*> &SourceOperands
,
535 DenseSet
<unsigned> &TargetValueNumbers
){
537 DenseMap
<unsigned, DenseSet
<unsigned>>::iterator ValueMappingIt
;
542 // Iterate over the operands in the source IRSimilarityCandidate to determine
543 // whether there exists an operand in the other IRSimilarityCandidate that
544 // creates a valid mapping of Value to Value between the
545 // IRSimilarityCaniddates.
546 for (Value
*V
: SourceOperands
) {
547 ArgVal
= SourceValueToNumberMapping
.find(V
)->second
;
549 // Instead of finding a current mapping, we attempt to insert a set.
550 std::tie(ValueMappingIt
, WasInserted
) = CurrentSrcTgtNumberMapping
.insert(
551 std::make_pair(ArgVal
, TargetValueNumbers
));
553 // We need to iterate over the items in other IRSimilarityCandidate's
554 // Instruction to determine whether there is a valid mapping of
556 DenseSet
<unsigned> NewSet
;
557 for (unsigned &Curr
: ValueMappingIt
->second
)
558 // If we can find the value in the mapping, we add it to the new set.
559 if (TargetValueNumbers
.contains(Curr
))
562 // If we could not find a Value, return 0.
566 // Otherwise replace the old mapping with the newly constructed one.
567 if (NewSet
.size() != ValueMappingIt
->second
.size())
568 ValueMappingIt
->second
.swap(NewSet
);
570 // We have reached no conclusions about the mapping, and cannot remove
571 // any items from the other operands, so we move to check the next operand.
572 if (ValueMappingIt
->second
.size() != 1)
575 unsigned ValToRemove
= *ValueMappingIt
->second
.begin();
576 // When there is only one item left in the mapping for and operand, remove
577 // the value from the other operands. If it results in there being no
578 // mapping, return false, it means the mapping is wrong
579 for (Value
*InnerV
: SourceOperands
) {
583 unsigned InnerVal
= SourceValueToNumberMapping
.find(InnerV
)->second
;
584 ValueMappingIt
= CurrentSrcTgtNumberMapping
.find(InnerVal
);
585 if (ValueMappingIt
== CurrentSrcTgtNumberMapping
.end())
588 ValueMappingIt
->second
.erase(ValToRemove
);
589 if (ValueMappingIt
->second
.empty())
597 /// Determine if operand number \p TargetArgVal is in the current mapping set
598 /// for operand number \p SourceArgVal.
600 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
601 /// value numbers from source IRSimilarityCandidate to target
602 /// IRSimilarityCandidate.
603 /// \param [in] SourceArgVal The global value number for an operand in the
604 /// in the original candidate.
605 /// \param [in] TargetArgVal The global value number for the corresponding
606 /// operand in the other candidate.
607 /// \returns True if there exists a mapping and false if not.
608 bool checkNumberingAndReplace(
609 DenseMap
<unsigned, DenseSet
<unsigned>> &CurrentSrcTgtNumberMapping
,
610 unsigned SourceArgVal
, unsigned TargetArgVal
) {
611 // We are given two unsigned integers representing the global values of
612 // the operands in different IRSimilarityCandidates and a current mapping
615 // Source Operand GVN: 1
616 // Target Operand GVN: 2
617 // CurrentMapping: {1: {1, 2}}
619 // Since we have mapping, and the target operand is contained in the set, we
621 // CurrentMapping: {1: {2}}
622 // and can return true. But, if the mapping was
623 // CurrentMapping: {1: {3}}
624 // we would return false.
627 DenseMap
<unsigned, DenseSet
<unsigned>>::iterator Val
;
629 std::tie(Val
, WasInserted
) = CurrentSrcTgtNumberMapping
.insert(
630 std::make_pair(SourceArgVal
, DenseSet
<unsigned>({TargetArgVal
})));
632 // If we created a new mapping, then we are done.
636 // If there is more than one option in the mapping set, and the target value
637 // is included in the mapping set replace that set with one that only includes
638 // the target value, as it is the only valid mapping via the non commutative
641 DenseSet
<unsigned> &TargetSet
= Val
->second
;
642 if (TargetSet
.size() > 1 && TargetSet
.contains(TargetArgVal
)) {
644 TargetSet
.insert(TargetArgVal
);
648 // Return true if we can find the value in the set.
649 return TargetSet
.contains(TargetArgVal
);
652 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
653 OperandMapping A
, OperandMapping B
) {
654 // Iterators to keep track of where we are in the operands for each
656 ArrayRef
<Value
*>::iterator VItA
= A
.OperVals
.begin();
657 ArrayRef
<Value
*>::iterator VItB
= B
.OperVals
.begin();
658 unsigned OperandLength
= A
.OperVals
.size();
660 // For each operand, get the value numbering and ensure it is consistent.
661 for (unsigned Idx
= 0; Idx
< OperandLength
; Idx
++, VItA
++, VItB
++) {
662 unsigned OperValA
= A
.IRSC
.ValueToNumber
.find(*VItA
)->second
;
663 unsigned OperValB
= B
.IRSC
.ValueToNumber
.find(*VItB
)->second
;
665 // Attempt to add a set with only the target value. If there is no mapping
666 // we can create it here.
668 // For an instruction like a subtraction:
669 // IRSimilarityCandidateA: IRSimilarityCandidateB:
670 // %resultA = sub %a, %b %resultB = sub %d, %e
672 // We map %a -> %d and %b -> %e.
674 // And check to see whether their mapping is consistent in
675 // checkNumberingAndReplace.
677 if (!checkNumberingAndReplace(A
.ValueNumberMapping
, OperValA
, OperValB
))
680 if (!checkNumberingAndReplace(B
.ValueNumberMapping
, OperValB
, OperValA
))
686 bool IRSimilarityCandidate::compareCommutativeOperandMapping(
687 OperandMapping A
, OperandMapping B
) {
688 DenseSet
<unsigned> ValueNumbersA
;
689 DenseSet
<unsigned> ValueNumbersB
;
691 ArrayRef
<Value
*>::iterator VItA
= A
.OperVals
.begin();
692 ArrayRef
<Value
*>::iterator VItB
= B
.OperVals
.begin();
693 unsigned OperandLength
= A
.OperVals
.size();
695 // Find the value number sets for the operands.
696 for (unsigned Idx
= 0; Idx
< OperandLength
;
697 Idx
++, VItA
++, VItB
++) {
698 ValueNumbersA
.insert(A
.IRSC
.ValueToNumber
.find(*VItA
)->second
);
699 ValueNumbersB
.insert(B
.IRSC
.ValueToNumber
.find(*VItB
)->second
);
702 // Iterate over the operands in the first IRSimilarityCandidate and make sure
703 // there exists a possible mapping with the operands in the second
704 // IRSimilarityCandidate.
705 if (!checkNumberingAndReplaceCommutative(A
.IRSC
.ValueToNumber
,
706 A
.ValueNumberMapping
, A
.OperVals
,
710 // Iterate over the operands in the second IRSimilarityCandidate and make sure
711 // there exists a possible mapping with the operands in the first
712 // IRSimilarityCandidate.
713 if (!checkNumberingAndReplaceCommutative(B
.IRSC
.ValueToNumber
,
714 B
.ValueNumberMapping
, B
.OperVals
,
721 bool IRSimilarityCandidate::compareAssignmentMapping(
722 const unsigned InstValA
, const unsigned &InstValB
,
723 DenseMap
<unsigned, DenseSet
<unsigned>> &ValueNumberMappingA
,
724 DenseMap
<unsigned, DenseSet
<unsigned>> &ValueNumberMappingB
) {
725 DenseMap
<unsigned, DenseSet
<unsigned>>::iterator ValueMappingIt
;
727 std::tie(ValueMappingIt
, WasInserted
) = ValueNumberMappingA
.insert(
728 std::make_pair(InstValA
, DenseSet
<unsigned>({InstValB
})));
729 if (!WasInserted
&& !ValueMappingIt
->second
.contains(InstValB
))
731 else if (ValueMappingIt
->second
.size() != 1) {
732 for (unsigned OtherVal
: ValueMappingIt
->second
) {
733 if (OtherVal
== InstValB
)
735 if (!ValueNumberMappingA
.contains(OtherVal
))
737 if (!ValueNumberMappingA
[OtherVal
].contains(InstValA
))
739 ValueNumberMappingA
[OtherVal
].erase(InstValA
);
741 ValueNumberMappingA
.erase(ValueMappingIt
);
742 std::tie(ValueMappingIt
, WasInserted
) = ValueNumberMappingA
.insert(
743 std::make_pair(InstValA
, DenseSet
<unsigned>({InstValB
})));
749 bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A
,
750 RelativeLocMapping B
) {
751 // Get the basic blocks the label refers to.
752 BasicBlock
*ABB
= cast
<BasicBlock
>(A
.OperVal
);
753 BasicBlock
*BBB
= cast
<BasicBlock
>(B
.OperVal
);
755 // Get the basic blocks contained in each region.
756 DenseSet
<BasicBlock
*> BasicBlockA
;
757 DenseSet
<BasicBlock
*> BasicBlockB
;
758 A
.IRSC
.getBasicBlocks(BasicBlockA
);
759 B
.IRSC
.getBasicBlocks(BasicBlockB
);
761 // Determine if the block is contained in the region.
762 bool AContained
= BasicBlockA
.contains(ABB
);
763 bool BContained
= BasicBlockB
.contains(BBB
);
765 // Both blocks need to be contained in the region, or both need to be outside
767 if (AContained
!= BContained
)
770 // If both are contained, then we need to make sure that the relative
771 // distance to the target blocks are the same.
773 return A
.RelativeLocation
== B
.RelativeLocation
;
777 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate
&A
,
778 const IRSimilarityCandidate
&B
) {
779 DenseMap
<unsigned, DenseSet
<unsigned>> MappingA
;
780 DenseMap
<unsigned, DenseSet
<unsigned>> MappingB
;
781 return IRSimilarityCandidate::compareStructure(A
, B
, MappingA
, MappingB
);
784 typedef detail::zippy
<detail::zip_shortest
, SmallVector
<int, 4> &,
785 SmallVector
<int, 4> &, ArrayRef
<Value
*> &,
787 ZippedRelativeLocationsT
;
789 bool IRSimilarityCandidate::compareStructure(
790 const IRSimilarityCandidate
&A
, const IRSimilarityCandidate
&B
,
791 DenseMap
<unsigned, DenseSet
<unsigned>> &ValueNumberMappingA
,
792 DenseMap
<unsigned, DenseSet
<unsigned>> &ValueNumberMappingB
) {
793 if (A
.getLength() != B
.getLength())
796 if (A
.ValueToNumber
.size() != B
.ValueToNumber
.size())
799 iterator ItA
= A
.begin();
800 iterator ItB
= B
.begin();
802 // These ValueNumber Mapping sets create a create a mapping between the values
803 // in one candidate to values in the other candidate. If we create a set with
804 // one element, and that same element maps to the original element in the
805 // candidate we have a good mapping.
807 // Iterate over the instructions contained in each candidate
808 unsigned SectionLength
= A
.getStartIdx() + A
.getLength();
809 for (unsigned Loc
= A
.getStartIdx(); Loc
< SectionLength
;
810 ItA
++, ItB
++, Loc
++) {
811 // Make sure the instructions are similar to one another.
812 if (!isClose(*ItA
, *ItB
))
815 Instruction
*IA
= ItA
->Inst
;
816 Instruction
*IB
= ItB
->Inst
;
818 if (!ItA
->Legal
|| !ItB
->Legal
)
821 // Get the operand sets for the instructions.
822 ArrayRef
<Value
*> OperValsA
= ItA
->OperVals
;
823 ArrayRef
<Value
*> OperValsB
= ItB
->OperVals
;
825 unsigned InstValA
= A
.ValueToNumber
.find(IA
)->second
;
826 unsigned InstValB
= B
.ValueToNumber
.find(IB
)->second
;
828 // Ensure that the mappings for the instructions exists.
829 if (!compareAssignmentMapping(InstValA
, InstValB
, ValueNumberMappingA
,
830 ValueNumberMappingB
))
833 if (!compareAssignmentMapping(InstValB
, InstValA
, ValueNumberMappingB
,
834 ValueNumberMappingA
))
837 // We have different paths for commutative instructions and non-commutative
838 // instructions since commutative instructions could allow multiple mappings
839 // to certain values.
840 if (IA
->isCommutative() && !isa
<FPMathOperator
>(IA
) &&
841 !isa
<IntrinsicInst
>(IA
)) {
842 if (!compareCommutativeOperandMapping(
843 {A
, OperValsA
, ValueNumberMappingA
},
844 {B
, OperValsB
, ValueNumberMappingB
}))
849 // Handle the non-commutative cases.
850 if (!compareNonCommutativeOperandMapping(
851 {A
, OperValsA
, ValueNumberMappingA
},
852 {B
, OperValsB
, ValueNumberMappingB
}))
855 // Here we check that between two corresponding instructions,
856 // when referring to a basic block in the same region, the
857 // relative locations are the same. And, that the instructions refer to
858 // basic blocks outside the region in the same corresponding locations.
860 // We are able to make the assumption about blocks outside of the region
861 // since the target block labels are considered values and will follow the
862 // same number matching that we defined for the other instructions in the
863 // region. So, at this point, in each location we target a specific block
864 // outside the region, we are targeting a corresponding block in each
865 // analagous location in the region we are comparing to.
866 if (!(isa
<BranchInst
>(IA
) && isa
<BranchInst
>(IB
)) &&
867 !(isa
<PHINode
>(IA
) && isa
<PHINode
>(IB
)))
870 SmallVector
<int, 4> &RelBlockLocsA
= ItA
->RelativeBlockLocations
;
871 SmallVector
<int, 4> &RelBlockLocsB
= ItB
->RelativeBlockLocations
;
872 ArrayRef
<Value
*> ABL
= ItA
->getBlockOperVals();
873 ArrayRef
<Value
*> BBL
= ItB
->getBlockOperVals();
875 // Check to make sure that the number of operands, and branching locations
876 // between BranchInsts is the same.
877 if (RelBlockLocsA
.size() != RelBlockLocsB
.size() &&
878 ABL
.size() != BBL
.size())
881 assert(RelBlockLocsA
.size() == ABL
.size() &&
882 "Block information vectors not the same size.");
883 assert(RelBlockLocsB
.size() == BBL
.size() &&
884 "Block information vectors not the same size.");
886 ZippedRelativeLocationsT ZippedRelativeLocations
=
887 zip(RelBlockLocsA
, RelBlockLocsB
, ABL
, BBL
);
888 if (any_of(ZippedRelativeLocations
,
889 [&A
, &B
](std::tuple
<int, int, Value
*, Value
*> R
) {
890 return !checkRelativeLocations(
891 {A
, std::get
<0>(R
), std::get
<2>(R
)},
892 {B
, std::get
<1>(R
), std::get
<3>(R
)});
899 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate
&A
,
900 const IRSimilarityCandidate
&B
) {
901 auto DoesOverlap
= [](const IRSimilarityCandidate
&X
,
902 const IRSimilarityCandidate
&Y
) {
904 // XXXXXX X starts before Y ends
905 // YYYYYYY Y starts after X starts
906 return X
.StartIdx
<= Y
.getEndIdx() && Y
.StartIdx
>= X
.StartIdx
;
909 return DoesOverlap(A
, B
) || DoesOverlap(B
, A
);
912 void IRSimilarityIdentifier::populateMapper(
913 Module
&M
, std::vector
<IRInstructionData
*> &InstrList
,
914 std::vector
<unsigned> &IntegerMapping
) {
916 std::vector
<IRInstructionData
*> InstrListForModule
;
917 std::vector
<unsigned> IntegerMappingForModule
;
918 // Iterate over the functions in the module to map each Instruction in each
919 // BasicBlock to an unsigned integer.
920 Mapper
.initializeForBBs(M
);
922 for (Function
&F
: M
) {
927 for (BasicBlock
&BB
: F
) {
929 // BB has potential to have similarity since it has a size greater than 2
930 // and can therefore match other regions greater than 2. Map it to a list
931 // of unsigned integers.
932 Mapper
.convertToUnsignedVec(BB
, InstrListForModule
,
933 IntegerMappingForModule
);
936 BasicBlock::iterator It
= F
.begin()->end();
937 Mapper
.mapToIllegalUnsigned(It
, IntegerMappingForModule
, InstrListForModule
,
939 if (InstrListForModule
.size() > 0)
940 Mapper
.IDL
->push_back(*InstrListForModule
.back());
943 // Insert the InstrListForModule at the end of the overall InstrList so that
944 // we can have a long InstrList for the entire set of Modules being analyzed.
945 llvm::append_range(InstrList
, InstrListForModule
);
946 // Do the same as above, but for IntegerMapping.
947 llvm::append_range(IntegerMapping
, IntegerMappingForModule
);
950 void IRSimilarityIdentifier::populateMapper(
951 ArrayRef
<std::unique_ptr
<Module
>> &Modules
,
952 std::vector
<IRInstructionData
*> &InstrList
,
953 std::vector
<unsigned> &IntegerMapping
) {
955 // Iterate over, and map the instructions in each module.
956 for (const std::unique_ptr
<Module
> &M
: Modules
)
957 populateMapper(*M
, InstrList
, IntegerMapping
);
960 /// From a repeated subsequence, find all the different instances of the
961 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
962 /// the IRInstructionData in subsequence.
964 /// \param [in] Mapper - The instruction mapper for basic correctness checks.
965 /// \param [in] InstrList - The vector that holds the instruction data.
966 /// \param [in] IntegerMapping - The vector that holds the mapped integers.
967 /// \param [out] CandsForRepSubstring - The vector to store the generated
968 /// IRSimilarityCandidates.
969 static void createCandidatesFromSuffixTree(
970 const IRInstructionMapper
& Mapper
, std::vector
<IRInstructionData
*> &InstrList
,
971 std::vector
<unsigned> &IntegerMapping
, SuffixTree::RepeatedSubstring
&RS
,
972 std::vector
<IRSimilarityCandidate
> &CandsForRepSubstring
) {
974 unsigned StringLen
= RS
.Length
;
978 // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
979 for (const unsigned &StartIdx
: RS
.StartIndices
) {
980 unsigned EndIdx
= StartIdx
+ StringLen
- 1;
982 // Check that this subsequence does not contain an illegal instruction.
983 bool ContainsIllegal
= false;
984 for (unsigned CurrIdx
= StartIdx
; CurrIdx
<= EndIdx
; CurrIdx
++) {
985 unsigned Key
= IntegerMapping
[CurrIdx
];
986 if (Key
> Mapper
.IllegalInstrNumber
) {
987 ContainsIllegal
= true;
992 // If we have an illegal instruction, we should not create an
993 // IRSimilarityCandidate for this region.
997 // We are getting iterators to the instructions in this region of code
998 // by advancing the start and end indices from the start of the
1000 std::vector
<IRInstructionData
*>::iterator StartIt
= InstrList
.begin();
1001 std::advance(StartIt
, StartIdx
);
1002 std::vector
<IRInstructionData
*>::iterator EndIt
= InstrList
.begin();
1003 std::advance(EndIt
, EndIdx
);
1005 CandsForRepSubstring
.emplace_back(StartIdx
, StringLen
, *StartIt
, *EndIt
);
1009 void IRSimilarityCandidate::createCanonicalRelationFrom(
1010 IRSimilarityCandidate
&SourceCand
,
1011 DenseMap
<unsigned, DenseSet
<unsigned>> &ToSourceMapping
,
1012 DenseMap
<unsigned, DenseSet
<unsigned>> &FromSourceMapping
) {
1013 assert(SourceCand
.CanonNumToNumber
.size() != 0 &&
1014 "Base canonical relationship is empty!");
1015 assert(SourceCand
.NumberToCanonNum
.size() != 0 &&
1016 "Base canonical relationship is empty!");
1018 assert(CanonNumToNumber
.size() == 0 && "Canonical Relationship is non-empty");
1019 assert(NumberToCanonNum
.size() == 0 && "Canonical Relationship is non-empty");
1021 DenseSet
<unsigned> UsedGVNs
;
1022 // Iterate over the mappings provided from this candidate to SourceCand. We
1023 // are then able to map the GVN in this candidate to the same canonical number
1024 // given to the corresponding GVN in SourceCand.
1025 for (std::pair
<unsigned, DenseSet
<unsigned>> &GVNMapping
: ToSourceMapping
) {
1026 unsigned SourceGVN
= GVNMapping
.first
;
1028 assert(GVNMapping
.second
.size() != 0 && "Possible GVNs is 0!");
1031 // We need special handling if we have more than one potential value. This
1032 // means that there are at least two GVNs that could correspond to this GVN.
1033 // This could lead to potential swapping later on, so we make a decision
1034 // here to ensure a one-to-one mapping.
1035 if (GVNMapping
.second
.size() > 1) {
1037 for (unsigned Val
: GVNMapping
.second
) {
1038 // We make sure the target value number hasn't already been reserved.
1039 if (UsedGVNs
.contains(Val
))
1042 // We make sure that the opposite mapping is still consistent.
1043 DenseMap
<unsigned, DenseSet
<unsigned>>::iterator It
=
1044 FromSourceMapping
.find(Val
);
1046 if (!It
->second
.contains(SourceGVN
))
1049 // We pick the first item that satisfies these conditions.
1055 assert(Found
&& "Could not find matching value for source GVN");
1059 ResultGVN
= *GVNMapping
.second
.begin();
1061 // Whatever GVN is found, we mark it as used.
1062 UsedGVNs
.insert(ResultGVN
);
1064 unsigned CanonNum
= *SourceCand
.getCanonicalNum(ResultGVN
);
1065 CanonNumToNumber
.insert(std::make_pair(CanonNum
, SourceGVN
));
1066 NumberToCanonNum
.insert(std::make_pair(SourceGVN
, CanonNum
));
1069 DenseSet
<BasicBlock
*> BBSet
;
1070 getBasicBlocks(BBSet
);
1071 // Find canonical numbers for the BasicBlocks in the current candidate.
1072 // This is done by finding the corresponding value for the first instruction
1073 // in the block in the current candidate, finding the matching value in the
1074 // source candidate. Then by finding the parent of this value, use the
1075 // canonical number of the block in the source candidate for the canonical
1076 // number in the current candidate.
1077 for (BasicBlock
*BB
: BBSet
) {
1078 unsigned BBGVNForCurrCand
= ValueToNumber
.find(BB
)->second
;
1080 // We can skip the BasicBlock if the canonical numbering has already been
1081 // found in a separate instruction.
1082 if (NumberToCanonNum
.contains(BBGVNForCurrCand
))
1085 // If the basic block is the starting block, then the shared instruction may
1086 // not be the first instruction in the block, it will be the first
1087 // instruction in the similarity region.
1088 Value
*FirstOutlineInst
= BB
== getStartBB()
1089 ? frontInstruction()
1090 : &*BB
->instructionsWithoutDebug().begin();
1092 unsigned FirstInstGVN
= *getGVN(FirstOutlineInst
);
1093 unsigned FirstInstCanonNum
= *getCanonicalNum(FirstInstGVN
);
1094 unsigned SourceGVN
= *SourceCand
.fromCanonicalNum(FirstInstCanonNum
);
1095 Value
*SourceV
= *SourceCand
.fromGVN(SourceGVN
);
1096 BasicBlock
*SourceBB
= cast
<Instruction
>(SourceV
)->getParent();
1097 unsigned SourceBBGVN
= *SourceCand
.getGVN(SourceBB
);
1098 unsigned SourceCanonBBGVN
= *SourceCand
.getCanonicalNum(SourceBBGVN
);
1099 CanonNumToNumber
.insert(std::make_pair(SourceCanonBBGVN
, BBGVNForCurrCand
));
1100 NumberToCanonNum
.insert(std::make_pair(BBGVNForCurrCand
, SourceCanonBBGVN
));
1104 void IRSimilarityCandidate::createCanonicalRelationFrom(
1105 IRSimilarityCandidate
&SourceCand
, IRSimilarityCandidate
&SourceCandLarge
,
1106 IRSimilarityCandidate
&TargetCandLarge
) {
1107 assert(!SourceCand
.CanonNumToNumber
.empty() &&
1108 "Canonical Relationship is non-empty");
1109 assert(!SourceCand
.NumberToCanonNum
.empty() &&
1110 "Canonical Relationship is non-empty");
1112 assert(!SourceCandLarge
.CanonNumToNumber
.empty() &&
1113 "Canonical Relationship is non-empty");
1114 assert(!SourceCandLarge
.NumberToCanonNum
.empty() &&
1115 "Canonical Relationship is non-empty");
1117 assert(!TargetCandLarge
.CanonNumToNumber
.empty() &&
1118 "Canonical Relationship is non-empty");
1119 assert(!TargetCandLarge
.NumberToCanonNum
.empty() &&
1120 "Canonical Relationship is non-empty");
1122 assert(CanonNumToNumber
.empty() && "Canonical Relationship is non-empty");
1123 assert(NumberToCanonNum
.empty() && "Canonical Relationship is non-empty");
1125 // We're going to use the larger candidates as a "bridge" to create the
1126 // canonical number for the target candidate since we have idetified two
1127 // candidates as subsequences of larger sequences, and therefore must be
1128 // structurally similar.
1129 for (std::pair
<Value
*, unsigned> &ValueNumPair
: ValueToNumber
) {
1130 Value
*CurrVal
= ValueNumPair
.first
;
1131 unsigned TargetCandGVN
= ValueNumPair
.second
;
1133 // Find the numbering in the large candidate that surrounds the
1134 // current candidate.
1135 std::optional
<unsigned> OLargeTargetGVN
= TargetCandLarge
.getGVN(CurrVal
);
1136 assert(OLargeTargetGVN
.has_value() && "GVN not found for Value");
1138 // Get the canonical numbering in the large target candidate.
1139 std::optional
<unsigned> OTargetCandCanon
=
1140 TargetCandLarge
.getCanonicalNum(OLargeTargetGVN
.value());
1141 assert(OTargetCandCanon
.has_value() &&
1142 "Canononical Number not found for GVN");
1144 // Get the GVN in the large source candidate from the canonical numbering.
1145 std::optional
<unsigned> OLargeSourceGVN
=
1146 SourceCandLarge
.fromCanonicalNum(OTargetCandCanon
.value());
1147 assert(OLargeSourceGVN
.has_value() &&
1148 "GVN Number not found for Canonical Number");
1150 // Get the Value from the GVN in the large source candidate.
1151 std::optional
<Value
*> OLargeSourceV
=
1152 SourceCandLarge
.fromGVN(OLargeSourceGVN
.value());
1153 assert(OLargeSourceV
.has_value() && "Value not found for GVN");
1155 // Get the GVN number for the Value in the source candidate.
1156 std::optional
<unsigned> OSourceGVN
=
1157 SourceCand
.getGVN(OLargeSourceV
.value());
1158 assert(OSourceGVN
.has_value() && "GVN Number not found for Value");
1160 // Get the canonical numbering from the GVN/
1161 std::optional
<unsigned> OSourceCanon
=
1162 SourceCand
.getCanonicalNum(OSourceGVN
.value());
1163 assert(OSourceCanon
.has_value() && "Canon Number not found for GVN");
1165 // Insert the canonical numbering and GVN pair into their respective
1167 CanonNumToNumber
.insert(
1168 std::make_pair(OSourceCanon
.value(), TargetCandGVN
));
1169 NumberToCanonNum
.insert(
1170 std::make_pair(TargetCandGVN
, OSourceCanon
.value()));
1174 void IRSimilarityCandidate::createCanonicalMappingFor(
1175 IRSimilarityCandidate
&CurrCand
) {
1176 assert(CurrCand
.CanonNumToNumber
.size() == 0 &&
1177 "Canonical Relationship is non-empty");
1178 assert(CurrCand
.NumberToCanonNum
.size() == 0 &&
1179 "Canonical Relationship is non-empty");
1181 unsigned CanonNum
= 0;
1182 // Iterate over the value numbers found, the order does not matter in this
1184 for (std::pair
<unsigned, Value
*> &NumToVal
: CurrCand
.NumberToValue
) {
1185 CurrCand
.NumberToCanonNum
.insert(std::make_pair(NumToVal
.first
, CanonNum
));
1186 CurrCand
.CanonNumToNumber
.insert(std::make_pair(CanonNum
, NumToVal
.first
));
1191 /// Look for larger IRSimilarityCandidates From the previously matched
1192 /// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is
1193 /// an overlap, return a pair of structurally similar, larger
1194 /// IRSimilarityCandidates.
1196 /// \param [in] CandA - The first candidate we are trying to determine the
1198 /// \param [in] CandB - The second candidate we are trying to determine the
1200 /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1201 /// a circuit to the IRSimilarityCandidates that include this instruction.
1202 /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1203 /// number representing the structural group assigned to it.
1204 static std::optional
<
1205 std::pair
<IRSimilarityCandidate
*, IRSimilarityCandidate
*>>
1207 IRSimilarityCandidate
&CandA
, IRSimilarityCandidate
&CandB
,
1208 DenseMap
<unsigned, DenseSet
<IRSimilarityCandidate
*>> &IndexToIncludedCand
,
1209 DenseMap
<IRSimilarityCandidate
*, unsigned> &CandToGroup
) {
1210 DenseMap
<unsigned, IRSimilarityCandidate
*> IncludedGroupAndCandA
;
1211 DenseMap
<unsigned, IRSimilarityCandidate
*> IncludedGroupAndCandB
;
1212 DenseSet
<unsigned> IncludedGroupsA
;
1213 DenseSet
<unsigned> IncludedGroupsB
;
1215 // Find the overall similarity group numbers that fully contain the candidate,
1216 // and record the larger candidate for each group.
1217 auto IdxToCandidateIt
= IndexToIncludedCand
.find(CandA
.getStartIdx());
1218 std::optional
<std::pair
<IRSimilarityCandidate
*, IRSimilarityCandidate
*>>
1221 unsigned CandAStart
= CandA
.getStartIdx();
1222 unsigned CandAEnd
= CandA
.getEndIdx();
1223 unsigned CandBStart
= CandB
.getStartIdx();
1224 unsigned CandBEnd
= CandB
.getEndIdx();
1225 if (IdxToCandidateIt
== IndexToIncludedCand
.end())
1227 for (IRSimilarityCandidate
*MatchedCand
: IdxToCandidateIt
->second
) {
1228 if (MatchedCand
->getStartIdx() > CandAStart
||
1229 (MatchedCand
->getEndIdx() < CandAEnd
))
1231 unsigned GroupNum
= CandToGroup
.find(MatchedCand
)->second
;
1232 IncludedGroupAndCandA
.insert(std::make_pair(GroupNum
, MatchedCand
));
1233 IncludedGroupsA
.insert(GroupNum
);
1236 // Find the overall similarity group numbers that fully contain the next
1237 // candidate, and record the larger candidate for each group.
1238 IdxToCandidateIt
= IndexToIncludedCand
.find(CandBStart
);
1239 if (IdxToCandidateIt
== IndexToIncludedCand
.end())
1241 for (IRSimilarityCandidate
*MatchedCand
: IdxToCandidateIt
->second
) {
1242 if (MatchedCand
->getStartIdx() > CandBStart
||
1243 MatchedCand
->getEndIdx() < CandBEnd
)
1245 unsigned GroupNum
= CandToGroup
.find(MatchedCand
)->second
;
1246 IncludedGroupAndCandB
.insert(std::make_pair(GroupNum
, MatchedCand
));
1247 IncludedGroupsB
.insert(GroupNum
);
1250 // Find the intersection between the two groups, these are the groups where
1251 // the larger candidates exist.
1252 set_intersect(IncludedGroupsA
, IncludedGroupsB
);
1254 // If there is no intersection between the sets, then we cannot determine
1255 // whether or not there is a match.
1256 if (IncludedGroupsA
.empty())
1259 // Create a pair that contains the larger candidates.
1260 auto ItA
= IncludedGroupAndCandA
.find(*IncludedGroupsA
.begin());
1261 auto ItB
= IncludedGroupAndCandB
.find(*IncludedGroupsA
.begin());
1262 Result
= std::make_pair(ItA
->second
, ItB
->second
);
1266 /// From the list of IRSimilarityCandidates, perform a comparison between each
1267 /// IRSimilarityCandidate to determine if there are overlapping
1268 /// IRInstructionData, or if they do not have the same structure.
1270 /// \param [in] CandsForRepSubstring - The vector containing the
1271 /// IRSimilarityCandidates.
1272 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1273 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1274 /// vector are structurally similar to one another.
1275 /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1276 /// a circuit to the IRSimilarityCandidates that include this instruction.
1277 /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1278 /// number representing the structural group assigned to it.
1279 static void findCandidateStructures(
1280 std::vector
<IRSimilarityCandidate
> &CandsForRepSubstring
,
1281 DenseMap
<unsigned, SimilarityGroup
> &StructuralGroups
,
1282 DenseMap
<unsigned, DenseSet
<IRSimilarityCandidate
*>> &IndexToIncludedCand
,
1283 DenseMap
<IRSimilarityCandidate
*, unsigned> &CandToOverallGroup
1285 std::vector
<IRSimilarityCandidate
>::iterator CandIt
, CandEndIt
, InnerCandIt
,
1288 // IRSimilarityCandidates each have a structure for operand use. It is
1289 // possible that two instances of the same subsequences have different
1290 // structure. Each type of structure found is assigned a number. This
1291 // DenseMap maps an IRSimilarityCandidate to which type of similarity
1292 // discovered it fits within.
1293 DenseMap
<IRSimilarityCandidate
*, unsigned> CandToGroup
;
1295 // Find the compatibility from each candidate to the others to determine
1296 // which candidates overlap and which have the same structure by mapping
1297 // each structure to a different group.
1300 unsigned CurrentGroupNum
= 0;
1301 unsigned OuterGroupNum
;
1302 DenseMap
<IRSimilarityCandidate
*, unsigned>::iterator CandToGroupIt
;
1303 DenseMap
<IRSimilarityCandidate
*, unsigned>::iterator CandToGroupItInner
;
1304 DenseMap
<unsigned, SimilarityGroup
>::iterator CurrentGroupPair
;
1306 // Iterate over the candidates to determine its structural and overlapping
1307 // compatibility with other instructions
1308 DenseMap
<unsigned, DenseSet
<unsigned>> ValueNumberMappingA
;
1309 DenseMap
<unsigned, DenseSet
<unsigned>> ValueNumberMappingB
;
1310 for (CandIt
= CandsForRepSubstring
.begin(),
1311 CandEndIt
= CandsForRepSubstring
.end();
1312 CandIt
!= CandEndIt
; CandIt
++) {
1314 // Determine if it has an assigned structural group already.
1315 CandToGroupIt
= CandToGroup
.find(&*CandIt
);
1316 if (CandToGroupIt
== CandToGroup
.end()) {
1317 // If not, we assign it one, and add it to our mapping.
1318 std::tie(CandToGroupIt
, Inserted
) =
1319 CandToGroup
.insert(std::make_pair(&*CandIt
, CurrentGroupNum
++));
1322 // Get the structural group number from the iterator.
1323 OuterGroupNum
= CandToGroupIt
->second
;
1325 // Check if we already have a list of IRSimilarityCandidates for the current
1326 // structural group. Create one if one does not exist.
1327 CurrentGroupPair
= StructuralGroups
.find(OuterGroupNum
);
1328 if (CurrentGroupPair
== StructuralGroups
.end()) {
1329 IRSimilarityCandidate::createCanonicalMappingFor(*CandIt
);
1330 std::tie(CurrentGroupPair
, Inserted
) = StructuralGroups
.insert(
1331 std::make_pair(OuterGroupNum
, SimilarityGroup({*CandIt
})));
1334 // Iterate over the IRSimilarityCandidates following the current
1335 // IRSimilarityCandidate in the list to determine whether the two
1336 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
1337 // of IRSimilarityCandidates.
1338 for (InnerCandIt
= std::next(CandIt
),
1339 InnerCandEndIt
= CandsForRepSubstring
.end();
1340 InnerCandIt
!= InnerCandEndIt
; InnerCandIt
++) {
1342 // We check if the inner item has a group already, if it does, we skip it.
1343 CandToGroupItInner
= CandToGroup
.find(&*InnerCandIt
);
1344 if (CandToGroupItInner
!= CandToGroup
.end())
1347 // Check if we have found structural similarity between two candidates
1348 // that fully contains the first and second candidates.
1349 std::optional
<std::pair
<IRSimilarityCandidate
*, IRSimilarityCandidate
*>>
1350 LargerPair
= CheckLargerCands(
1351 *CandIt
, *InnerCandIt
, IndexToIncludedCand
, CandToOverallGroup
);
1353 // If a pair was found, it means that we can assume that these smaller
1354 // substrings are also structurally similar. Use the larger candidates to
1355 // determine the canonical mapping between the two sections.
1356 if (LargerPair
.has_value()) {
1357 SameStructure
= true;
1358 InnerCandIt
->createCanonicalRelationFrom(
1359 *CandIt
, *LargerPair
.value().first
, *LargerPair
.value().second
);
1360 CandToGroup
.insert(std::make_pair(&*InnerCandIt
, OuterGroupNum
));
1361 CurrentGroupPair
->second
.push_back(*InnerCandIt
);
1365 // Otherwise we determine if they have the same structure and add it to
1366 // vector if they match.
1367 ValueNumberMappingA
.clear();
1368 ValueNumberMappingB
.clear();
1369 SameStructure
= IRSimilarityCandidate::compareStructure(
1370 *CandIt
, *InnerCandIt
, ValueNumberMappingA
, ValueNumberMappingB
);
1374 InnerCandIt
->createCanonicalRelationFrom(*CandIt
, ValueNumberMappingA
,
1375 ValueNumberMappingB
);
1376 CandToGroup
.insert(std::make_pair(&*InnerCandIt
, OuterGroupNum
));
1377 CurrentGroupPair
->second
.push_back(*InnerCandIt
);
1382 void IRSimilarityIdentifier::findCandidates(
1383 std::vector
<IRInstructionData
*> &InstrList
,
1384 std::vector
<unsigned> &IntegerMapping
) {
1385 SuffixTree
ST(IntegerMapping
);
1387 std::vector
<IRSimilarityCandidate
> CandsForRepSubstring
;
1388 std::vector
<SimilarityGroup
> NewCandidateGroups
;
1390 DenseMap
<unsigned, SimilarityGroup
> StructuralGroups
;
1391 DenseMap
<unsigned, DenseSet
<IRSimilarityCandidate
*>> IndexToIncludedCand
;
1392 DenseMap
<IRSimilarityCandidate
*, unsigned> CandToGroup
;
1394 // Iterate over the subsequences found by the Suffix Tree to create
1395 // IRSimilarityCandidates for each repeated subsequence and determine which
1396 // instances are structurally similar to one another.
1398 // Sort the suffix tree from longest substring to shortest.
1399 std::vector
<SuffixTree::RepeatedSubstring
> RSes
;
1400 for (SuffixTree::RepeatedSubstring
&RS
: ST
)
1403 llvm::stable_sort(RSes
, [](const SuffixTree::RepeatedSubstring
&LHS
,
1404 const SuffixTree::RepeatedSubstring
&RHS
) {
1405 return LHS
.Length
> RHS
.Length
;
1407 for (SuffixTree::RepeatedSubstring
&RS
: RSes
) {
1408 createCandidatesFromSuffixTree(Mapper
, InstrList
, IntegerMapping
, RS
,
1409 CandsForRepSubstring
);
1411 if (CandsForRepSubstring
.size() < 2)
1414 findCandidateStructures(CandsForRepSubstring
, StructuralGroups
,
1415 IndexToIncludedCand
, CandToGroup
);
1416 for (std::pair
<unsigned, SimilarityGroup
> &Group
: StructuralGroups
) {
1417 // We only add the group if it contains more than one
1418 // IRSimilarityCandidate. If there is only one, that means there is no
1419 // other repeated subsequence with the same structure.
1420 if (Group
.second
.size() > 1) {
1421 SimilarityCandidates
->push_back(Group
.second
);
1422 // Iterate over each candidate in the group, and add an entry for each
1423 // instruction included with a mapping to a set of
1424 // IRSimilarityCandidates that include that instruction.
1425 for (IRSimilarityCandidate
&IRCand
: SimilarityCandidates
->back()) {
1426 for (unsigned Idx
= IRCand
.getStartIdx(), Edx
= IRCand
.getEndIdx();
1427 Idx
<= Edx
; ++Idx
) {
1428 DenseMap
<unsigned, DenseSet
<IRSimilarityCandidate
*>>::iterator
1430 IdIt
= IndexToIncludedCand
.find(Idx
);
1431 bool Inserted
= false;
1432 if (IdIt
== IndexToIncludedCand
.end())
1433 std::tie(IdIt
, Inserted
) = IndexToIncludedCand
.insert(
1434 std::make_pair(Idx
, DenseSet
<IRSimilarityCandidate
*>()));
1435 IdIt
->second
.insert(&IRCand
);
1437 // Add mapping of candidate to the overall similarity group number.
1439 std::make_pair(&IRCand
, SimilarityCandidates
->size() - 1));
1444 CandsForRepSubstring
.clear();
1445 StructuralGroups
.clear();
1446 NewCandidateGroups
.clear();
1450 SimilarityGroupList
&IRSimilarityIdentifier::findSimilarity(
1451 ArrayRef
<std::unique_ptr
<Module
>> Modules
) {
1452 resetSimilarityCandidates();
1454 std::vector
<IRInstructionData
*> InstrList
;
1455 std::vector
<unsigned> IntegerMapping
;
1456 Mapper
.InstClassifier
.EnableBranches
= this->EnableBranches
;
1457 Mapper
.InstClassifier
.EnableIndirectCalls
= EnableIndirectCalls
;
1458 Mapper
.EnableMatchCallsByName
= EnableMatchingCallsByName
;
1459 Mapper
.InstClassifier
.EnableIntrinsics
= EnableIntrinsics
;
1460 Mapper
.InstClassifier
.EnableMustTailCalls
= EnableMustTailCalls
;
1462 populateMapper(Modules
, InstrList
, IntegerMapping
);
1463 findCandidates(InstrList
, IntegerMapping
);
1465 return *SimilarityCandidates
;
1468 SimilarityGroupList
&IRSimilarityIdentifier::findSimilarity(Module
&M
) {
1469 resetSimilarityCandidates();
1470 Mapper
.InstClassifier
.EnableBranches
= this->EnableBranches
;
1471 Mapper
.InstClassifier
.EnableIndirectCalls
= EnableIndirectCalls
;
1472 Mapper
.EnableMatchCallsByName
= EnableMatchingCallsByName
;
1473 Mapper
.InstClassifier
.EnableIntrinsics
= EnableIntrinsics
;
1474 Mapper
.InstClassifier
.EnableMustTailCalls
= EnableMustTailCalls
;
1476 std::vector
<IRInstructionData
*> InstrList
;
1477 std::vector
<unsigned> IntegerMapping
;
1479 populateMapper(M
, InstrList
, IntegerMapping
);
1480 findCandidates(InstrList
, IntegerMapping
);
1482 return *SimilarityCandidates
;
1485 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass
, "ir-similarity-identifier",
1486 "ir-similarity-identifier", false, true)
1488 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
1490 initializeIRSimilarityIdentifierWrapperPassPass(
1491 *PassRegistry::getPassRegistry());
1494 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module
&M
) {
1495 IRSI
.reset(new IRSimilarityIdentifier(!DisableBranches
, !DisableIndirectCalls
,
1496 MatchCallsByName
, !DisableIntrinsics
,
1501 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module
&M
) {
1506 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module
&M
) {
1507 IRSI
->findSimilarity(M
);
1511 AnalysisKey
IRSimilarityAnalysis::Key
;
1512 IRSimilarityIdentifier
IRSimilarityAnalysis::run(Module
&M
,
1513 ModuleAnalysisManager
&) {
1514 auto IRSI
= IRSimilarityIdentifier(!DisableBranches
, !DisableIndirectCalls
,
1515 MatchCallsByName
, !DisableIntrinsics
,
1517 IRSI
.findSimilarity(M
);
1522 IRSimilarityAnalysisPrinterPass::run(Module
&M
, ModuleAnalysisManager
&AM
) {
1523 IRSimilarityIdentifier
&IRSI
= AM
.getResult
<IRSimilarityAnalysis
>(M
);
1524 std::optional
<SimilarityGroupList
> &SimilarityCandidatesOpt
=
1525 IRSI
.getSimilarity();
1527 for (std::vector
<IRSimilarityCandidate
> &CandVec
: *SimilarityCandidatesOpt
) {
1528 OS
<< CandVec
.size() << " candidates of length "
1529 << CandVec
.begin()->getLength() << ". Found in: \n";
1530 for (IRSimilarityCandidate
&Cand
: CandVec
) {
1531 OS
<< " Function: " << Cand
.front()->Inst
->getFunction()->getName().str()
1532 << ", Basic Block: ";
1533 if (Cand
.front()->Inst
->getParent()->getName().str() == "")
1536 OS
<< Cand
.front()->Inst
->getParent()->getName().str();
1537 OS
<< "\n Start Instruction: ";
1538 Cand
.frontInstruction()->print(OS
);
1539 OS
<< "\n End Instruction: ";
1540 Cand
.backInstruction()->print(OS
);
1545 return PreservedAnalyses::all();
1548 char IRSimilarityIdentifierWrapperPass::ID
= 0;