[mlir][linalg] Add support for masked vectorization of `tensor.insert_slice` (1/N...
[llvm-project.git] / llvm / lib / Analysis / IRSimilarityIdentifier.cpp
blob42e986e6179dd34dda828af5f39e9e63d15a38bb
1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // \file
10 // Implementation file for the IRSimilarityIdentifier for identifying
11 // similarities in IR including the IRInstructionMapper.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Analysis/IRSimilarityIdentifier.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/ADT/SetOperations.h"
18 #include "llvm/IR/Intrinsics.h"
19 #include "llvm/IR/Operator.h"
20 #include "llvm/IR/User.h"
21 #include "llvm/InitializePasses.h"
22 #include "llvm/Support/SuffixTree.h"
24 using namespace llvm;
25 using namespace IRSimilarity;
27 namespace llvm {
28 cl::opt<bool>
29 DisableBranches("no-ir-sim-branch-matching", cl::init(false),
30 cl::ReallyHidden,
31 cl::desc("disable similarity matching, and outlining, "
32 "across branches for debugging purposes."));
34 cl::opt<bool>
35 DisableIndirectCalls("no-ir-sim-indirect-calls", cl::init(false),
36 cl::ReallyHidden,
37 cl::desc("disable outlining indirect calls."));
39 cl::opt<bool>
40 MatchCallsByName("ir-sim-calls-by-name", cl::init(false), cl::ReallyHidden,
41 cl::desc("only allow matching call instructions if the "
42 "name and type signature match."));
44 cl::opt<bool>
45 DisableIntrinsics("no-ir-sim-intrinsics", cl::init(false), cl::ReallyHidden,
46 cl::desc("Don't match or outline intrinsics"));
47 } // namespace llvm
49 IRInstructionData::IRInstructionData(Instruction &I, bool Legality,
50 IRInstructionDataList &IDList)
51 : Inst(&I), Legal(Legality), IDL(&IDList) {
52 initializeInstruction();
55 void IRInstructionData::initializeInstruction() {
56 // We check for whether we have a comparison instruction. If it is, we
57 // find the "less than" version of the predicate for consistency for
58 // comparison instructions throught the program.
59 if (CmpInst *C = dyn_cast<CmpInst>(Inst)) {
60 CmpInst::Predicate Predicate = predicateForConsistency(C);
61 if (Predicate != C->getPredicate())
62 RevisedPredicate = Predicate;
65 // Here we collect the operands and their types for determining whether
66 // the structure of the operand use matches between two different candidates.
67 for (Use &OI : Inst->operands()) {
68 if (isa<CmpInst>(Inst) && RevisedPredicate) {
69 // If we have a CmpInst where the predicate is reversed, it means the
70 // operands must be reversed as well.
71 OperVals.insert(OperVals.begin(), OI.get());
72 continue;
75 OperVals.push_back(OI.get());
78 // We capture the incoming BasicBlocks as values as well as the incoming
79 // Values in order to check for structural similarity.
80 if (PHINode *PN = dyn_cast<PHINode>(Inst))
81 for (BasicBlock *BB : PN->blocks())
82 OperVals.push_back(BB);
85 IRInstructionData::IRInstructionData(IRInstructionDataList &IDList)
86 : IDL(&IDList) {}
88 void IRInstructionData::setBranchSuccessors(
89 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
90 assert(isa<BranchInst>(Inst) && "Instruction must be branch");
92 BranchInst *BI = cast<BranchInst>(Inst);
93 DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
95 BBNumIt = BasicBlockToInteger.find(BI->getParent());
96 assert(BBNumIt != BasicBlockToInteger.end() &&
97 "Could not find location for BasicBlock!");
99 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
101 for (Value *V : getBlockOperVals()) {
102 BasicBlock *Successor = cast<BasicBlock>(V);
103 BBNumIt = BasicBlockToInteger.find(Successor);
104 assert(BBNumIt != BasicBlockToInteger.end() &&
105 "Could not find number for BasicBlock!");
106 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
108 int Relative = OtherBlockNumber - CurrentBlockNumber;
109 RelativeBlockLocations.push_back(Relative);
113 ArrayRef<Value *> IRInstructionData::getBlockOperVals() {
114 assert((isa<BranchInst>(Inst) ||
115 isa<PHINode>(Inst)) && "Instruction must be branch or PHINode");
117 if (BranchInst *BI = dyn_cast<BranchInst>(Inst))
118 return ArrayRef<Value *>(
119 std::next(OperVals.begin(), BI->isConditional() ? 1 : 0),
120 OperVals.end()
123 if (PHINode *PN = dyn_cast<PHINode>(Inst))
124 return ArrayRef<Value *>(
125 std::next(OperVals.begin(), PN->getNumIncomingValues()),
126 OperVals.end()
129 return ArrayRef<Value *>();
132 void IRInstructionData::setCalleeName(bool MatchByName) {
133 CallInst *CI = dyn_cast<CallInst>(Inst);
134 assert(CI && "Instruction must be call");
136 CalleeName = "";
137 if (IntrinsicInst *II = dyn_cast<IntrinsicInst>(Inst)) {
138 // To hash intrinsics, we use the opcode, and types like the other
139 // instructions, but also, the Intrinsic ID, and the Name of the
140 // intrinsic.
141 Intrinsic::ID IntrinsicID = II->getIntrinsicID();
142 FunctionType *FT = II->getFunctionType();
143 // If there is an overloaded name, we have to use the complex version
144 // of getName to get the entire string.
145 if (Intrinsic::isOverloaded(IntrinsicID))
146 CalleeName =
147 Intrinsic::getName(IntrinsicID, FT->params(), II->getModule(), FT);
148 // If there is not an overloaded name, we only need to use this version.
149 else
150 CalleeName = Intrinsic::getName(IntrinsicID).str();
152 return;
155 if (!CI->isIndirectCall() && MatchByName)
156 CalleeName = CI->getCalledFunction()->getName().str();
159 void IRInstructionData::setPHIPredecessors(
160 DenseMap<BasicBlock *, unsigned> &BasicBlockToInteger) {
161 assert(isa<PHINode>(Inst) && "Instruction must be phi node");
163 PHINode *PN = cast<PHINode>(Inst);
164 DenseMap<BasicBlock *, unsigned>::iterator BBNumIt;
166 BBNumIt = BasicBlockToInteger.find(PN->getParent());
167 assert(BBNumIt != BasicBlockToInteger.end() &&
168 "Could not find location for BasicBlock!");
170 int CurrentBlockNumber = static_cast<int>(BBNumIt->second);
172 // Convert the incoming blocks of the PHINode to an integer value, based on
173 // the relative distances between the current block and the incoming block.
174 for (unsigned Idx = 0; Idx < PN->getNumIncomingValues(); Idx++) {
175 BasicBlock *Incoming = PN->getIncomingBlock(Idx);
176 BBNumIt = BasicBlockToInteger.find(Incoming);
177 assert(BBNumIt != BasicBlockToInteger.end() &&
178 "Could not find number for BasicBlock!");
179 int OtherBlockNumber = static_cast<int>(BBNumIt->second);
181 int Relative = OtherBlockNumber - CurrentBlockNumber;
182 RelativeBlockLocations.push_back(Relative);
186 CmpInst::Predicate IRInstructionData::predicateForConsistency(CmpInst *CI) {
187 switch (CI->getPredicate()) {
188 case CmpInst::FCMP_OGT:
189 case CmpInst::FCMP_UGT:
190 case CmpInst::FCMP_OGE:
191 case CmpInst::FCMP_UGE:
192 case CmpInst::ICMP_SGT:
193 case CmpInst::ICMP_UGT:
194 case CmpInst::ICMP_SGE:
195 case CmpInst::ICMP_UGE:
196 return CI->getSwappedPredicate();
197 default:
198 return CI->getPredicate();
202 CmpInst::Predicate IRInstructionData::getPredicate() const {
203 assert(isa<CmpInst>(Inst) &&
204 "Can only get a predicate from a compare instruction");
206 if (RevisedPredicate)
207 return *RevisedPredicate;
209 return cast<CmpInst>(Inst)->getPredicate();
212 StringRef IRInstructionData::getCalleeName() const {
213 assert(isa<CallInst>(Inst) &&
214 "Can only get a name from a call instruction");
216 assert(CalleeName && "CalleeName has not been set");
218 return *CalleeName;
221 bool IRSimilarity::isClose(const IRInstructionData &A,
222 const IRInstructionData &B) {
224 if (!A.Legal || !B.Legal)
225 return false;
227 // Check if we are performing the same sort of operation on the same types
228 // but not on the same values.
229 if (!A.Inst->isSameOperationAs(B.Inst)) {
230 // If there is a predicate, this means that either there is a swapped
231 // predicate, or that the types are different, we want to make sure that
232 // the predicates are equivalent via swapping.
233 if (isa<CmpInst>(A.Inst) && isa<CmpInst>(B.Inst)) {
235 if (A.getPredicate() != B.getPredicate())
236 return false;
238 // If the predicates are the same via swap, make sure that the types are
239 // still the same.
240 auto ZippedTypes = zip(A.OperVals, B.OperVals);
242 return all_of(
243 ZippedTypes, [](std::tuple<llvm::Value *, llvm::Value *> R) {
244 return std::get<0>(R)->getType() == std::get<1>(R)->getType();
248 return false;
251 // Since any GEP Instruction operands after the first operand cannot be
252 // defined by a register, we must make sure that the operands after the first
253 // are the same in the two instructions
254 if (auto *GEP = dyn_cast<GetElementPtrInst>(A.Inst)) {
255 auto *OtherGEP = cast<GetElementPtrInst>(B.Inst);
257 // If the instructions do not have the same inbounds restrictions, we do
258 // not consider them the same.
259 if (GEP->isInBounds() != OtherGEP->isInBounds())
260 return false;
262 auto ZippedOperands = zip(GEP->indices(), OtherGEP->indices());
264 // We increment here since we do not care about the first instruction,
265 // we only care about the following operands since they must be the
266 // exact same to be considered similar.
267 return all_of(drop_begin(ZippedOperands),
268 [](std::tuple<llvm::Use &, llvm::Use &> R) {
269 return std::get<0>(R) == std::get<1>(R);
273 // If the instructions are functions calls, we make sure that the function
274 // name is the same. We already know that the types are since is
275 // isSameOperationAs is true.
276 if (isa<CallInst>(A.Inst) && isa<CallInst>(B.Inst)) {
277 if (A.getCalleeName() != B.getCalleeName())
278 return false;
281 if (isa<BranchInst>(A.Inst) && isa<BranchInst>(B.Inst) &&
282 A.RelativeBlockLocations.size() != B.RelativeBlockLocations.size())
283 return false;
285 return true;
288 // TODO: This is the same as the MachineOutliner, and should be consolidated
289 // into the same interface.
290 void IRInstructionMapper::convertToUnsignedVec(
291 BasicBlock &BB, std::vector<IRInstructionData *> &InstrList,
292 std::vector<unsigned> &IntegerMapping) {
293 BasicBlock::iterator It = BB.begin();
295 std::vector<unsigned> IntegerMappingForBB;
296 std::vector<IRInstructionData *> InstrListForBB;
298 for (BasicBlock::iterator Et = BB.end(); It != Et; ++It) {
299 switch (InstClassifier.visit(*It)) {
300 case InstrType::Legal:
301 mapToLegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
302 break;
303 case InstrType::Illegal:
304 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB);
305 break;
306 case InstrType::Invisible:
307 AddedIllegalLastTime = false;
308 break;
312 if (AddedIllegalLastTime)
313 mapToIllegalUnsigned(It, IntegerMappingForBB, InstrListForBB, true);
314 for (IRInstructionData *ID : InstrListForBB)
315 this->IDL->push_back(*ID);
316 llvm::append_range(InstrList, InstrListForBB);
317 llvm::append_range(IntegerMapping, IntegerMappingForBB);
320 // TODO: This is the same as the MachineOutliner, and should be consolidated
321 // into the same interface.
322 unsigned IRInstructionMapper::mapToLegalUnsigned(
323 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
324 std::vector<IRInstructionData *> &InstrListForBB) {
325 // We added something legal, so we should unset the AddedLegalLastTime
326 // flag.
327 AddedIllegalLastTime = false;
329 // If we have at least two adjacent legal instructions (which may have
330 // invisible instructions in between), remember that.
331 if (CanCombineWithPrevInstr)
332 HaveLegalRange = true;
333 CanCombineWithPrevInstr = true;
335 // Get the integer for this instruction or give it the current
336 // LegalInstrNumber.
337 IRInstructionData *ID = allocateIRInstructionData(*It, true, *IDL);
338 InstrListForBB.push_back(ID);
340 if (isa<BranchInst>(*It))
341 ID->setBranchSuccessors(BasicBlockToInteger);
343 if (isa<CallInst>(*It))
344 ID->setCalleeName(EnableMatchCallsByName);
346 if (isa<PHINode>(*It))
347 ID->setPHIPredecessors(BasicBlockToInteger);
349 // Add to the instruction list
350 bool WasInserted;
351 DenseMap<IRInstructionData *, unsigned, IRInstructionDataTraits>::iterator
352 ResultIt;
353 std::tie(ResultIt, WasInserted) =
354 InstructionIntegerMap.insert(std::make_pair(ID, LegalInstrNumber));
355 unsigned INumber = ResultIt->second;
357 // There was an insertion.
358 if (WasInserted)
359 LegalInstrNumber++;
361 IntegerMappingForBB.push_back(INumber);
363 // Make sure we don't overflow or use any integers reserved by the DenseMap.
364 assert(LegalInstrNumber < IllegalInstrNumber &&
365 "Instruction mapping overflow!");
367 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
368 "Tried to assign DenseMap tombstone or empty key to instruction.");
369 assert(LegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
370 "Tried to assign DenseMap tombstone or empty key to instruction.");
372 return INumber;
375 IRInstructionData *
376 IRInstructionMapper::allocateIRInstructionData(Instruction &I, bool Legality,
377 IRInstructionDataList &IDL) {
378 return new (InstDataAllocator->Allocate()) IRInstructionData(I, Legality, IDL);
381 IRInstructionData *
382 IRInstructionMapper::allocateIRInstructionData(IRInstructionDataList &IDL) {
383 return new (InstDataAllocator->Allocate()) IRInstructionData(IDL);
386 IRInstructionDataList *
387 IRInstructionMapper::allocateIRInstructionDataList() {
388 return new (IDLAllocator->Allocate()) IRInstructionDataList();
391 // TODO: This is the same as the MachineOutliner, and should be consolidated
392 // into the same interface.
393 unsigned IRInstructionMapper::mapToIllegalUnsigned(
394 BasicBlock::iterator &It, std::vector<unsigned> &IntegerMappingForBB,
395 std::vector<IRInstructionData *> &InstrListForBB, bool End) {
396 // Can't combine an illegal instruction. Set the flag.
397 CanCombineWithPrevInstr = false;
399 // Only add one illegal number per range of legal numbers.
400 if (AddedIllegalLastTime)
401 return IllegalInstrNumber;
403 IRInstructionData *ID = nullptr;
404 if (!End)
405 ID = allocateIRInstructionData(*It, false, *IDL);
406 else
407 ID = allocateIRInstructionData(*IDL);
408 InstrListForBB.push_back(ID);
410 // Remember that we added an illegal number last time.
411 AddedIllegalLastTime = true;
412 unsigned INumber = IllegalInstrNumber;
413 IntegerMappingForBB.push_back(IllegalInstrNumber--);
415 assert(LegalInstrNumber < IllegalInstrNumber &&
416 "Instruction mapping overflow!");
418 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getEmptyKey() &&
419 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
421 assert(IllegalInstrNumber != DenseMapInfo<unsigned>::getTombstoneKey() &&
422 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
424 return INumber;
427 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx, unsigned Len,
428 IRInstructionData *FirstInstIt,
429 IRInstructionData *LastInstIt)
430 : StartIdx(StartIdx), Len(Len) {
432 assert(FirstInstIt != nullptr && "Instruction is nullptr!");
433 assert(LastInstIt != nullptr && "Instruction is nullptr!");
434 assert(StartIdx + Len > StartIdx &&
435 "Overflow for IRSimilarityCandidate range?");
436 assert(Len - 1 == static_cast<unsigned>(std::distance(
437 iterator(FirstInstIt), iterator(LastInstIt))) &&
438 "Length of the first and last IRInstructionData do not match the "
439 "given length");
441 // We iterate over the given instructions, and map each unique value
442 // to a unique number in the IRSimilarityCandidate ValueToNumber and
443 // NumberToValue maps. A constant get its own value globally, the individual
444 // uses of the constants are not considered to be unique.
446 // IR: Mapping Added:
447 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
448 // %add2 = add i32 %a, %1 %add2 -> 4
449 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
451 // when replace with global values, starting from 1, would be
453 // 3 = add i32 1, 2
454 // 4 = add i32 1, 3
455 // 6 = add i32 5, 2
456 unsigned LocalValNumber = 1;
457 IRInstructionDataList::iterator ID = iterator(*FirstInstIt);
458 for (unsigned Loc = StartIdx; Loc < StartIdx + Len; Loc++, ID++) {
459 // Map the operand values to an unsigned integer if it does not already
460 // have an unsigned integer assigned to it.
461 for (Value *Arg : ID->OperVals)
462 if (ValueToNumber.try_emplace(Arg, LocalValNumber).second) {
463 NumberToValue.try_emplace(LocalValNumber, Arg);
464 LocalValNumber++;
467 // Mapping the instructions to an unsigned integer if it is not already
468 // exist in the mapping.
469 if (ValueToNumber.try_emplace(ID->Inst, LocalValNumber).second) {
470 NumberToValue.try_emplace(LocalValNumber, ID->Inst);
471 LocalValNumber++;
475 // Setting the first and last instruction data pointers for the candidate. If
476 // we got through the entire for loop without hitting an assert, we know
477 // that both of these instructions are not nullptrs.
478 FirstInst = FirstInstIt;
479 LastInst = LastInstIt;
481 // Add the basic blocks contained in the set into the global value numbering.
482 DenseSet<BasicBlock *> BBSet;
483 getBasicBlocks(BBSet);
484 for (BasicBlock *BB : BBSet) {
485 if (ValueToNumber.try_emplace(BB, LocalValNumber).second) {
486 NumberToValue.try_emplace(LocalValNumber, BB);
487 LocalValNumber++;
492 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate &A,
493 const IRSimilarityCandidate &B) {
494 if (A.getLength() != B.getLength())
495 return false;
497 auto InstrDataForBoth =
498 zip(make_range(A.begin(), A.end()), make_range(B.begin(), B.end()));
500 return all_of(InstrDataForBoth,
501 [](std::tuple<IRInstructionData &, IRInstructionData &> R) {
502 IRInstructionData &A = std::get<0>(R);
503 IRInstructionData &B = std::get<1>(R);
504 if (!A.Legal || !B.Legal)
505 return false;
506 return isClose(A, B);
510 /// Determine if one or more of the assigned global value numbers for the
511 /// operands in \p TargetValueNumbers is in the current mapping set for operand
512 /// numbers in \p SourceOperands. The set of possible corresponding global
513 /// value numbers are replaced with the most recent version of compatible
514 /// values.
516 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
517 /// value number for the source IRInstructionCandidate.
518 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
519 /// IRSimilarityCandidate global value numbers to a set of possible numbers in
520 /// the target.
521 /// \param [in] SourceOperands - The operands in the original
522 /// IRSimilarityCandidate in the current instruction.
523 /// \param [in] TargetValueNumbers - The global value numbers of the operands in
524 /// the corresponding Instruction in the other IRSimilarityCandidate.
525 /// \returns true if there exists a possible mapping between the source
526 /// Instruction operands and the target Instruction operands, and false if not.
527 static bool checkNumberingAndReplaceCommutative(
528 const DenseMap<Value *, unsigned> &SourceValueToNumberMapping,
529 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
530 ArrayRef<Value *> &SourceOperands,
531 DenseSet<unsigned> &TargetValueNumbers){
533 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
535 unsigned ArgVal;
536 bool WasInserted;
538 // Iterate over the operands in the source IRSimilarityCandidate to determine
539 // whether there exists an operand in the other IRSimilarityCandidate that
540 // creates a valid mapping of Value to Value between the
541 // IRSimilarityCaniddates.
542 for (Value *V : SourceOperands) {
543 ArgVal = SourceValueToNumberMapping.find(V)->second;
545 // Instead of finding a current mapping, we attempt to insert a set.
546 std::tie(ValueMappingIt, WasInserted) = CurrentSrcTgtNumberMapping.insert(
547 std::make_pair(ArgVal, TargetValueNumbers));
549 // We need to iterate over the items in other IRSimilarityCandidate's
550 // Instruction to determine whether there is a valid mapping of
551 // Value to Value.
552 DenseSet<unsigned> NewSet;
553 for (unsigned &Curr : ValueMappingIt->second)
554 // If we can find the value in the mapping, we add it to the new set.
555 if (TargetValueNumbers.contains(Curr))
556 NewSet.insert(Curr);
558 // If we could not find a Value, return 0.
559 if (NewSet.empty())
560 return false;
562 // Otherwise replace the old mapping with the newly constructed one.
563 if (NewSet.size() != ValueMappingIt->second.size())
564 ValueMappingIt->second.swap(NewSet);
566 // We have reached no conclusions about the mapping, and cannot remove
567 // any items from the other operands, so we move to check the next operand.
568 if (ValueMappingIt->second.size() != 1)
569 continue;
571 unsigned ValToRemove = *ValueMappingIt->second.begin();
572 // When there is only one item left in the mapping for and operand, remove
573 // the value from the other operands. If it results in there being no
574 // mapping, return false, it means the mapping is wrong
575 for (Value *InnerV : SourceOperands) {
576 if (V == InnerV)
577 continue;
579 unsigned InnerVal = SourceValueToNumberMapping.find(InnerV)->second;
580 ValueMappingIt = CurrentSrcTgtNumberMapping.find(InnerVal);
581 if (ValueMappingIt == CurrentSrcTgtNumberMapping.end())
582 continue;
584 ValueMappingIt->second.erase(ValToRemove);
585 if (ValueMappingIt->second.empty())
586 return false;
590 return true;
593 /// Determine if operand number \p TargetArgVal is in the current mapping set
594 /// for operand number \p SourceArgVal.
596 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
597 /// value numbers from source IRSimilarityCandidate to target
598 /// IRSimilarityCandidate.
599 /// \param [in] SourceArgVal The global value number for an operand in the
600 /// in the original candidate.
601 /// \param [in] TargetArgVal The global value number for the corresponding
602 /// operand in the other candidate.
603 /// \returns True if there exists a mapping and false if not.
604 bool checkNumberingAndReplace(
605 DenseMap<unsigned, DenseSet<unsigned>> &CurrentSrcTgtNumberMapping,
606 unsigned SourceArgVal, unsigned TargetArgVal) {
607 // We are given two unsigned integers representing the global values of
608 // the operands in different IRSimilarityCandidates and a current mapping
609 // between the two.
611 // Source Operand GVN: 1
612 // Target Operand GVN: 2
613 // CurrentMapping: {1: {1, 2}}
615 // Since we have mapping, and the target operand is contained in the set, we
616 // update it to:
617 // CurrentMapping: {1: {2}}
618 // and can return true. But, if the mapping was
619 // CurrentMapping: {1: {3}}
620 // we would return false.
622 bool WasInserted;
623 DenseMap<unsigned, DenseSet<unsigned>>::iterator Val;
625 std::tie(Val, WasInserted) = CurrentSrcTgtNumberMapping.insert(
626 std::make_pair(SourceArgVal, DenseSet<unsigned>({TargetArgVal})));
628 // If we created a new mapping, then we are done.
629 if (WasInserted)
630 return true;
632 // If there is more than one option in the mapping set, and the target value
633 // is included in the mapping set replace that set with one that only includes
634 // the target value, as it is the only valid mapping via the non commutative
635 // instruction.
637 DenseSet<unsigned> &TargetSet = Val->second;
638 if (TargetSet.size() > 1 && TargetSet.contains(TargetArgVal)) {
639 TargetSet.clear();
640 TargetSet.insert(TargetArgVal);
641 return true;
644 // Return true if we can find the value in the set.
645 return TargetSet.contains(TargetArgVal);
648 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
649 OperandMapping A, OperandMapping B) {
650 // Iterators to keep track of where we are in the operands for each
651 // Instruction.
652 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
653 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
654 unsigned OperandLength = A.OperVals.size();
656 // For each operand, get the value numbering and ensure it is consistent.
657 for (unsigned Idx = 0; Idx < OperandLength; Idx++, VItA++, VItB++) {
658 unsigned OperValA = A.IRSC.ValueToNumber.find(*VItA)->second;
659 unsigned OperValB = B.IRSC.ValueToNumber.find(*VItB)->second;
661 // Attempt to add a set with only the target value. If there is no mapping
662 // we can create it here.
664 // For an instruction like a subtraction:
665 // IRSimilarityCandidateA: IRSimilarityCandidateB:
666 // %resultA = sub %a, %b %resultB = sub %d, %e
668 // We map %a -> %d and %b -> %e.
670 // And check to see whether their mapping is consistent in
671 // checkNumberingAndReplace.
673 if (!checkNumberingAndReplace(A.ValueNumberMapping, OperValA, OperValB))
674 return false;
676 if (!checkNumberingAndReplace(B.ValueNumberMapping, OperValB, OperValA))
677 return false;
679 return true;
682 bool IRSimilarityCandidate::compareCommutativeOperandMapping(
683 OperandMapping A, OperandMapping B) {
684 DenseSet<unsigned> ValueNumbersA;
685 DenseSet<unsigned> ValueNumbersB;
687 ArrayRef<Value *>::iterator VItA = A.OperVals.begin();
688 ArrayRef<Value *>::iterator VItB = B.OperVals.begin();
689 unsigned OperandLength = A.OperVals.size();
691 // Find the value number sets for the operands.
692 for (unsigned Idx = 0; Idx < OperandLength;
693 Idx++, VItA++, VItB++) {
694 ValueNumbersA.insert(A.IRSC.ValueToNumber.find(*VItA)->second);
695 ValueNumbersB.insert(B.IRSC.ValueToNumber.find(*VItB)->second);
698 // Iterate over the operands in the first IRSimilarityCandidate and make sure
699 // there exists a possible mapping with the operands in the second
700 // IRSimilarityCandidate.
701 if (!checkNumberingAndReplaceCommutative(A.IRSC.ValueToNumber,
702 A.ValueNumberMapping, A.OperVals,
703 ValueNumbersB))
704 return false;
706 // Iterate over the operands in the second IRSimilarityCandidate and make sure
707 // there exists a possible mapping with the operands in the first
708 // IRSimilarityCandidate.
709 if (!checkNumberingAndReplaceCommutative(B.IRSC.ValueToNumber,
710 B.ValueNumberMapping, B.OperVals,
711 ValueNumbersA))
712 return false;
714 return true;
717 bool IRSimilarityCandidate::compareAssignmentMapping(
718 const unsigned InstValA, const unsigned &InstValB,
719 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
720 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
721 DenseMap<unsigned, DenseSet<unsigned>>::iterator ValueMappingIt;
722 bool WasInserted;
723 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
724 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
725 if (!WasInserted && !ValueMappingIt->second.contains(InstValB))
726 return false;
727 else if (ValueMappingIt->second.size() != 1) {
728 for (unsigned OtherVal : ValueMappingIt->second) {
729 if (OtherVal == InstValB)
730 continue;
731 if (!ValueNumberMappingA.contains(OtherVal))
732 continue;
733 if (!ValueNumberMappingA[OtherVal].contains(InstValA))
734 continue;
735 ValueNumberMappingA[OtherVal].erase(InstValA);
737 ValueNumberMappingA.erase(ValueMappingIt);
738 std::tie(ValueMappingIt, WasInserted) = ValueNumberMappingA.insert(
739 std::make_pair(InstValA, DenseSet<unsigned>({InstValB})));
742 return true;
745 bool IRSimilarityCandidate::checkRelativeLocations(RelativeLocMapping A,
746 RelativeLocMapping B) {
747 // Get the basic blocks the label refers to.
748 BasicBlock *ABB = cast<BasicBlock>(A.OperVal);
749 BasicBlock *BBB = cast<BasicBlock>(B.OperVal);
751 // Get the basic blocks contained in each region.
752 DenseSet<BasicBlock *> BasicBlockA;
753 DenseSet<BasicBlock *> BasicBlockB;
754 A.IRSC.getBasicBlocks(BasicBlockA);
755 B.IRSC.getBasicBlocks(BasicBlockB);
757 // Determine if the block is contained in the region.
758 bool AContained = BasicBlockA.contains(ABB);
759 bool BContained = BasicBlockB.contains(BBB);
761 // Both blocks need to be contained in the region, or both need to be outside
762 // the region.
763 if (AContained != BContained)
764 return false;
766 // If both are contained, then we need to make sure that the relative
767 // distance to the target blocks are the same.
768 if (AContained)
769 return A.RelativeLocation == B.RelativeLocation;
770 return true;
773 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate &A,
774 const IRSimilarityCandidate &B) {
775 DenseMap<unsigned, DenseSet<unsigned>> MappingA;
776 DenseMap<unsigned, DenseSet<unsigned>> MappingB;
777 return IRSimilarityCandidate::compareStructure(A, B, MappingA, MappingB);
780 typedef detail::zippy<detail::zip_shortest, SmallVector<int, 4> &,
781 SmallVector<int, 4> &, ArrayRef<Value *> &,
782 ArrayRef<Value *> &>
783 ZippedRelativeLocationsT;
785 bool IRSimilarityCandidate::compareStructure(
786 const IRSimilarityCandidate &A, const IRSimilarityCandidate &B,
787 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingA,
788 DenseMap<unsigned, DenseSet<unsigned>> &ValueNumberMappingB) {
789 if (A.getLength() != B.getLength())
790 return false;
792 if (A.ValueToNumber.size() != B.ValueToNumber.size())
793 return false;
795 iterator ItA = A.begin();
796 iterator ItB = B.begin();
798 // These ValueNumber Mapping sets create a create a mapping between the values
799 // in one candidate to values in the other candidate. If we create a set with
800 // one element, and that same element maps to the original element in the
801 // candidate we have a good mapping.
803 // Iterate over the instructions contained in each candidate
804 unsigned SectionLength = A.getStartIdx() + A.getLength();
805 for (unsigned Loc = A.getStartIdx(); Loc < SectionLength;
806 ItA++, ItB++, Loc++) {
807 // Make sure the instructions are similar to one another.
808 if (!isClose(*ItA, *ItB))
809 return false;
811 Instruction *IA = ItA->Inst;
812 Instruction *IB = ItB->Inst;
814 if (!ItA->Legal || !ItB->Legal)
815 return false;
817 // Get the operand sets for the instructions.
818 ArrayRef<Value *> OperValsA = ItA->OperVals;
819 ArrayRef<Value *> OperValsB = ItB->OperVals;
821 unsigned InstValA = A.ValueToNumber.find(IA)->second;
822 unsigned InstValB = B.ValueToNumber.find(IB)->second;
824 // Ensure that the mappings for the instructions exists.
825 if (!compareAssignmentMapping(InstValA, InstValB, ValueNumberMappingA,
826 ValueNumberMappingB))
827 return false;
829 if (!compareAssignmentMapping(InstValB, InstValA, ValueNumberMappingB,
830 ValueNumberMappingA))
831 return false;
833 // We have different paths for commutative instructions and non-commutative
834 // instructions since commutative instructions could allow multiple mappings
835 // to certain values.
836 if (IA->isCommutative() && !isa<FPMathOperator>(IA) &&
837 !isa<IntrinsicInst>(IA)) {
838 if (!compareCommutativeOperandMapping(
839 {A, OperValsA, ValueNumberMappingA},
840 {B, OperValsB, ValueNumberMappingB}))
841 return false;
842 continue;
845 // Handle the non-commutative cases.
846 if (!compareNonCommutativeOperandMapping(
847 {A, OperValsA, ValueNumberMappingA},
848 {B, OperValsB, ValueNumberMappingB}))
849 return false;
851 // Here we check that between two corresponding instructions,
852 // when referring to a basic block in the same region, the
853 // relative locations are the same. And, that the instructions refer to
854 // basic blocks outside the region in the same corresponding locations.
856 // We are able to make the assumption about blocks outside of the region
857 // since the target block labels are considered values and will follow the
858 // same number matching that we defined for the other instructions in the
859 // region. So, at this point, in each location we target a specific block
860 // outside the region, we are targeting a corresponding block in each
861 // analagous location in the region we are comparing to.
862 if (!(isa<BranchInst>(IA) && isa<BranchInst>(IB)) &&
863 !(isa<PHINode>(IA) && isa<PHINode>(IB)))
864 continue;
866 SmallVector<int, 4> &RelBlockLocsA = ItA->RelativeBlockLocations;
867 SmallVector<int, 4> &RelBlockLocsB = ItB->RelativeBlockLocations;
868 ArrayRef<Value *> ABL = ItA->getBlockOperVals();
869 ArrayRef<Value *> BBL = ItB->getBlockOperVals();
871 // Check to make sure that the number of operands, and branching locations
872 // between BranchInsts is the same.
873 if (RelBlockLocsA.size() != RelBlockLocsB.size() &&
874 ABL.size() != BBL.size())
875 return false;
877 assert(RelBlockLocsA.size() == ABL.size() &&
878 "Block information vectors not the same size.");
879 assert(RelBlockLocsB.size() == BBL.size() &&
880 "Block information vectors not the same size.");
882 ZippedRelativeLocationsT ZippedRelativeLocations =
883 zip(RelBlockLocsA, RelBlockLocsB, ABL, BBL);
884 if (any_of(ZippedRelativeLocations,
885 [&A, &B](std::tuple<int, int, Value *, Value *> R) {
886 return !checkRelativeLocations(
887 {A, std::get<0>(R), std::get<2>(R)},
888 {B, std::get<1>(R), std::get<3>(R)});
890 return false;
892 return true;
895 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate &A,
896 const IRSimilarityCandidate &B) {
897 auto DoesOverlap = [](const IRSimilarityCandidate &X,
898 const IRSimilarityCandidate &Y) {
899 // Check:
900 // XXXXXX X starts before Y ends
901 // YYYYYYY Y starts after X starts
902 return X.StartIdx <= Y.getEndIdx() && Y.StartIdx >= X.StartIdx;
905 return DoesOverlap(A, B) || DoesOverlap(B, A);
908 void IRSimilarityIdentifier::populateMapper(
909 Module &M, std::vector<IRInstructionData *> &InstrList,
910 std::vector<unsigned> &IntegerMapping) {
912 std::vector<IRInstructionData *> InstrListForModule;
913 std::vector<unsigned> IntegerMappingForModule;
914 // Iterate over the functions in the module to map each Instruction in each
915 // BasicBlock to an unsigned integer.
916 Mapper.initializeForBBs(M);
918 for (Function &F : M) {
920 if (F.empty())
921 continue;
923 for (BasicBlock &BB : F) {
925 // BB has potential to have similarity since it has a size greater than 2
926 // and can therefore match other regions greater than 2. Map it to a list
927 // of unsigned integers.
928 Mapper.convertToUnsignedVec(BB, InstrListForModule,
929 IntegerMappingForModule);
932 BasicBlock::iterator It = F.begin()->end();
933 Mapper.mapToIllegalUnsigned(It, IntegerMappingForModule, InstrListForModule,
934 true);
935 if (InstrListForModule.size() > 0)
936 Mapper.IDL->push_back(*InstrListForModule.back());
939 // Insert the InstrListForModule at the end of the overall InstrList so that
940 // we can have a long InstrList for the entire set of Modules being analyzed.
941 llvm::append_range(InstrList, InstrListForModule);
942 // Do the same as above, but for IntegerMapping.
943 llvm::append_range(IntegerMapping, IntegerMappingForModule);
946 void IRSimilarityIdentifier::populateMapper(
947 ArrayRef<std::unique_ptr<Module>> &Modules,
948 std::vector<IRInstructionData *> &InstrList,
949 std::vector<unsigned> &IntegerMapping) {
951 // Iterate over, and map the instructions in each module.
952 for (const std::unique_ptr<Module> &M : Modules)
953 populateMapper(*M, InstrList, IntegerMapping);
956 /// From a repeated subsequence, find all the different instances of the
957 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
958 /// the IRInstructionData in subsequence.
960 /// \param [in] Mapper - The instruction mapper for basic correctness checks.
961 /// \param [in] InstrList - The vector that holds the instruction data.
962 /// \param [in] IntegerMapping - The vector that holds the mapped integers.
963 /// \param [out] CandsForRepSubstring - The vector to store the generated
964 /// IRSimilarityCandidates.
965 static void createCandidatesFromSuffixTree(
966 const IRInstructionMapper& Mapper, std::vector<IRInstructionData *> &InstrList,
967 std::vector<unsigned> &IntegerMapping, SuffixTree::RepeatedSubstring &RS,
968 std::vector<IRSimilarityCandidate> &CandsForRepSubstring) {
970 unsigned StringLen = RS.Length;
971 if (StringLen < 2)
972 return;
974 // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
975 for (const unsigned &StartIdx : RS.StartIndices) {
976 unsigned EndIdx = StartIdx + StringLen - 1;
978 // Check that this subsequence does not contain an illegal instruction.
979 bool ContainsIllegal = false;
980 for (unsigned CurrIdx = StartIdx; CurrIdx <= EndIdx; CurrIdx++) {
981 unsigned Key = IntegerMapping[CurrIdx];
982 if (Key > Mapper.IllegalInstrNumber) {
983 ContainsIllegal = true;
984 break;
988 // If we have an illegal instruction, we should not create an
989 // IRSimilarityCandidate for this region.
990 if (ContainsIllegal)
991 continue;
993 // We are getting iterators to the instructions in this region of code
994 // by advancing the start and end indices from the start of the
995 // InstrList.
996 std::vector<IRInstructionData *>::iterator StartIt = InstrList.begin();
997 std::advance(StartIt, StartIdx);
998 std::vector<IRInstructionData *>::iterator EndIt = InstrList.begin();
999 std::advance(EndIt, EndIdx);
1001 CandsForRepSubstring.emplace_back(StartIdx, StringLen, *StartIt, *EndIt);
1005 void IRSimilarityCandidate::createCanonicalRelationFrom(
1006 IRSimilarityCandidate &SourceCand,
1007 DenseMap<unsigned, DenseSet<unsigned>> &ToSourceMapping,
1008 DenseMap<unsigned, DenseSet<unsigned>> &FromSourceMapping) {
1009 assert(SourceCand.CanonNumToNumber.size() != 0 &&
1010 "Base canonical relationship is empty!");
1011 assert(SourceCand.NumberToCanonNum.size() != 0 &&
1012 "Base canonical relationship is empty!");
1014 assert(CanonNumToNumber.size() == 0 && "Canonical Relationship is non-empty");
1015 assert(NumberToCanonNum.size() == 0 && "Canonical Relationship is non-empty");
1017 DenseSet<unsigned> UsedGVNs;
1018 // Iterate over the mappings provided from this candidate to SourceCand. We
1019 // are then able to map the GVN in this candidate to the same canonical number
1020 // given to the corresponding GVN in SourceCand.
1021 for (std::pair<unsigned, DenseSet<unsigned>> &GVNMapping : ToSourceMapping) {
1022 unsigned SourceGVN = GVNMapping.first;
1024 assert(GVNMapping.second.size() != 0 && "Possible GVNs is 0!");
1026 unsigned ResultGVN;
1027 // We need special handling if we have more than one potential value. This
1028 // means that there are at least two GVNs that could correspond to this GVN.
1029 // This could lead to potential swapping later on, so we make a decision
1030 // here to ensure a one-to-one mapping.
1031 if (GVNMapping.second.size() > 1) {
1032 bool Found = false;
1033 for (unsigned Val : GVNMapping.second) {
1034 // We make sure the target value number hasn't already been reserved.
1035 if (UsedGVNs.contains(Val))
1036 continue;
1038 // We make sure that the opposite mapping is still consistent.
1039 DenseMap<unsigned, DenseSet<unsigned>>::iterator It =
1040 FromSourceMapping.find(Val);
1042 if (!It->second.contains(SourceGVN))
1043 continue;
1045 // We pick the first item that satisfies these conditions.
1046 Found = true;
1047 ResultGVN = Val;
1048 break;
1051 assert(Found && "Could not find matching value for source GVN");
1052 (void)Found;
1054 } else
1055 ResultGVN = *GVNMapping.second.begin();
1057 // Whatever GVN is found, we mark it as used.
1058 UsedGVNs.insert(ResultGVN);
1060 unsigned CanonNum = *SourceCand.getCanonicalNum(ResultGVN);
1061 CanonNumToNumber.insert(std::make_pair(CanonNum, SourceGVN));
1062 NumberToCanonNum.insert(std::make_pair(SourceGVN, CanonNum));
1065 DenseSet<BasicBlock *> BBSet;
1066 getBasicBlocks(BBSet);
1067 // Find canonical numbers for the BasicBlocks in the current candidate.
1068 // This is done by finding the corresponding value for the first instruction
1069 // in the block in the current candidate, finding the matching value in the
1070 // source candidate. Then by finding the parent of this value, use the
1071 // canonical number of the block in the source candidate for the canonical
1072 // number in the current candidate.
1073 for (BasicBlock *BB : BBSet) {
1074 unsigned BBGVNForCurrCand = ValueToNumber.find(BB)->second;
1076 // We can skip the BasicBlock if the canonical numbering has already been
1077 // found in a separate instruction.
1078 if (NumberToCanonNum.contains(BBGVNForCurrCand))
1079 continue;
1081 // If the basic block is the starting block, then the shared instruction may
1082 // not be the first instruction in the block, it will be the first
1083 // instruction in the similarity region.
1084 Value *FirstOutlineInst = BB == getStartBB()
1085 ? frontInstruction()
1086 : &*BB->instructionsWithoutDebug().begin();
1088 unsigned FirstInstGVN = *getGVN(FirstOutlineInst);
1089 unsigned FirstInstCanonNum = *getCanonicalNum(FirstInstGVN);
1090 unsigned SourceGVN = *SourceCand.fromCanonicalNum(FirstInstCanonNum);
1091 Value *SourceV = *SourceCand.fromGVN(SourceGVN);
1092 BasicBlock *SourceBB = cast<Instruction>(SourceV)->getParent();
1093 unsigned SourceBBGVN = *SourceCand.getGVN(SourceBB);
1094 unsigned SourceCanonBBGVN = *SourceCand.getCanonicalNum(SourceBBGVN);
1095 CanonNumToNumber.insert(std::make_pair(SourceCanonBBGVN, BBGVNForCurrCand));
1096 NumberToCanonNum.insert(std::make_pair(BBGVNForCurrCand, SourceCanonBBGVN));
1100 void IRSimilarityCandidate::createCanonicalRelationFrom(
1101 IRSimilarityCandidate &SourceCand, IRSimilarityCandidate &SourceCandLarge,
1102 IRSimilarityCandidate &TargetCandLarge) {
1103 assert(!SourceCand.CanonNumToNumber.empty() &&
1104 "Canonical Relationship is non-empty");
1105 assert(!SourceCand.NumberToCanonNum.empty() &&
1106 "Canonical Relationship is non-empty");
1108 assert(!SourceCandLarge.CanonNumToNumber.empty() &&
1109 "Canonical Relationship is non-empty");
1110 assert(!SourceCandLarge.NumberToCanonNum.empty() &&
1111 "Canonical Relationship is non-empty");
1113 assert(!TargetCandLarge.CanonNumToNumber.empty() &&
1114 "Canonical Relationship is non-empty");
1115 assert(!TargetCandLarge.NumberToCanonNum.empty() &&
1116 "Canonical Relationship is non-empty");
1118 assert(CanonNumToNumber.empty() && "Canonical Relationship is non-empty");
1119 assert(NumberToCanonNum.empty() && "Canonical Relationship is non-empty");
1121 // We're going to use the larger candidates as a "bridge" to create the
1122 // canonical number for the target candidate since we have idetified two
1123 // candidates as subsequences of larger sequences, and therefore must be
1124 // structurally similar.
1125 for (std::pair<Value *, unsigned> &ValueNumPair : ValueToNumber) {
1126 Value *CurrVal = ValueNumPair.first;
1127 unsigned TargetCandGVN = ValueNumPair.second;
1129 // Find the numbering in the large candidate that surrounds the
1130 // current candidate.
1131 std::optional<unsigned> OLargeTargetGVN = TargetCandLarge.getGVN(CurrVal);
1132 assert(OLargeTargetGVN.has_value() && "GVN not found for Value");
1134 // Get the canonical numbering in the large target candidate.
1135 std::optional<unsigned> OTargetCandCanon =
1136 TargetCandLarge.getCanonicalNum(OLargeTargetGVN.value());
1137 assert(OTargetCandCanon.has_value() &&
1138 "Canononical Number not found for GVN");
1140 // Get the GVN in the large source candidate from the canonical numbering.
1141 std::optional<unsigned> OLargeSourceGVN =
1142 SourceCandLarge.fromCanonicalNum(OTargetCandCanon.value());
1143 assert(OLargeSourceGVN.has_value() &&
1144 "GVN Number not found for Canonical Number");
1146 // Get the Value from the GVN in the large source candidate.
1147 std::optional<Value *> OLargeSourceV =
1148 SourceCandLarge.fromGVN(OLargeSourceGVN.value());
1149 assert(OLargeSourceV.has_value() && "Value not found for GVN");
1151 // Get the GVN number for the Value in the source candidate.
1152 std::optional<unsigned> OSourceGVN =
1153 SourceCand.getGVN(OLargeSourceV.value());
1154 assert(OSourceGVN.has_value() && "GVN Number not found for Value");
1156 // Get the canonical numbering from the GVN/
1157 std::optional<unsigned> OSourceCanon =
1158 SourceCand.getCanonicalNum(OSourceGVN.value());
1159 assert(OSourceCanon.has_value() && "Canon Number not found for GVN");
1161 // Insert the canonical numbering and GVN pair into their respective
1162 // mappings.
1163 CanonNumToNumber.insert(
1164 std::make_pair(OSourceCanon.value(), TargetCandGVN));
1165 NumberToCanonNum.insert(
1166 std::make_pair(TargetCandGVN, OSourceCanon.value()));
1170 void IRSimilarityCandidate::createCanonicalMappingFor(
1171 IRSimilarityCandidate &CurrCand) {
1172 assert(CurrCand.CanonNumToNumber.size() == 0 &&
1173 "Canonical Relationship is non-empty");
1174 assert(CurrCand.NumberToCanonNum.size() == 0 &&
1175 "Canonical Relationship is non-empty");
1177 unsigned CanonNum = 0;
1178 // Iterate over the value numbers found, the order does not matter in this
1179 // case.
1180 for (std::pair<unsigned, Value *> &NumToVal : CurrCand.NumberToValue) {
1181 CurrCand.NumberToCanonNum.insert(std::make_pair(NumToVal.first, CanonNum));
1182 CurrCand.CanonNumToNumber.insert(std::make_pair(CanonNum, NumToVal.first));
1183 CanonNum++;
1187 /// Look for larger IRSimilarityCandidates From the previously matched
1188 /// IRSimilarityCandidates that fully contain \p CandA or \p CandB. If there is
1189 /// an overlap, return a pair of structurally similar, larger
1190 /// IRSimilarityCandidates.
1192 /// \param [in] CandA - The first candidate we are trying to determine the
1193 /// structure of.
1194 /// \param [in] CandB - The second candidate we are trying to determine the
1195 /// structure of.
1196 /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1197 /// a circuit to the IRSimilarityCandidates that include this instruction.
1198 /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1199 /// number representing the structural group assigned to it.
1200 static std::optional<
1201 std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1202 CheckLargerCands(
1203 IRSimilarityCandidate &CandA, IRSimilarityCandidate &CandB,
1204 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1205 DenseMap<IRSimilarityCandidate *, unsigned> &CandToGroup) {
1206 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandA;
1207 DenseMap<unsigned, IRSimilarityCandidate *> IncludedGroupAndCandB;
1208 DenseSet<unsigned> IncludedGroupsA;
1209 DenseSet<unsigned> IncludedGroupsB;
1211 // Find the overall similarity group numbers that fully contain the candidate,
1212 // and record the larger candidate for each group.
1213 auto IdxToCandidateIt = IndexToIncludedCand.find(CandA.getStartIdx());
1214 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1215 Result;
1217 unsigned CandAStart = CandA.getStartIdx();
1218 unsigned CandAEnd = CandA.getEndIdx();
1219 unsigned CandBStart = CandB.getStartIdx();
1220 unsigned CandBEnd = CandB.getEndIdx();
1221 if (IdxToCandidateIt == IndexToIncludedCand.end())
1222 return Result;
1223 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1224 if (MatchedCand->getStartIdx() > CandAStart ||
1225 (MatchedCand->getEndIdx() < CandAEnd))
1226 continue;
1227 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1228 IncludedGroupAndCandA.insert(std::make_pair(GroupNum, MatchedCand));
1229 IncludedGroupsA.insert(GroupNum);
1232 // Find the overall similarity group numbers that fully contain the next
1233 // candidate, and record the larger candidate for each group.
1234 IdxToCandidateIt = IndexToIncludedCand.find(CandBStart);
1235 if (IdxToCandidateIt == IndexToIncludedCand.end())
1236 return Result;
1237 for (IRSimilarityCandidate *MatchedCand : IdxToCandidateIt->second) {
1238 if (MatchedCand->getStartIdx() > CandBStart ||
1239 MatchedCand->getEndIdx() < CandBEnd)
1240 continue;
1241 unsigned GroupNum = CandToGroup.find(MatchedCand)->second;
1242 IncludedGroupAndCandB.insert(std::make_pair(GroupNum, MatchedCand));
1243 IncludedGroupsB.insert(GroupNum);
1246 // Find the intersection between the two groups, these are the groups where
1247 // the larger candidates exist.
1248 set_intersect(IncludedGroupsA, IncludedGroupsB);
1250 // If there is no intersection between the sets, then we cannot determine
1251 // whether or not there is a match.
1252 if (IncludedGroupsA.empty())
1253 return Result;
1255 // Create a pair that contains the larger candidates.
1256 auto ItA = IncludedGroupAndCandA.find(*IncludedGroupsA.begin());
1257 auto ItB = IncludedGroupAndCandB.find(*IncludedGroupsA.begin());
1258 Result = std::make_pair(ItA->second, ItB->second);
1259 return Result;
1262 /// From the list of IRSimilarityCandidates, perform a comparison between each
1263 /// IRSimilarityCandidate to determine if there are overlapping
1264 /// IRInstructionData, or if they do not have the same structure.
1266 /// \param [in] CandsForRepSubstring - The vector containing the
1267 /// IRSimilarityCandidates.
1268 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
1269 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
1270 /// vector are structurally similar to one another.
1271 /// \param [in] IndexToIncludedCand - Mapping of index of the an instruction in
1272 /// a circuit to the IRSimilarityCandidates that include this instruction.
1273 /// \param [in] CandToOverallGroup - Mapping of IRSimilarityCandidate to a
1274 /// number representing the structural group assigned to it.
1275 static void findCandidateStructures(
1276 std::vector<IRSimilarityCandidate> &CandsForRepSubstring,
1277 DenseMap<unsigned, SimilarityGroup> &StructuralGroups,
1278 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> &IndexToIncludedCand,
1279 DenseMap<IRSimilarityCandidate *, unsigned> &CandToOverallGroup
1281 std::vector<IRSimilarityCandidate>::iterator CandIt, CandEndIt, InnerCandIt,
1282 InnerCandEndIt;
1284 // IRSimilarityCandidates each have a structure for operand use. It is
1285 // possible that two instances of the same subsequences have different
1286 // structure. Each type of structure found is assigned a number. This
1287 // DenseMap maps an IRSimilarityCandidate to which type of similarity
1288 // discovered it fits within.
1289 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1291 // Find the compatibility from each candidate to the others to determine
1292 // which candidates overlap and which have the same structure by mapping
1293 // each structure to a different group.
1294 bool SameStructure;
1295 bool Inserted;
1296 unsigned CurrentGroupNum = 0;
1297 unsigned OuterGroupNum;
1298 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupIt;
1299 DenseMap<IRSimilarityCandidate *, unsigned>::iterator CandToGroupItInner;
1300 DenseMap<unsigned, SimilarityGroup>::iterator CurrentGroupPair;
1302 // Iterate over the candidates to determine its structural and overlapping
1303 // compatibility with other instructions
1304 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingA;
1305 DenseMap<unsigned, DenseSet<unsigned>> ValueNumberMappingB;
1306 for (CandIt = CandsForRepSubstring.begin(),
1307 CandEndIt = CandsForRepSubstring.end();
1308 CandIt != CandEndIt; CandIt++) {
1310 // Determine if it has an assigned structural group already.
1311 CandToGroupIt = CandToGroup.find(&*CandIt);
1312 if (CandToGroupIt == CandToGroup.end()) {
1313 // If not, we assign it one, and add it to our mapping.
1314 std::tie(CandToGroupIt, Inserted) =
1315 CandToGroup.insert(std::make_pair(&*CandIt, CurrentGroupNum++));
1318 // Get the structural group number from the iterator.
1319 OuterGroupNum = CandToGroupIt->second;
1321 // Check if we already have a list of IRSimilarityCandidates for the current
1322 // structural group. Create one if one does not exist.
1323 CurrentGroupPair = StructuralGroups.find(OuterGroupNum);
1324 if (CurrentGroupPair == StructuralGroups.end()) {
1325 IRSimilarityCandidate::createCanonicalMappingFor(*CandIt);
1326 std::tie(CurrentGroupPair, Inserted) = StructuralGroups.insert(
1327 std::make_pair(OuterGroupNum, SimilarityGroup({*CandIt})));
1330 // Iterate over the IRSimilarityCandidates following the current
1331 // IRSimilarityCandidate in the list to determine whether the two
1332 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
1333 // of IRSimilarityCandidates.
1334 for (InnerCandIt = std::next(CandIt),
1335 InnerCandEndIt = CandsForRepSubstring.end();
1336 InnerCandIt != InnerCandEndIt; InnerCandIt++) {
1338 // We check if the inner item has a group already, if it does, we skip it.
1339 CandToGroupItInner = CandToGroup.find(&*InnerCandIt);
1340 if (CandToGroupItInner != CandToGroup.end())
1341 continue;
1343 // Check if we have found structural similarity between two candidates
1344 // that fully contains the first and second candidates.
1345 std::optional<std::pair<IRSimilarityCandidate *, IRSimilarityCandidate *>>
1346 LargerPair = CheckLargerCands(
1347 *CandIt, *InnerCandIt, IndexToIncludedCand, CandToOverallGroup);
1349 // If a pair was found, it means that we can assume that these smaller
1350 // substrings are also structurally similar. Use the larger candidates to
1351 // determine the canonical mapping between the two sections.
1352 if (LargerPair.has_value()) {
1353 SameStructure = true;
1354 InnerCandIt->createCanonicalRelationFrom(
1355 *CandIt, *LargerPair.value().first, *LargerPair.value().second);
1356 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1357 CurrentGroupPair->second.push_back(*InnerCandIt);
1358 continue;
1361 // Otherwise we determine if they have the same structure and add it to
1362 // vector if they match.
1363 ValueNumberMappingA.clear();
1364 ValueNumberMappingB.clear();
1365 SameStructure = IRSimilarityCandidate::compareStructure(
1366 *CandIt, *InnerCandIt, ValueNumberMappingA, ValueNumberMappingB);
1367 if (!SameStructure)
1368 continue;
1370 InnerCandIt->createCanonicalRelationFrom(*CandIt, ValueNumberMappingA,
1371 ValueNumberMappingB);
1372 CandToGroup.insert(std::make_pair(&*InnerCandIt, OuterGroupNum));
1373 CurrentGroupPair->second.push_back(*InnerCandIt);
1378 void IRSimilarityIdentifier::findCandidates(
1379 std::vector<IRInstructionData *> &InstrList,
1380 std::vector<unsigned> &IntegerMapping) {
1381 SuffixTree ST(IntegerMapping);
1383 std::vector<IRSimilarityCandidate> CandsForRepSubstring;
1384 std::vector<SimilarityGroup> NewCandidateGroups;
1386 DenseMap<unsigned, SimilarityGroup> StructuralGroups;
1387 DenseMap<unsigned, DenseSet<IRSimilarityCandidate *>> IndexToIncludedCand;
1388 DenseMap<IRSimilarityCandidate *, unsigned> CandToGroup;
1390 // Iterate over the subsequences found by the Suffix Tree to create
1391 // IRSimilarityCandidates for each repeated subsequence and determine which
1392 // instances are structurally similar to one another.
1394 // Sort the suffix tree from longest substring to shortest.
1395 std::vector<SuffixTree::RepeatedSubstring> RSes;
1396 for (SuffixTree::RepeatedSubstring &RS : ST)
1397 RSes.push_back(RS);
1399 llvm::stable_sort(RSes, [](const SuffixTree::RepeatedSubstring &LHS,
1400 const SuffixTree::RepeatedSubstring &RHS) {
1401 return LHS.Length > RHS.Length;
1403 for (SuffixTree::RepeatedSubstring &RS : RSes) {
1404 createCandidatesFromSuffixTree(Mapper, InstrList, IntegerMapping, RS,
1405 CandsForRepSubstring);
1407 if (CandsForRepSubstring.size() < 2)
1408 continue;
1410 findCandidateStructures(CandsForRepSubstring, StructuralGroups,
1411 IndexToIncludedCand, CandToGroup);
1412 for (std::pair<unsigned, SimilarityGroup> &Group : StructuralGroups) {
1413 // We only add the group if it contains more than one
1414 // IRSimilarityCandidate. If there is only one, that means there is no
1415 // other repeated subsequence with the same structure.
1416 if (Group.second.size() > 1) {
1417 SimilarityCandidates->push_back(Group.second);
1418 // Iterate over each candidate in the group, and add an entry for each
1419 // instruction included with a mapping to a set of
1420 // IRSimilarityCandidates that include that instruction.
1421 for (IRSimilarityCandidate &IRCand : SimilarityCandidates->back()) {
1422 for (unsigned Idx = IRCand.getStartIdx(), Edx = IRCand.getEndIdx();
1423 Idx <= Edx; ++Idx)
1424 IndexToIncludedCand[Idx].insert(&IRCand);
1425 // Add mapping of candidate to the overall similarity group number.
1426 CandToGroup.insert(
1427 std::make_pair(&IRCand, SimilarityCandidates->size() - 1));
1432 CandsForRepSubstring.clear();
1433 StructuralGroups.clear();
1434 NewCandidateGroups.clear();
1438 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(
1439 ArrayRef<std::unique_ptr<Module>> Modules) {
1440 resetSimilarityCandidates();
1442 std::vector<IRInstructionData *> InstrList;
1443 std::vector<unsigned> IntegerMapping;
1444 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1445 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1446 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1447 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1448 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1450 populateMapper(Modules, InstrList, IntegerMapping);
1451 findCandidates(InstrList, IntegerMapping);
1453 return *SimilarityCandidates;
1456 SimilarityGroupList &IRSimilarityIdentifier::findSimilarity(Module &M) {
1457 resetSimilarityCandidates();
1458 Mapper.InstClassifier.EnableBranches = this->EnableBranches;
1459 Mapper.InstClassifier.EnableIndirectCalls = EnableIndirectCalls;
1460 Mapper.EnableMatchCallsByName = EnableMatchingCallsByName;
1461 Mapper.InstClassifier.EnableIntrinsics = EnableIntrinsics;
1462 Mapper.InstClassifier.EnableMustTailCalls = EnableMustTailCalls;
1464 std::vector<IRInstructionData *> InstrList;
1465 std::vector<unsigned> IntegerMapping;
1467 populateMapper(M, InstrList, IntegerMapping);
1468 findCandidates(InstrList, IntegerMapping);
1470 return *SimilarityCandidates;
1473 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass, "ir-similarity-identifier",
1474 "ir-similarity-identifier", false, true)
1476 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
1477 : ModulePass(ID) {
1478 initializeIRSimilarityIdentifierWrapperPassPass(
1479 *PassRegistry::getPassRegistry());
1482 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module &M) {
1483 IRSI.reset(new IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1484 MatchCallsByName, !DisableIntrinsics,
1485 false));
1486 return false;
1489 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module &M) {
1490 IRSI.reset();
1491 return false;
1494 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module &M) {
1495 IRSI->findSimilarity(M);
1496 return false;
1499 AnalysisKey IRSimilarityAnalysis::Key;
1500 IRSimilarityIdentifier IRSimilarityAnalysis::run(Module &M,
1501 ModuleAnalysisManager &) {
1502 auto IRSI = IRSimilarityIdentifier(!DisableBranches, !DisableIndirectCalls,
1503 MatchCallsByName, !DisableIntrinsics,
1504 false);
1505 IRSI.findSimilarity(M);
1506 return IRSI;
1509 PreservedAnalyses
1510 IRSimilarityAnalysisPrinterPass::run(Module &M, ModuleAnalysisManager &AM) {
1511 IRSimilarityIdentifier &IRSI = AM.getResult<IRSimilarityAnalysis>(M);
1512 std::optional<SimilarityGroupList> &SimilarityCandidatesOpt =
1513 IRSI.getSimilarity();
1515 for (std::vector<IRSimilarityCandidate> &CandVec : *SimilarityCandidatesOpt) {
1516 OS << CandVec.size() << " candidates of length "
1517 << CandVec.begin()->getLength() << ". Found in: \n";
1518 for (IRSimilarityCandidate &Cand : CandVec) {
1519 OS << " Function: " << Cand.front()->Inst->getFunction()->getName().str()
1520 << ", Basic Block: ";
1521 if (Cand.front()->Inst->getParent()->getName().str() == "")
1522 OS << "(unnamed)";
1523 else
1524 OS << Cand.front()->Inst->getParent()->getName().str();
1525 OS << "\n Start Instruction: ";
1526 Cand.frontInstruction()->print(OS);
1527 OS << "\n End Instruction: ";
1528 Cand.backInstruction()->print(OS);
1529 OS << "\n";
1533 return PreservedAnalyses::all();
1536 char IRSimilarityIdentifierWrapperPass::ID = 0;