1 //===- IRSimilarityIdentifier.cpp - Find similarity in a module -----------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 // Implementation file for the IRSimilarityIdentifier for identifying
11 // similarities in IR including the IRInstructionMapper.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Analysis/IRSimilarityIdentifier.h"
16 #include "llvm/ADT/DenseMap.h"
17 #include "llvm/IR/Intrinsics.h"
18 #include "llvm/IR/Operator.h"
19 #include "llvm/IR/User.h"
20 #include "llvm/InitializePasses.h"
21 #include "llvm/Support/SuffixTree.h"
24 using namespace IRSimilarity
;
26 IRInstructionData::IRInstructionData(Instruction
&I
, bool Legality
,
27 IRInstructionDataList
&IDList
)
28 : Inst(&I
), Legal(Legality
), IDL(&IDList
) {
29 // We check for whether we have a comparison instruction. If it is, we
30 // find the "less than" version of the predicate for consistency for
31 // comparison instructions throught the program.
32 if (CmpInst
*C
= dyn_cast
<CmpInst
>(&I
)) {
33 CmpInst::Predicate Predicate
= predicateForConsistency(C
);
34 if (Predicate
!= C
->getPredicate())
35 RevisedPredicate
= Predicate
;
38 // Here we collect the operands and their types for determining whether
39 // the structure of the operand use matches between two different candidates.
40 for (Use
&OI
: I
.operands()) {
41 if (isa
<CmpInst
>(I
) && RevisedPredicate
.hasValue()) {
42 // If we have a CmpInst where the predicate is reversed, it means the
43 // operands must be reversed as well.
44 OperVals
.insert(OperVals
.begin(), OI
.get());
48 OperVals
.push_back(OI
.get());
52 CmpInst::Predicate
IRInstructionData::predicateForConsistency(CmpInst
*CI
) {
53 switch (CI
->getPredicate()) {
54 case CmpInst::FCMP_OGT
:
55 case CmpInst::FCMP_UGT
:
56 case CmpInst::FCMP_OGE
:
57 case CmpInst::FCMP_UGE
:
58 case CmpInst::ICMP_SGT
:
59 case CmpInst::ICMP_UGT
:
60 case CmpInst::ICMP_SGE
:
61 case CmpInst::ICMP_UGE
:
62 return CI
->getSwappedPredicate();
64 return CI
->getPredicate();
68 CmpInst::Predicate
IRInstructionData::getPredicate() const {
69 assert(isa
<CmpInst
>(Inst
) &&
70 "Can only get a predicate from a compare instruction");
72 if (RevisedPredicate
.hasValue())
73 return RevisedPredicate
.getValue();
75 return cast
<CmpInst
>(Inst
)->getPredicate();
78 static StringRef
getCalledFunctionName(CallInst
&CI
) {
79 assert(CI
.getCalledFunction() != nullptr && "Called Function is nullptr?");
81 return CI
.getCalledFunction()->getName();
84 bool IRSimilarity::isClose(const IRInstructionData
&A
,
85 const IRInstructionData
&B
) {
87 if (!A
.Legal
|| !B
.Legal
)
90 // Check if we are performing the same sort of operation on the same types
91 // but not on the same values.
92 if (!A
.Inst
->isSameOperationAs(B
.Inst
)) {
93 // If there is a predicate, this means that either there is a swapped
94 // predicate, or that the types are different, we want to make sure that
95 // the predicates are equivalent via swapping.
96 if (isa
<CmpInst
>(A
.Inst
) && isa
<CmpInst
>(B
.Inst
)) {
98 if (A
.getPredicate() != B
.getPredicate())
101 // If the predicates are the same via swap, make sure that the types are
103 auto ZippedTypes
= zip(A
.OperVals
, B
.OperVals
);
106 ZippedTypes
, [](std::tuple
<llvm::Value
*, llvm::Value
*> R
) {
107 return std::get
<0>(R
)->getType() == std::get
<1>(R
)->getType();
114 // Since any GEP Instruction operands after the first operand cannot be
115 // defined by a register, we must make sure that the operands after the first
116 // are the same in the two instructions
117 if (auto *GEP
= dyn_cast
<GetElementPtrInst
>(A
.Inst
)) {
118 auto *OtherGEP
= cast
<GetElementPtrInst
>(B
.Inst
);
120 // If the instructions do not have the same inbounds restrictions, we do
121 // not consider them the same.
122 if (GEP
->isInBounds() != OtherGEP
->isInBounds())
125 auto ZippedOperands
= zip(GEP
->indices(), OtherGEP
->indices());
127 // We increment here since we do not care about the first instruction,
128 // we only care about the following operands since they must be the
129 // exact same to be considered similar.
130 return all_of(drop_begin(ZippedOperands
),
131 [](std::tuple
<llvm::Use
&, llvm::Use
&> R
) {
132 return std::get
<0>(R
) == std::get
<1>(R
);
136 // If the instructions are functions, we make sure that the function name is
137 // the same. We already know that the types are since is isSameOperationAs is
139 if (isa
<CallInst
>(A
.Inst
) && isa
<CallInst
>(B
.Inst
)) {
140 CallInst
*CIA
= cast
<CallInst
>(A
.Inst
);
141 CallInst
*CIB
= cast
<CallInst
>(B
.Inst
);
142 if (getCalledFunctionName(*CIA
).compare(getCalledFunctionName(*CIB
)) != 0)
149 // TODO: This is the same as the MachineOutliner, and should be consolidated
150 // into the same interface.
151 void IRInstructionMapper::convertToUnsignedVec(
152 BasicBlock
&BB
, std::vector
<IRInstructionData
*> &InstrList
,
153 std::vector
<unsigned> &IntegerMapping
) {
154 BasicBlock::iterator It
= BB
.begin();
156 std::vector
<unsigned> IntegerMappingForBB
;
157 std::vector
<IRInstructionData
*> InstrListForBB
;
159 HaveLegalRange
= false;
160 CanCombineWithPrevInstr
= false;
161 AddedIllegalLastTime
= true;
163 for (BasicBlock::iterator Et
= BB
.end(); It
!= Et
; ++It
) {
164 switch (InstClassifier
.visit(*It
)) {
165 case InstrType::Legal
:
166 mapToLegalUnsigned(It
, IntegerMappingForBB
, InstrListForBB
);
168 case InstrType::Illegal
:
169 mapToIllegalUnsigned(It
, IntegerMappingForBB
, InstrListForBB
);
171 case InstrType::Invisible
:
172 AddedIllegalLastTime
= false;
177 if (HaveLegalRange
) {
178 mapToIllegalUnsigned(It
, IntegerMappingForBB
, InstrListForBB
, true);
179 for (IRInstructionData
*ID
: InstrListForBB
)
180 this->IDL
->push_back(*ID
);
181 llvm::append_range(InstrList
, InstrListForBB
);
182 llvm::append_range(IntegerMapping
, IntegerMappingForBB
);
186 // TODO: This is the same as the MachineOutliner, and should be consolidated
187 // into the same interface.
188 unsigned IRInstructionMapper::mapToLegalUnsigned(
189 BasicBlock::iterator
&It
, std::vector
<unsigned> &IntegerMappingForBB
,
190 std::vector
<IRInstructionData
*> &InstrListForBB
) {
191 // We added something legal, so we should unset the AddedLegalLastTime
193 AddedIllegalLastTime
= false;
195 // If we have at least two adjacent legal instructions (which may have
196 // invisible instructions in between), remember that.
197 if (CanCombineWithPrevInstr
)
198 HaveLegalRange
= true;
199 CanCombineWithPrevInstr
= true;
201 // Get the integer for this instruction or give it the current
203 IRInstructionData
*ID
= allocateIRInstructionData(*It
, true, *IDL
);
204 InstrListForBB
.push_back(ID
);
206 // Add to the instruction list
208 DenseMap
<IRInstructionData
*, unsigned, IRInstructionDataTraits
>::iterator
210 std::tie(ResultIt
, WasInserted
) =
211 InstructionIntegerMap
.insert(std::make_pair(ID
, LegalInstrNumber
));
212 unsigned INumber
= ResultIt
->second
;
214 // There was an insertion.
218 IntegerMappingForBB
.push_back(INumber
);
220 // Make sure we don't overflow or use any integers reserved by the DenseMap.
221 assert(LegalInstrNumber
< IllegalInstrNumber
&&
222 "Instruction mapping overflow!");
224 assert(LegalInstrNumber
!= DenseMapInfo
<unsigned>::getEmptyKey() &&
225 "Tried to assign DenseMap tombstone or empty key to instruction.");
226 assert(LegalInstrNumber
!= DenseMapInfo
<unsigned>::getTombstoneKey() &&
227 "Tried to assign DenseMap tombstone or empty key to instruction.");
233 IRInstructionMapper::allocateIRInstructionData(Instruction
&I
, bool Legality
,
234 IRInstructionDataList
&IDL
) {
235 return new (InstDataAllocator
->Allocate()) IRInstructionData(I
, Legality
, IDL
);
238 IRInstructionDataList
*
239 IRInstructionMapper::allocateIRInstructionDataList() {
240 return new (IDLAllocator
->Allocate()) IRInstructionDataList();
243 // TODO: This is the same as the MachineOutliner, and should be consolidated
244 // into the same interface.
245 unsigned IRInstructionMapper::mapToIllegalUnsigned(
246 BasicBlock::iterator
&It
, std::vector
<unsigned> &IntegerMappingForBB
,
247 std::vector
<IRInstructionData
*> &InstrListForBB
, bool End
) {
248 // Can't combine an illegal instruction. Set the flag.
249 CanCombineWithPrevInstr
= false;
251 // Only add one illegal number per range of legal numbers.
252 if (AddedIllegalLastTime
)
253 return IllegalInstrNumber
;
255 IRInstructionData
*ID
= nullptr;
257 ID
= allocateIRInstructionData(*It
, false, *IDL
);
258 InstrListForBB
.push_back(ID
);
260 // Remember that we added an illegal number last time.
261 AddedIllegalLastTime
= true;
262 unsigned INumber
= IllegalInstrNumber
;
263 IntegerMappingForBB
.push_back(IllegalInstrNumber
--);
265 assert(LegalInstrNumber
< IllegalInstrNumber
&&
266 "Instruction mapping overflow!");
268 assert(IllegalInstrNumber
!= DenseMapInfo
<unsigned>::getEmptyKey() &&
269 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
271 assert(IllegalInstrNumber
!= DenseMapInfo
<unsigned>::getTombstoneKey() &&
272 "IllegalInstrNumber cannot be DenseMap tombstone or empty key!");
277 IRSimilarityCandidate::IRSimilarityCandidate(unsigned StartIdx
, unsigned Len
,
278 IRInstructionData
*FirstInstIt
,
279 IRInstructionData
*LastInstIt
)
280 : StartIdx(StartIdx
), Len(Len
) {
282 assert(FirstInstIt
!= nullptr && "Instruction is nullptr!");
283 assert(LastInstIt
!= nullptr && "Instruction is nullptr!");
284 assert(StartIdx
+ Len
> StartIdx
&&
285 "Overflow for IRSimilarityCandidate range?");
286 assert(Len
- 1 == static_cast<unsigned>(std::distance(
287 iterator(FirstInstIt
), iterator(LastInstIt
))) &&
288 "Length of the first and last IRInstructionData do not match the "
291 // We iterate over the given instructions, and map each unique value
292 // to a unique number in the IRSimilarityCandidate ValueToNumber and
293 // NumberToValue maps. A constant get its own value globally, the individual
294 // uses of the constants are not considered to be unique.
296 // IR: Mapping Added:
297 // %add1 = add i32 %a, c1 %add1 -> 3, %a -> 1, c1 -> 2
298 // %add2 = add i32 %a, %1 %add2 -> 4
299 // %add3 = add i32 c2, c1 %add3 -> 6, c2 -> 5
301 // when replace with global values, starting from 1, would be
306 unsigned LocalValNumber
= 1;
307 IRInstructionDataList::iterator ID
= iterator(*FirstInstIt
);
308 for (unsigned Loc
= StartIdx
; Loc
< StartIdx
+ Len
; Loc
++, ID
++) {
309 // Map the operand values to an unsigned integer if it does not already
310 // have an unsigned integer assigned to it.
311 for (Value
*Arg
: ID
->OperVals
)
312 if (ValueToNumber
.find(Arg
) == ValueToNumber
.end()) {
313 ValueToNumber
.try_emplace(Arg
, LocalValNumber
);
314 NumberToValue
.try_emplace(LocalValNumber
, Arg
);
318 // Mapping the instructions to an unsigned integer if it is not already
319 // exist in the mapping.
320 if (ValueToNumber
.find(ID
->Inst
) == ValueToNumber
.end()) {
321 ValueToNumber
.try_emplace(ID
->Inst
, LocalValNumber
);
322 NumberToValue
.try_emplace(LocalValNumber
, ID
->Inst
);
327 // Setting the first and last instruction data pointers for the candidate. If
328 // we got through the entire for loop without hitting an assert, we know
329 // that both of these instructions are not nullptrs.
330 FirstInst
= FirstInstIt
;
331 LastInst
= LastInstIt
;
334 bool IRSimilarityCandidate::isSimilar(const IRSimilarityCandidate
&A
,
335 const IRSimilarityCandidate
&B
) {
336 if (A
.getLength() != B
.getLength())
339 auto InstrDataForBoth
=
340 zip(make_range(A
.begin(), A
.end()), make_range(B
.begin(), B
.end()));
342 return all_of(InstrDataForBoth
,
343 [](std::tuple
<IRInstructionData
&, IRInstructionData
&> R
) {
344 IRInstructionData
&A
= std::get
<0>(R
);
345 IRInstructionData
&B
= std::get
<1>(R
);
346 if (!A
.Legal
|| !B
.Legal
)
348 return isClose(A
, B
);
352 /// Determine if one or more of the assigned global value numbers for the
353 /// operands in \p TargetValueNumbers is in the current mapping set for operand
354 /// numbers in \p SourceOperands. The set of possible corresponding global
355 /// value numbers are replaced with the most recent version of compatible
358 /// \param [in] SourceValueToNumberMapping - The mapping of a Value to global
359 /// value number for the source IRInstructionCandidate.
360 /// \param [in, out] CurrentSrcTgtNumberMapping - The current mapping of source
361 /// IRSimilarityCandidate global value numbers to a set of possible numbers in
363 /// \param [in] SourceOperands - The operands in the original
364 /// IRSimilarityCandidate in the current instruction.
365 /// \param [in] TargetValueNumbers - The global value numbers of the operands in
366 /// the corresponding Instruction in the other IRSimilarityCandidate.
367 /// \returns true if there exists a possible mapping between the source
368 /// Instruction operands and the target Instruction operands, and false if not.
369 static bool checkNumberingAndReplaceCommutative(
370 const DenseMap
<Value
*, unsigned> &SourceValueToNumberMapping
,
371 DenseMap
<unsigned, DenseSet
<unsigned>> &CurrentSrcTgtNumberMapping
,
372 ArrayRef
<Value
*> &SourceOperands
,
373 DenseSet
<unsigned> &TargetValueNumbers
){
375 DenseMap
<unsigned, DenseSet
<unsigned>>::iterator ValueMappingIt
;
380 // Iterate over the operands in the source IRSimilarityCandidate to determine
381 // whether there exists an operand in the other IRSimilarityCandidate that
382 // creates a valid mapping of Value to Value between the
383 // IRSimilarityCaniddates.
384 for (Value
*V
: SourceOperands
) {
385 ArgVal
= SourceValueToNumberMapping
.find(V
)->second
;
387 std::tie(ValueMappingIt
, WasInserted
) = CurrentSrcTgtNumberMapping
.insert(
388 std::make_pair(ArgVal
, TargetValueNumbers
));
390 // Instead of finding a current mapping, we inserted a set. This means a
391 // mapping did not exist for the source Instruction operand, it has no
392 // current constraints we need to check.
396 // If a mapping already exists for the source operand to the values in the
397 // other IRSimilarityCandidate we need to iterate over the items in other
398 // IRSimilarityCandidate's Instruction to determine whether there is a valid
399 // mapping of Value to Value.
400 DenseSet
<unsigned> NewSet
;
401 for (unsigned &Curr
: ValueMappingIt
->second
)
402 // If we can find the value in the mapping, we add it to the new set.
403 if (TargetValueNumbers
.contains(Curr
))
406 // If we could not find a Value, return 0.
410 // Otherwise replace the old mapping with the newly constructed one.
411 if (NewSet
.size() != ValueMappingIt
->second
.size())
412 ValueMappingIt
->second
.swap(NewSet
);
414 // We have reached no conclusions about the mapping, and cannot remove
415 // any items from the other operands, so we move to check the next operand.
416 if (ValueMappingIt
->second
.size() != 1)
420 unsigned ValToRemove
= *ValueMappingIt
->second
.begin();
421 // When there is only one item left in the mapping for and operand, remove
422 // the value from the other operands. If it results in there being no
423 // mapping, return false, it means the mapping is wrong
424 for (Value
*InnerV
: SourceOperands
) {
428 unsigned InnerVal
= SourceValueToNumberMapping
.find(InnerV
)->second
;
429 ValueMappingIt
= CurrentSrcTgtNumberMapping
.find(InnerVal
);
430 if (ValueMappingIt
== CurrentSrcTgtNumberMapping
.end())
433 ValueMappingIt
->second
.erase(ValToRemove
);
434 if (ValueMappingIt
->second
.empty())
442 /// Determine if operand number \p TargetArgVal is in the current mapping set
443 /// for operand number \p SourceArgVal.
445 /// \param [in, out] CurrentSrcTgtNumberMapping current mapping of global
446 /// value numbers from source IRSimilarityCandidate to target
447 /// IRSimilarityCandidate.
448 /// \param [in] SourceArgVal The global value number for an operand in the
449 /// in the original candidate.
450 /// \param [in] TargetArgVal The global value number for the corresponding
451 /// operand in the other candidate.
452 /// \returns True if there exists a mapping and false if not.
453 bool checkNumberingAndReplace(
454 DenseMap
<unsigned, DenseSet
<unsigned>> &CurrentSrcTgtNumberMapping
,
455 unsigned SourceArgVal
, unsigned TargetArgVal
) {
456 // We are given two unsigned integers representing the global values of
457 // the operands in different IRSimilarityCandidates and a current mapping
460 // Source Operand GVN: 1
461 // Target Operand GVN: 2
462 // CurrentMapping: {1: {1, 2}}
464 // Since we have mapping, and the target operand is contained in the set, we
466 // CurrentMapping: {1: {2}}
467 // and can return true. But, if the mapping was
468 // CurrentMapping: {1: {3}}
469 // we would return false.
472 DenseMap
<unsigned, DenseSet
<unsigned>>::iterator Val
;
474 std::tie(Val
, WasInserted
) = CurrentSrcTgtNumberMapping
.insert(
475 std::make_pair(SourceArgVal
, DenseSet
<unsigned>({TargetArgVal
})));
477 // If we created a new mapping, then we are done.
481 // If there is more than one option in the mapping set, and the target value
482 // is included in the mapping set replace that set with one that only includes
483 // the target value, as it is the only valid mapping via the non commutative
486 DenseSet
<unsigned> &TargetSet
= Val
->second
;
487 if (TargetSet
.size() > 1 && TargetSet
.contains(TargetArgVal
)) {
489 TargetSet
.insert(TargetArgVal
);
493 // Return true if we can find the value in the set.
494 return TargetSet
.contains(TargetArgVal
);
497 bool IRSimilarityCandidate::compareNonCommutativeOperandMapping(
498 OperandMapping A
, OperandMapping B
) {
499 // Iterators to keep track of where we are in the operands for each
501 ArrayRef
<Value
*>::iterator VItA
= A
.OperVals
.begin();
502 ArrayRef
<Value
*>::iterator VItB
= B
.OperVals
.begin();
503 unsigned OperandLength
= A
.OperVals
.size();
505 // For each operand, get the value numbering and ensure it is consistent.
506 for (unsigned Idx
= 0; Idx
< OperandLength
; Idx
++, VItA
++, VItB
++) {
507 unsigned OperValA
= A
.IRSC
.ValueToNumber
.find(*VItA
)->second
;
508 unsigned OperValB
= B
.IRSC
.ValueToNumber
.find(*VItB
)->second
;
510 // Attempt to add a set with only the target value. If there is no mapping
511 // we can create it here.
513 // For an instruction like a subtraction:
514 // IRSimilarityCandidateA: IRSimilarityCandidateB:
515 // %resultA = sub %a, %b %resultB = sub %d, %e
517 // We map %a -> %d and %b -> %e.
519 // And check to see whether their mapping is consistent in
520 // checkNumberingAndReplace.
522 if (!checkNumberingAndReplace(A
.ValueNumberMapping
, OperValA
, OperValB
))
525 if (!checkNumberingAndReplace(B
.ValueNumberMapping
, OperValB
, OperValA
))
531 bool IRSimilarityCandidate::compareCommutativeOperandMapping(
532 OperandMapping A
, OperandMapping B
) {
533 DenseSet
<unsigned> ValueNumbersA
;
534 DenseSet
<unsigned> ValueNumbersB
;
536 ArrayRef
<Value
*>::iterator VItA
= A
.OperVals
.begin();
537 ArrayRef
<Value
*>::iterator VItB
= B
.OperVals
.begin();
538 unsigned OperandLength
= A
.OperVals
.size();
540 // Find the value number sets for the operands.
541 for (unsigned Idx
= 0; Idx
< OperandLength
;
542 Idx
++, VItA
++, VItB
++) {
543 ValueNumbersA
.insert(A
.IRSC
.ValueToNumber
.find(*VItA
)->second
);
544 ValueNumbersB
.insert(B
.IRSC
.ValueToNumber
.find(*VItB
)->second
);
547 // Iterate over the operands in the first IRSimilarityCandidate and make sure
548 // there exists a possible mapping with the operands in the second
549 // IRSimilarityCandidate.
550 if (!checkNumberingAndReplaceCommutative(A
.IRSC
.ValueToNumber
,
551 A
.ValueNumberMapping
, A
.OperVals
,
555 // Iterate over the operands in the second IRSimilarityCandidate and make sure
556 // there exists a possible mapping with the operands in the first
557 // IRSimilarityCandidate.
558 if (!checkNumberingAndReplaceCommutative(B
.IRSC
.ValueToNumber
,
559 B
.ValueNumberMapping
, B
.OperVals
,
566 bool IRSimilarityCandidate::compareStructure(const IRSimilarityCandidate
&A
,
567 const IRSimilarityCandidate
&B
) {
568 if (A
.getLength() != B
.getLength())
571 if (A
.ValueToNumber
.size() != B
.ValueToNumber
.size())
574 iterator ItA
= A
.begin();
575 iterator ItB
= B
.begin();
577 // These sets create a create a mapping between the values in one candidate
578 // to values in the other candidate. If we create a set with one element,
579 // and that same element maps to the original element in the candidate
580 // we have a good mapping.
581 DenseMap
<unsigned, DenseSet
<unsigned>> ValueNumberMappingA
;
582 DenseMap
<unsigned, DenseSet
<unsigned>> ValueNumberMappingB
;
583 DenseMap
<unsigned, DenseSet
<unsigned>>::iterator ValueMappingIt
;
587 // Iterate over the instructions contained in each candidate
588 unsigned SectionLength
= A
.getStartIdx() + A
.getLength();
589 for (unsigned Loc
= A
.getStartIdx(); Loc
< SectionLength
;
590 ItA
++, ItB
++, Loc
++) {
591 // Make sure the instructions are similar to one another.
592 if (!isClose(*ItA
, *ItB
))
595 Instruction
*IA
= ItA
->Inst
;
596 Instruction
*IB
= ItB
->Inst
;
598 if (!ItA
->Legal
|| !ItB
->Legal
)
601 // Get the operand sets for the instructions.
602 ArrayRef
<Value
*> OperValsA
= ItA
->OperVals
;
603 ArrayRef
<Value
*> OperValsB
= ItB
->OperVals
;
605 unsigned InstValA
= A
.ValueToNumber
.find(IA
)->second
;
606 unsigned InstValB
= B
.ValueToNumber
.find(IB
)->second
;
608 // Ensure that the mappings for the instructions exists.
609 std::tie(ValueMappingIt
, WasInserted
) = ValueNumberMappingA
.insert(
610 std::make_pair(InstValA
, DenseSet
<unsigned>({InstValB
})));
611 if (!WasInserted
&& !ValueMappingIt
->second
.contains(InstValB
))
614 std::tie(ValueMappingIt
, WasInserted
) = ValueNumberMappingB
.insert(
615 std::make_pair(InstValB
, DenseSet
<unsigned>({InstValA
})));
616 if (!WasInserted
&& !ValueMappingIt
->second
.contains(InstValA
))
619 // We have different paths for commutative instructions and non-commutative
620 // instructions since commutative instructions could allow multiple mappings
621 // to certain values.
622 if (IA
->isCommutative() && !isa
<FPMathOperator
>(IA
)) {
623 if (!compareCommutativeOperandMapping(
624 {A
, OperValsA
, ValueNumberMappingA
},
625 {B
, OperValsB
, ValueNumberMappingB
}))
630 // Handle the non-commutative cases.
631 if (!compareNonCommutativeOperandMapping(
632 {A
, OperValsA
, ValueNumberMappingA
},
633 {B
, OperValsB
, ValueNumberMappingB
}))
639 bool IRSimilarityCandidate::overlap(const IRSimilarityCandidate
&A
,
640 const IRSimilarityCandidate
&B
) {
641 auto DoesOverlap
= [](const IRSimilarityCandidate
&X
,
642 const IRSimilarityCandidate
&Y
) {
644 // XXXXXX X starts before Y ends
645 // YYYYYYY Y starts after X starts
646 return X
.StartIdx
<= Y
.getEndIdx() && Y
.StartIdx
>= X
.StartIdx
;
649 return DoesOverlap(A
, B
) || DoesOverlap(B
, A
);
652 void IRSimilarityIdentifier::populateMapper(
653 Module
&M
, std::vector
<IRInstructionData
*> &InstrList
,
654 std::vector
<unsigned> &IntegerMapping
) {
656 std::vector
<IRInstructionData
*> InstrListForModule
;
657 std::vector
<unsigned> IntegerMappingForModule
;
658 // Iterate over the functions in the module to map each Instruction in each
659 // BasicBlock to an unsigned integer.
660 for (Function
&F
: M
) {
665 for (BasicBlock
&BB
: F
) {
667 if (BB
.sizeWithoutDebug() < 2)
670 // BB has potential to have similarity since it has a size greater than 2
671 // and can therefore match other regions greater than 2. Map it to a list
672 // of unsigned integers.
673 Mapper
.convertToUnsignedVec(BB
, InstrListForModule
,
674 IntegerMappingForModule
);
678 // Insert the InstrListForModule at the end of the overall InstrList so that
679 // we can have a long InstrList for the entire set of Modules being analyzed.
680 llvm::append_range(InstrList
, InstrListForModule
);
681 // Do the same as above, but for IntegerMapping.
682 llvm::append_range(IntegerMapping
, IntegerMappingForModule
);
685 void IRSimilarityIdentifier::populateMapper(
686 ArrayRef
<std::unique_ptr
<Module
>> &Modules
,
687 std::vector
<IRInstructionData
*> &InstrList
,
688 std::vector
<unsigned> &IntegerMapping
) {
690 // Iterate over, and map the instructions in each module.
691 for (const std::unique_ptr
<Module
> &M
: Modules
)
692 populateMapper(*M
, InstrList
, IntegerMapping
);
695 /// From a repeated subsequence, find all the different instances of the
696 /// subsequence from the \p InstrList, and create an IRSimilarityCandidate from
697 /// the IRInstructionData in subsequence.
699 /// \param [in] Mapper - The instruction mapper for sanity checks.
700 /// \param [in] InstrList - The vector that holds the instruction data.
701 /// \param [in] IntegerMapping - The vector that holds the mapped integers.
702 /// \param [out] CandsForRepSubstring - The vector to store the generated
703 /// IRSimilarityCandidates.
704 static void createCandidatesFromSuffixTree(
705 const IRInstructionMapper
& Mapper
, std::vector
<IRInstructionData
*> &InstrList
,
706 std::vector
<unsigned> &IntegerMapping
, SuffixTree::RepeatedSubstring
&RS
,
707 std::vector
<IRSimilarityCandidate
> &CandsForRepSubstring
) {
709 unsigned StringLen
= RS
.Length
;
711 // Create an IRSimilarityCandidate for instance of this subsequence \p RS.
712 for (const unsigned &StartIdx
: RS
.StartIndices
) {
713 unsigned EndIdx
= StartIdx
+ StringLen
- 1;
715 // Check that this subsequence does not contain an illegal instruction.
716 bool ContainsIllegal
= false;
717 for (unsigned CurrIdx
= StartIdx
; CurrIdx
<= EndIdx
; CurrIdx
++) {
718 unsigned Key
= IntegerMapping
[CurrIdx
];
719 if (Key
> Mapper
.IllegalInstrNumber
) {
720 ContainsIllegal
= true;
725 // If we have an illegal instruction, we should not create an
726 // IRSimilarityCandidate for this region.
730 // We are getting iterators to the instructions in this region of code
731 // by advancing the start and end indices from the start of the
733 std::vector
<IRInstructionData
*>::iterator StartIt
= InstrList
.begin();
734 std::advance(StartIt
, StartIdx
);
735 std::vector
<IRInstructionData
*>::iterator EndIt
= InstrList
.begin();
736 std::advance(EndIt
, EndIdx
);
738 CandsForRepSubstring
.emplace_back(StartIdx
, StringLen
, *StartIt
, *EndIt
);
742 /// From the list of IRSimilarityCandidates, perform a comparison between each
743 /// IRSimilarityCandidate to determine if there are overlapping
744 /// IRInstructionData, or if they do not have the same structure.
746 /// \param [in] CandsForRepSubstring - The vector containing the
747 /// IRSimilarityCandidates.
748 /// \param [out] StructuralGroups - the mapping of unsigned integers to vector
749 /// of IRSimilarityCandidates where each of the IRSimilarityCandidates in the
750 /// vector are structurally similar to one another.
751 static void findCandidateStructures(
752 std::vector
<IRSimilarityCandidate
> &CandsForRepSubstring
,
753 DenseMap
<unsigned, SimilarityGroup
> &StructuralGroups
) {
754 std::vector
<IRSimilarityCandidate
>::iterator CandIt
, CandEndIt
, InnerCandIt
,
757 // IRSimilarityCandidates each have a structure for operand use. It is
758 // possible that two instances of the same subsequences have different
759 // structure. Each type of structure found is assigned a number. This
760 // DenseMap maps an IRSimilarityCandidate to which type of similarity
761 // discovered it fits within.
762 DenseMap
<IRSimilarityCandidate
*, unsigned> CandToGroup
;
764 // Find the compatibility from each candidate to the others to determine
765 // which candidates overlap and which have the same structure by mapping
766 // each structure to a different group.
769 unsigned CurrentGroupNum
= 0;
770 unsigned OuterGroupNum
;
771 DenseMap
<IRSimilarityCandidate
*, unsigned>::iterator CandToGroupIt
;
772 DenseMap
<IRSimilarityCandidate
*, unsigned>::iterator CandToGroupItInner
;
773 DenseMap
<unsigned, SimilarityGroup
>::iterator CurrentGroupPair
;
775 // Iterate over the candidates to determine its structural and overlapping
776 // compatibility with other instructions
777 for (CandIt
= CandsForRepSubstring
.begin(),
778 CandEndIt
= CandsForRepSubstring
.end();
779 CandIt
!= CandEndIt
; CandIt
++) {
781 // Determine if it has an assigned structural group already.
782 CandToGroupIt
= CandToGroup
.find(&*CandIt
);
783 if (CandToGroupIt
== CandToGroup
.end()) {
784 // If not, we assign it one, and add it to our mapping.
785 std::tie(CandToGroupIt
, Inserted
) =
786 CandToGroup
.insert(std::make_pair(&*CandIt
, CurrentGroupNum
++));
789 // Get the structural group number from the iterator.
790 OuterGroupNum
= CandToGroupIt
->second
;
792 // Check if we already have a list of IRSimilarityCandidates for the current
793 // structural group. Create one if one does not exist.
794 CurrentGroupPair
= StructuralGroups
.find(OuterGroupNum
);
795 if (CurrentGroupPair
== StructuralGroups
.end())
796 std::tie(CurrentGroupPair
, Inserted
) = StructuralGroups
.insert(
797 std::make_pair(OuterGroupNum
, SimilarityGroup({*CandIt
})));
799 // Iterate over the IRSimilarityCandidates following the current
800 // IRSimilarityCandidate in the list to determine whether the two
801 // IRSimilarityCandidates are compatible. This is so we do not repeat pairs
802 // of IRSimilarityCandidates.
803 for (InnerCandIt
= std::next(CandIt
),
804 InnerCandEndIt
= CandsForRepSubstring
.end();
805 InnerCandIt
!= InnerCandEndIt
; InnerCandIt
++) {
807 // We check if the inner item has a group already, if it does, we skip it.
808 CandToGroupItInner
= CandToGroup
.find(&*InnerCandIt
);
809 if (CandToGroupItInner
!= CandToGroup
.end())
812 // Otherwise we determine if they have the same structure and add it to
813 // vector if they match.
815 IRSimilarityCandidate::compareStructure(*CandIt
, *InnerCandIt
);
819 CandToGroup
.insert(std::make_pair(&*InnerCandIt
, OuterGroupNum
));
820 CurrentGroupPair
->second
.push_back(*InnerCandIt
);
825 void IRSimilarityIdentifier::findCandidates(
826 std::vector
<IRInstructionData
*> &InstrList
,
827 std::vector
<unsigned> &IntegerMapping
) {
828 SuffixTree
ST(IntegerMapping
);
830 std::vector
<IRSimilarityCandidate
> CandsForRepSubstring
;
831 std::vector
<SimilarityGroup
> NewCandidateGroups
;
833 DenseMap
<unsigned, SimilarityGroup
> StructuralGroups
;
835 // Iterate over the subsequences found by the Suffix Tree to create
836 // IRSimilarityCandidates for each repeated subsequence and determine which
837 // instances are structurally similar to one another.
838 for (SuffixTree::RepeatedSubstring
&RS
: ST
) {
839 createCandidatesFromSuffixTree(Mapper
, InstrList
, IntegerMapping
, RS
,
840 CandsForRepSubstring
);
842 if (CandsForRepSubstring
.size() < 2)
845 findCandidateStructures(CandsForRepSubstring
, StructuralGroups
);
846 for (std::pair
<unsigned, SimilarityGroup
> &Group
: StructuralGroups
)
847 // We only add the group if it contains more than one
848 // IRSimilarityCandidate. If there is only one, that means there is no
849 // other repeated subsequence with the same structure.
850 if (Group
.second
.size() > 1)
851 SimilarityCandidates
->push_back(Group
.second
);
853 CandsForRepSubstring
.clear();
854 StructuralGroups
.clear();
855 NewCandidateGroups
.clear();
859 SimilarityGroupList
&IRSimilarityIdentifier::findSimilarity(
860 ArrayRef
<std::unique_ptr
<Module
>> Modules
) {
861 resetSimilarityCandidates();
863 std::vector
<IRInstructionData
*> InstrList
;
864 std::vector
<unsigned> IntegerMapping
;
866 populateMapper(Modules
, InstrList
, IntegerMapping
);
867 findCandidates(InstrList
, IntegerMapping
);
869 return SimilarityCandidates
.getValue();
872 SimilarityGroupList
&IRSimilarityIdentifier::findSimilarity(Module
&M
) {
873 resetSimilarityCandidates();
875 std::vector
<IRInstructionData
*> InstrList
;
876 std::vector
<unsigned> IntegerMapping
;
878 populateMapper(M
, InstrList
, IntegerMapping
);
879 findCandidates(InstrList
, IntegerMapping
);
881 return SimilarityCandidates
.getValue();
884 INITIALIZE_PASS(IRSimilarityIdentifierWrapperPass
, "ir-similarity-identifier",
885 "ir-similarity-identifier", false, true)
887 IRSimilarityIdentifierWrapperPass::IRSimilarityIdentifierWrapperPass()
889 initializeIRSimilarityIdentifierWrapperPassPass(
890 *PassRegistry::getPassRegistry());
893 bool IRSimilarityIdentifierWrapperPass::doInitialization(Module
&M
) {
894 IRSI
.reset(new IRSimilarityIdentifier());
898 bool IRSimilarityIdentifierWrapperPass::doFinalization(Module
&M
) {
903 bool IRSimilarityIdentifierWrapperPass::runOnModule(Module
&M
) {
904 IRSI
->findSimilarity(M
);
908 AnalysisKey
IRSimilarityAnalysis::Key
;
909 IRSimilarityIdentifier
IRSimilarityAnalysis::run(Module
&M
,
910 ModuleAnalysisManager
&) {
912 auto IRSI
= IRSimilarityIdentifier();
913 IRSI
.findSimilarity(M
);
918 IRSimilarityAnalysisPrinterPass::run(Module
&M
, ModuleAnalysisManager
&AM
) {
919 IRSimilarityIdentifier
&IRSI
= AM
.getResult
<IRSimilarityAnalysis
>(M
);
920 Optional
<SimilarityGroupList
> &SimilarityCandidatesOpt
= IRSI
.getSimilarity();
922 for (std::vector
<IRSimilarityCandidate
> &CandVec
: *SimilarityCandidatesOpt
) {
923 OS
<< CandVec
.size() << " candidates of length "
924 << CandVec
.begin()->getLength() << ". Found in: \n";
925 for (IRSimilarityCandidate
&Cand
: CandVec
) {
926 OS
<< " Function: " << Cand
.front()->Inst
->getFunction()->getName().str()
927 << ", Basic Block: ";
928 if (Cand
.front()->Inst
->getParent()->getName().str() == "")
931 OS
<< Cand
.front()->Inst
->getParent()->getName().str();
932 OS
<< "\n Start Instruction: ";
933 Cand
.frontInstruction()->print(OS
);
934 OS
<< "\n End Instruction: ";
935 Cand
.backInstruction()->print(OS
);
940 return PreservedAnalyses::all();
943 char IRSimilarityIdentifierWrapperPass::ID
= 0;