1 //===-- llvm-sim.cpp - Find similar sections of programs -------*- C++ -*-===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This program finds similar sections of a Module, and exports them as a JSON
12 // To find similarities contained across multiple modules, please use llvm-link
13 // first to merge the modules.
15 //===----------------------------------------------------------------------===//
17 #include "llvm/Analysis/IRSimilarityIdentifier.h"
18 #include "llvm/IRReader/IRReader.h"
19 #include "llvm/Support/CommandLine.h"
20 #include "llvm/Support/FileSystem.h"
21 #include "llvm/Support/InitLLVM.h"
22 #include "llvm/Support/JSON.h"
23 #include "llvm/Support/SourceMgr.h"
24 #include "llvm/Support/ToolOutputFile.h"
27 using namespace IRSimilarity
;
29 static cl::opt
<std::string
> OutputFilename("o", cl::desc("Output Filename"),
31 cl::value_desc("filename"));
33 static cl::opt
<std::string
> InputSourceFile(cl::Positional
,
34 cl::desc("<Source file>"),
36 cl::value_desc("filename"));
38 /// Retrieve the unique number \p I was mapped to in parseBitcodeFile.
40 /// \param I - The Instruction to find the instruction number for.
41 /// \param LLVMInstNum - The mapping of Instructions to their location in the
42 /// module represented by an unsigned integer.
43 /// \returns The instruction number for \p I if it exists.
45 getPositionInModule(const Instruction
*I
,
46 const DenseMap
<Instruction
*, unsigned> &LLVMInstNum
) {
47 assert(I
&& "Instruction is nullptr!");
48 DenseMap
<Instruction
*, unsigned>::const_iterator It
= LLVMInstNum
.find(I
);
49 if (It
== LLVMInstNum
.end())
54 /// Exports the given SimilarityGroups to a JSON file at \p FilePath.
56 /// \param FilePath - The path to the output location.
57 /// \param SimSections - The similarity groups to process.
58 /// \param LLVMInstNum - The mapping of Instructions to their location in the
59 /// module represented by an unsigned integer.
60 /// \returns A nonzero error code if there was a failure creating the file.
62 exportToFile(const StringRef FilePath
,
63 const SimilarityGroupList
&SimSections
,
64 const DenseMap
<Instruction
*, unsigned> &LLVMInstNum
) {
66 std::unique_ptr
<ToolOutputFile
> Out(
67 new ToolOutputFile(FilePath
, EC
, sys::fs::OF_None
));
71 json::OStream
J(Out
->os(), 1);
74 unsigned SimOption
= 1;
75 // Process each list of SimilarityGroups organized by the Module.
76 for (const SimilarityGroup
&G
: SimSections
) {
77 std::string SimOptionStr
= std::to_string(SimOption
);
78 J
.attributeBegin(SimOptionStr
);
80 // For each file there is a list of the range where the similarity
82 for (const IRSimilarityCandidate
&C
: G
) {
83 Optional
<unsigned> Start
=
84 getPositionInModule((*C
.front()).Inst
, LLVMInstNum
);
85 Optional
<unsigned> End
=
86 getPositionInModule((*C
.back()).Inst
, LLVMInstNum
);
89 "Could not find instruction number for first instruction");
90 assert(End
&& "Could not find instruction number for last instruction");
93 J
.attribute("start", Start
.value());
94 J
.attribute("end", End
.value());
108 int main(int argc
, const char *argv
[]) {
109 InitLLVM
X(argc
, argv
);
111 cl::ParseCommandLineOptions(argc
, argv
, "LLVM IR Similarity Visualizer\n");
113 LLVMContext CurrContext
;
115 std::unique_ptr
<Module
> ModuleToAnalyze
=
116 parseIRFile(InputSourceFile
, Err
, CurrContext
);
118 if (!ModuleToAnalyze
) {
119 Err
.print(argv
[0], errs());
123 // Mapping from an Instruction pointer to its occurrence in a sequential
124 // list of all the Instructions in a Module.
125 DenseMap
<Instruction
*, unsigned> LLVMInstNum
;
127 // We give each instruction a number, which gives us a start and end value
128 // for the beginning and end of each IRSimilarityCandidate.
129 unsigned InstructionNumber
= 1;
130 for (Function
&F
: *ModuleToAnalyze
)
131 for (BasicBlock
&BB
: F
)
132 for (Instruction
&I
: BB
.instructionsWithoutDebug())
133 LLVMInstNum
[&I
]= InstructionNumber
++;
135 // The similarity identifier we will use to find the similar sections.
136 IRSimilarityIdentifier SimIdent
;
137 SimilarityGroupList SimilaritySections
=
138 SimIdent
.findSimilarity(*ModuleToAnalyze
);
141 exportToFile(OutputFilename
, SimilaritySections
, LLVMInstNum
);
143 errs() << argv
[0] << ": " << E
.message() << '\n';