1 //===--- llvm-mc-fuzzer.cpp - Fuzzer for the MC layer ---------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file is distributed under the University of Illinois Open Source
6 // License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 //===----------------------------------------------------------------------===//
12 #include "llvm-c/Target.h"
13 #include "llvm/MC/SubtargetFeature.h"
14 #include "llvm/MC/MCAsmBackend.h"
15 #include "llvm/MC/MCAsmInfo.h"
16 #include "llvm/MC/MCCodeEmitter.h"
17 #include "llvm/MC/MCContext.h"
18 #include "llvm/MC/MCInstPrinter.h"
19 #include "llvm/MC/MCInstrInfo.h"
20 #include "llvm/MC/MCObjectFileInfo.h"
21 #include "llvm/MC/MCObjectWriter.h"
22 #include "llvm/MC/MCParser/AsmLexer.h"
23 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
24 #include "llvm/MC/MCRegisterInfo.h"
25 #include "llvm/MC/MCSectionMachO.h"
26 #include "llvm/MC/MCStreamer.h"
27 #include "llvm/MC/MCSubtargetInfo.h"
28 #include "llvm/MC/MCTargetOptionsCommandFlags.inc"
29 #include "llvm/Support/MemoryBuffer.h"
30 #include "llvm/Support/CommandLine.h"
31 #include "llvm/Support/FileUtilities.h"
32 #include "llvm/Support/raw_ostream.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Support/TargetSelect.h"
35 #include "llvm/Support/TargetRegistry.h"
36 #include "llvm/Support/ToolOutputFile.h"
40 static cl::opt
<std::string
>
41 TripleName("triple", cl::desc("Target triple to assemble for, "
42 "see -version for available targets"));
44 static cl::opt
<std::string
>
46 cl::desc("Target a specific cpu type (-mcpu=help for details)"),
47 cl::value_desc("cpu-name"), cl::init(""));
49 // This is useful for variable-length instruction sets.
50 static cl::opt
<unsigned> InsnLimit(
52 cl::desc("Limit the number of instructions to process (0 for no limit)"),
53 cl::value_desc("count"), cl::init(0));
55 static cl::list
<std::string
>
56 MAttrs("mattr", cl::CommaSeparated
,
57 cl::desc("Target specific attributes (-mattr=help for details)"),
58 cl::value_desc("a1,+a2,-a3,..."));
59 // The feature string derived from -mattr's values.
60 std::string FeaturesStr
;
62 static cl::list
<std::string
>
63 FuzzerArgs("fuzzer-args", cl::Positional
,
64 cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore
,
65 cl::PositionalEatsArgs
);
66 static std::vector
<char *> ModifiedArgv
;
73 static cl::opt
<OutputFileType
>
74 FileType("filetype", cl::init(OFT_AssemblyFile
),
75 cl::desc("Choose an output file type:"),
77 clEnumValN(OFT_AssemblyFile
, "asm",
78 "Emit an assembly ('.s') file"),
79 clEnumValN(OFT_Null
, "null",
80 "Don't emit anything (for timing purposes)"),
81 clEnumValN(OFT_ObjectFile
, "obj",
82 "Emit a native object ('.o') file")));
85 class LLVMFuzzerInputBuffer
: public MemoryBuffer
88 LLVMFuzzerInputBuffer(const uint8_t *data_
, size_t size_
)
89 : Data(reinterpret_cast<const char *>(data_
)),
91 init(Data
, Data
+Size
, false);
95 virtual BufferKind
getBufferKind() const {
96 return MemoryBuffer_Malloc
; // it's not disk-backed so I think that's
97 // the intent ... though AFAIK it
98 // probably came from an mmap or sbrk
106 static int AssembleInput(const char *ProgName
, const Target
*TheTarget
,
107 SourceMgr
&SrcMgr
, MCContext
&Ctx
, MCStreamer
&Str
,
108 MCAsmInfo
&MAI
, MCSubtargetInfo
&STI
,
109 MCInstrInfo
&MCII
, MCTargetOptions
&MCOptions
) {
110 static const bool NoInitialTextSection
= false;
112 std::unique_ptr
<MCAsmParser
> Parser(
113 createMCAsmParser(SrcMgr
, Ctx
, Str
, MAI
));
115 std::unique_ptr
<MCTargetAsmParser
> TAP(
116 TheTarget
->createMCAsmParser(STI
, *Parser
, MCII
, MCOptions
));
120 << ": error: this target '" << TripleName
121 << "', does not support assembly parsing.\n";
125 Parser
->setTargetParser(*TAP
);
127 return Parser
->Run(NoInitialTextSection
);
131 int AssembleOneInput(const uint8_t *Data
, size_t Size
) {
132 const bool ShowInst
= false;
133 const bool AsmVerbose
= false;
134 const bool UseDwarfDirectory
= true;
136 Triple
TheTriple(Triple::normalize(TripleName
));
140 std::unique_ptr
<MemoryBuffer
> BufferPtr(new LLVMFuzzerInputBuffer(Data
, Size
));
142 // Tell SrcMgr about this buffer, which is what the parser will pick up.
143 SrcMgr
.AddNewSourceBuffer(std::move(BufferPtr
), SMLoc());
145 static const std::vector
<std::string
> NoIncludeDirs
;
146 SrcMgr
.setIncludeDirs(NoIncludeDirs
);
148 static std::string ArchName
;
150 const Target
*TheTarget
= TargetRegistry::lookupTarget(ArchName
, TheTriple
,
153 errs() << "error: this target '" << TheTriple
.normalize()
154 << "/" << ArchName
<< "', was not found: '" << Error
<< "'\n";
159 std::unique_ptr
<MCRegisterInfo
> MRI(TheTarget
->createMCRegInfo(TripleName
));
161 errs() << "Unable to create target register info!";
165 std::unique_ptr
<MCAsmInfo
> MAI(TheTarget
->createMCAsmInfo(*MRI
, TripleName
));
167 errs() << "Unable to create target asm info!";
172 MCObjectFileInfo MOFI
;
173 MCContext
Ctx(MAI
.get(), MRI
.get(), &MOFI
, &SrcMgr
);
175 static const bool UsePIC
= false;
176 MOFI
.InitMCObjectFileInfo(TheTriple
, UsePIC
, Ctx
);
178 const unsigned OutputAsmVariant
= 0;
179 std::unique_ptr
<MCInstrInfo
> MCII(TheTarget
->createMCInstrInfo());
180 MCInstPrinter
*IP
= TheTarget
->createMCInstPrinter(Triple(TripleName
), OutputAsmVariant
,
184 << "error: unable to create instruction printer for target triple '"
185 << TheTriple
.normalize() << "' with assembly variant "
186 << OutputAsmVariant
<< ".\n";
191 const char *ProgName
= "llvm-mc-fuzzer";
192 std::unique_ptr
<MCSubtargetInfo
> STI(
193 TheTarget
->createMCSubtargetInfo(TripleName
, MCPU
, FeaturesStr
));
194 std::unique_ptr
<MCCodeEmitter
> CE
= nullptr;
195 std::unique_ptr
<MCAsmBackend
> MAB
= nullptr;
197 MCTargetOptions MCOptions
= InitMCTargetOptionsFromFlags();
199 std::string OutputString
;
200 raw_string_ostream
Out(OutputString
);
201 auto FOut
= llvm::make_unique
<formatted_raw_ostream
>(Out
);
203 std::unique_ptr
<MCStreamer
> Str
;
205 if (FileType
== OFT_AssemblyFile
) {
206 Str
.reset(TheTarget
->createAsmStreamer(Ctx
, std::move(FOut
), AsmVerbose
,
207 UseDwarfDirectory
, IP
, std::move(CE
),
208 std::move(MAB
), ShowInst
));
210 assert(FileType
== OFT_ObjectFile
&& "Invalid file type!");
213 const std::string OutputFilename
= "-";
215 llvm::make_unique
<ToolOutputFile
>(OutputFilename
, EC
, sys::fs::F_None
);
217 errs() << EC
.message() << '\n';
221 // Don't waste memory on names of temp labels.
222 Ctx
.setUseNamesOnTempLabels(false);
224 std::unique_ptr
<buffer_ostream
> BOS
;
225 raw_pwrite_stream
*OS
= &Out
->os();
226 if (!Out
->os().supportsSeeking()) {
227 BOS
= make_unique
<buffer_ostream
>(Out
->os());
231 MCCodeEmitter
*CE
= TheTarget
->createMCCodeEmitter(*MCII
, *MRI
, Ctx
);
232 MCAsmBackend
*MAB
= TheTarget
->createMCAsmBackend(*STI
, *MRI
, MCOptions
);
233 Str
.reset(TheTarget
->createMCObjectStreamer(
234 TheTriple
, Ctx
, std::unique_ptr
<MCAsmBackend
>(MAB
),
235 MAB
->createObjectWriter(*OS
), std::unique_ptr
<MCCodeEmitter
>(CE
), *STI
,
236 MCOptions
.MCRelaxAll
, MCOptions
.MCIncrementalLinkerCompatible
,
237 /*DWARFMustBeAtTheEnd*/ false));
239 const int Res
= AssembleInput(ProgName
, TheTarget
, SrcMgr
, Ctx
, *Str
, *MAI
, *STI
,
247 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data
, size_t Size
) {
248 return AssembleOneInput(Data
, Size
);
251 extern "C" LLVM_ATTRIBUTE_USED
int LLVMFuzzerInitialize(int *argc
,
253 // The command line is unusual compared to other fuzzers due to the need to
254 // specify the target. Options like -triple, -mcpu, and -mattr work like
255 // their counterparts in llvm-mc, while -fuzzer-args collects options for the
260 // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
261 // 4-bytes each and use the contents of ./corpus as the test corpus:
262 // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
263 // -fuzzer-args -max_len=4 -runs=100000 ./corpus
265 // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
266 // feature enabled using up to 64-byte inputs:
267 // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
268 // -disassemble -fuzzer-args ./corpus
270 // If your aim is to find instructions that are not tested, then it is
271 // advisable to constrain the maximum input size to a single instruction
272 // using -max_len as in the first example. This results in a test corpus of
273 // individual instructions that test unique paths. Without this constraint,
274 // there will be considerable redundancy in the corpus.
276 char **OriginalArgv
= *argv
;
278 LLVMInitializeAllTargetInfos();
279 LLVMInitializeAllTargetMCs();
280 LLVMInitializeAllAsmParsers();
282 cl::ParseCommandLineOptions(*argc
, OriginalArgv
);
284 // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
285 // the driver can parse its arguments.
287 // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
288 // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
289 // non-const buffer to avoid the need to clean up when the fuzzer terminates.
290 ModifiedArgv
.push_back(OriginalArgv
[0]);
291 for (const auto &FuzzerArg
: FuzzerArgs
) {
292 for (int i
= 1; i
< *argc
; ++i
) {
293 if (FuzzerArg
== OriginalArgv
[i
])
294 ModifiedArgv
.push_back(OriginalArgv
[i
]);
297 *argc
= ModifiedArgv
.size();
298 *argv
= ModifiedArgv
.data();
300 // Package up features to be passed to target/subtarget
301 // We have to pass it via a global since the callback doesn't
302 // permit any user data.
304 SubtargetFeatures Features
;
305 for (unsigned i
= 0; i
!= MAttrs
.size(); ++i
)
306 Features
.AddFeature(MAttrs
[i
]);
307 FeaturesStr
= Features
.getString();
310 if (TripleName
.empty())
311 TripleName
= sys::getDefaultTargetTriple();