1 //===-- llvm-mc-assemble-fuzzer.cpp - Fuzzer for the MC layer -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
11 #include "llvm-c/Target.h"
12 #include "llvm/MC/SubtargetFeature.h"
13 #include "llvm/MC/MCAsmBackend.h"
14 #include "llvm/MC/MCAsmInfo.h"
15 #include "llvm/MC/MCCodeEmitter.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCInstPrinter.h"
18 #include "llvm/MC/MCInstrInfo.h"
19 #include "llvm/MC/MCObjectFileInfo.h"
20 #include "llvm/MC/MCObjectWriter.h"
21 #include "llvm/MC/MCParser/AsmLexer.h"
22 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
23 #include "llvm/MC/MCRegisterInfo.h"
24 #include "llvm/MC/MCSectionMachO.h"
25 #include "llvm/MC/MCStreamer.h"
26 #include "llvm/MC/MCSubtargetInfo.h"
27 #include "llvm/MC/MCTargetOptionsCommandFlags.inc"
28 #include "llvm/Support/MemoryBuffer.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/FileUtilities.h"
31 #include "llvm/Support/raw_ostream.h"
32 #include "llvm/Support/SourceMgr.h"
33 #include "llvm/Support/TargetSelect.h"
34 #include "llvm/Support/TargetRegistry.h"
35 #include "llvm/Support/ToolOutputFile.h"
39 static cl::opt
<std::string
>
40 TripleName("triple", cl::desc("Target triple to assemble for, "
41 "see -version for available targets"));
43 static cl::opt
<std::string
>
45 cl::desc("Target a specific cpu type (-mcpu=help for details)"),
46 cl::value_desc("cpu-name"), cl::init(""));
48 // This is useful for variable-length instruction sets.
49 static cl::opt
<unsigned> InsnLimit(
51 cl::desc("Limit the number of instructions to process (0 for no limit)"),
52 cl::value_desc("count"), cl::init(0));
54 static cl::list
<std::string
>
55 MAttrs("mattr", cl::CommaSeparated
,
56 cl::desc("Target specific attributes (-mattr=help for details)"),
57 cl::value_desc("a1,+a2,-a3,..."));
58 // The feature string derived from -mattr's values.
59 std::string FeaturesStr
;
61 static cl::list
<std::string
>
62 FuzzerArgs("fuzzer-args", cl::Positional
,
63 cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore
,
64 cl::PositionalEatsArgs
);
65 static std::vector
<char *> ModifiedArgv
;
72 static cl::opt
<OutputFileType
>
73 FileType("filetype", cl::init(OFT_AssemblyFile
),
74 cl::desc("Choose an output file type:"),
76 clEnumValN(OFT_AssemblyFile
, "asm",
77 "Emit an assembly ('.s') file"),
78 clEnumValN(OFT_Null
, "null",
79 "Don't emit anything (for timing purposes)"),
80 clEnumValN(OFT_ObjectFile
, "obj",
81 "Emit a native object ('.o') file")));
84 class LLVMFuzzerInputBuffer
: public MemoryBuffer
87 LLVMFuzzerInputBuffer(const uint8_t *data_
, size_t size_
)
88 : Data(reinterpret_cast<const char *>(data_
)),
90 init(Data
, Data
+Size
, false);
94 virtual BufferKind
getBufferKind() const {
95 return MemoryBuffer_Malloc
; // it's not disk-backed so I think that's
96 // the intent ... though AFAIK it
97 // probably came from an mmap or sbrk
105 static int AssembleInput(const char *ProgName
, const Target
*TheTarget
,
106 SourceMgr
&SrcMgr
, MCContext
&Ctx
, MCStreamer
&Str
,
107 MCAsmInfo
&MAI
, MCSubtargetInfo
&STI
,
108 MCInstrInfo
&MCII
, MCTargetOptions
&MCOptions
) {
109 static const bool NoInitialTextSection
= false;
111 std::unique_ptr
<MCAsmParser
> Parser(
112 createMCAsmParser(SrcMgr
, Ctx
, Str
, MAI
));
114 std::unique_ptr
<MCTargetAsmParser
> TAP(
115 TheTarget
->createMCAsmParser(STI
, *Parser
, MCII
, MCOptions
));
119 << ": error: this target '" << TripleName
120 << "', does not support assembly parsing.\n";
124 Parser
->setTargetParser(*TAP
);
126 return Parser
->Run(NoInitialTextSection
);
130 int AssembleOneInput(const uint8_t *Data
, size_t Size
) {
131 const bool ShowInst
= false;
132 const bool AsmVerbose
= false;
133 const bool UseDwarfDirectory
= true;
135 Triple
TheTriple(Triple::normalize(TripleName
));
139 std::unique_ptr
<MemoryBuffer
> BufferPtr(new LLVMFuzzerInputBuffer(Data
, Size
));
141 // Tell SrcMgr about this buffer, which is what the parser will pick up.
142 SrcMgr
.AddNewSourceBuffer(std::move(BufferPtr
), SMLoc());
144 static const std::vector
<std::string
> NoIncludeDirs
;
145 SrcMgr
.setIncludeDirs(NoIncludeDirs
);
147 static std::string ArchName
;
149 const Target
*TheTarget
= TargetRegistry::lookupTarget(ArchName
, TheTriple
,
152 errs() << "error: this target '" << TheTriple
.normalize()
153 << "/" << ArchName
<< "', was not found: '" << Error
<< "'\n";
158 std::unique_ptr
<MCRegisterInfo
> MRI(TheTarget
->createMCRegInfo(TripleName
));
160 errs() << "Unable to create target register info!";
164 std::unique_ptr
<MCAsmInfo
> MAI(TheTarget
->createMCAsmInfo(*MRI
, TripleName
));
166 errs() << "Unable to create target asm info!";
171 MCObjectFileInfo MOFI
;
172 MCContext
Ctx(MAI
.get(), MRI
.get(), &MOFI
, &SrcMgr
);
174 static const bool UsePIC
= false;
175 MOFI
.InitMCObjectFileInfo(TheTriple
, UsePIC
, Ctx
);
177 const unsigned OutputAsmVariant
= 0;
178 std::unique_ptr
<MCInstrInfo
> MCII(TheTarget
->createMCInstrInfo());
179 MCInstPrinter
*IP
= TheTarget
->createMCInstPrinter(Triple(TripleName
), OutputAsmVariant
,
183 << "error: unable to create instruction printer for target triple '"
184 << TheTriple
.normalize() << "' with assembly variant "
185 << OutputAsmVariant
<< ".\n";
190 const char *ProgName
= "llvm-mc-fuzzer";
191 std::unique_ptr
<MCSubtargetInfo
> STI(
192 TheTarget
->createMCSubtargetInfo(TripleName
, MCPU
, FeaturesStr
));
193 std::unique_ptr
<MCCodeEmitter
> CE
= nullptr;
194 std::unique_ptr
<MCAsmBackend
> MAB
= nullptr;
196 MCTargetOptions MCOptions
= InitMCTargetOptionsFromFlags();
198 std::string OutputString
;
199 raw_string_ostream
Out(OutputString
);
200 auto FOut
= std::make_unique
<formatted_raw_ostream
>(Out
);
202 std::unique_ptr
<MCStreamer
> Str
;
204 if (FileType
== OFT_AssemblyFile
) {
205 Str
.reset(TheTarget
->createAsmStreamer(Ctx
, std::move(FOut
), AsmVerbose
,
206 UseDwarfDirectory
, IP
, std::move(CE
),
207 std::move(MAB
), ShowInst
));
209 assert(FileType
== OFT_ObjectFile
&& "Invalid file type!");
212 const std::string OutputFilename
= "-";
214 std::make_unique
<ToolOutputFile
>(OutputFilename
, EC
, sys::fs::OF_None
);
216 errs() << EC
.message() << '\n';
220 // Don't waste memory on names of temp labels.
221 Ctx
.setUseNamesOnTempLabels(false);
223 std::unique_ptr
<buffer_ostream
> BOS
;
224 raw_pwrite_stream
*OS
= &Out
->os();
225 if (!Out
->os().supportsSeeking()) {
226 BOS
= std::make_unique
<buffer_ostream
>(Out
->os());
230 MCCodeEmitter
*CE
= TheTarget
->createMCCodeEmitter(*MCII
, *MRI
, Ctx
);
231 MCAsmBackend
*MAB
= TheTarget
->createMCAsmBackend(*STI
, *MRI
, MCOptions
);
232 Str
.reset(TheTarget
->createMCObjectStreamer(
233 TheTriple
, Ctx
, std::unique_ptr
<MCAsmBackend
>(MAB
),
234 MAB
->createObjectWriter(*OS
), std::unique_ptr
<MCCodeEmitter
>(CE
), *STI
,
235 MCOptions
.MCRelaxAll
, MCOptions
.MCIncrementalLinkerCompatible
,
236 /*DWARFMustBeAtTheEnd*/ false));
238 const int Res
= AssembleInput(ProgName
, TheTarget
, SrcMgr
, Ctx
, *Str
, *MAI
, *STI
,
246 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data
, size_t Size
) {
247 return AssembleOneInput(Data
, Size
);
250 extern "C" LLVM_ATTRIBUTE_USED
int LLVMFuzzerInitialize(int *argc
,
252 // The command line is unusual compared to other fuzzers due to the need to
253 // specify the target. Options like -triple, -mcpu, and -mattr work like
254 // their counterparts in llvm-mc, while -fuzzer-args collects options for the
259 // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
260 // 4-bytes each and use the contents of ./corpus as the test corpus:
261 // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
262 // -fuzzer-args -max_len=4 -runs=100000 ./corpus
264 // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
265 // feature enabled using up to 64-byte inputs:
266 // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
267 // -disassemble -fuzzer-args ./corpus
269 // If your aim is to find instructions that are not tested, then it is
270 // advisable to constrain the maximum input size to a single instruction
271 // using -max_len as in the first example. This results in a test corpus of
272 // individual instructions that test unique paths. Without this constraint,
273 // there will be considerable redundancy in the corpus.
275 char **OriginalArgv
= *argv
;
277 LLVMInitializeAllTargetInfos();
278 LLVMInitializeAllTargetMCs();
279 LLVMInitializeAllAsmParsers();
281 cl::ParseCommandLineOptions(*argc
, OriginalArgv
);
283 // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
284 // the driver can parse its arguments.
286 // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
287 // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
288 // non-const buffer to avoid the need to clean up when the fuzzer terminates.
289 ModifiedArgv
.push_back(OriginalArgv
[0]);
290 for (const auto &FuzzerArg
: FuzzerArgs
) {
291 for (int i
= 1; i
< *argc
; ++i
) {
292 if (FuzzerArg
== OriginalArgv
[i
])
293 ModifiedArgv
.push_back(OriginalArgv
[i
]);
296 *argc
= ModifiedArgv
.size();
297 *argv
= ModifiedArgv
.data();
299 // Package up features to be passed to target/subtarget
300 // We have to pass it via a global since the callback doesn't
301 // permit any user data.
303 SubtargetFeatures Features
;
304 for (unsigned i
= 0; i
!= MAttrs
.size(); ++i
)
305 Features
.AddFeature(MAttrs
[i
]);
306 FeaturesStr
= Features
.getString();
309 if (TripleName
.empty())
310 TripleName
= sys::getDefaultTargetTriple();