1 //===-- llvm-mc-assemble-fuzzer.cpp - Fuzzer for the MC layer -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
11 #include "llvm-c/Target.h"
12 #include "llvm/MC/MCAsmBackend.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/MC/MCCodeEmitter.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCInstPrinter.h"
17 #include "llvm/MC/MCInstrInfo.h"
18 #include "llvm/MC/MCObjectFileInfo.h"
19 #include "llvm/MC/MCObjectWriter.h"
20 #include "llvm/MC/MCParser/AsmLexer.h"
21 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCSectionMachO.h"
24 #include "llvm/MC/MCStreamer.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/MC/MCTargetOptionsCommandFlags.h"
27 #include "llvm/MC/SubtargetFeature.h"
28 #include "llvm/MC/TargetRegistry.h"
29 #include "llvm/Support/CommandLine.h"
30 #include "llvm/Support/FileUtilities.h"
31 #include "llvm/Support/Host.h"
32 #include "llvm/Support/MemoryBuffer.h"
33 #include "llvm/Support/SourceMgr.h"
34 #include "llvm/Support/TargetSelect.h"
35 #include "llvm/Support/ToolOutputFile.h"
36 #include "llvm/Support/raw_ostream.h"
40 static mc::RegisterMCTargetOptionsFlags MOF
;
42 static cl::opt
<std::string
>
43 TripleName("triple", cl::desc("Target triple to assemble for, "
44 "see -version for available targets"));
46 static cl::opt
<std::string
>
48 cl::desc("Target a specific cpu type (-mcpu=help for details)"),
49 cl::value_desc("cpu-name"), cl::init(""));
51 // This is useful for variable-length instruction sets.
52 static cl::opt
<unsigned> InsnLimit(
54 cl::desc("Limit the number of instructions to process (0 for no limit)"),
55 cl::value_desc("count"), cl::init(0));
57 static cl::list
<std::string
>
58 MAttrs("mattr", cl::CommaSeparated
,
59 cl::desc("Target specific attributes (-mattr=help for details)"),
60 cl::value_desc("a1,+a2,-a3,..."));
61 // The feature string derived from -mattr's values.
62 std::string FeaturesStr
;
64 static cl::list
<std::string
>
65 FuzzerArgs("fuzzer-args", cl::Positional
,
66 cl::desc("Options to pass to the fuzzer"), cl::ZeroOrMore
,
67 cl::PositionalEatsArgs
);
68 static std::vector
<char *> ModifiedArgv
;
75 static cl::opt
<OutputFileType
>
76 FileType("filetype", cl::init(OFT_AssemblyFile
),
77 cl::desc("Choose an output file type:"),
79 clEnumValN(OFT_AssemblyFile
, "asm",
80 "Emit an assembly ('.s') file"),
81 clEnumValN(OFT_Null
, "null",
82 "Don't emit anything (for timing purposes)"),
83 clEnumValN(OFT_ObjectFile
, "obj",
84 "Emit a native object ('.o') file")));
87 class LLVMFuzzerInputBuffer
: public MemoryBuffer
90 LLVMFuzzerInputBuffer(const uint8_t *data_
, size_t size_
)
91 : Data(reinterpret_cast<const char *>(data_
)),
93 init(Data
, Data
+Size
, false);
97 virtual BufferKind
getBufferKind() const {
98 return MemoryBuffer_Malloc
; // it's not disk-backed so I think that's
99 // the intent ... though AFAIK it
100 // probably came from an mmap or sbrk
108 static int AssembleInput(const char *ProgName
, const Target
*TheTarget
,
109 SourceMgr
&SrcMgr
, MCContext
&Ctx
, MCStreamer
&Str
,
110 MCAsmInfo
&MAI
, MCSubtargetInfo
&STI
,
111 MCInstrInfo
&MCII
, MCTargetOptions
&MCOptions
) {
112 static const bool NoInitialTextSection
= false;
114 std::unique_ptr
<MCAsmParser
> Parser(
115 createMCAsmParser(SrcMgr
, Ctx
, Str
, MAI
));
117 std::unique_ptr
<MCTargetAsmParser
> TAP(
118 TheTarget
->createMCAsmParser(STI
, *Parser
, MCII
, MCOptions
));
122 << ": error: this target '" << TripleName
123 << "', does not support assembly parsing.\n";
127 Parser
->setTargetParser(*TAP
);
129 return Parser
->Run(NoInitialTextSection
);
133 int AssembleOneInput(const uint8_t *Data
, size_t Size
) {
134 const bool ShowInst
= false;
135 const bool AsmVerbose
= false;
136 const bool UseDwarfDirectory
= true;
138 Triple
TheTriple(Triple::normalize(TripleName
));
142 std::unique_ptr
<MemoryBuffer
> BufferPtr(new LLVMFuzzerInputBuffer(Data
, Size
));
144 // Tell SrcMgr about this buffer, which is what the parser will pick up.
145 SrcMgr
.AddNewSourceBuffer(std::move(BufferPtr
), SMLoc());
147 static const std::vector
<std::string
> NoIncludeDirs
;
148 SrcMgr
.setIncludeDirs(NoIncludeDirs
);
150 static std::string ArchName
;
152 const Target
*TheTarget
= TargetRegistry::lookupTarget(ArchName
, TheTriple
,
155 errs() << "error: this target '" << TheTriple
.normalize()
156 << "/" << ArchName
<< "', was not found: '" << Error
<< "'\n";
161 std::unique_ptr
<MCRegisterInfo
> MRI(TheTarget
->createMCRegInfo(TripleName
));
163 errs() << "Unable to create target register info!";
167 MCTargetOptions MCOptions
= mc::InitMCTargetOptionsFromFlags();
168 std::unique_ptr
<MCAsmInfo
> MAI(
169 TheTarget
->createMCAsmInfo(*MRI
, TripleName
, MCOptions
));
171 errs() << "Unable to create target asm info!";
175 std::unique_ptr
<MCSubtargetInfo
> STI(
176 TheTarget
->createMCSubtargetInfo(TripleName
, MCPU
, FeaturesStr
));
178 MCContext
Ctx(TheTriple
, MAI
.get(), MRI
.get(), STI
.get(), &SrcMgr
);
179 std::unique_ptr
<MCObjectFileInfo
> MOFI(
180 TheTarget
->createMCObjectFileInfo(Ctx
, /*PIC=*/false));
181 Ctx
.setObjectFileInfo(MOFI
.get());
183 const unsigned OutputAsmVariant
= 0;
184 std::unique_ptr
<MCInstrInfo
> MCII(TheTarget
->createMCInstrInfo());
185 MCInstPrinter
*IP
= TheTarget
->createMCInstPrinter(Triple(TripleName
), OutputAsmVariant
,
189 << "error: unable to create instruction printer for target triple '"
190 << TheTriple
.normalize() << "' with assembly variant "
191 << OutputAsmVariant
<< ".\n";
196 const char *ProgName
= "llvm-mc-fuzzer";
197 std::unique_ptr
<MCCodeEmitter
> CE
= nullptr;
198 std::unique_ptr
<MCAsmBackend
> MAB
= nullptr;
200 std::string OutputString
;
201 raw_string_ostream
Out(OutputString
);
202 auto FOut
= std::make_unique
<formatted_raw_ostream
>(Out
);
204 std::unique_ptr
<MCStreamer
> Str
;
206 if (FileType
== OFT_AssemblyFile
) {
207 Str
.reset(TheTarget
->createAsmStreamer(Ctx
, std::move(FOut
), AsmVerbose
,
208 UseDwarfDirectory
, IP
, std::move(CE
),
209 std::move(MAB
), ShowInst
));
211 assert(FileType
== OFT_ObjectFile
&& "Invalid file type!");
214 const std::string OutputFilename
= "-";
216 std::make_unique
<ToolOutputFile
>(OutputFilename
, EC
, sys::fs::OF_None
);
218 errs() << EC
.message() << '\n';
222 // Don't waste memory on names of temp labels.
223 Ctx
.setUseNamesOnTempLabels(false);
225 std::unique_ptr
<buffer_ostream
> BOS
;
226 raw_pwrite_stream
*OS
= &Out
->os();
227 if (!Out
->os().supportsSeeking()) {
228 BOS
= std::make_unique
<buffer_ostream
>(Out
->os());
232 MCCodeEmitter
*CE
= TheTarget
->createMCCodeEmitter(*MCII
, *MRI
, Ctx
);
233 MCAsmBackend
*MAB
= TheTarget
->createMCAsmBackend(*STI
, *MRI
, MCOptions
);
234 Str
.reset(TheTarget
->createMCObjectStreamer(
235 TheTriple
, Ctx
, std::unique_ptr
<MCAsmBackend
>(MAB
),
236 MAB
->createObjectWriter(*OS
), std::unique_ptr
<MCCodeEmitter
>(CE
), *STI
,
237 MCOptions
.MCRelaxAll
, MCOptions
.MCIncrementalLinkerCompatible
,
238 /*DWARFMustBeAtTheEnd*/ false));
240 const int Res
= AssembleInput(ProgName
, TheTarget
, SrcMgr
, Ctx
, *Str
, *MAI
, *STI
,
248 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data
, size_t Size
) {
249 return AssembleOneInput(Data
, Size
);
252 extern "C" LLVM_ATTRIBUTE_USED
int LLVMFuzzerInitialize(int *argc
,
254 // The command line is unusual compared to other fuzzers due to the need to
255 // specify the target. Options like -triple, -mcpu, and -mattr work like
256 // their counterparts in llvm-mc, while -fuzzer-args collects options for the
261 // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
262 // 4-bytes each and use the contents of ./corpus as the test corpus:
263 // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
264 // -fuzzer-args -max_len=4 -runs=100000 ./corpus
266 // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
267 // feature enabled using up to 64-byte inputs:
268 // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
269 // -disassemble -fuzzer-args ./corpus
271 // If your aim is to find instructions that are not tested, then it is
272 // advisable to constrain the maximum input size to a single instruction
273 // using -max_len as in the first example. This results in a test corpus of
274 // individual instructions that test unique paths. Without this constraint,
275 // there will be considerable redundancy in the corpus.
277 char **OriginalArgv
= *argv
;
279 LLVMInitializeAllTargetInfos();
280 LLVMInitializeAllTargetMCs();
281 LLVMInitializeAllAsmParsers();
283 cl::ParseCommandLineOptions(*argc
, OriginalArgv
);
285 // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
286 // the driver can parse its arguments.
288 // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
289 // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
290 // non-const buffer to avoid the need to clean up when the fuzzer terminates.
291 ModifiedArgv
.push_back(OriginalArgv
[0]);
292 for (const auto &FuzzerArg
: FuzzerArgs
) {
293 for (int i
= 1; i
< *argc
; ++i
) {
294 if (FuzzerArg
== OriginalArgv
[i
])
295 ModifiedArgv
.push_back(OriginalArgv
[i
]);
298 *argc
= ModifiedArgv
.size();
299 *argv
= ModifiedArgv
.data();
301 // Package up features to be passed to target/subtarget
302 // We have to pass it via a global since the callback doesn't
303 // permit any user data.
305 SubtargetFeatures Features
;
306 for (unsigned i
= 0; i
!= MAttrs
.size(); ++i
)
307 Features
.AddFeature(MAttrs
[i
]);
308 FeaturesStr
= Features
.getString();
311 if (TripleName
.empty())
312 TripleName
= sys::getDefaultTargetTriple();