1 //===-- llvm-mc-assemble-fuzzer.cpp - Fuzzer for the MC layer -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 //===----------------------------------------------------------------------===//
11 #include "llvm-c/Target.h"
12 #include "llvm/MC/MCAsmBackend.h"
13 #include "llvm/MC/MCAsmInfo.h"
14 #include "llvm/MC/MCCodeEmitter.h"
15 #include "llvm/MC/MCContext.h"
16 #include "llvm/MC/MCInstPrinter.h"
17 #include "llvm/MC/MCInstrInfo.h"
18 #include "llvm/MC/MCObjectFileInfo.h"
19 #include "llvm/MC/MCObjectWriter.h"
20 #include "llvm/MC/MCParser/AsmLexer.h"
21 #include "llvm/MC/MCParser/MCTargetAsmParser.h"
22 #include "llvm/MC/MCRegisterInfo.h"
23 #include "llvm/MC/MCSectionMachO.h"
24 #include "llvm/MC/MCStreamer.h"
25 #include "llvm/MC/MCSubtargetInfo.h"
26 #include "llvm/MC/MCTargetOptionsCommandFlags.h"
27 #include "llvm/MC/TargetRegistry.h"
28 #include "llvm/Support/CommandLine.h"
29 #include "llvm/Support/FileUtilities.h"
30 #include "llvm/Support/MemoryBuffer.h"
31 #include "llvm/Support/SourceMgr.h"
32 #include "llvm/Support/TargetSelect.h"
33 #include "llvm/Support/ToolOutputFile.h"
34 #include "llvm/Support/raw_ostream.h"
35 #include "llvm/TargetParser/Host.h"
36 #include "llvm/TargetParser/SubtargetFeature.h"
40 static mc::RegisterMCTargetOptionsFlags MOF
;
42 static cl::opt
<std::string
>
43 TripleName("triple", cl::desc("Target triple to assemble for, "
44 "see -version for available targets"));
46 static cl::opt
<std::string
>
48 cl::desc("Target a specific cpu type (-mcpu=help for details)"),
49 cl::value_desc("cpu-name"), cl::init(""));
51 // This is useful for variable-length instruction sets.
52 static cl::opt
<unsigned> InsnLimit(
54 cl::desc("Limit the number of instructions to process (0 for no limit)"),
55 cl::value_desc("count"), cl::init(0));
57 static cl::list
<std::string
>
58 MAttrs("mattr", cl::CommaSeparated
,
59 cl::desc("Target specific attributes (-mattr=help for details)"),
60 cl::value_desc("a1,+a2,-a3,..."));
61 // The feature string derived from -mattr's values.
62 std::string FeaturesStr
;
64 static cl::list
<std::string
>
65 FuzzerArgs("fuzzer-args", cl::Positional
,
66 cl::desc("Options to pass to the fuzzer"),
67 cl::PositionalEatsArgs
);
68 static std::vector
<char *> ModifiedArgv
;
75 static cl::opt
<OutputFileType
>
76 FileType("filetype", cl::init(OFT_AssemblyFile
),
77 cl::desc("Choose an output file type:"),
79 clEnumValN(OFT_AssemblyFile
, "asm",
80 "Emit an assembly ('.s') file"),
81 clEnumValN(OFT_Null
, "null",
82 "Don't emit anything (for timing purposes)"),
83 clEnumValN(OFT_ObjectFile
, "obj",
84 "Emit a native object ('.o') file")));
87 class LLVMFuzzerInputBuffer
: public MemoryBuffer
90 LLVMFuzzerInputBuffer(const uint8_t *data_
, size_t size_
)
91 : Data(reinterpret_cast<const char *>(data_
)),
93 init(Data
, Data
+Size
, false);
97 virtual BufferKind
getBufferKind() const {
98 return MemoryBuffer_Malloc
; // it's not disk-backed so I think that's
99 // the intent ... though AFAIK it
100 // probably came from an mmap or sbrk
108 static int AssembleInput(const char *ProgName
, const Target
*TheTarget
,
109 SourceMgr
&SrcMgr
, MCContext
&Ctx
, MCStreamer
&Str
,
110 MCAsmInfo
&MAI
, MCSubtargetInfo
&STI
,
111 MCInstrInfo
&MCII
, MCTargetOptions
&MCOptions
) {
112 static const bool NoInitialTextSection
= false;
114 std::unique_ptr
<MCAsmParser
> Parser(
115 createMCAsmParser(SrcMgr
, Ctx
, Str
, MAI
));
117 std::unique_ptr
<MCTargetAsmParser
> TAP(
118 TheTarget
->createMCAsmParser(STI
, *Parser
, MCII
, MCOptions
));
122 << ": error: this target '" << TripleName
123 << "', does not support assembly parsing.\n";
127 Parser
->setTargetParser(*TAP
);
129 return Parser
->Run(NoInitialTextSection
);
133 int AssembleOneInput(const uint8_t *Data
, size_t Size
) {
134 Triple
TheTriple(Triple::normalize(TripleName
));
138 std::unique_ptr
<MemoryBuffer
> BufferPtr(new LLVMFuzzerInputBuffer(Data
, Size
));
140 // Tell SrcMgr about this buffer, which is what the parser will pick up.
141 SrcMgr
.AddNewSourceBuffer(std::move(BufferPtr
), SMLoc());
143 static const std::vector
<std::string
> NoIncludeDirs
;
144 SrcMgr
.setIncludeDirs(NoIncludeDirs
);
146 static std::string ArchName
;
148 const Target
*TheTarget
= TargetRegistry::lookupTarget(ArchName
, TheTriple
,
151 errs() << "error: this target '" << TheTriple
.normalize()
152 << "/" << ArchName
<< "', was not found: '" << Error
<< "'\n";
157 std::unique_ptr
<MCRegisterInfo
> MRI(TheTarget
->createMCRegInfo(TripleName
));
159 errs() << "Unable to create target register info!";
163 MCTargetOptions MCOptions
= mc::InitMCTargetOptionsFromFlags();
164 std::unique_ptr
<MCAsmInfo
> MAI(
165 TheTarget
->createMCAsmInfo(*MRI
, TripleName
, MCOptions
));
167 errs() << "Unable to create target asm info!";
171 std::unique_ptr
<MCSubtargetInfo
> STI(
172 TheTarget
->createMCSubtargetInfo(TripleName
, MCPU
, FeaturesStr
));
174 MCContext
Ctx(TheTriple
, MAI
.get(), MRI
.get(), STI
.get(), &SrcMgr
);
175 std::unique_ptr
<MCObjectFileInfo
> MOFI(
176 TheTarget
->createMCObjectFileInfo(Ctx
, /*PIC=*/false));
177 Ctx
.setObjectFileInfo(MOFI
.get());
179 const unsigned OutputAsmVariant
= 0;
180 std::unique_ptr
<MCInstrInfo
> MCII(TheTarget
->createMCInstrInfo());
181 MCInstPrinter
*IP
= TheTarget
->createMCInstPrinter(Triple(TripleName
), OutputAsmVariant
,
185 << "error: unable to create instruction printer for target triple '"
186 << TheTriple
.normalize() << "' with assembly variant "
187 << OutputAsmVariant
<< ".\n";
192 const char *ProgName
= "llvm-mc-fuzzer";
193 std::unique_ptr
<MCCodeEmitter
> CE
= nullptr;
194 std::unique_ptr
<MCAsmBackend
> MAB
= nullptr;
196 std::string OutputString
;
197 raw_string_ostream
Out(OutputString
);
198 auto FOut
= std::make_unique
<formatted_raw_ostream
>(Out
);
200 std::unique_ptr
<MCStreamer
> Str
;
202 if (FileType
== OFT_AssemblyFile
) {
203 Str
.reset(TheTarget
->createAsmStreamer(Ctx
, std::move(FOut
), IP
,
204 std::move(CE
), std::move(MAB
)));
206 assert(FileType
== OFT_ObjectFile
&& "Invalid file type!");
209 const std::string OutputFilename
= "-";
211 std::make_unique
<ToolOutputFile
>(OutputFilename
, EC
, sys::fs::OF_None
);
213 errs() << EC
.message() << '\n';
217 // Don't waste memory on names of temp labels.
218 Ctx
.setUseNamesOnTempLabels(false);
220 std::unique_ptr
<buffer_ostream
> BOS
;
221 raw_pwrite_stream
*OS
= &Out
->os();
222 if (!Out
->os().supportsSeeking()) {
223 BOS
= std::make_unique
<buffer_ostream
>(Out
->os());
227 MCCodeEmitter
*CE
= TheTarget
->createMCCodeEmitter(*MCII
, Ctx
);
228 MCAsmBackend
*MAB
= TheTarget
->createMCAsmBackend(*STI
, *MRI
, MCOptions
);
229 Str
.reset(TheTarget
->createMCObjectStreamer(
230 TheTriple
, Ctx
, std::unique_ptr
<MCAsmBackend
>(MAB
),
231 MAB
->createObjectWriter(*OS
), std::unique_ptr
<MCCodeEmitter
>(CE
),
234 const int Res
= AssembleInput(ProgName
, TheTarget
, SrcMgr
, Ctx
, *Str
, *MAI
, *STI
,
242 extern "C" int LLVMFuzzerTestOneInput(const uint8_t *Data
, size_t Size
) {
243 return AssembleOneInput(Data
, Size
);
246 extern "C" LLVM_ATTRIBUTE_USED
int LLVMFuzzerInitialize(int *argc
,
248 // The command line is unusual compared to other fuzzers due to the need to
249 // specify the target. Options like -triple, -mcpu, and -mattr work like
250 // their counterparts in llvm-mc, while -fuzzer-args collects options for the
255 // Fuzz the big-endian MIPS32R6 disassembler using 100,000 inputs of up to
256 // 4-bytes each and use the contents of ./corpus as the test corpus:
257 // llvm-mc-fuzzer -triple mips-linux-gnu -mcpu=mips32r6 -disassemble \
258 // -fuzzer-args -max_len=4 -runs=100000 ./corpus
260 // Infinitely fuzz the little-endian MIPS64R2 disassembler with the MSA
261 // feature enabled using up to 64-byte inputs:
262 // llvm-mc-fuzzer -triple mipsel-linux-gnu -mcpu=mips64r2 -mattr=msa \
263 // -disassemble -fuzzer-args ./corpus
265 // If your aim is to find instructions that are not tested, then it is
266 // advisable to constrain the maximum input size to a single instruction
267 // using -max_len as in the first example. This results in a test corpus of
268 // individual instructions that test unique paths. Without this constraint,
269 // there will be considerable redundancy in the corpus.
271 char **OriginalArgv
= *argv
;
273 LLVMInitializeAllTargetInfos();
274 LLVMInitializeAllTargetMCs();
275 LLVMInitializeAllAsmParsers();
277 cl::ParseCommandLineOptions(*argc
, OriginalArgv
);
279 // Rebuild the argv without the arguments llvm-mc-fuzzer consumed so that
280 // the driver can parse its arguments.
282 // FuzzerArgs cannot provide the non-const pointer that OriginalArgv needs.
283 // Re-use the strings from OriginalArgv instead of copying FuzzerArg to a
284 // non-const buffer to avoid the need to clean up when the fuzzer terminates.
285 ModifiedArgv
.push_back(OriginalArgv
[0]);
286 for (const auto &FuzzerArg
: FuzzerArgs
) {
287 for (int i
= 1; i
< *argc
; ++i
) {
288 if (FuzzerArg
== OriginalArgv
[i
])
289 ModifiedArgv
.push_back(OriginalArgv
[i
]);
292 *argc
= ModifiedArgv
.size();
293 *argv
= ModifiedArgv
.data();
295 // Package up features to be passed to target/subtarget
296 // We have to pass it via a global since the callback doesn't
297 // permit any user data.
299 SubtargetFeatures Features
;
300 for (unsigned i
= 0; i
!= MAttrs
.size(); ++i
)
301 Features
.AddFeature(MAttrs
[i
]);
302 FeaturesStr
= Features
.getString();
305 if (TripleName
.empty())
306 TripleName
= sys::getDefaultTargetTriple();