1 //===- Disassembler.cpp - Disassembler for hex strings --------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This class implements the disassembler of strings of bytes written in
10 // hexadecimal, from standard input or from a file.
12 //===----------------------------------------------------------------------===//
14 #include "Disassembler.h"
15 #include "llvm/MC/MCAsmInfo.h"
16 #include "llvm/MC/MCContext.h"
17 #include "llvm/MC/MCDisassembler/MCDisassembler.h"
18 #include "llvm/MC/MCInst.h"
19 #include "llvm/MC/MCRegisterInfo.h"
20 #include "llvm/MC/MCStreamer.h"
21 #include "llvm/MC/MCSubtargetInfo.h"
22 #include "llvm/MC/TargetRegistry.h"
23 #include "llvm/Support/MemoryBuffer.h"
24 #include "llvm/Support/SourceMgr.h"
25 #include "llvm/Support/raw_ostream.h"
26 #include "llvm/TargetParser/Triple.h"
30 typedef std::pair
<std::vector
<unsigned char>, std::vector
<const char *>>
33 static bool PrintInsts(const MCDisassembler
&DisAsm
, const ByteArrayTy
&Bytes
,
34 SourceMgr
&SM
, raw_ostream
&Out
, MCStreamer
&Streamer
,
35 bool InAtomicBlock
, const MCSubtargetInfo
&STI
) {
36 ArrayRef
<uint8_t> Data(Bytes
.first
.data(), Bytes
.first
.size());
38 // Disassemble it to strings.
42 for (Index
= 0; Index
< Bytes
.first
.size(); Index
+= Size
) {
45 MCDisassembler::DecodeStatus S
;
46 S
= DisAsm
.getInstruction(Inst
, Size
, Data
.slice(Index
), Index
, nulls());
48 case MCDisassembler::Fail
:
49 SM
.PrintMessage(SMLoc::getFromPointer(Bytes
.second
[Index
]),
50 SourceMgr::DK_Warning
, "invalid instruction encoding");
51 // Don't try to resynchronise the stream in a block
56 Size
= 1; // skip illegible bytes
60 case MCDisassembler::SoftFail
:
61 SM
.PrintMessage(SMLoc::getFromPointer(Bytes
.second
[Index
]),
62 SourceMgr::DK_Warning
,
63 "potentially undefined instruction encoding");
66 case MCDisassembler::Success
:
67 Streamer
.emitInstruction(Inst
, STI
);
75 static bool SkipToToken(StringRef
&Str
) {
80 // Strip horizontal whitespace and commas.
81 if (size_t Pos
= Str
.find_first_not_of(" \t\r\n,")) {
82 Str
= Str
.substr(Pos
);
86 // If this is the start of a comment, remove the rest of the line.
88 Str
= Str
.substr(Str
.find_first_of('\n'));
95 static bool ByteArrayFromString(ByteArrayTy
&ByteArray
, StringRef
&Str
,
97 while (SkipToToken(Str
)) {
98 // Handled by higher level
99 if (Str
[0] == '[' || Str
[0] == ']')
102 // Get the current token.
103 size_t Next
= Str
.find_first_of(" \t\n\r,#[]");
104 StringRef Value
= Str
.substr(0, Next
);
106 // Convert to a byte and add to the byte vector.
108 if (Value
.getAsInteger(0, ByteVal
) || ByteVal
> 255) {
109 // If we have an error, print it and skip to the end of line.
110 SM
.PrintMessage(SMLoc::getFromPointer(Value
.data()), SourceMgr::DK_Error
,
111 "invalid input token");
112 Str
= Str
.substr(Str
.find('\n'));
113 ByteArray
.first
.clear();
114 ByteArray
.second
.clear();
118 ByteArray
.first
.push_back(ByteVal
);
119 ByteArray
.second
.push_back(Value
.data());
120 Str
= Str
.substr(Next
);
126 int Disassembler::disassemble(const Target
&T
, const std::string
&TripleName
,
127 MCSubtargetInfo
&STI
, MCStreamer
&Streamer
,
128 MemoryBuffer
&Buffer
, SourceMgr
&SM
,
130 std::unique_ptr
<const MCRegisterInfo
> MRI(T
.createMCRegInfo(TripleName
));
132 errs() << "error: no register info for target " << TripleName
<< "\n";
136 MCTargetOptions MCOptions
;
137 std::unique_ptr
<const MCAsmInfo
> MAI(
138 T
.createMCAsmInfo(*MRI
, TripleName
, MCOptions
));
140 errs() << "error: no assembly info for target " << TripleName
<< "\n";
144 // Set up the MCContext for creating symbols and MCExpr's.
145 MCContext
Ctx(Triple(TripleName
), MAI
.get(), MRI
.get(), &STI
);
147 std::unique_ptr
<const MCDisassembler
> DisAsm(
148 T
.createMCDisassembler(STI
, Ctx
));
150 errs() << "error: no disassembler for target " << TripleName
<< "\n";
154 // Set up initial section manually here
155 Streamer
.initSections(false, STI
);
157 bool ErrorOccurred
= false;
159 // Convert the input to a vector for disassembly.
160 ByteArrayTy ByteArray
;
161 StringRef Str
= Buffer
.getBuffer();
162 bool InAtomicBlock
= false;
164 while (SkipToToken(Str
)) {
165 ByteArray
.first
.clear();
166 ByteArray
.second
.clear();
170 SM
.PrintMessage(SMLoc::getFromPointer(Str
.data()), SourceMgr::DK_Error
,
171 "nested atomic blocks make no sense");
172 ErrorOccurred
= true;
174 InAtomicBlock
= true;
175 Str
= Str
.drop_front();
177 } else if (Str
[0] == ']') {
178 if (!InAtomicBlock
) {
179 SM
.PrintMessage(SMLoc::getFromPointer(Str
.data()), SourceMgr::DK_Error
,
180 "attempt to close atomic block without opening");
181 ErrorOccurred
= true;
183 InAtomicBlock
= false;
184 Str
= Str
.drop_front();
188 // It's a real token, get the bytes and emit them
189 ErrorOccurred
|= ByteArrayFromString(ByteArray
, Str
, SM
);
191 if (!ByteArray
.first
.empty())
193 PrintInsts(*DisAsm
, ByteArray
, SM
, Out
, Streamer
, InAtomicBlock
, STI
);
197 SM
.PrintMessage(SMLoc::getFromPointer(Str
.data()), SourceMgr::DK_Error
,
198 "unclosed atomic block");
199 ErrorOccurred
= true;
202 return ErrorOccurred
;