1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/assembly_program.h"
14 #include "base/logging.h"
16 #include "courgette/courgette.h"
17 #include "courgette/encoded_program.h"
21 // Opcodes of simple assembly language
23 ORIGIN
, // ORIGIN <rva> - set current address for assembly.
24 MAKERELOCS
, // Generates a base relocation table.
25 DEFBYTE
, // DEFBYTE <value> - emit a byte literal.
26 REL32
, // REL32 <label> - emit a rel32 encoded reference to 'label'.
27 ABS32
, // REL32 <label> - emit am abs32 encoded reference to 'label'.
31 // Base class for instructions. Because we have so many instructions we want to
32 // keep them as small as possible. For this reason we avoid virtual functions.
36 OP
op() const { return static_cast<OP
>(op_
); }
39 explicit Instruction(OP op
) : op_(op
), info_(0) {}
40 Instruction(OP op
, unsigned int info
) : op_(op
), info_(info
) {}
42 uint32 op_
: 4; // A few bits to store the OP code.
43 uint32 info_
: 28; // Remaining bits in first word available to subclass.
46 DISALLOW_COPY_AND_ASSIGN(Instruction
);
51 // Sets the current address for the emitting instructions.
52 class OriginInstruction
: public Instruction
{
54 explicit OriginInstruction(RVA rva
) : Instruction(ORIGIN
, 0), rva_(rva
) {}
55 RVA
origin_rva() const { return rva_
; }
60 // Emits an entire base relocation table.
61 class MakeRelocsInstruction
: public Instruction
{
63 MakeRelocsInstruction() : Instruction(MAKERELOCS
) {}
66 // Emits a single byte.
67 class ByteInstruction
: public Instruction
{
69 explicit ByteInstruction(uint8 value
) : Instruction(DEFBYTE
, value
) {}
70 uint8
byte_value() const { return info_
; }
73 // A ABS32 to REL32 instruction emits a reference to a label's address.
74 class InstructionWithLabel
: public Instruction
{
76 InstructionWithLabel(OP op
, Label
* label
)
77 : Instruction(op
, 0), label_(label
) {
78 if (label
== NULL
) NOTREACHED();
80 Label
* label() const { return label_
; }
87 AssemblyProgram::AssemblyProgram()
88 : byte_instruction_cache_(NULL
),
92 static void DeleteContainedLabels(const RVAToLabel
& labels
) {
93 for (RVAToLabel::const_iterator p
= labels
.begin(); p
!= labels
.end(); ++p
)
97 AssemblyProgram::~AssemblyProgram() {
98 for (size_t i
= 0; i
< instructions_
.size(); ++i
) {
99 Instruction
* instruction
= instructions_
[i
];
100 if (instruction
->op() != DEFBYTE
) // Will be in byte_instruction_cache_.
103 if (byte_instruction_cache_
) {
104 for (size_t i
= 0; i
< 256; ++i
)
105 delete byte_instruction_cache_
[i
];
106 delete[] byte_instruction_cache_
;
108 DeleteContainedLabels(rel32_labels_
);
109 DeleteContainedLabels(abs32_labels_
);
112 void AssemblyProgram::EmitMakeRelocsInstruction() {
113 Emit(new MakeRelocsInstruction());
116 void AssemblyProgram::EmitOriginInstruction(RVA rva
) {
117 Emit(new OriginInstruction(rva
));
120 void AssemblyProgram::EmitByteInstruction(uint8 byte
) {
121 Emit(GetByteInstruction(byte
));
124 void AssemblyProgram::EmitRel32(Label
* label
) {
125 Emit(new InstructionWithLabel(REL32
, label
));
128 void AssemblyProgram::EmitAbs32(Label
* label
) {
129 Emit(new InstructionWithLabel(ABS32
, label
));
132 Label
* AssemblyProgram::FindOrMakeAbs32Label(RVA rva
) {
133 return FindLabel(rva
, &abs32_labels_
);
136 Label
* AssemblyProgram::FindOrMakeRel32Label(RVA rva
) {
137 return FindLabel(rva
, &rel32_labels_
);
140 void AssemblyProgram::DefaultAssignIndexes() {
141 DefaultAssignIndexes(&abs32_labels_
);
142 DefaultAssignIndexes(&rel32_labels_
);
145 void AssemblyProgram::UnassignIndexes() {
146 UnassignIndexes(&abs32_labels_
);
147 UnassignIndexes(&rel32_labels_
);
150 void AssemblyProgram::AssignRemainingIndexes() {
151 AssignRemainingIndexes(&abs32_labels_
);
152 AssignRemainingIndexes(&rel32_labels_
);
155 Label
* AssemblyProgram::InstructionAbs32Label(
156 const Instruction
* instruction
) const {
157 if (instruction
->op() == ABS32
)
158 return static_cast<const InstructionWithLabel
*>(instruction
)->label();
162 Label
* AssemblyProgram::InstructionRel32Label(
163 const Instruction
* instruction
) const {
164 if (instruction
->op() == REL32
)
165 return static_cast<const InstructionWithLabel
*>(instruction
)->label();
169 Label
* AssemblyProgram::FindLabel(RVA rva
, RVAToLabel
* labels
) {
170 Label
*& slot
= (*labels
)[rva
];
172 slot
= new Label(rva
);
177 void AssemblyProgram::UnassignIndexes(RVAToLabel
* labels
) {
178 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
179 Label
* current
= p
->second
;
180 current
->index_
= Label::kNoIndex
;
184 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
187 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel
* labels
) {
189 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
190 Label
* current
= p
->second
;
191 if (current
->index_
!= Label::kNoIndex
)
193 current
->index_
= index
;
198 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
199 // yet assigned an index.
201 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel
* labels
) {
202 // An address table compresses best when each index is associated with an
203 // address that is slight larger than the previous index.
205 // First see which indexes have not been used. The 'available' vector could
206 // grow even bigger, but the number of addresses is a better starting size
208 std::vector
<bool> available(labels
->size(), true);
211 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
212 int index
= p
->second
->index_
;
213 if (index
!= Label::kNoIndex
) {
214 while (static_cast<size_t>(index
) >= available
.size())
215 available
.push_back(true);
216 available
.at(index
) = false;
221 LOG(INFO
) << used
<< " of " << labels
->size() << " labels pre-assigned";
223 // Are there any unused labels that happen to be adjacent following a used
226 int fill_forward_count
= 0;
228 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
229 Label
* current
= p
->second
;
230 if (current
->index_
== Label::kNoIndex
) {
232 if (prev
&& prev
->index_
!= Label::kNoIndex
)
233 index
= prev
->index_
+ 1;
234 if (index
< available
.size() && available
.at(index
)) {
235 current
->index_
= index
;
236 available
.at(index
) = false;
237 ++fill_forward_count
;
243 // Are there any unused labels that happen to be adjacent preceeding a used
246 int fill_backward_count
= 0;
248 for (RVAToLabel::reverse_iterator p
= labels
->rbegin();
251 Label
* current
= p
->second
;
252 if (current
->index_
== Label::kNoIndex
) {
255 prev_index
= prev
->index_
;
257 prev_index
= available
.size();
258 if (prev_index
!= 0 &&
259 prev_index
!= Label::kNoIndex
&&
260 available
.at(prev_index
- 1)) {
261 current
->index_
= prev_index
- 1;
262 available
.at(current
->index_
) = false;
263 ++fill_backward_count
;
269 // Fill in any remaining indexes
270 int fill_infill_count
= 0;
272 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
273 Label
* current
= p
->second
;
274 if (current
->index_
== Label::kNoIndex
) {
275 while (!available
.at(index
)) {
278 current
->index_
= index
;
279 available
.at(index
) = false;
286 << " forward " << fill_forward_count
<< " "
287 << " backward " << fill_backward_count
<< " "
288 << " infill " << fill_infill_count
;
291 typedef void (EncodedProgram::*DefineLabelMethod
)(int index
, RVA value
);
293 static void DefineLabels(const RVAToLabel
& labels
,
294 EncodedProgram
* encoded_format
,
295 DefineLabelMethod define_label
) {
296 for (RVAToLabel::const_iterator p
= labels
.begin(); p
!= labels
.end(); ++p
) {
297 Label
* label
= p
->second
;
298 (encoded_format
->*define_label
)(label
->index_
, label
->rva_
);
302 EncodedProgram
* AssemblyProgram::Encode() const {
303 EncodedProgram
* encoded
= new EncodedProgram();
305 encoded
->set_image_base(image_base_
);
306 DefineLabels(abs32_labels_
, encoded
, &EncodedProgram::DefineAbs32Label
);
307 DefineLabels(rel32_labels_
, encoded
, &EncodedProgram::DefineRel32Label
);
308 encoded
->EndLabels();
310 for (size_t i
= 0; i
< instructions_
.size(); ++i
) {
311 Instruction
* instruction
= instructions_
[i
];
313 switch (instruction
->op()) {
315 OriginInstruction
* org
= static_cast<OriginInstruction
*>(instruction
);
316 encoded
->AddOrigin(org
->origin_rva());
320 uint8 b
= static_cast<ByteInstruction
*>(instruction
)->byte_value();
321 encoded
->AddCopy(1, &b
);
325 Label
* label
= static_cast<InstructionWithLabel
*>(instruction
)->label();
326 encoded
->AddRel32(label
->index_
);
330 Label
* label
= static_cast<InstructionWithLabel
*>(instruction
)->label();
331 encoded
->AddAbs32(label
->index_
);
335 encoded
->AddMakeRelocs();
339 NOTREACHED() << "Unknown Insn OP kind";
347 Instruction
* AssemblyProgram::GetByteInstruction(uint8 byte
) {
348 if (!byte_instruction_cache_
) {
349 byte_instruction_cache_
= new Instruction
*[256];
350 for (int i
= 0; i
< 256; ++i
) {
351 byte_instruction_cache_
[i
] = new ByteInstruction(static_cast<uint8
>(i
));
355 return byte_instruction_cache_
[byte
];
358 ////////////////////////////////////////////////////////////////////////////////
360 Status
Encode(AssemblyProgram
* program
, EncodedProgram
** output
) {
362 EncodedProgram
*encoded
= program
->Encode();
367 return C_GENERAL_ERROR
;
371 } // namespace courgette