1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/assembly_program.h"
14 #include "base/logging.h"
15 #include "base/memory/scoped_ptr.h"
17 #include "courgette/courgette.h"
18 #include "courgette/encoded_program.h"
22 // Opcodes of simple assembly language
24 ORIGIN
, // ORIGIN <rva> - set current address for assembly.
25 MAKEPERELOCS
, // Generates a base relocation table.
26 MAKEELFRELOCS
, // Generates a base relocation table.
27 DEFBYTE
, // DEFBYTE <value> - emit a byte literal.
28 REL32
, // REL32 <label> - emit a rel32 encoded reference to 'label'.
29 ABS32
, // REL32 <label> - emit am abs32 encoded reference to 'label'.
30 REL32ARM
, // REL32ARM <c_op> <label> - arm-specific rel32 reference
31 MAKEELFARMRELOCS
, // Generates a base relocation table.
32 DEFBYTES
, // Emits any number of byte literals
36 // Base class for instructions. Because we have so many instructions we want to
37 // keep them as small as possible. For this reason we avoid virtual functions.
41 OP
op() const { return static_cast<OP
>(op_
); }
44 explicit Instruction(OP op
) : op_(op
), info_(0) {}
45 Instruction(OP op
, unsigned int info
) : op_(op
), info_(info
) {}
47 uint32 op_
: 4; // A few bits to store the OP code.
48 uint32 info_
: 28; // Remaining bits in first word available to subclass.
51 DISALLOW_COPY_AND_ASSIGN(Instruction
);
56 // Sets the current address for the emitting instructions.
57 class OriginInstruction
: public Instruction
{
59 explicit OriginInstruction(RVA rva
) : Instruction(ORIGIN
, 0), rva_(rva
) {}
60 RVA
origin_rva() const { return rva_
; }
65 // Emits an entire PE base relocation table.
66 class PeRelocsInstruction
: public Instruction
{
68 PeRelocsInstruction() : Instruction(MAKEPERELOCS
) {}
71 // Emits an ELF relocation table.
72 class ElfRelocsInstruction
: public Instruction
{
74 ElfRelocsInstruction() : Instruction(MAKEELFRELOCS
) {}
77 // Emits an ELF ARM relocation table.
78 class ElfARMRelocsInstruction
: public Instruction
{
80 ElfARMRelocsInstruction() : Instruction(MAKEELFARMRELOCS
) {}
83 // Emits a single byte.
84 class ByteInstruction
: public Instruction
{
86 explicit ByteInstruction(uint8 value
) : Instruction(DEFBYTE
, value
) {}
87 uint8
byte_value() const { return info_
; }
90 // Emits a single byte.
91 class BytesInstruction
: public Instruction
{
93 BytesInstruction(const uint8
* values
, size_t len
)
94 : Instruction(DEFBYTES
, 0),
97 const uint8
* byte_values() const { return values_
; }
98 size_t len() const { return len_
; }
101 const uint8
* values_
;
105 // A ABS32 to REL32 instruction emits a reference to a label's address.
106 class InstructionWithLabel
: public Instruction
{
108 InstructionWithLabel(OP op
, Label
* label
)
109 : Instruction(op
, 0), label_(label
) {
110 if (label
== NULL
) NOTREACHED();
112 Label
* label() const { return label_
; }
117 // An ARM REL32 instruction emits a reference to a label's address and
118 // a specially-compressed ARM op.
119 class InstructionWithLabelARM
: public InstructionWithLabel
{
121 InstructionWithLabelARM(OP op
, uint16 compressed_op
, Label
* label
,
122 const uint8
* arm_op
, uint16 op_size
)
123 : InstructionWithLabel(op
, label
), compressed_op_(compressed_op
),
124 arm_op_(arm_op
), op_size_(op_size
) {
125 if (label
== NULL
) NOTREACHED();
127 uint16
compressed_op() const { return compressed_op_
; }
128 const uint8
* arm_op() const { return arm_op_
; }
129 uint16
op_size() const { return op_size_
; }
131 uint16 compressed_op_
;
132 const uint8
* arm_op_
;
138 AssemblyProgram::AssemblyProgram(ExecutableType kind
)
139 : kind_(kind
), image_base_(0) {
142 static void DeleteContainedLabels(const RVAToLabel
& labels
) {
143 for (RVAToLabel::const_iterator p
= labels
.begin(); p
!= labels
.end(); ++p
)
147 AssemblyProgram::~AssemblyProgram() {
148 for (size_t i
= 0; i
< instructions_
.size(); ++i
) {
149 Instruction
* instruction
= instructions_
[i
];
150 if (instruction
->op() != DEFBYTE
) // Will be in byte_instruction_cache_.
153 if (byte_instruction_cache_
.get()) {
154 for (size_t i
= 0; i
< 256; ++i
)
155 delete byte_instruction_cache_
[i
];
157 DeleteContainedLabels(rel32_labels_
);
158 DeleteContainedLabels(abs32_labels_
);
161 CheckBool
AssemblyProgram::EmitPeRelocsInstruction() {
162 return Emit(new(std::nothrow
) PeRelocsInstruction());
165 CheckBool
AssemblyProgram::EmitElfRelocationInstruction() {
166 return Emit(new(std::nothrow
) ElfRelocsInstruction());
169 CheckBool
AssemblyProgram::EmitElfARMRelocationInstruction() {
170 return Emit(new(std::nothrow
) ElfARMRelocsInstruction());
173 CheckBool
AssemblyProgram::EmitOriginInstruction(RVA rva
) {
174 return Emit(new(std::nothrow
) OriginInstruction(rva
));
177 CheckBool
AssemblyProgram::EmitByteInstruction(uint8 byte
) {
178 return Emit(GetByteInstruction(byte
));
181 CheckBool
AssemblyProgram::EmitBytesInstruction(const uint8
* values
,
183 return Emit(new(std::nothrow
) BytesInstruction(values
, len
));
186 CheckBool
AssemblyProgram::EmitRel32(Label
* label
) {
187 return Emit(new(std::nothrow
) InstructionWithLabel(REL32
, label
));
190 CheckBool
AssemblyProgram::EmitRel32ARM(uint16 op
, Label
* label
,
191 const uint8
* arm_op
, uint16 op_size
) {
192 return Emit(new(std::nothrow
) InstructionWithLabelARM(REL32ARM
, op
, label
,
196 CheckBool
AssemblyProgram::EmitAbs32(Label
* label
) {
197 return Emit(new(std::nothrow
) InstructionWithLabel(ABS32
, label
));
200 Label
* AssemblyProgram::FindOrMakeAbs32Label(RVA rva
) {
201 return FindLabel(rva
, &abs32_labels_
);
204 Label
* AssemblyProgram::FindOrMakeRel32Label(RVA rva
) {
205 return FindLabel(rva
, &rel32_labels_
);
208 void AssemblyProgram::DefaultAssignIndexes() {
209 DefaultAssignIndexes(&abs32_labels_
);
210 DefaultAssignIndexes(&rel32_labels_
);
213 void AssemblyProgram::UnassignIndexes() {
214 UnassignIndexes(&abs32_labels_
);
215 UnassignIndexes(&rel32_labels_
);
218 void AssemblyProgram::AssignRemainingIndexes() {
219 AssignRemainingIndexes(&abs32_labels_
);
220 AssignRemainingIndexes(&rel32_labels_
);
223 Label
* AssemblyProgram::InstructionAbs32Label(
224 const Instruction
* instruction
) const {
225 if (instruction
->op() == ABS32
)
226 return static_cast<const InstructionWithLabel
*>(instruction
)->label();
230 Label
* AssemblyProgram::InstructionRel32Label(
231 const Instruction
* instruction
) const {
232 if (instruction
->op() == REL32
|| instruction
->op() == REL32ARM
) {
234 static_cast<const InstructionWithLabel
*>(instruction
)->label();
240 CheckBool
AssemblyProgram::Emit(Instruction
* instruction
) {
243 bool ok
= instructions_
.push_back(instruction
);
249 Label
* AssemblyProgram::FindLabel(RVA rva
, RVAToLabel
* labels
) {
250 Label
*& slot
= (*labels
)[rva
];
252 slot
= new(std::nothrow
) Label(rva
);
258 void AssemblyProgram::UnassignIndexes(RVAToLabel
* labels
) {
259 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
260 Label
* current
= p
->second
;
261 current
->index_
= Label::kNoIndex
;
265 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
268 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel
* labels
) {
270 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
271 Label
* current
= p
->second
;
272 if (current
->index_
!= Label::kNoIndex
)
274 current
->index_
= index
;
279 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
280 // yet assigned an index.
282 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel
* labels
) {
283 // An address table compresses best when each index is associated with an
284 // address that is slight larger than the previous index.
286 // First see which indexes have not been used. The 'available' vector could
287 // grow even bigger, but the number of addresses is a better starting size
289 std::vector
<bool> available(labels
->size(), true);
292 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
293 int index
= p
->second
->index_
;
294 if (index
!= Label::kNoIndex
) {
295 while (static_cast<size_t>(index
) >= available
.size())
296 available
.push_back(true);
297 available
.at(index
) = false;
302 VLOG(1) << used
<< " of " << labels
->size() << " labels pre-assigned";
304 // Are there any unused labels that happen to be adjacent following a used
307 int fill_forward_count
= 0;
309 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
310 Label
* current
= p
->second
;
311 if (current
->index_
== Label::kNoIndex
) {
313 if (prev
&& prev
->index_
!= Label::kNoIndex
)
314 index
= prev
->index_
+ 1;
315 if (index
< static_cast<int>(available
.size()) && available
.at(index
)) {
316 current
->index_
= index
;
317 available
.at(index
) = false;
318 ++fill_forward_count
;
324 // Are there any unused labels that happen to be adjacent preceeding a used
327 int fill_backward_count
= 0;
329 for (RVAToLabel::reverse_iterator p
= labels
->rbegin();
332 Label
* current
= p
->second
;
333 if (current
->index_
== Label::kNoIndex
) {
336 prev_index
= prev
->index_
;
338 prev_index
= static_cast<uint32
>(available
.size());
339 if (prev_index
!= 0 &&
340 prev_index
!= Label::kNoIndex
&&
341 available
.at(prev_index
- 1)) {
342 current
->index_
= prev_index
- 1;
343 available
.at(current
->index_
) = false;
344 ++fill_backward_count
;
350 // Fill in any remaining indexes
351 int fill_infill_count
= 0;
353 for (RVAToLabel::iterator p
= labels
->begin(); p
!= labels
->end(); ++p
) {
354 Label
* current
= p
->second
;
355 if (current
->index_
== Label::kNoIndex
) {
356 while (!available
.at(index
)) {
359 current
->index_
= index
;
360 available
.at(index
) = false;
366 VLOG(1) << " fill forward " << fill_forward_count
367 << " backward " << fill_backward_count
368 << " infill " << fill_infill_count
;
371 typedef CheckBool (EncodedProgram::*DefineLabelMethod
)(int index
, RVA value
);
376 static CheckBool
DefineLabels(const RVAToLabel
& labels
,
377 EncodedProgram
* encoded_format
,
378 DefineLabelMethod define_label
) {
380 for (RVAToLabel::const_iterator p
= labels
.begin();
381 ok
&& p
!= labels
.end();
383 Label
* label
= p
->second
;
384 ok
= (encoded_format
->*define_label
)(label
->index_
, label
->rva_
);
389 EncodedProgram
* AssemblyProgram::Encode() const {
390 scoped_ptr
<EncodedProgram
> encoded(new(std::nothrow
) EncodedProgram());
394 encoded
->set_image_base(image_base_
);
396 if (!DefineLabels(abs32_labels_
, encoded
.get(),
397 &EncodedProgram::DefineAbs32Label
) ||
398 !DefineLabels(rel32_labels_
, encoded
.get(),
399 &EncodedProgram::DefineRel32Label
)) {
403 encoded
->EndLabels();
405 for (size_t i
= 0; i
< instructions_
.size(); ++i
) {
406 Instruction
* instruction
= instructions_
[i
];
408 switch (instruction
->op()) {
410 OriginInstruction
* org
= static_cast<OriginInstruction
*>(instruction
);
411 if (!encoded
->AddOrigin(org
->origin_rva()))
416 uint8 b
= static_cast<ByteInstruction
*>(instruction
)->byte_value();
417 if (!encoded
->AddCopy(1, &b
))
422 const uint8
* byte_values
=
423 static_cast<BytesInstruction
*>(instruction
)->byte_values();
424 size_t len
= static_cast<BytesInstruction
*>(instruction
)->len();
426 if (!encoded
->AddCopy(len
, byte_values
))
431 Label
* label
= static_cast<InstructionWithLabel
*>(instruction
)->label();
432 if (!encoded
->AddRel32(label
->index_
))
438 static_cast<InstructionWithLabelARM
*>(instruction
)->label();
439 uint16 compressed_op
=
440 static_cast<InstructionWithLabelARM
*>(instruction
)->
442 if (!encoded
->AddRel32ARM(compressed_op
, label
->index_
))
447 Label
* label
= static_cast<InstructionWithLabel
*>(instruction
)->label();
448 if (!encoded
->AddAbs32(label
->index_
))
453 if (!encoded
->AddPeMakeRelocs(kind_
))
457 case MAKEELFRELOCS
: {
458 if (!encoded
->AddElfMakeRelocs())
462 case MAKEELFARMRELOCS
: {
463 if (!encoded
->AddElfARMMakeRelocs())
468 NOTREACHED() << "Unknown Insn OP kind";
473 return encoded
.release();
476 Instruction
* AssemblyProgram::GetByteInstruction(uint8 byte
) {
477 if (!byte_instruction_cache_
.get()) {
478 byte_instruction_cache_
.reset(new(std::nothrow
) Instruction
*[256]);
479 if (!byte_instruction_cache_
.get())
482 for (int i
= 0; i
< 256; ++i
) {
483 byte_instruction_cache_
[i
] =
484 new(std::nothrow
) ByteInstruction(static_cast<uint8
>(i
));
485 if (!byte_instruction_cache_
[i
]) {
486 for (int j
= 0; j
< i
; ++j
)
487 delete byte_instruction_cache_
[j
];
488 byte_instruction_cache_
.reset();
494 return byte_instruction_cache_
[byte
];
497 // Chosen empirically to give the best reduction in payload size for
498 // an update from daisy_3701.98.0 to daisy_4206.0.0.
499 const int AssemblyProgram::kLabelLowerLimit
= 5;
501 CheckBool
AssemblyProgram::TrimLabels() {
502 // For now only trim for ARM binaries
503 if (kind() != EXE_ELF_32_ARM
)
506 int lower_limit
= kLabelLowerLimit
;
508 VLOG(1) << "TrimLabels: threshold " << lower_limit
;
510 // Remove underused labels from the list of labels
511 RVAToLabel::iterator it
= rel32_labels_
.begin();
512 while (it
!= rel32_labels_
.end()) {
513 if (it
->second
->count_
<= lower_limit
) {
514 rel32_labels_
.erase(it
++);
520 // Walk through the list of instructions, replacing trimmed labels
521 // with the original machine instruction
522 for (size_t i
= 0; i
< instructions_
.size(); ++i
) {
523 Instruction
* instruction
= instructions_
[i
];
524 switch (instruction
->op()) {
527 static_cast<InstructionWithLabelARM
*>(instruction
)->label();
528 if (label
->count_
<= lower_limit
) {
529 const uint8
* arm_op
=
530 static_cast<InstructionWithLabelARM
*>(instruction
)->arm_op();
532 static_cast<InstructionWithLabelARM
*>(instruction
)->op_size();
536 BytesInstruction
* new_instruction
=
537 new(std::nothrow
) BytesInstruction(arm_op
, op_size
);
538 instructions_
[i
] = new_instruction
;
550 ////////////////////////////////////////////////////////////////////////////////
552 Status
TrimLabels(AssemblyProgram
* program
) {
553 if (program
->TrimLabels())
556 return C_TRIM_FAILED
;
559 Status
Encode(AssemblyProgram
* program
, EncodedProgram
** output
) {
561 EncodedProgram
*encoded
= program
->Encode();
566 return C_GENERAL_ERROR
;
570 } // namespace courgette