Include all dupe types (event when value is zero) in scan stats.
[chromium-blink-merge.git] / courgette / assembly_program.cc
blob965f457878b38bb98e289547e8189d4ab2ee42c0
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/assembly_program.h"
7 #include <memory.h>
8 #include <algorithm>
9 #include <map>
10 #include <set>
11 #include <sstream>
12 #include <vector>
14 #include "base/logging.h"
15 #include "base/memory/scoped_ptr.h"
17 #include "courgette/courgette.h"
18 #include "courgette/encoded_program.h"
20 namespace courgette {
22 // Opcodes of simple assembly language
23 enum OP {
24 ORIGIN, // ORIGIN <rva> - set current address for assembly.
25 MAKEPERELOCS, // Generates a base relocation table.
26 MAKEELFRELOCS, // Generates a base relocation table.
27 DEFBYTE, // DEFBYTE <value> - emit a byte literal.
28 REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'.
29 ABS32, // ABS32 <label> - emit an abs32 encoded reference to 'label'.
30 REL32ARM, // REL32ARM <c_op> <label> - arm-specific rel32 reference
31 MAKEELFARMRELOCS, // Generates a base relocation table.
32 DEFBYTES, // Emits any number of byte literals
33 ABS64, // ABS64 <label> - emit an abs64 encoded reference to 'label'.
34 LAST_OP
37 // Base class for instructions. Because we have so many instructions we want to
38 // keep them as small as possible. For this reason we avoid virtual functions.
40 class Instruction {
41 public:
42 OP op() const { return static_cast<OP>(op_); }
44 protected:
45 explicit Instruction(OP op) : op_(op), info_(0) {}
46 Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
48 uint32 op_ : 4; // A few bits to store the OP code.
49 uint32 info_ : 28; // Remaining bits in first word available to subclass.
51 private:
52 DISALLOW_COPY_AND_ASSIGN(Instruction);
55 namespace {
57 // Sets the current address for the emitting instructions.
58 class OriginInstruction : public Instruction {
59 public:
60 explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
61 RVA origin_rva() const { return rva_; }
62 private:
63 RVA rva_;
66 // Emits an entire PE base relocation table.
67 class PeRelocsInstruction : public Instruction {
68 public:
69 PeRelocsInstruction() : Instruction(MAKEPERELOCS) {}
72 // Emits an ELF relocation table.
73 class ElfRelocsInstruction : public Instruction {
74 public:
75 ElfRelocsInstruction() : Instruction(MAKEELFRELOCS) {}
78 // Emits an ELF ARM relocation table.
79 class ElfARMRelocsInstruction : public Instruction {
80 public:
81 ElfARMRelocsInstruction() : Instruction(MAKEELFARMRELOCS) {}
84 // Emits a single byte.
85 class ByteInstruction : public Instruction {
86 public:
87 explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {}
88 uint8 byte_value() const { return info_; }
91 // Emits a single byte.
92 class BytesInstruction : public Instruction {
93 public:
94 BytesInstruction(const uint8* values, size_t len)
95 : Instruction(DEFBYTES, 0),
96 values_(values),
97 len_(len) {}
98 const uint8* byte_values() const { return values_; }
99 size_t len() const { return len_; }
101 private:
102 const uint8* values_;
103 size_t len_;
106 // A ABS32 to REL32 instruction emits a reference to a label's address.
107 class InstructionWithLabel : public Instruction {
108 public:
109 InstructionWithLabel(OP op, Label* label)
110 : Instruction(op, 0), label_(label) {
111 if (label == NULL) NOTREACHED();
113 Label* label() const { return label_; }
114 protected:
115 Label* label_;
118 // An ARM REL32 instruction emits a reference to a label's address and
119 // a specially-compressed ARM op.
120 class InstructionWithLabelARM : public InstructionWithLabel {
121 public:
122 InstructionWithLabelARM(OP op, uint16 compressed_op, Label* label,
123 const uint8* arm_op, uint16 op_size)
124 : InstructionWithLabel(op, label), compressed_op_(compressed_op),
125 arm_op_(arm_op), op_size_(op_size) {
126 if (label == NULL) NOTREACHED();
128 uint16 compressed_op() const { return compressed_op_; }
129 const uint8* arm_op() const { return arm_op_; }
130 uint16 op_size() const { return op_size_; }
131 private:
132 uint16 compressed_op_;
133 const uint8* arm_op_;
134 uint16 op_size_;
137 } // namespace
139 AssemblyProgram::AssemblyProgram(ExecutableType kind)
140 : kind_(kind), image_base_(0) {
143 static void DeleteContainedLabels(const RVAToLabel& labels) {
144 for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p)
145 delete p->second;
148 AssemblyProgram::~AssemblyProgram() {
149 for (size_t i = 0; i < instructions_.size(); ++i) {
150 Instruction* instruction = instructions_[i];
151 if (instruction->op() != DEFBYTE) // Will be in byte_instruction_cache_.
152 delete instruction;
154 if (byte_instruction_cache_.get()) {
155 for (size_t i = 0; i < 256; ++i)
156 delete byte_instruction_cache_[i];
158 DeleteContainedLabels(rel32_labels_);
159 DeleteContainedLabels(abs32_labels_);
162 CheckBool AssemblyProgram::EmitPeRelocsInstruction() {
163 return Emit(new(std::nothrow) PeRelocsInstruction());
166 CheckBool AssemblyProgram::EmitElfRelocationInstruction() {
167 return Emit(new(std::nothrow) ElfRelocsInstruction());
170 CheckBool AssemblyProgram::EmitElfARMRelocationInstruction() {
171 return Emit(new(std::nothrow) ElfARMRelocsInstruction());
174 CheckBool AssemblyProgram::EmitOriginInstruction(RVA rva) {
175 return Emit(new(std::nothrow) OriginInstruction(rva));
178 CheckBool AssemblyProgram::EmitByteInstruction(uint8 byte) {
179 return Emit(GetByteInstruction(byte));
182 CheckBool AssemblyProgram::EmitBytesInstruction(const uint8* values,
183 size_t len) {
184 return Emit(new(std::nothrow) BytesInstruction(values, len));
187 CheckBool AssemblyProgram::EmitRel32(Label* label) {
188 return Emit(new(std::nothrow) InstructionWithLabel(REL32, label));
191 CheckBool AssemblyProgram::EmitRel32ARM(uint16 op, Label* label,
192 const uint8* arm_op, uint16 op_size) {
193 return Emit(new(std::nothrow) InstructionWithLabelARM(REL32ARM, op, label,
194 arm_op, op_size));
197 CheckBool AssemblyProgram::EmitAbs32(Label* label) {
198 return Emit(new(std::nothrow) InstructionWithLabel(ABS32, label));
201 CheckBool AssemblyProgram::EmitAbs64(Label* label) {
202 return Emit(new (std::nothrow) InstructionWithLabel(ABS64, label));
205 Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) {
206 return FindLabel(rva, &abs32_labels_);
209 Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) {
210 return FindLabel(rva, &rel32_labels_);
213 void AssemblyProgram::DefaultAssignIndexes() {
214 DefaultAssignIndexes(&abs32_labels_);
215 DefaultAssignIndexes(&rel32_labels_);
218 void AssemblyProgram::UnassignIndexes() {
219 UnassignIndexes(&abs32_labels_);
220 UnassignIndexes(&rel32_labels_);
223 void AssemblyProgram::AssignRemainingIndexes() {
224 AssignRemainingIndexes(&abs32_labels_);
225 AssignRemainingIndexes(&rel32_labels_);
228 Label* AssemblyProgram::InstructionAbs32Label(
229 const Instruction* instruction) const {
230 if (instruction->op() == ABS32)
231 return static_cast<const InstructionWithLabel*>(instruction)->label();
232 return NULL;
235 Label* AssemblyProgram::InstructionAbs64Label(
236 const Instruction* instruction) const {
237 if (instruction->op() == ABS64)
238 return static_cast<const InstructionWithLabel*>(instruction)->label();
239 return NULL;
242 Label* AssemblyProgram::InstructionRel32Label(
243 const Instruction* instruction) const {
244 if (instruction->op() == REL32 || instruction->op() == REL32ARM) {
245 Label* label =
246 static_cast<const InstructionWithLabel*>(instruction)->label();
247 return label;
249 return NULL;
252 CheckBool AssemblyProgram::Emit(Instruction* instruction) {
253 if (!instruction)
254 return false;
255 bool ok = instructions_.push_back(instruction);
256 if (!ok)
257 delete instruction;
258 return ok;
261 Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
262 Label*& slot = (*labels)[rva];
263 if (slot == NULL) {
264 slot = new(std::nothrow) Label(rva);
265 if (slot == NULL)
266 return NULL;
268 slot->count_++;
269 return slot;
272 void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
273 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
274 Label* current = p->second;
275 current->index_ = Label::kNoIndex;
279 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
280 // address order.
282 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
283 int index = 0;
284 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
285 Label* current = p->second;
286 if (current->index_ != Label::kNoIndex)
287 NOTREACHED();
288 current->index_ = index;
289 ++index;
293 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
294 // yet assigned an index.
296 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
297 // An address table compresses best when each index is associated with an
298 // address that is slight larger than the previous index.
300 // First see which indexes have not been used. The 'available' vector could
301 // grow even bigger, but the number of addresses is a better starting size
302 // than empty.
303 std::vector<bool> available(labels->size(), true);
304 int used = 0;
306 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
307 int index = p->second->index_;
308 if (index != Label::kNoIndex) {
309 while (static_cast<size_t>(index) >= available.size())
310 available.push_back(true);
311 available.at(index) = false;
312 ++used;
316 VLOG(1) << used << " of " << labels->size() << " labels pre-assigned";
318 // Are there any unused labels that happen to be adjacent following a used
319 // label?
321 int fill_forward_count = 0;
322 Label* prev = 0;
323 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
324 Label* current = p->second;
325 if (current->index_ == Label::kNoIndex) {
326 int index = 0;
327 if (prev && prev->index_ != Label::kNoIndex)
328 index = prev->index_ + 1;
329 if (index < static_cast<int>(available.size()) && available.at(index)) {
330 current->index_ = index;
331 available.at(index) = false;
332 ++fill_forward_count;
335 prev = current;
338 // Are there any unused labels that happen to be adjacent preceeding a used
339 // label?
341 int fill_backward_count = 0;
342 prev = 0;
343 for (RVAToLabel::reverse_iterator p = labels->rbegin();
344 p != labels->rend();
345 ++p) {
346 Label* current = p->second;
347 if (current->index_ == Label::kNoIndex) {
348 int prev_index;
349 if (prev)
350 prev_index = prev->index_;
351 else
352 prev_index = static_cast<uint32>(available.size());
353 if (prev_index != 0 &&
354 prev_index != Label::kNoIndex &&
355 available.at(prev_index - 1)) {
356 current->index_ = prev_index - 1;
357 available.at(current->index_) = false;
358 ++fill_backward_count;
361 prev = current;
364 // Fill in any remaining indexes
365 int fill_infill_count = 0;
366 int index = 0;
367 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
368 Label* current = p->second;
369 if (current->index_ == Label::kNoIndex) {
370 while (!available.at(index)) {
371 ++index;
373 current->index_ = index;
374 available.at(index) = false;
375 ++index;
376 ++fill_infill_count;
380 VLOG(1) << " fill forward " << fill_forward_count
381 << " backward " << fill_backward_count
382 << " infill " << fill_infill_count;
385 typedef CheckBool (EncodedProgram::*DefineLabelMethod)(int index, RVA value);
387 #if defined(OS_WIN)
388 __declspec(noinline)
389 #endif
390 static CheckBool DefineLabels(const RVAToLabel& labels,
391 EncodedProgram* encoded_format,
392 DefineLabelMethod define_label) {
393 bool ok = true;
394 for (RVAToLabel::const_iterator p = labels.begin();
395 ok && p != labels.end();
396 ++p) {
397 Label* label = p->second;
398 ok = (encoded_format->*define_label)(label->index_, label->rva_);
400 return ok;
403 EncodedProgram* AssemblyProgram::Encode() const {
404 scoped_ptr<EncodedProgram> encoded(new(std::nothrow) EncodedProgram());
405 if (!encoded.get())
406 return NULL;
408 encoded->set_image_base(image_base_);
410 if (!DefineLabels(abs32_labels_, encoded.get(),
411 &EncodedProgram::DefineAbs32Label) ||
412 !DefineLabels(rel32_labels_, encoded.get(),
413 &EncodedProgram::DefineRel32Label)) {
414 return NULL;
417 encoded->EndLabels();
419 for (size_t i = 0; i < instructions_.size(); ++i) {
420 Instruction* instruction = instructions_[i];
422 switch (instruction->op()) {
423 case ORIGIN: {
424 OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
425 if (!encoded->AddOrigin(org->origin_rva()))
426 return NULL;
427 break;
429 case DEFBYTE: {
430 uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value();
431 if (!encoded->AddCopy(1, &b))
432 return NULL;
433 break;
435 case DEFBYTES: {
436 const uint8* byte_values =
437 static_cast<BytesInstruction*>(instruction)->byte_values();
438 size_t len = static_cast<BytesInstruction*>(instruction)->len();
440 if (!encoded->AddCopy(len, byte_values))
441 return NULL;
442 break;
444 case REL32: {
445 Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
446 if (!encoded->AddRel32(label->index_))
447 return NULL;
448 break;
450 case REL32ARM: {
451 Label* label =
452 static_cast<InstructionWithLabelARM*>(instruction)->label();
453 uint16 compressed_op =
454 static_cast<InstructionWithLabelARM*>(instruction)->
455 compressed_op();
456 if (!encoded->AddRel32ARM(compressed_op, label->index_))
457 return NULL;
458 break;
460 case ABS32: {
461 Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
462 if (!encoded->AddAbs32(label->index_))
463 return NULL;
464 break;
466 case ABS64: {
467 Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
468 if (!encoded->AddAbs64(label->index_))
469 return NULL;
470 break;
472 case MAKEPERELOCS: {
473 if (!encoded->AddPeMakeRelocs(kind_))
474 return NULL;
475 break;
477 case MAKEELFRELOCS: {
478 if (!encoded->AddElfMakeRelocs())
479 return NULL;
480 break;
482 case MAKEELFARMRELOCS: {
483 if (!encoded->AddElfARMMakeRelocs())
484 return NULL;
485 break;
487 default: {
488 NOTREACHED() << "Unknown Insn OP kind";
493 return encoded.release();
496 Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
497 if (!byte_instruction_cache_.get()) {
498 byte_instruction_cache_.reset(new(std::nothrow) Instruction*[256]);
499 if (!byte_instruction_cache_.get())
500 return NULL;
502 for (int i = 0; i < 256; ++i) {
503 byte_instruction_cache_[i] =
504 new(std::nothrow) ByteInstruction(static_cast<uint8>(i));
505 if (!byte_instruction_cache_[i]) {
506 for (int j = 0; j < i; ++j)
507 delete byte_instruction_cache_[j];
508 byte_instruction_cache_.reset();
509 return NULL;
514 return byte_instruction_cache_[byte];
517 // Chosen empirically to give the best reduction in payload size for
518 // an update from daisy_3701.98.0 to daisy_4206.0.0.
519 const int AssemblyProgram::kLabelLowerLimit = 5;
521 CheckBool AssemblyProgram::TrimLabels() {
522 // For now only trim for ARM binaries
523 if (kind() != EXE_ELF_32_ARM)
524 return true;
526 int lower_limit = kLabelLowerLimit;
528 VLOG(1) << "TrimLabels: threshold " << lower_limit;
530 // Remove underused labels from the list of labels
531 RVAToLabel::iterator it = rel32_labels_.begin();
532 while (it != rel32_labels_.end()) {
533 if (it->second->count_ <= lower_limit) {
534 rel32_labels_.erase(it++);
535 } else {
536 ++it;
540 // Walk through the list of instructions, replacing trimmed labels
541 // with the original machine instruction
542 for (size_t i = 0; i < instructions_.size(); ++i) {
543 Instruction* instruction = instructions_[i];
544 switch (instruction->op()) {
545 case REL32ARM: {
546 Label* label =
547 static_cast<InstructionWithLabelARM*>(instruction)->label();
548 if (label->count_ <= lower_limit) {
549 const uint8* arm_op =
550 static_cast<InstructionWithLabelARM*>(instruction)->arm_op();
551 uint16 op_size =
552 static_cast<InstructionWithLabelARM*>(instruction)->op_size();
554 if (op_size < 1)
555 return false;
556 BytesInstruction* new_instruction =
557 new(std::nothrow) BytesInstruction(arm_op, op_size);
558 instructions_[i] = new_instruction;
559 delete instruction;
561 break;
563 default:
564 break;
568 return true;
571 ////////////////////////////////////////////////////////////////////////////////
573 Status TrimLabels(AssemblyProgram* program) {
574 if (program->TrimLabels())
575 return C_OK;
576 else
577 return C_TRIM_FAILED;
580 Status Encode(AssemblyProgram* program, EncodedProgram** output) {
581 *output = NULL;
582 EncodedProgram *encoded = program->Encode();
583 if (encoded) {
584 *output = encoded;
585 return C_OK;
586 } else {
587 return C_GENERAL_ERROR;
591 } // namespace courgette