Separate Simple Backend creation from initialization.
[chromium-blink-merge.git] / courgette / assembly_program.cc
blob9225578488791391e92a961941c9a0661d5ba749
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/assembly_program.h"
7 #include <memory.h>
8 #include <algorithm>
9 #include <map>
10 #include <set>
11 #include <sstream>
12 #include <vector>
14 #include "base/logging.h"
15 #include "base/memory/scoped_ptr.h"
17 #include "courgette/courgette.h"
18 #include "courgette/encoded_program.h"
20 namespace courgette {
22 // Opcodes of simple assembly language
23 enum OP {
24 ORIGIN, // ORIGIN <rva> - set current address for assembly.
25 MAKEPERELOCS, // Generates a base relocation table.
26 MAKEELFRELOCS, // Generates a base relocation table.
27 DEFBYTE, // DEFBYTE <value> - emit a byte literal.
28 REL32, // REL32 <label> - emit a rel32 encoded reference to 'label'.
29 ABS32, // REL32 <label> - emit am abs32 encoded reference to 'label'.
30 LAST_OP
33 // Base class for instructions. Because we have so many instructions we want to
34 // keep them as small as possible. For this reason we avoid virtual functions.
36 class Instruction {
37 public:
38 OP op() const { return static_cast<OP>(op_); }
40 protected:
41 explicit Instruction(OP op) : op_(op), info_(0) {}
42 Instruction(OP op, unsigned int info) : op_(op), info_(info) {}
44 uint32 op_ : 4; // A few bits to store the OP code.
45 uint32 info_ : 28; // Remaining bits in first word available to subclass.
47 private:
48 DISALLOW_COPY_AND_ASSIGN(Instruction);
51 namespace {
53 // Sets the current address for the emitting instructions.
54 class OriginInstruction : public Instruction {
55 public:
56 explicit OriginInstruction(RVA rva) : Instruction(ORIGIN, 0), rva_(rva) {}
57 RVA origin_rva() const { return rva_; }
58 private:
59 RVA rva_;
62 // Emits an entire PE base relocation table.
63 class PeRelocsInstruction : public Instruction {
64 public:
65 PeRelocsInstruction() : Instruction(MAKEPERELOCS) {}
68 // Emits an ELF relocation table.
69 class ElfRelocsInstruction : public Instruction {
70 public:
71 ElfRelocsInstruction() : Instruction(MAKEELFRELOCS) {}
74 // Emits a single byte.
75 class ByteInstruction : public Instruction {
76 public:
77 explicit ByteInstruction(uint8 value) : Instruction(DEFBYTE, value) {}
78 uint8 byte_value() const { return info_; }
81 // A ABS32 to REL32 instruction emits a reference to a label's address.
82 class InstructionWithLabel : public Instruction {
83 public:
84 InstructionWithLabel(OP op, Label* label)
85 : Instruction(op, 0), label_(label) {
86 if (label == NULL) NOTREACHED();
88 Label* label() const { return label_; }
89 private:
90 Label* label_;
93 } // namespace
95 AssemblyProgram::AssemblyProgram()
96 : image_base_(0) {
99 static void DeleteContainedLabels(const RVAToLabel& labels) {
100 for (RVAToLabel::const_iterator p = labels.begin(); p != labels.end(); ++p)
101 delete p->second;
104 AssemblyProgram::~AssemblyProgram() {
105 for (size_t i = 0; i < instructions_.size(); ++i) {
106 Instruction* instruction = instructions_[i];
107 if (instruction->op() != DEFBYTE) // Will be in byte_instruction_cache_.
108 delete instruction;
110 if (byte_instruction_cache_.get()) {
111 for (size_t i = 0; i < 256; ++i)
112 delete byte_instruction_cache_[i];
114 DeleteContainedLabels(rel32_labels_);
115 DeleteContainedLabels(abs32_labels_);
118 CheckBool AssemblyProgram::EmitPeRelocsInstruction() {
119 return Emit(new(std::nothrow) PeRelocsInstruction());
122 CheckBool AssemblyProgram::EmitElfRelocationInstruction() {
123 return Emit(new(std::nothrow) ElfRelocsInstruction());
126 CheckBool AssemblyProgram::EmitOriginInstruction(RVA rva) {
127 return Emit(new(std::nothrow) OriginInstruction(rva));
130 CheckBool AssemblyProgram::EmitByteInstruction(uint8 byte) {
131 return Emit(GetByteInstruction(byte));
134 CheckBool AssemblyProgram::EmitRel32(Label* label) {
135 return Emit(new(std::nothrow) InstructionWithLabel(REL32, label));
138 CheckBool AssemblyProgram::EmitAbs32(Label* label) {
139 return Emit(new(std::nothrow) InstructionWithLabel(ABS32, label));
142 Label* AssemblyProgram::FindOrMakeAbs32Label(RVA rva) {
143 return FindLabel(rva, &abs32_labels_);
146 Label* AssemblyProgram::FindOrMakeRel32Label(RVA rva) {
147 return FindLabel(rva, &rel32_labels_);
150 void AssemblyProgram::DefaultAssignIndexes() {
151 DefaultAssignIndexes(&abs32_labels_);
152 DefaultAssignIndexes(&rel32_labels_);
155 void AssemblyProgram::UnassignIndexes() {
156 UnassignIndexes(&abs32_labels_);
157 UnassignIndexes(&rel32_labels_);
160 void AssemblyProgram::AssignRemainingIndexes() {
161 AssignRemainingIndexes(&abs32_labels_);
162 AssignRemainingIndexes(&rel32_labels_);
165 Label* AssemblyProgram::InstructionAbs32Label(
166 const Instruction* instruction) const {
167 if (instruction->op() == ABS32)
168 return static_cast<const InstructionWithLabel*>(instruction)->label();
169 return NULL;
172 Label* AssemblyProgram::InstructionRel32Label(
173 const Instruction* instruction) const {
174 if (instruction->op() == REL32)
175 return static_cast<const InstructionWithLabel*>(instruction)->label();
176 return NULL;
179 CheckBool AssemblyProgram::Emit(Instruction* instruction) {
180 if (!instruction)
181 return false;
182 bool ok = instructions_.push_back(instruction);
183 if (!ok)
184 delete instruction;
185 return ok;
188 Label* AssemblyProgram::FindLabel(RVA rva, RVAToLabel* labels) {
189 Label*& slot = (*labels)[rva];
190 if (slot == NULL) {
191 slot = new(std::nothrow) Label(rva);
193 return slot;
196 void AssemblyProgram::UnassignIndexes(RVAToLabel* labels) {
197 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
198 Label* current = p->second;
199 current->index_ = Label::kNoIndex;
203 // DefaultAssignIndexes takes a set of labels and assigns indexes in increasing
204 // address order.
206 void AssemblyProgram::DefaultAssignIndexes(RVAToLabel* labels) {
207 int index = 0;
208 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
209 Label* current = p->second;
210 if (current->index_ != Label::kNoIndex)
211 NOTREACHED();
212 current->index_ = index;
213 ++index;
217 // AssignRemainingIndexes assigns indexes to any addresses (labels) that are not
218 // yet assigned an index.
220 void AssemblyProgram::AssignRemainingIndexes(RVAToLabel* labels) {
221 // An address table compresses best when each index is associated with an
222 // address that is slight larger than the previous index.
224 // First see which indexes have not been used. The 'available' vector could
225 // grow even bigger, but the number of addresses is a better starting size
226 // than empty.
227 std::vector<bool> available(labels->size(), true);
228 int used = 0;
230 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
231 int index = p->second->index_;
232 if (index != Label::kNoIndex) {
233 while (static_cast<size_t>(index) >= available.size())
234 available.push_back(true);
235 available.at(index) = false;
236 ++used;
240 VLOG(1) << used << " of " << labels->size() << " labels pre-assigned";
242 // Are there any unused labels that happen to be adjacent following a used
243 // label?
245 int fill_forward_count = 0;
246 Label* prev = 0;
247 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
248 Label* current = p->second;
249 if (current->index_ == Label::kNoIndex) {
250 int index = 0;
251 if (prev && prev->index_ != Label::kNoIndex)
252 index = prev->index_ + 1;
253 if (index < static_cast<int>(available.size()) && available.at(index)) {
254 current->index_ = index;
255 available.at(index) = false;
256 ++fill_forward_count;
259 prev = current;
262 // Are there any unused labels that happen to be adjacent preceeding a used
263 // label?
265 int fill_backward_count = 0;
266 prev = 0;
267 for (RVAToLabel::reverse_iterator p = labels->rbegin();
268 p != labels->rend();
269 ++p) {
270 Label* current = p->second;
271 if (current->index_ == Label::kNoIndex) {
272 int prev_index;
273 if (prev)
274 prev_index = prev->index_;
275 else
276 prev_index = static_cast<uint32>(available.size());
277 if (prev_index != 0 &&
278 prev_index != Label::kNoIndex &&
279 available.at(prev_index - 1)) {
280 current->index_ = prev_index - 1;
281 available.at(current->index_) = false;
282 ++fill_backward_count;
285 prev = current;
288 // Fill in any remaining indexes
289 int fill_infill_count = 0;
290 int index = 0;
291 for (RVAToLabel::iterator p = labels->begin(); p != labels->end(); ++p) {
292 Label* current = p->second;
293 if (current->index_ == Label::kNoIndex) {
294 while (!available.at(index)) {
295 ++index;
297 current->index_ = index;
298 available.at(index) = false;
299 ++index;
300 ++fill_infill_count;
304 VLOG(1) << " fill forward " << fill_forward_count
305 << " backward " << fill_backward_count
306 << " infill " << fill_infill_count;
309 typedef CheckBool (EncodedProgram::*DefineLabelMethod)(int index, RVA value);
311 #if defined(OS_WIN)
312 __declspec(noinline)
313 #endif
314 static CheckBool DefineLabels(const RVAToLabel& labels,
315 EncodedProgram* encoded_format,
316 DefineLabelMethod define_label) {
317 bool ok = true;
318 for (RVAToLabel::const_iterator p = labels.begin();
319 ok && p != labels.end();
320 ++p) {
321 Label* label = p->second;
322 ok = (encoded_format->*define_label)(label->index_, label->rva_);
324 return ok;
327 EncodedProgram* AssemblyProgram::Encode() const {
328 scoped_ptr<EncodedProgram> encoded(new(std::nothrow) EncodedProgram());
329 if (!encoded.get())
330 return NULL;
332 encoded->set_image_base(image_base_);
334 if (!DefineLabels(abs32_labels_, encoded.get(),
335 &EncodedProgram::DefineAbs32Label) ||
336 !DefineLabels(rel32_labels_, encoded.get(),
337 &EncodedProgram::DefineRel32Label)) {
338 return NULL;
341 encoded->EndLabels();
343 for (size_t i = 0; i < instructions_.size(); ++i) {
344 Instruction* instruction = instructions_[i];
346 switch (instruction->op()) {
347 case ORIGIN: {
348 OriginInstruction* org = static_cast<OriginInstruction*>(instruction);
349 if (!encoded->AddOrigin(org->origin_rva()))
350 return NULL;
351 break;
353 case DEFBYTE: {
354 uint8 b = static_cast<ByteInstruction*>(instruction)->byte_value();
355 if (!encoded->AddCopy(1, &b))
356 return NULL;
357 break;
359 case REL32: {
360 Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
361 if (!encoded->AddRel32(label->index_))
362 return NULL;
363 break;
365 case ABS32: {
366 Label* label = static_cast<InstructionWithLabel*>(instruction)->label();
367 if (!encoded->AddAbs32(label->index_))
368 return NULL;
369 break;
371 case MAKEPERELOCS: {
372 if (!encoded->AddPeMakeRelocs())
373 return NULL;
374 break;
376 case MAKEELFRELOCS: {
377 if (!encoded->AddElfMakeRelocs())
378 return NULL;
379 break;
381 default: {
382 NOTREACHED() << "Unknown Insn OP kind";
387 return encoded.release();
390 Instruction* AssemblyProgram::GetByteInstruction(uint8 byte) {
391 if (!byte_instruction_cache_.get()) {
392 byte_instruction_cache_.reset(new(std::nothrow) Instruction*[256]);
393 if (!byte_instruction_cache_.get())
394 return NULL;
396 for (int i = 0; i < 256; ++i) {
397 byte_instruction_cache_[i] =
398 new(std::nothrow) ByteInstruction(static_cast<uint8>(i));
399 if (!byte_instruction_cache_[i]) {
400 for (int j = 0; j < i; ++j)
401 delete byte_instruction_cache_[j];
402 byte_instruction_cache_.reset();
403 return NULL;
408 return byte_instruction_cache_[byte];
411 ////////////////////////////////////////////////////////////////////////////////
413 Status Encode(AssemblyProgram* program, EncodedProgram** output) {
414 *output = NULL;
415 EncodedProgram *encoded = program->Encode();
416 if (encoded) {
417 *output = encoded;
418 return C_OK;
419 } else {
420 return C_GENERAL_ERROR;
424 } // namespace courgette