Separate Simple Backend creation from initialization.
[chromium-blink-merge.git] / courgette / encoded_program.cc
blob3da94147671890b9b80bb53ea2e71a2b63cf403c
1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/encoded_program.h"
7 #include <algorithm>
8 #include <map>
9 #include <string>
10 #include <vector>
12 #include "base/environment.h"
13 #include "base/logging.h"
14 #include "base/memory/scoped_ptr.h"
15 #include "base/string_util.h"
16 #include "base/utf_string_conversions.h"
17 #include "courgette/courgette.h"
18 #include "courgette/streams.h"
19 #include "courgette/types_elf.h"
21 namespace courgette {
23 // Stream indexes.
24 const int kStreamMisc = 0;
25 const int kStreamOps = 1;
26 const int kStreamBytes = 2;
27 const int kStreamAbs32Indexes = 3;
28 const int kStreamRel32Indexes = 4;
29 const int kStreamAbs32Addresses = 5;
30 const int kStreamRel32Addresses = 6;
31 const int kStreamCopyCounts = 7;
32 const int kStreamOriginAddresses = kStreamMisc;
34 const int kStreamLimit = 9;
36 // Constructor is here rather than in the header. Although the constructor
37 // appears to do nothing it is fact quite large because of the implicit calls to
38 // field constructors. Ditto for the destructor.
39 EncodedProgram::EncodedProgram() : image_base_(0) {}
40 EncodedProgram::~EncodedProgram() {}
42 // Serializes a vector of integral values using Varint32 coding.
43 template<typename V>
44 CheckBool WriteVector(const V& items, SinkStream* buffer) {
45 size_t count = items.size();
46 bool ok = buffer->WriteSizeVarint32(count);
47 for (size_t i = 0; ok && i < count; ++i) {
48 COMPILE_ASSERT(sizeof(items[0]) <= sizeof(uint32), // NOLINT
49 T_must_fit_in_uint32);
50 ok = buffer->WriteSizeVarint32(items[i]);
52 return ok;
55 template<typename V>
56 bool ReadVector(V* items, SourceStream* buffer) {
57 uint32 count;
58 if (!buffer->ReadVarint32(&count))
59 return false;
61 items->clear();
63 bool ok = items->reserve(count);
64 for (size_t i = 0; ok && i < count; ++i) {
65 uint32 item;
66 ok = buffer->ReadVarint32(&item);
67 if (ok)
68 ok = items->push_back(static_cast<typename V::value_type>(item));
71 return ok;
74 // Serializes a vector, using delta coding followed by Varint32 coding.
75 template<typename V>
76 CheckBool WriteU32Delta(const V& set, SinkStream* buffer) {
77 size_t count = set.size();
78 bool ok = buffer->WriteSizeVarint32(count);
79 uint32 prev = 0;
80 for (size_t i = 0; ok && i < count; ++i) {
81 uint32 current = set[i];
82 uint32 delta = current - prev;
83 ok = buffer->WriteVarint32(delta);
84 prev = current;
86 return ok;
89 template <typename V>
90 static CheckBool ReadU32Delta(V* set, SourceStream* buffer) {
91 uint32 count;
93 if (!buffer->ReadVarint32(&count))
94 return false;
96 set->clear();
97 bool ok = set->reserve(count);
98 uint32 prev = 0;
100 for (size_t i = 0; ok && i < count; ++i) {
101 uint32 delta;
102 ok = buffer->ReadVarint32(&delta);
103 if (ok) {
104 uint32 current = prev + delta;
105 ok = set->push_back(current);
106 prev = current;
110 return ok;
113 // Write a vector as the byte representation of the contents.
115 // (This only really makes sense for a type T that has sizeof(T)==1, otherwise
116 // serialized representation is not endian-agnostic. But it is useful to keep
117 // the possibility of a greater size for experiments comparing Varint32 encoding
118 // of a vector of larger integrals vs a plain form.)
120 template<typename V>
121 CheckBool WriteVectorU8(const V& items, SinkStream* buffer) {
122 size_t count = items.size();
123 bool ok = buffer->WriteSizeVarint32(count);
124 if (count != 0 && ok) {
125 size_t byte_count = count * sizeof(typename V::value_type);
126 ok = buffer->Write(static_cast<const void*>(&items[0]), byte_count);
128 return ok;
131 template<typename V>
132 bool ReadVectorU8(V* items, SourceStream* buffer) {
133 uint32 count;
134 if (!buffer->ReadVarint32(&count))
135 return false;
137 items->clear();
138 bool ok = items->resize(count, 0);
139 if (ok && count != 0) {
140 size_t byte_count = count * sizeof(typename V::value_type);
141 return buffer->Read(static_cast<void*>(&((*items)[0])), byte_count);
143 return ok;
146 ////////////////////////////////////////////////////////////////////////////////
148 CheckBool EncodedProgram::DefineRel32Label(int index, RVA value) {
149 return DefineLabelCommon(&rel32_rva_, index, value);
152 CheckBool EncodedProgram::DefineAbs32Label(int index, RVA value) {
153 return DefineLabelCommon(&abs32_rva_, index, value);
156 static const RVA kUnassignedRVA = static_cast<RVA>(-1);
158 CheckBool EncodedProgram::DefineLabelCommon(RvaVector* rvas,
159 int index,
160 RVA rva) {
161 bool ok = true;
162 if (static_cast<int>(rvas->size()) <= index)
163 ok = rvas->resize(index + 1, kUnassignedRVA);
165 if (ok) {
166 DCHECK_EQ((*rvas)[index], kUnassignedRVA)
167 << "DefineLabel double assigned " << index;
168 (*rvas)[index] = rva;
171 return ok;
174 void EncodedProgram::EndLabels() {
175 FinishLabelsCommon(&abs32_rva_);
176 FinishLabelsCommon(&rel32_rva_);
179 void EncodedProgram::FinishLabelsCommon(RvaVector* rvas) {
180 // Replace all unassigned slots with the value at the previous index so they
181 // delta-encode to zero. (There might be better values than zero. The way to
182 // get that is have the higher level assembly program assign the unassigned
183 // slots.)
184 RVA previous = 0;
185 size_t size = rvas->size();
186 for (size_t i = 0; i < size; ++i) {
187 if ((*rvas)[i] == kUnassignedRVA)
188 (*rvas)[i] = previous;
189 else
190 previous = (*rvas)[i];
194 CheckBool EncodedProgram::AddOrigin(RVA origin) {
195 return ops_.push_back(ORIGIN) && origins_.push_back(origin);
198 CheckBool EncodedProgram::AddCopy(uint32 count, const void* bytes) {
199 const uint8* source = static_cast<const uint8*>(bytes);
201 bool ok = true;
203 // Fold adjacent COPY instructions into one. This nearly halves the size of
204 // an EncodedProgram with only COPY1 instructions since there are approx plain
205 // 16 bytes per reloc. This has a working-set benefit during decompression.
206 // For compression of files with large differences this makes a small (4%)
207 // improvement in size. For files with small differences this degrades the
208 // compressed size by 1.3%
209 if (!ops_.empty()) {
210 if (ops_.back() == COPY1) {
211 ops_.back() = COPY;
212 ok = copy_counts_.push_back(1);
214 if (ok && ops_.back() == COPY) {
215 copy_counts_.back() += count;
216 for (uint32 i = 0; ok && i < count; ++i) {
217 ok = copy_bytes_.push_back(source[i]);
219 return ok;
223 if (ok) {
224 if (count == 1) {
225 ok = ops_.push_back(COPY1) && copy_bytes_.push_back(source[0]);
226 } else {
227 ok = ops_.push_back(COPY) && copy_counts_.push_back(count);
228 for (uint32 i = 0; ok && i < count; ++i) {
229 ok = copy_bytes_.push_back(source[i]);
234 return ok;
237 CheckBool EncodedProgram::AddAbs32(int label_index) {
238 return ops_.push_back(ABS32) && abs32_ix_.push_back(label_index);
241 CheckBool EncodedProgram::AddRel32(int label_index) {
242 return ops_.push_back(REL32) && rel32_ix_.push_back(label_index);
245 CheckBool EncodedProgram::AddPeMakeRelocs() {
246 return ops_.push_back(MAKE_PE_RELOCATION_TABLE);
249 CheckBool EncodedProgram::AddElfMakeRelocs() {
250 return ops_.push_back(MAKE_ELF_RELOCATION_TABLE);
253 void EncodedProgram::DebuggingSummary() {
254 VLOG(1) << "EncodedProgram Summary"
255 << "\n image base " << image_base_
256 << "\n abs32 rvas " << abs32_rva_.size()
257 << "\n rel32 rvas " << rel32_rva_.size()
258 << "\n ops " << ops_.size()
259 << "\n origins " << origins_.size()
260 << "\n copy_counts " << copy_counts_.size()
261 << "\n copy_bytes " << copy_bytes_.size()
262 << "\n abs32_ix " << abs32_ix_.size()
263 << "\n rel32_ix " << rel32_ix_.size();
266 ////////////////////////////////////////////////////////////////////////////////
268 // For algorithm refinement purposes it is useful to write subsets of the file
269 // format. This gives us the ability to estimate the entropy of the
270 // differential compression of the individual streams, which can provide
271 // invaluable insights. The default, of course, is to include all the streams.
273 enum FieldSelect {
274 INCLUDE_ABS32_ADDRESSES = 0x0001,
275 INCLUDE_REL32_ADDRESSES = 0x0002,
276 INCLUDE_ABS32_INDEXES = 0x0010,
277 INCLUDE_REL32_INDEXES = 0x0020,
278 INCLUDE_OPS = 0x0100,
279 INCLUDE_BYTES = 0x0200,
280 INCLUDE_COPY_COUNTS = 0x0400,
281 INCLUDE_MISC = 0x1000
284 static FieldSelect GetFieldSelect() {
285 #if 1
286 // TODO(sra): Use better configuration.
287 scoped_ptr<base::Environment> env(base::Environment::Create());
288 std::string s;
289 env->GetVar("A_FIELDS", &s);
290 if (!s.empty()) {
291 return static_cast<FieldSelect>(wcstoul(ASCIIToWide(s).c_str(), 0, 0));
293 #endif
294 return static_cast<FieldSelect>(~0);
297 CheckBool EncodedProgram::WriteTo(SinkStreamSet* streams) {
298 FieldSelect select = GetFieldSelect();
300 // The order of fields must be consistent in WriteTo and ReadFrom, regardless
301 // of the streams used. The code can be configured with all kStreamXXX
302 // constants the same.
304 // If we change the code to pipeline reading with assembly (to avoid temporary
305 // storage vectors by consuming operands directly from the stream) then we
306 // need to read the base address and the random access address tables first,
307 // the rest can be interleaved.
309 if (select & INCLUDE_MISC) {
310 // TODO(sra): write 64 bits.
311 if (!streams->stream(kStreamMisc)->WriteVarint32(
312 static_cast<uint32>(image_base_))) {
313 return false;
317 bool success = true;
319 if (select & INCLUDE_ABS32_ADDRESSES) {
320 success &= WriteU32Delta(abs32_rva_,
321 streams->stream(kStreamAbs32Addresses));
324 if (select & INCLUDE_REL32_ADDRESSES) {
325 success &= WriteU32Delta(rel32_rva_,
326 streams->stream(kStreamRel32Addresses));
329 if (select & INCLUDE_MISC)
330 success &= WriteVector(origins_, streams->stream(kStreamOriginAddresses));
332 if (select & INCLUDE_OPS) {
333 // 5 for length.
334 success &= streams->stream(kStreamOps)->Reserve(ops_.size() + 5);
335 success &= WriteVector(ops_, streams->stream(kStreamOps));
338 if (select & INCLUDE_COPY_COUNTS)
339 success &= WriteVector(copy_counts_, streams->stream(kStreamCopyCounts));
341 if (select & INCLUDE_BYTES)
342 success &= WriteVectorU8(copy_bytes_, streams->stream(kStreamBytes));
344 if (select & INCLUDE_ABS32_INDEXES)
345 success &= WriteVector(abs32_ix_, streams->stream(kStreamAbs32Indexes));
347 if (select & INCLUDE_REL32_INDEXES)
348 success &= WriteVector(rel32_ix_, streams->stream(kStreamRel32Indexes));
350 return success;
353 bool EncodedProgram::ReadFrom(SourceStreamSet* streams) {
354 // TODO(sra): read 64 bits.
355 uint32 temp;
356 if (!streams->stream(kStreamMisc)->ReadVarint32(&temp))
357 return false;
358 image_base_ = temp;
360 if (!ReadU32Delta(&abs32_rva_, streams->stream(kStreamAbs32Addresses)))
361 return false;
362 if (!ReadU32Delta(&rel32_rva_, streams->stream(kStreamRel32Addresses)))
363 return false;
364 if (!ReadVector(&origins_, streams->stream(kStreamOriginAddresses)))
365 return false;
366 if (!ReadVector(&ops_, streams->stream(kStreamOps)))
367 return false;
368 if (!ReadVector(&copy_counts_, streams->stream(kStreamCopyCounts)))
369 return false;
370 if (!ReadVectorU8(&copy_bytes_, streams->stream(kStreamBytes)))
371 return false;
372 if (!ReadVector(&abs32_ix_, streams->stream(kStreamAbs32Indexes)))
373 return false;
374 if (!ReadVector(&rel32_ix_, streams->stream(kStreamRel32Indexes)))
375 return false;
377 // Check that streams have been completely consumed.
378 for (int i = 0; i < kStreamLimit; ++i) {
379 if (streams->stream(i)->Remaining() > 0)
380 return false;
383 return true;
386 // Safe, non-throwing version of std::vector::at(). Returns 'true' for success,
387 // 'false' for out-of-bounds index error.
388 template<typename V, typename T>
389 bool VectorAt(const V& v, size_t index, T* output) {
390 if (index >= v.size())
391 return false;
392 *output = v[index];
393 return true;
396 CheckBool EncodedProgram::AssembleTo(SinkStream* final_buffer) {
397 // For the most part, the assembly process walks the various tables.
398 // ix_mumble is the index into the mumble table.
399 size_t ix_origins = 0;
400 size_t ix_copy_counts = 0;
401 size_t ix_copy_bytes = 0;
402 size_t ix_abs32_ix = 0;
403 size_t ix_rel32_ix = 0;
405 RVA current_rva = 0;
407 bool pending_pe_relocation_table = false;
408 bool pending_elf_relocation_table = false;
409 SinkStream bytes_following_relocation_table;
411 SinkStream* output = final_buffer;
413 for (size_t ix_ops = 0; ix_ops < ops_.size(); ++ix_ops) {
414 OP op = ops_[ix_ops];
416 switch (op) {
417 default:
418 return false;
420 case ORIGIN: {
421 RVA section_rva;
422 if (!VectorAt(origins_, ix_origins, &section_rva))
423 return false;
424 ++ix_origins;
425 current_rva = section_rva;
426 break;
429 case COPY: {
430 uint32 count;
431 if (!VectorAt(copy_counts_, ix_copy_counts, &count))
432 return false;
433 ++ix_copy_counts;
434 for (uint32 i = 0; i < count; ++i) {
435 uint8 b;
436 if (!VectorAt(copy_bytes_, ix_copy_bytes, &b))
437 return false;
438 ++ix_copy_bytes;
439 if (!output->Write(&b, 1))
440 return false;
442 current_rva += count;
443 break;
446 case COPY1: {
447 uint8 b;
448 if (!VectorAt(copy_bytes_, ix_copy_bytes, &b))
449 return false;
450 ++ix_copy_bytes;
451 if (!output->Write(&b, 1))
452 return false;
453 current_rva += 1;
454 break;
457 case REL32: {
458 uint32 index;
459 if (!VectorAt(rel32_ix_, ix_rel32_ix, &index))
460 return false;
461 ++ix_rel32_ix;
462 RVA rva;
463 if (!VectorAt(rel32_rva_, index, &rva))
464 return false;
465 uint32 offset = (rva - (current_rva + 4));
466 if (!output->Write(&offset, 4))
467 return false;
468 current_rva += 4;
469 break;
472 case ABS32: {
473 uint32 index;
474 if (!VectorAt(abs32_ix_, ix_abs32_ix, &index))
475 return false;
476 ++ix_abs32_ix;
477 RVA rva;
478 if (!VectorAt(abs32_rva_, index, &rva))
479 return false;
480 uint32 abs32 = static_cast<uint32>(rva + image_base_);
481 if (!abs32_relocs_.push_back(current_rva) || !output->Write(&abs32, 4))
482 return false;
483 current_rva += 4;
484 break;
487 case MAKE_PE_RELOCATION_TABLE: {
488 // We can see the base relocation anywhere, but we only have the
489 // information to generate it at the very end. So we divert the bytes
490 // we are generating to a temporary stream.
491 if (pending_pe_relocation_table) // Can't have two base relocation
492 // tables.
493 return false;
495 pending_pe_relocation_table = true;
496 output = &bytes_following_relocation_table;
497 break;
498 // There is a potential problem *if* the instruction stream contains
499 // some REL32 relocations following the base relocation and in the same
500 // section. We don't know the size of the table, so 'current_rva' will
501 // be wrong, causing REL32 offsets to be miscalculated. This never
502 // happens; the base relocation table is usually in a section of its
503 // own, a data-only section, and following everything else in the
504 // executable except some padding zero bytes. We could fix this by
505 // emitting an ORIGIN after the MAKE_BASE_RELOCATION_TABLE.
508 case MAKE_ELF_RELOCATION_TABLE: {
509 // We can see the base relocation anywhere, but we only have the
510 // information to generate it at the very end. So we divert the bytes
511 // we are generating to a temporary stream.
512 if (pending_elf_relocation_table) // Can't have two relocation
513 // tables.
514 return false;
516 pending_elf_relocation_table = true;
517 output = &bytes_following_relocation_table;
518 break;
523 if (pending_pe_relocation_table) {
524 if (!GeneratePeRelocations(final_buffer) ||
525 !final_buffer->Append(&bytes_following_relocation_table))
526 return false;
529 if (pending_elf_relocation_table) {
530 if (!GenerateElfRelocations(final_buffer) ||
531 !final_buffer->Append(&bytes_following_relocation_table))
532 return false;
535 // Final verification check: did we consume all lists?
536 if (ix_copy_counts != copy_counts_.size())
537 return false;
538 if (ix_copy_bytes != copy_bytes_.size())
539 return false;
540 if (ix_abs32_ix != abs32_ix_.size())
541 return false;
542 if (ix_rel32_ix != rel32_ix_.size())
543 return false;
545 return true;
548 // RelocBlock has the layout of a block of relocations in the base relocation
549 // table file format.
551 struct RelocBlockPOD {
552 uint32 page_rva;
553 uint32 block_size;
554 uint16 relocs[4096]; // Allow up to one relocation per byte of a 4k page.
557 COMPILE_ASSERT(offsetof(RelocBlockPOD, relocs) == 8, reloc_block_header_size);
559 class RelocBlock {
560 public:
561 RelocBlock() {
562 pod.page_rva = ~0;
563 pod.block_size = 8;
566 void Add(uint16 item) {
567 pod.relocs[(pod.block_size-8)/2] = item;
568 pod.block_size += 2;
571 CheckBool Flush(SinkStream* buffer) WARN_UNUSED_RESULT {
572 bool ok = true;
573 if (pod.block_size != 8) {
574 if (pod.block_size % 4 != 0) { // Pad to make size multiple of 4 bytes.
575 Add(0);
577 ok = buffer->Write(&pod, pod.block_size);
578 pod.block_size = 8;
580 return ok;
582 RelocBlockPOD pod;
585 CheckBool EncodedProgram::GeneratePeRelocations(SinkStream* buffer) {
586 std::sort(abs32_relocs_.begin(), abs32_relocs_.end());
588 RelocBlock block;
590 bool ok = true;
591 for (size_t i = 0; ok && i < abs32_relocs_.size(); ++i) {
592 uint32 rva = abs32_relocs_[i];
593 uint32 page_rva = rva & ~0xFFF;
594 if (page_rva != block.pod.page_rva) {
595 ok &= block.Flush(buffer);
596 block.pod.page_rva = page_rva;
598 if (ok)
599 block.Add(0x3000 | (rva & 0xFFF));
601 ok &= block.Flush(buffer);
602 return ok;
605 CheckBool EncodedProgram::GenerateElfRelocations(SinkStream* buffer) {
606 std::sort(abs32_relocs_.begin(), abs32_relocs_.end());
608 Elf32_Rel relocation_block;
610 // We only handle this specific type of relocation, so far.
611 relocation_block.r_info = R_386_RELATIVE;
613 bool ok = true;
614 for (size_t i = 0; ok && i < abs32_relocs_.size(); ++i) {
615 relocation_block.r_offset = abs32_relocs_[i];
616 ok = buffer->Write(&relocation_block, sizeof(Elf32_Rel));
619 return ok;
621 ////////////////////////////////////////////////////////////////////////////////
623 Status WriteEncodedProgram(EncodedProgram* encoded, SinkStreamSet* sink) {
624 if (!encoded->WriteTo(sink))
625 return C_STREAM_ERROR;
626 return C_OK;
629 Status ReadEncodedProgram(SourceStreamSet* streams, EncodedProgram** output) {
630 EncodedProgram* encoded = new EncodedProgram();
631 if (encoded->ReadFrom(streams)) {
632 *output = encoded;
633 return C_OK;
635 delete encoded;
636 return C_DESERIALIZATION_FAILED;
639 Status Assemble(EncodedProgram* encoded, SinkStream* buffer) {
640 bool assembled = encoded->AssembleTo(buffer);
641 if (assembled)
642 return C_OK;
643 return C_ASSEMBLY_FAILED;
646 void DeleteEncodedProgram(EncodedProgram* encoded) {
647 delete encoded;
650 } // end namespace