1 // Copyright (c) 2011 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/encoded_program.h"
12 #include "base/environment.h"
13 #include "base/logging.h"
14 #include "base/memory/scoped_ptr.h"
15 #include "base/numerics/safe_conversions.h"
16 #include "base/numerics/safe_math.h"
17 #include "base/strings/string_number_conversions.h"
18 #include "base/strings/string_util.h"
19 #include "courgette/courgette.h"
20 #include "courgette/disassembler_elf_32_arm.h"
21 #include "courgette/streams.h"
22 #include "courgette/types_elf.h"
26 // Constructor is here rather than in the header. Although the constructor
27 // appears to do nothing it is fact quite large because of the implicit calls to
28 // field constructors. Ditto for the destructor.
29 EncodedProgram::EncodedProgram() : image_base_(0) {}
30 EncodedProgram::~EncodedProgram() {}
32 // Serializes a vector of integral values using Varint32 coding.
34 CheckBool
WriteVector(const V
& items
, SinkStream
* buffer
) {
35 size_t count
= items
.size();
36 bool ok
= buffer
->WriteSizeVarint32(count
);
37 for (size_t i
= 0; ok
&& i
< count
; ++i
) {
38 ok
= buffer
->WriteSizeVarint32(items
[i
]);
44 bool ReadVector(V
* items
, SourceStream
* buffer
) {
46 if (!buffer
->ReadVarint32(&count
))
51 bool ok
= items
->reserve(count
);
52 for (size_t i
= 0; ok
&& i
< count
; ++i
) {
54 ok
= buffer
->ReadVarint32(&item
);
56 ok
= items
->push_back(static_cast<typename
V::value_type
>(item
));
62 // Serializes a vector, using delta coding followed by Varint32Signed coding.
64 CheckBool
WriteSigned32Delta(const V
& set
, SinkStream
* buffer
) {
65 size_t count
= set
.size();
66 bool ok
= buffer
->WriteSizeVarint32(count
);
68 for (size_t i
= 0; ok
&& i
< count
; ++i
) {
69 uint32 current
= set
[i
];
70 int32 delta
= current
- prev
;
71 ok
= buffer
->WriteVarint32Signed(delta
);
78 static CheckBool
ReadSigned32Delta(V
* set
, SourceStream
* buffer
) {
81 if (!buffer
->ReadVarint32(&count
))
85 bool ok
= set
->reserve(count
);
87 for (size_t i
= 0; ok
&& i
< count
; ++i
) {
89 ok
= buffer
->ReadVarint32Signed(&delta
);
91 uint32 current
= static_cast<uint32
>(prev
+ delta
);
92 ok
= set
->push_back(current
);
99 // Write a vector as the byte representation of the contents.
101 // (This only really makes sense for a type T that has sizeof(T)==1, otherwise
102 // serialized representation is not endian-agnostic. But it is useful to keep
103 // the possibility of a greater size for experiments comparing Varint32 encoding
104 // of a vector of larger integrals vs a plain form.)
107 CheckBool
WriteVectorU8(const V
& items
, SinkStream
* buffer
) {
108 size_t count
= items
.size();
109 bool ok
= buffer
->WriteSizeVarint32(count
);
110 if (count
!= 0 && ok
) {
111 size_t byte_count
= count
* sizeof(typename
V::value_type
);
112 ok
= buffer
->Write(static_cast<const void*>(&items
[0]), byte_count
);
118 bool ReadVectorU8(V
* items
, SourceStream
* buffer
) {
120 if (!buffer
->ReadVarint32(&count
))
124 bool ok
= items
->resize(count
, 0);
125 if (ok
&& count
!= 0) {
126 size_t byte_count
= count
* sizeof(typename
V::value_type
);
127 return buffer
->Read(static_cast<void*>(&((*items
)[0])), byte_count
);
132 ////////////////////////////////////////////////////////////////////////////////
134 CheckBool
EncodedProgram::DefineRel32Label(int index
, RVA value
) {
135 return DefineLabelCommon(&rel32_rva_
, index
, value
);
138 CheckBool
EncodedProgram::DefineAbs32Label(int index
, RVA value
) {
139 return DefineLabelCommon(&abs32_rva_
, index
, value
);
142 static const RVA kUnassignedRVA
= static_cast<RVA
>(-1);
144 CheckBool
EncodedProgram::DefineLabelCommon(RvaVector
* rvas
,
148 if (static_cast<int>(rvas
->size()) <= index
)
149 ok
= rvas
->resize(index
+ 1, kUnassignedRVA
);
152 DCHECK_EQ((*rvas
)[index
], kUnassignedRVA
)
153 << "DefineLabel double assigned " << index
;
154 (*rvas
)[index
] = rva
;
160 void EncodedProgram::EndLabels() {
161 FinishLabelsCommon(&abs32_rva_
);
162 FinishLabelsCommon(&rel32_rva_
);
165 void EncodedProgram::FinishLabelsCommon(RvaVector
* rvas
) {
166 // Replace all unassigned slots with the value at the previous index so they
167 // delta-encode to zero. (There might be better values than zero. The way to
168 // get that is have the higher level assembly program assign the unassigned
171 size_t size
= rvas
->size();
172 for (size_t i
= 0; i
< size
; ++i
) {
173 if ((*rvas
)[i
] == kUnassignedRVA
)
174 (*rvas
)[i
] = previous
;
176 previous
= (*rvas
)[i
];
180 CheckBool
EncodedProgram::AddOrigin(RVA origin
) {
181 return ops_
.push_back(ORIGIN
) && origins_
.push_back(origin
);
184 CheckBool
EncodedProgram::AddCopy(size_t count
, const void* bytes
) {
185 const uint8
* source
= static_cast<const uint8
*>(bytes
);
189 // Fold adjacent COPY instructions into one. This nearly halves the size of
190 // an EncodedProgram with only COPY1 instructions since there are approx plain
191 // 16 bytes per reloc. This has a working-set benefit during decompression.
192 // For compression of files with large differences this makes a small (4%)
193 // improvement in size. For files with small differences this degrades the
194 // compressed size by 1.3%
196 if (ops_
.back() == COPY1
) {
198 ok
= copy_counts_
.push_back(1);
200 if (ok
&& ops_
.back() == COPY
) {
201 copy_counts_
.back() += count
;
202 for (size_t i
= 0; ok
&& i
< count
; ++i
) {
203 ok
= copy_bytes_
.push_back(source
[i
]);
211 ok
= ops_
.push_back(COPY1
) && copy_bytes_
.push_back(source
[0]);
213 ok
= ops_
.push_back(COPY
) && copy_counts_
.push_back(count
);
214 for (size_t i
= 0; ok
&& i
< count
; ++i
) {
215 ok
= copy_bytes_
.push_back(source
[i
]);
223 CheckBool
EncodedProgram::AddAbs32(int label_index
) {
224 return ops_
.push_back(ABS32
) && abs32_ix_
.push_back(label_index
);
227 CheckBool
EncodedProgram::AddAbs64(int label_index
) {
228 return ops_
.push_back(ABS64
) && abs32_ix_
.push_back(label_index
);
231 CheckBool
EncodedProgram::AddRel32(int label_index
) {
232 return ops_
.push_back(REL32
) && rel32_ix_
.push_back(label_index
);
235 CheckBool
EncodedProgram::AddRel32ARM(uint16 op
, int label_index
) {
236 return ops_
.push_back(static_cast<OP
>(op
)) &&
237 rel32_ix_
.push_back(label_index
);
240 CheckBool
EncodedProgram::AddPeMakeRelocs(ExecutableType kind
) {
241 if (kind
== EXE_WIN_32_X86
)
242 return ops_
.push_back(MAKE_PE_RELOCATION_TABLE
);
243 return ops_
.push_back(MAKE_PE64_RELOCATION_TABLE
);
246 CheckBool
EncodedProgram::AddElfMakeRelocs() {
247 return ops_
.push_back(MAKE_ELF_RELOCATION_TABLE
);
250 CheckBool
EncodedProgram::AddElfARMMakeRelocs() {
251 return ops_
.push_back(MAKE_ELF_ARM_RELOCATION_TABLE
);
254 void EncodedProgram::DebuggingSummary() {
255 VLOG(1) << "EncodedProgram Summary"
256 << "\n image base " << image_base_
257 << "\n abs32 rvas " << abs32_rva_
.size()
258 << "\n rel32 rvas " << rel32_rva_
.size()
259 << "\n ops " << ops_
.size()
260 << "\n origins " << origins_
.size()
261 << "\n copy_counts " << copy_counts_
.size()
262 << "\n copy_bytes " << copy_bytes_
.size()
263 << "\n abs32_ix " << abs32_ix_
.size()
264 << "\n rel32_ix " << rel32_ix_
.size();
267 ////////////////////////////////////////////////////////////////////////////////
269 // For algorithm refinement purposes it is useful to write subsets of the file
270 // format. This gives us the ability to estimate the entropy of the
271 // differential compression of the individual streams, which can provide
272 // invaluable insights. The default, of course, is to include all the streams.
275 INCLUDE_ABS32_ADDRESSES
= 0x0001,
276 INCLUDE_REL32_ADDRESSES
= 0x0002,
277 INCLUDE_ABS32_INDEXES
= 0x0010,
278 INCLUDE_REL32_INDEXES
= 0x0020,
279 INCLUDE_OPS
= 0x0100,
280 INCLUDE_BYTES
= 0x0200,
281 INCLUDE_COPY_COUNTS
= 0x0400,
282 INCLUDE_MISC
= 0x1000
285 static FieldSelect
GetFieldSelect() {
286 // TODO(sra): Use better configuration.
287 scoped_ptr
<base::Environment
> env(base::Environment::Create());
289 env
->GetVar("A_FIELDS", &s
);
291 if (!base::StringToUint64(s
, &fields
))
292 return static_cast<FieldSelect
>(~0);
293 return static_cast<FieldSelect
>(fields
);
296 CheckBool
EncodedProgram::WriteTo(SinkStreamSet
* streams
) {
297 FieldSelect select
= GetFieldSelect();
299 // The order of fields must be consistent in WriteTo and ReadFrom, regardless
300 // of the streams used. The code can be configured with all kStreamXXX
301 // constants the same.
303 // If we change the code to pipeline reading with assembly (to avoid temporary
304 // storage vectors by consuming operands directly from the stream) then we
305 // need to read the base address and the random access address tables first,
306 // the rest can be interleaved.
308 if (select
& INCLUDE_MISC
) {
309 uint32 high
= static_cast<uint32
>(image_base_
>> 32);
310 uint32 low
= static_cast<uint32
>(image_base_
& 0xffffffffU
);
312 if (!streams
->stream(kStreamMisc
)->WriteVarint32(high
) ||
313 !streams
->stream(kStreamMisc
)->WriteVarint32(low
)) {
320 if (select
& INCLUDE_ABS32_ADDRESSES
) {
321 success
&= WriteSigned32Delta(abs32_rva_
,
322 streams
->stream(kStreamAbs32Addresses
));
325 if (select
& INCLUDE_REL32_ADDRESSES
) {
326 success
&= WriteSigned32Delta(rel32_rva_
,
327 streams
->stream(kStreamRel32Addresses
));
330 if (select
& INCLUDE_MISC
)
331 success
&= WriteVector(origins_
, streams
->stream(kStreamOriginAddresses
));
333 if (select
& INCLUDE_OPS
) {
335 success
&= streams
->stream(kStreamOps
)->Reserve(ops_
.size() + 5);
336 success
&= WriteVector(ops_
, streams
->stream(kStreamOps
));
339 if (select
& INCLUDE_COPY_COUNTS
)
340 success
&= WriteVector(copy_counts_
, streams
->stream(kStreamCopyCounts
));
342 if (select
& INCLUDE_BYTES
)
343 success
&= WriteVectorU8(copy_bytes_
, streams
->stream(kStreamBytes
));
345 if (select
& INCLUDE_ABS32_INDEXES
)
346 success
&= WriteVector(abs32_ix_
, streams
->stream(kStreamAbs32Indexes
));
348 if (select
& INCLUDE_REL32_INDEXES
)
349 success
&= WriteVector(rel32_ix_
, streams
->stream(kStreamRel32Indexes
));
354 bool EncodedProgram::ReadFrom(SourceStreamSet
* streams
) {
358 if (!streams
->stream(kStreamMisc
)->ReadVarint32(&high
) ||
359 !streams
->stream(kStreamMisc
)->ReadVarint32(&low
)) {
362 image_base_
= (static_cast<uint64
>(high
) << 32) | low
;
364 if (!ReadSigned32Delta(&abs32_rva_
, streams
->stream(kStreamAbs32Addresses
)))
366 if (!ReadSigned32Delta(&rel32_rva_
, streams
->stream(kStreamRel32Addresses
)))
368 if (!ReadVector(&origins_
, streams
->stream(kStreamOriginAddresses
)))
370 if (!ReadVector(&ops_
, streams
->stream(kStreamOps
)))
372 if (!ReadVector(©_counts_
, streams
->stream(kStreamCopyCounts
)))
374 if (!ReadVectorU8(©_bytes_
, streams
->stream(kStreamBytes
)))
376 if (!ReadVector(&abs32_ix_
, streams
->stream(kStreamAbs32Indexes
)))
378 if (!ReadVector(&rel32_ix_
, streams
->stream(kStreamRel32Indexes
)))
381 // Check that streams have been completely consumed.
382 for (int i
= 0; i
< kStreamLimit
; ++i
) {
383 if (streams
->stream(i
)->Remaining() > 0)
390 // Safe, non-throwing version of std::vector::at(). Returns 'true' for success,
391 // 'false' for out-of-bounds index error.
392 template<typename V
, typename T
>
393 bool VectorAt(const V
& v
, size_t index
, T
* output
) {
394 if (index
>= v
.size())
400 CheckBool
EncodedProgram::EvaluateRel32ARM(OP op
,
403 SinkStream
* output
) {
404 switch (op
& 0x0000F000) {
407 if (!VectorAt(rel32_ix_
, ix_rel32_ix
, &index
))
411 if (!VectorAt(rel32_rva_
, index
, &rva
))
413 uint32 decompressed_op
;
414 if (!DisassemblerElf32ARM::Decompress(ARM_OFF8
,
415 static_cast<uint16
>(op
),
416 static_cast<uint32
>(rva
-
421 uint16 op16
= static_cast<uint16
>(decompressed_op
);
422 if (!output
->Write(&op16
, 2))
429 if (!VectorAt(rel32_ix_
, ix_rel32_ix
, &index
))
433 if (!VectorAt(rel32_rva_
, index
, &rva
))
435 uint32 decompressed_op
;
436 if (!DisassemblerElf32ARM::Decompress(ARM_OFF11
, (uint16
) op
,
437 (uint32
) (rva
- current_rva
),
441 uint16 op16
= static_cast<uint16
>(decompressed_op
);
442 if (!output
->Write(&op16
, 2))
449 if (!VectorAt(rel32_ix_
, ix_rel32_ix
, &index
))
453 if (!VectorAt(rel32_rva_
, index
, &rva
))
455 uint32 decompressed_op
;
456 if (!DisassemblerElf32ARM::Decompress(ARM_OFF24
, (uint16
) op
,
457 (uint32
) (rva
- current_rva
),
461 if (!output
->Write(&decompressed_op
, 4))
468 if (!VectorAt(rel32_ix_
, ix_rel32_ix
, &index
))
472 if (!VectorAt(rel32_rva_
, index
, &rva
))
474 uint32 decompressed_op
;
475 if (!DisassemblerElf32ARM::Decompress(ARM_OFF25
, (uint16
) op
,
476 (uint32
) (rva
- current_rva
),
480 uint32 words
= (decompressed_op
<< 16) | (decompressed_op
>> 16);
481 if (!output
->Write(&words
, 4))
488 if (!VectorAt(rel32_ix_
, ix_rel32_ix
, &index
))
492 if (!VectorAt(rel32_rva_
, index
, &rva
))
494 uint32 decompressed_op
;
495 if (!DisassemblerElf32ARM::Decompress(ARM_OFF21
, (uint16
) op
,
496 (uint32
) (rva
- current_rva
),
500 uint32 words
= (decompressed_op
<< 16) | (decompressed_op
>> 16);
501 if (!output
->Write(&words
, 4))
513 CheckBool
EncodedProgram::AssembleTo(SinkStream
* final_buffer
) {
514 // For the most part, the assembly process walks the various tables.
515 // ix_mumble is the index into the mumble table.
516 size_t ix_origins
= 0;
517 size_t ix_copy_counts
= 0;
518 size_t ix_copy_bytes
= 0;
519 size_t ix_abs32_ix
= 0;
520 size_t ix_rel32_ix
= 0;
524 bool pending_pe_relocation_table
= false;
525 uint8 pending_pe_relocation_table_type
= 0x03; // IMAGE_REL_BASED_HIGHLOW
526 Elf32_Word pending_elf_relocation_table_type
= 0;
527 SinkStream bytes_following_relocation_table
;
529 SinkStream
* output
= final_buffer
;
531 for (size_t ix_ops
= 0; ix_ops
< ops_
.size(); ++ix_ops
) {
532 OP op
= ops_
[ix_ops
];
536 if (!EvaluateRel32ARM(op
, ix_rel32_ix
, current_rva
, output
))
542 if (!VectorAt(origins_
, ix_origins
, §ion_rva
))
545 current_rva
= section_rva
;
551 if (!VectorAt(copy_counts_
, ix_copy_counts
, &count
))
554 for (size_t i
= 0; i
< count
; ++i
) {
556 if (!VectorAt(copy_bytes_
, ix_copy_bytes
, &b
))
559 if (!output
->Write(&b
, 1))
562 current_rva
+= static_cast<RVA
>(count
);
568 if (!VectorAt(copy_bytes_
, ix_copy_bytes
, &b
))
571 if (!output
->Write(&b
, 1))
579 if (!VectorAt(rel32_ix_
, ix_rel32_ix
, &index
))
583 if (!VectorAt(rel32_rva_
, index
, &rva
))
585 uint32 offset
= (rva
- (current_rva
+ 4));
586 if (!output
->Write(&offset
, 4))
595 if (!VectorAt(abs32_ix_
, ix_abs32_ix
, &index
))
599 if (!VectorAt(abs32_rva_
, index
, &rva
))
602 base::CheckedNumeric
<uint32
> abs32
= image_base_
;
604 uint32 safe_abs32
= abs32
.ValueOrDie();
605 if (!abs32_relocs_
.push_back(current_rva
) ||
606 !output
->Write(&safe_abs32
, 4)) {
611 base::CheckedNumeric
<uint64
> abs64
= image_base_
;
613 uint64 safe_abs64
= abs64
.ValueOrDie();
614 if (!abs32_relocs_
.push_back(current_rva
) ||
615 !output
->Write(&safe_abs64
, 8)) {
623 case MAKE_PE_RELOCATION_TABLE
: {
624 // We can see the base relocation anywhere, but we only have the
625 // information to generate it at the very end. So we divert the bytes
626 // we are generating to a temporary stream.
627 if (pending_pe_relocation_table
)
628 return false; // Can't have two base relocation tables.
630 pending_pe_relocation_table
= true;
631 output
= &bytes_following_relocation_table
;
633 // There is a potential problem *if* the instruction stream contains
634 // some REL32 relocations following the base relocation and in the same
635 // section. We don't know the size of the table, so 'current_rva' will
636 // be wrong, causing REL32 offsets to be miscalculated. This never
637 // happens; the base relocation table is usually in a section of its
638 // own, a data-only section, and following everything else in the
639 // executable except some padding zero bytes. We could fix this by
640 // emitting an ORIGIN after the MAKE_BASE_RELOCATION_TABLE.
643 case MAKE_PE64_RELOCATION_TABLE
: {
644 if (pending_pe_relocation_table
)
645 return false; // Can't have two base relocation tables.
647 pending_pe_relocation_table
= true;
648 pending_pe_relocation_table_type
= 0x0A; // IMAGE_REL_BASED_DIR64
649 output
= &bytes_following_relocation_table
;
653 case MAKE_ELF_ARM_RELOCATION_TABLE
: {
654 // We can see the base relocation anywhere, but we only have the
655 // information to generate it at the very end. So we divert the bytes
656 // we are generating to a temporary stream.
657 if (pending_elf_relocation_table_type
)
658 return false; // Can't have two base relocation tables.
660 pending_elf_relocation_table_type
= R_ARM_RELATIVE
;
661 output
= &bytes_following_relocation_table
;
665 case MAKE_ELF_RELOCATION_TABLE
: {
666 // We can see the base relocation anywhere, but we only have the
667 // information to generate it at the very end. So we divert the bytes
668 // we are generating to a temporary stream.
669 if (pending_elf_relocation_table_type
)
670 return false; // Can't have two base relocation tables.
672 pending_elf_relocation_table_type
= R_386_RELATIVE
;
673 output
= &bytes_following_relocation_table
;
679 if (pending_pe_relocation_table
) {
680 if (!GeneratePeRelocations(final_buffer
,
681 pending_pe_relocation_table_type
) ||
682 !final_buffer
->Append(&bytes_following_relocation_table
))
686 if (pending_elf_relocation_table_type
) {
687 if (!GenerateElfRelocations(pending_elf_relocation_table_type
,
689 !final_buffer
->Append(&bytes_following_relocation_table
))
693 // Final verification check: did we consume all lists?
694 if (ix_copy_counts
!= copy_counts_
.size())
696 if (ix_copy_bytes
!= copy_bytes_
.size())
698 if (ix_abs32_ix
!= abs32_ix_
.size())
700 if (ix_rel32_ix
!= rel32_ix_
.size())
706 // RelocBlock has the layout of a block of relocations in the base relocation
707 // table file format.
709 struct RelocBlockPOD
{
712 uint16 relocs
[4096]; // Allow up to one relocation per byte of a 4k page.
715 static_assert(offsetof(RelocBlockPOD
, relocs
) == 8, "reloc block header size");
720 pod
.page_rva
= 0xFFFFFFFF;
724 void Add(uint16 item
) {
725 pod
.relocs
[(pod
.block_size
-8)/2] = item
;
729 CheckBool
Flush(SinkStream
* buffer
) WARN_UNUSED_RESULT
{
731 if (pod
.block_size
!= 8) {
732 if (pod
.block_size
% 4 != 0) { // Pad to make size multiple of 4 bytes.
735 ok
= buffer
->Write(&pod
, pod
.block_size
);
743 CheckBool
EncodedProgram::GeneratePeRelocations(SinkStream
* buffer
,
745 std::sort(abs32_relocs_
.begin(), abs32_relocs_
.end());
750 for (size_t i
= 0; ok
&& i
< abs32_relocs_
.size(); ++i
) {
751 uint32 rva
= abs32_relocs_
[i
];
752 uint32 page_rva
= rva
& ~0xFFF;
753 if (page_rva
!= block
.pod
.page_rva
) {
754 ok
&= block
.Flush(buffer
);
755 block
.pod
.page_rva
= page_rva
;
758 block
.Add(((static_cast<uint16
>(type
)) << 12) | (rva
& 0xFFF));
760 ok
&= block
.Flush(buffer
);
764 CheckBool
EncodedProgram::GenerateElfRelocations(Elf32_Word r_info
,
765 SinkStream
* buffer
) {
766 std::sort(abs32_relocs_
.begin(), abs32_relocs_
.end());
768 Elf32_Rel relocation_block
;
770 relocation_block
.r_info
= r_info
;
773 for (size_t i
= 0; ok
&& i
< abs32_relocs_
.size(); ++i
) {
774 relocation_block
.r_offset
= abs32_relocs_
[i
];
775 ok
= buffer
->Write(&relocation_block
, sizeof(Elf32_Rel
));
780 ////////////////////////////////////////////////////////////////////////////////
782 Status
WriteEncodedProgram(EncodedProgram
* encoded
, SinkStreamSet
* sink
) {
783 if (!encoded
->WriteTo(sink
))
784 return C_STREAM_ERROR
;
788 Status
ReadEncodedProgram(SourceStreamSet
* streams
, EncodedProgram
** output
) {
789 EncodedProgram
* encoded
= new EncodedProgram();
790 if (encoded
->ReadFrom(streams
)) {
795 return C_DESERIALIZATION_FAILED
;
798 Status
Assemble(EncodedProgram
* encoded
, SinkStream
* buffer
) {
799 bool assembled
= encoded
->AssembleTo(buffer
);
802 return C_ASSEMBLY_FAILED
;
805 void DeleteEncodedProgram(EncodedProgram
* encoded
) {
809 } // namespace courgette