Updating trunk VERSION from 2139.0 to 2140.0
[chromium-blink-merge.git] / courgette / disassembler_elf_32.cc
blobbfd1ef4acdcbad747095928b07e7f906a2149c60
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/disassembler_elf_32.h"
7 #include <algorithm>
8 #include <string>
9 #include <vector>
11 #include "base/basictypes.h"
12 #include "base/logging.h"
13 #include "base/memory/scoped_vector.h"
15 #include "courgette/assembly_program.h"
16 #include "courgette/courgette.h"
17 #include "courgette/encoded_program.h"
19 namespace courgette {
21 DisassemblerElf32::DisassemblerElf32(const void* start, size_t length)
22 : Disassembler(start, length),
23 header_(NULL),
24 section_header_table_(NULL),
25 section_header_table_size_(0),
26 program_header_table_(NULL),
27 program_header_table_size_(0),
28 default_string_section_(NULL) {
31 bool DisassemblerElf32::ParseHeader() {
32 if (length() < sizeof(Elf32_Ehdr))
33 return Bad("Too small");
35 header_ = (Elf32_Ehdr *)start();
37 // Have magic for elf header?
38 if (header_->e_ident[0] != 0x7f ||
39 header_->e_ident[1] != 'E' ||
40 header_->e_ident[2] != 'L' ||
41 header_->e_ident[3] != 'F')
42 return Bad("No Magic Number");
44 if (header_->e_type != ET_EXEC &&
45 header_->e_type != ET_DYN)
46 return Bad("Not an executable file or shared library");
48 if (header_->e_machine != ElfEM())
49 return Bad("Not a supported architecture");
51 if (header_->e_version != 1)
52 return Bad("Unknown file version");
54 if (header_->e_shentsize != sizeof(Elf32_Shdr))
55 return Bad("Unexpected section header size");
57 if (header_->e_shoff >= length())
58 return Bad("Out of bounds section header table offset");
60 section_header_table_ = (Elf32_Shdr *)OffsetToPointer(header_->e_shoff);
61 section_header_table_size_ = header_->e_shnum;
63 if ((header_->e_shoff + header_->e_shnum ) >= length())
64 return Bad("Out of bounds section header table");
66 if (header_->e_phoff >= length())
67 return Bad("Out of bounds program header table offset");
69 program_header_table_ = (Elf32_Phdr *)OffsetToPointer(header_->e_phoff);
70 program_header_table_size_ = header_->e_phnum;
72 if ((header_->e_phoff + header_->e_phnum) >= length())
73 return Bad("Out of bounds program header table");
75 default_string_section_ = (const char *)SectionBody((int)header_->e_shstrndx);
77 ReduceLength(DiscoverLength());
79 return Good();
82 bool DisassemblerElf32::Disassemble(AssemblyProgram* target) {
83 if (!ok())
84 return false;
86 // The Image Base is always 0 for ELF Executables
87 target->set_image_base(0);
89 if (!ParseAbs32Relocs())
90 return false;
92 if (!ParseRel32RelocsFromSections())
93 return false;
95 if (!ParseFile(target))
96 return false;
98 target->DefaultAssignIndexes();
100 return true;
103 uint32 DisassemblerElf32::DiscoverLength() {
104 uint32 result = 0;
106 // Find the end of the last section
107 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
108 const Elf32_Shdr *section_header = SectionHeader(section_id);
110 if (section_header->sh_type == SHT_NOBITS)
111 continue;
113 uint32 section_end = section_header->sh_offset + section_header->sh_size;
115 if (section_end > result)
116 result = section_end;
119 // Find the end of the last segment
120 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
121 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
123 uint32 segment_end = segment_header->p_offset + segment_header->p_filesz;
125 if (segment_end > result)
126 result = segment_end;
129 uint32 section_table_end = header_->e_shoff +
130 (header_->e_shnum * sizeof(Elf32_Shdr));
131 if (section_table_end > result)
132 result = section_table_end;
134 uint32 segment_table_end = header_->e_phoff +
135 (header_->e_phnum * sizeof(Elf32_Phdr));
136 if (segment_table_end > result)
137 result = segment_table_end;
139 return result;
142 CheckBool DisassemblerElf32::IsValidRVA(RVA rva) const {
144 // It's valid if it's contained in any program segment
145 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
146 const Elf32_Phdr *segment_header = ProgramSegmentHeader(i);
148 if (segment_header->p_type != PT_LOAD)
149 continue;
151 Elf32_Addr begin = segment_header->p_vaddr;
152 Elf32_Addr end = segment_header->p_vaddr + segment_header->p_memsz;
154 if (rva >= begin && rva < end)
155 return true;
158 return false;
161 // Returns RVA for an in memory address, or NULL.
162 CheckBool DisassemblerElf32::RVAToFileOffset(Elf32_Addr addr,
163 size_t* result) const {
165 for (int i = 0; i < ProgramSegmentHeaderCount(); i++) {
166 Elf32_Addr begin = ProgramSegmentMemoryBegin(i);
167 Elf32_Addr end = begin + ProgramSegmentMemorySize(i);
169 if (addr >= begin && addr < end) {
170 Elf32_Addr offset = addr - begin;
172 if (offset < ProgramSegmentFileSize(i)) {
173 *result = ProgramSegmentFileOffset(i) + offset;
174 return true;
179 return false;
182 RVA DisassemblerElf32::FileOffsetToRVA(size_t offset) const {
183 // File offsets can be 64 bit values, but we are dealing with 32
184 // bit executables and so only need to support 32bit file sizes.
185 uint32 offset32 = (uint32)offset;
187 for (int i = 0; i < SectionHeaderCount(); i++) {
189 const Elf32_Shdr *section_header = SectionHeader(i);
191 // These can appear to have a size in the file, but don't.
192 if (section_header->sh_type == SHT_NOBITS)
193 continue;
195 Elf32_Off section_begin = section_header->sh_offset;
196 Elf32_Off section_end = section_begin + section_header->sh_size;
198 if (offset32 >= section_begin && offset32 < section_end) {
199 return section_header->sh_addr + (offset32 - section_begin);
203 return 0;
206 CheckBool DisassemblerElf32::RVAsToOffsets(std::vector<RVA>* rvas,
207 std::vector<size_t>* offsets) {
208 offsets->clear();
210 for (std::vector<RVA>::iterator rva = rvas->begin();
211 rva != rvas->end();
212 rva++) {
214 size_t offset;
216 if (!RVAToFileOffset(*rva, &offset))
217 return false;
219 offsets->push_back(offset);
222 return true;
225 CheckBool DisassemblerElf32::RVAsToOffsets(ScopedVector<TypedRVA>* rvas) {
226 for (ScopedVector<TypedRVA>::iterator rva = rvas->begin();
227 rva != rvas->end();
228 rva++) {
230 size_t offset;
232 if (!RVAToFileOffset((*rva)->rva(), &offset))
233 return false;
235 (*rva)->set_offset(offset);
238 return true;
241 CheckBool DisassemblerElf32::ParseFile(AssemblyProgram* program) {
242 // Walk all the bytes in the file, whether or not in a section.
243 uint32 file_offset = 0;
245 std::vector<size_t> abs_offsets;
247 if (!RVAsToOffsets(&abs32_locations_, &abs_offsets))
248 return false;
250 if (!RVAsToOffsets(&rel32_locations_))
251 return false;
253 std::vector<size_t>::iterator current_abs_offset = abs_offsets.begin();
254 ScopedVector<TypedRVA>::iterator current_rel = rel32_locations_.begin();
256 std::vector<size_t>::iterator end_abs_offset = abs_offsets.end();
257 ScopedVector<TypedRVA>::iterator end_rel = rel32_locations_.end();
259 for (int section_id = 0;
260 section_id < SectionHeaderCount();
261 section_id++) {
263 const Elf32_Shdr *section_header = SectionHeader(section_id);
265 if (!ParseSimpleRegion(file_offset,
266 section_header->sh_offset,
267 program))
268 return false;
269 file_offset = section_header->sh_offset;
271 switch (section_header->sh_type) {
272 case SHT_REL:
273 if (!ParseRelocationSection(section_header, program))
274 return false;
275 file_offset = section_header->sh_offset + section_header->sh_size;
276 break;
277 case SHT_PROGBITS:
278 if (!ParseProgbitsSection(section_header,
279 &current_abs_offset, end_abs_offset,
280 &current_rel, end_rel,
281 program))
282 return false;
283 file_offset = section_header->sh_offset + section_header->sh_size;
284 break;
285 case SHT_NOBITS:
286 // Fall through
287 case SHT_INIT_ARRAY:
288 // Fall through
289 case SHT_FINI_ARRAY:
290 while (current_abs_offset != end_abs_offset &&
291 *current_abs_offset >= section_header->sh_offset &&
292 *current_abs_offset <
293 (section_header->sh_offset + section_header->sh_size)) {
294 // Skip any abs_offsets appear in the unsupported INIT_ARRAY section
295 VLOG(1) << "Skipping relocation entry for unsupported section: " <<
296 section_header->sh_type;
297 current_abs_offset++;
299 break;
300 default:
301 if (current_abs_offset != end_abs_offset &&
302 *current_abs_offset >= section_header->sh_offset &&
303 *current_abs_offset <
304 (section_header->sh_offset + section_header->sh_size))
305 VLOG(1) << "Relocation address in unrecognized ELF section: " << \
306 section_header->sh_type;
307 break;
311 // Rest of the file past the last section
312 if (!ParseSimpleRegion(file_offset,
313 length(),
314 program))
315 return false;
317 // Make certain we consume all of the relocations as expected
318 return (current_abs_offset == end_abs_offset);
321 CheckBool DisassemblerElf32::ParseProgbitsSection(
322 const Elf32_Shdr *section_header,
323 std::vector<size_t>::iterator* current_abs_offset,
324 std::vector<size_t>::iterator end_abs_offset,
325 ScopedVector<TypedRVA>::iterator* current_rel,
326 ScopedVector<TypedRVA>::iterator end_rel,
327 AssemblyProgram* program) {
329 // Walk all the bytes in the file, whether or not in a section.
330 size_t file_offset = section_header->sh_offset;
331 size_t section_end = section_header->sh_offset + section_header->sh_size;
333 Elf32_Addr origin = section_header->sh_addr;
334 size_t origin_offset = section_header->sh_offset;
335 if (!program->EmitOriginInstruction(origin))
336 return false;
338 while (file_offset < section_end) {
340 if (*current_abs_offset != end_abs_offset &&
341 file_offset > **current_abs_offset)
342 return false;
344 while (*current_rel != end_rel &&
345 file_offset > (**current_rel)->get_offset()) {
346 (*current_rel)++;
349 size_t next_relocation = section_end;
351 if (*current_abs_offset != end_abs_offset &&
352 next_relocation > **current_abs_offset)
353 next_relocation = **current_abs_offset;
355 // Rel offsets are heuristically derived, and might (incorrectly) overlap
356 // an Abs value, or the end of the section, so +3 to make sure there is
357 // room for the full 4 byte value.
358 if (*current_rel != end_rel &&
359 next_relocation > ((**current_rel)->get_offset() + 3))
360 next_relocation = (**current_rel)->get_offset();
362 if (next_relocation > file_offset) {
363 if (!ParseSimpleRegion(file_offset, next_relocation, program))
364 return false;
366 file_offset = next_relocation;
367 continue;
370 if (*current_abs_offset != end_abs_offset &&
371 file_offset == **current_abs_offset) {
373 const uint8* p = OffsetToPointer(file_offset);
374 RVA target_rva = Read32LittleEndian(p);
376 if (!program->EmitAbs32(program->FindOrMakeAbs32Label(target_rva)))
377 return false;
378 file_offset += sizeof(RVA);
379 (*current_abs_offset)++;
380 continue;
383 if (*current_rel != end_rel &&
384 file_offset == (**current_rel)->get_offset()) {
386 uint32 relative_target = (**current_rel)->relative_target();
387 // This cast is for 64 bit systems, and is only safe because we
388 // are working on 32 bit executables.
389 RVA target_rva = (RVA)(origin + (file_offset - origin_offset) +
390 relative_target);
392 if (! (**current_rel)->EmitInstruction(program, target_rva))
393 return false;
394 file_offset += (**current_rel)->op_size();
395 (*current_rel)++;
396 continue;
400 // Rest of the section (if any)
401 return ParseSimpleRegion(file_offset, section_end, program);
404 CheckBool DisassemblerElf32::ParseSimpleRegion(
405 size_t start_file_offset,
406 size_t end_file_offset,
407 AssemblyProgram* program) {
409 const uint8* start = OffsetToPointer(start_file_offset);
410 const uint8* end = OffsetToPointer(end_file_offset);
412 // Callers don't guarantee start < end
413 if (start >= end) return true;
415 const ptrdiff_t len = end - start; // Works because vars are byte pointers
417 if (!program->EmitBytesInstruction(start, len))
418 return false;
420 return true;
423 CheckBool DisassemblerElf32::ParseAbs32Relocs() {
424 abs32_locations_.clear();
426 // Loop through sections for relocation sections
427 for (int section_id = 0; section_id < SectionHeaderCount(); section_id++) {
428 const Elf32_Shdr *section_header = SectionHeader(section_id);
430 if (section_header->sh_type == SHT_REL) {
432 Elf32_Rel *relocs_table = (Elf32_Rel *)SectionBody(section_id);
434 int relocs_table_count = section_header->sh_size /
435 section_header->sh_entsize;
437 // Elf32_Word relocation_section_id = section_header->sh_info;
439 // Loop through relocation objects in the relocation section
440 for (int rel_id = 0; rel_id < relocs_table_count; rel_id++) {
441 RVA rva;
443 // Quite a few of these conversions fail, and we simply skip
444 // them, that's okay.
445 if (RelToRVA(relocs_table[rel_id], &rva) && CheckSection(rva))
446 abs32_locations_.push_back(rva);
451 std::sort(abs32_locations_.begin(), abs32_locations_.end());
452 return true;
455 CheckBool DisassemblerElf32::CheckSection(RVA rva) {
456 size_t offset;
458 if (!RVAToFileOffset(rva, &offset)) {
459 return false;
462 for (int section_id = 0;
463 section_id < SectionHeaderCount();
464 section_id++) {
466 const Elf32_Shdr *section_header = SectionHeader(section_id);
468 if (offset >= section_header->sh_offset &&
469 offset < (section_header->sh_offset + section_header->sh_size)) {
470 switch (section_header->sh_type) {
471 case SHT_REL:
472 // Fall-through
473 case SHT_PROGBITS:
474 return true;
479 return false;
482 CheckBool DisassemblerElf32::ParseRel32RelocsFromSections() {
484 rel32_locations_.clear();
486 // Loop through sections for relocation sections
487 for (int section_id = 0;
488 section_id < SectionHeaderCount();
489 section_id++) {
491 const Elf32_Shdr *section_header = SectionHeader(section_id);
493 if (section_header->sh_type != SHT_PROGBITS)
494 continue;
496 if (!ParseRel32RelocsFromSection(section_header))
497 return false;
500 std::sort(rel32_locations_.begin(),
501 rel32_locations_.end(),
502 TypedRVA::IsLessThan);
503 return true;
506 } // namespace courgette