1 // Copyright (c) 2012 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/disassembler_elf_32_x86.h"
11 #include "base/basictypes.h"
12 #include "base/logging.h"
14 #include "courgette/assembly_program.h"
15 #include "courgette/courgette.h"
16 #include "courgette/encoded_program.h"
20 DisassemblerElf32X86::DisassemblerElf32X86(const void* start
, size_t length
)
21 : Disassembler(start
, length
),
23 section_header_table_(NULL
),
24 section_header_table_size_(0),
25 program_header_table_(NULL
),
26 program_header_table_size_(0),
27 default_string_section_(NULL
) {
30 bool DisassemblerElf32X86::ParseHeader() {
31 if (length() < sizeof(Elf32_Ehdr
))
32 return Bad("Too small");
34 header_
= (Elf32_Ehdr
*)start();
36 // Have magic for elf header?
37 if (header_
->e_ident
[0] != 0x7f ||
38 header_
->e_ident
[1] != 'E' ||
39 header_
->e_ident
[2] != 'L' ||
40 header_
->e_ident
[3] != 'F')
41 return Bad("No Magic Number");
43 if (header_
->e_type
!= ET_EXEC
&&
44 header_
->e_type
!= ET_DYN
)
45 return Bad("Not an executable file or shared library");
47 if (header_
->e_machine
!= EM_386
)
48 return Bad("Not a supported architecture");
50 if (header_
->e_version
!= 1)
51 return Bad("Unknown file version");
53 if (header_
->e_shentsize
!= sizeof(Elf32_Shdr
))
54 return Bad("Unexpected section header size");
56 if (header_
->e_shoff
>= length())
57 return Bad("Out of bounds section header table offset");
59 section_header_table_
= (Elf32_Shdr
*)OffsetToPointer(header_
->e_shoff
);
60 section_header_table_size_
= header_
->e_shnum
;
62 if ((header_
->e_shoff
+ header_
->e_shnum
) >= length())
63 return Bad("Out of bounds section header table");
65 if (header_
->e_phoff
>= length())
66 return Bad("Out of bounds program header table offset");
68 program_header_table_
= (Elf32_Phdr
*)OffsetToPointer(header_
->e_phoff
);
69 program_header_table_size_
= header_
->e_phnum
;
71 if ((header_
->e_phoff
+ header_
->e_phnum
) >= length())
72 return Bad("Out of bounds program header table");
74 default_string_section_
= (const char *)SectionBody((int)header_
->e_shstrndx
);
76 ReduceLength(DiscoverLength());
81 bool DisassemblerElf32X86::Disassemble(AssemblyProgram
* target
) {
85 // The Image Base is always 0 for ELF Executables
86 target
->set_image_base(0);
88 if (!ParseAbs32Relocs())
91 if (!ParseRel32RelocsFromSections())
94 if (!ParseFile(target
))
97 target
->DefaultAssignIndexes();
102 uint32
DisassemblerElf32X86::DiscoverLength() {
105 // Find the end of the last section
106 for (int section_id
= 0; section_id
< SectionHeaderCount(); section_id
++) {
107 const Elf32_Shdr
*section_header
= SectionHeader(section_id
);
109 if (section_header
->sh_type
== SHT_NOBITS
)
112 uint32 section_end
= section_header
->sh_offset
+ section_header
->sh_size
;
114 if (section_end
> result
)
115 result
= section_end
;
118 // Find the end of the last segment
119 for (int i
= 0; i
< ProgramSegmentHeaderCount(); i
++) {
120 const Elf32_Phdr
*segment_header
= ProgramSegmentHeader(i
);
122 uint32 segment_end
= segment_header
->p_offset
+ segment_header
->p_filesz
;
124 if (segment_end
> result
)
125 result
= segment_end
;
128 uint32 section_table_end
= header_
->e_shoff
+
129 (header_
->e_shnum
* sizeof(Elf32_Shdr
));
130 if (section_table_end
> result
)
131 result
= section_table_end
;
133 uint32 segment_table_end
= header_
->e_phoff
+
134 (header_
->e_phnum
* sizeof(Elf32_Phdr
));
135 if (segment_table_end
> result
)
136 result
= segment_table_end
;
141 CheckBool
DisassemblerElf32X86::IsValidRVA(RVA rva
) const {
143 // It's valid if it's contained in any program segment
144 for (int i
= 0; i
< ProgramSegmentHeaderCount(); i
++) {
145 const Elf32_Phdr
*segment_header
= ProgramSegmentHeader(i
);
147 if (segment_header
->p_type
!= PT_LOAD
)
150 Elf32_Addr begin
= segment_header
->p_vaddr
;
151 Elf32_Addr end
= segment_header
->p_vaddr
+ segment_header
->p_memsz
;
153 if (rva
>= begin
&& rva
< end
)
160 // Convert an ELF relocation struction into an RVA
161 CheckBool
DisassemblerElf32X86::RelToRVA(Elf32_Rel rel
, RVA
* result
) const {
163 // The rightmost byte of r_info is the type...
164 elf32_rel_386_type_values type
=
165 (elf32_rel_386_type_values
)(unsigned char)rel
.r_info
;
167 // The other 3 bytes of r_info are the symbol
168 uint32 symbol
= rel
.r_info
>> 8;
186 // This is a basic ABS32 relocation address
187 *result
= rel
.r_offset
;
192 case R_386_TLS_TPOFF
:
199 // Returns RVA for an in memory address, or NULL.
200 CheckBool
DisassemblerElf32X86::RVAToFileOffset(Elf32_Addr addr
,
201 size_t* result
) const {
203 for (int i
= 0; i
< ProgramSegmentHeaderCount(); i
++) {
204 Elf32_Addr begin
= ProgramSegmentMemoryBegin(i
);
205 Elf32_Addr end
= begin
+ ProgramSegmentMemorySize(i
);
207 if (addr
>= begin
&& addr
< end
) {
208 Elf32_Addr offset
= addr
- begin
;
210 if (offset
< ProgramSegmentFileSize(i
)) {
211 *result
= ProgramSegmentFileOffset(i
) + offset
;
220 RVA
DisassemblerElf32X86::FileOffsetToRVA(size_t offset
) const {
221 // File offsets can be 64 bit values, but we are dealing with 32
222 // bit executables and so only need to support 32bit file sizes.
223 uint32 offset32
= (uint32
)offset
;
225 for (int i
= 0; i
< SectionHeaderCount(); i
++) {
227 const Elf32_Shdr
*section_header
= SectionHeader(i
);
229 // These can appear to have a size in the file, but don't.
230 if (section_header
->sh_type
== SHT_NOBITS
)
233 Elf32_Off section_begin
= section_header
->sh_offset
;
234 Elf32_Off section_end
= section_begin
+ section_header
->sh_size
;
236 if (offset32
>= section_begin
&& offset32
< section_end
) {
237 return section_header
->sh_addr
+ (offset32
- section_begin
);
244 CheckBool
DisassemblerElf32X86::RVAsToOffsets(std::vector
<RVA
>* rvas
,
245 std::vector
<size_t>* offsets
) {
248 for (std::vector
<RVA
>::iterator rva
= rvas
->begin();
254 if (!RVAToFileOffset(*rva
, &offset
))
257 offsets
->push_back(offset
);
263 CheckBool
DisassemblerElf32X86::ParseFile(AssemblyProgram
* program
) {
264 // Walk all the bytes in the file, whether or not in a section.
265 uint32 file_offset
= 0;
267 std::vector
<size_t> abs_offsets
;
268 std::vector
<size_t> rel_offsets
;
270 if (!RVAsToOffsets(&abs32_locations_
, &abs_offsets
))
273 if (!RVAsToOffsets(&rel32_locations_
, &rel_offsets
))
276 std::vector
<size_t>::iterator current_abs_offset
= abs_offsets
.begin();
277 std::vector
<size_t>::iterator current_rel_offset
= rel_offsets
.begin();
279 std::vector
<size_t>::iterator end_abs_offset
= abs_offsets
.end();
280 std::vector
<size_t>::iterator end_rel_offset
= rel_offsets
.end();
282 for (int section_id
= 0;
283 section_id
< SectionHeaderCount();
286 const Elf32_Shdr
*section_header
= SectionHeader(section_id
);
288 if (!ParseSimpleRegion(file_offset
,
289 section_header
->sh_offset
,
292 file_offset
= section_header
->sh_offset
;
294 switch (section_header
->sh_type
) {
296 if (!ParseRelocationSection(section_header
, program
))
298 file_offset
= section_header
->sh_offset
+ section_header
->sh_size
;
301 if (!ParseProgbitsSection(section_header
,
302 ¤t_abs_offset
, end_abs_offset
,
303 ¤t_rel_offset
, end_rel_offset
,
306 file_offset
= section_header
->sh_offset
+ section_header
->sh_size
;
313 // Rest of the file past the last section
314 if (!ParseSimpleRegion(file_offset
,
319 // Make certain we consume all of the relocations as expected
320 return (current_abs_offset
== end_abs_offset
);
323 CheckBool
DisassemblerElf32X86::ParseRelocationSection(
324 const Elf32_Shdr
*section_header
,
325 AssemblyProgram
* program
) {
326 // We can reproduce the R_386_RELATIVE entries in one of the relocation
327 // table based on other information in the patch, given these
330 // All R_386_RELATIVE entries are:
331 // 1) In the same relocation table
332 // 2) Are consecutive
333 // 3) Are sorted in memory address order
335 // Happily, this is normally the case, but it's not required by spec
336 // so we check, and just don't do it if we don't match up.
338 // The expectation is that one relocation section will contain
339 // all of our R_386_RELATIVE entries in the expected order followed
340 // by assorted other entries we can't use special handling for.
344 // Walk all the bytes in the section, matching relocation table or not
345 size_t file_offset
= section_header
->sh_offset
;
346 size_t section_end
= section_header
->sh_offset
+ section_header
->sh_size
;
348 Elf32_Rel
*section_relocs_iter
=
349 (Elf32_Rel
*)OffsetToPointer(section_header
->sh_offset
);
351 uint32 section_relocs_count
= section_header
->sh_size
/
352 section_header
->sh_entsize
;
354 if (abs32_locations_
.size() > section_relocs_count
)
357 std::vector
<RVA
>::iterator reloc_iter
= abs32_locations_
.begin();
359 while (match
&& (reloc_iter
!= abs32_locations_
.end())) {
360 if (section_relocs_iter
->r_info
!= R_386_RELATIVE
||
361 section_relocs_iter
->r_offset
!= *reloc_iter
)
363 section_relocs_iter
++;
368 // Skip over relocation tables
369 if (!program
->EmitElfRelocationInstruction())
371 file_offset
+= sizeof(Elf32_Rel
) * abs32_locations_
.size();
374 return ParseSimpleRegion(file_offset
, section_end
, program
);
377 CheckBool
DisassemblerElf32X86::ParseProgbitsSection(
378 const Elf32_Shdr
*section_header
,
379 std::vector
<size_t>::iterator
* current_abs_offset
,
380 std::vector
<size_t>::iterator end_abs_offset
,
381 std::vector
<size_t>::iterator
* current_rel_offset
,
382 std::vector
<size_t>::iterator end_rel_offset
,
383 AssemblyProgram
* program
) {
385 // Walk all the bytes in the file, whether or not in a section.
386 size_t file_offset
= section_header
->sh_offset
;
387 size_t section_end
= section_header
->sh_offset
+ section_header
->sh_size
;
389 Elf32_Addr origin
= section_header
->sh_addr
;
390 size_t origin_offset
= section_header
->sh_offset
;
391 if (!program
->EmitOriginInstruction(origin
))
394 while (file_offset
< section_end
) {
396 if (*current_abs_offset
!= end_abs_offset
&&
397 file_offset
> **current_abs_offset
)
400 while (*current_rel_offset
!= end_rel_offset
&&
401 file_offset
> **current_rel_offset
) {
402 (*current_rel_offset
)++;
405 size_t next_relocation
= section_end
;
407 if (*current_abs_offset
!= end_abs_offset
&&
408 next_relocation
> **current_abs_offset
)
409 next_relocation
= **current_abs_offset
;
411 // Rel offsets are heuristically derived, and might (incorrectly) overlap
412 // an Abs value, or the end of the section, so +3 to make sure there is
413 // room for the full 4 byte value.
414 if (*current_rel_offset
!= end_rel_offset
&&
415 next_relocation
> (**current_rel_offset
+ 3))
416 next_relocation
= **current_rel_offset
;
418 if (next_relocation
> file_offset
) {
419 if (!ParseSimpleRegion(file_offset
, next_relocation
, program
))
422 file_offset
= next_relocation
;
426 if (*current_abs_offset
!= end_abs_offset
&&
427 file_offset
== **current_abs_offset
) {
429 const uint8
* p
= OffsetToPointer(file_offset
);
430 RVA target_rva
= Read32LittleEndian(p
);
432 if (!program
->EmitAbs32(program
->FindOrMakeAbs32Label(target_rva
)))
434 file_offset
+= sizeof(RVA
);
435 (*current_abs_offset
)++;
439 if (*current_rel_offset
!= end_rel_offset
&&
440 file_offset
== **current_rel_offset
) {
442 const uint8
* p
= OffsetToPointer(file_offset
);
443 uint32 relative_target
= Read32LittleEndian(p
);
444 // This cast is for 64 bit systems, and is only safe because we
445 // are working on 32 bit executables.
446 RVA target_rva
= (RVA
)(origin
+ (file_offset
- origin_offset
) +
447 4 + relative_target
);
449 if (!program
->EmitRel32(program
->FindOrMakeRel32Label(target_rva
)))
451 file_offset
+= sizeof(RVA
);
452 (*current_rel_offset
)++;
457 // Rest of the section (if any)
458 return ParseSimpleRegion(file_offset
, section_end
, program
);
461 CheckBool
DisassemblerElf32X86::ParseSimpleRegion(
462 size_t start_file_offset
,
463 size_t end_file_offset
,
464 AssemblyProgram
* program
) {
466 const uint8
* start
= OffsetToPointer(start_file_offset
);
467 const uint8
* end
= OffsetToPointer(end_file_offset
);
469 const uint8
* p
= start
;
472 if (!program
->EmitByteInstruction(*p
))
480 CheckBool
DisassemblerElf32X86::ParseAbs32Relocs() {
481 abs32_locations_
.clear();
483 // Loop through sections for relocation sections
484 for (int section_id
= 0; section_id
< SectionHeaderCount(); section_id
++) {
485 const Elf32_Shdr
*section_header
= SectionHeader(section_id
);
487 if (section_header
->sh_type
== SHT_REL
) {
489 Elf32_Rel
*relocs_table
= (Elf32_Rel
*)SectionBody(section_id
);
491 int relocs_table_count
= section_header
->sh_size
/
492 section_header
->sh_entsize
;
494 // Elf32_Word relocation_section_id = section_header->sh_info;
496 // Loop through relocation objects in the relocation section
497 for (int rel_id
= 0; rel_id
< relocs_table_count
; rel_id
++) {
500 // Quite a few of these conversions fail, and we simply skip
501 // them, that's okay.
502 if (RelToRVA(relocs_table
[rel_id
], &rva
))
503 abs32_locations_
.push_back(rva
);
508 std::sort(abs32_locations_
.begin(), abs32_locations_
.end());
512 CheckBool
DisassemblerElf32X86::ParseRel32RelocsFromSections() {
514 rel32_locations_
.clear();
516 // Loop through sections for relocation sections
517 for (int section_id
= 0;
518 section_id
< SectionHeaderCount();
521 const Elf32_Shdr
*section_header
= SectionHeader(section_id
);
523 if (section_header
->sh_type
!= SHT_PROGBITS
)
526 if (!ParseRel32RelocsFromSection(section_header
))
530 std::sort(rel32_locations_
.begin(), rel32_locations_
.end());
534 CheckBool
DisassemblerElf32X86::ParseRel32RelocsFromSection(
535 const Elf32_Shdr
* section_header
) {
537 uint32 start_file_offset
= section_header
->sh_offset
;
538 uint32 end_file_offset
= start_file_offset
+ section_header
->sh_size
;
540 const uint8
* start_pointer
= OffsetToPointer(start_file_offset
);
541 const uint8
* end_pointer
= OffsetToPointer(end_file_offset
);
543 // Quick way to convert from Pointer to RVA within a single Section is to
544 // subtract 'pointer_to_rva'.
545 const uint8
* const adjust_pointer_to_rva
= start_pointer
-
546 section_header
->sh_addr
;
548 // Find the rel32 relocations.
549 const uint8
* p
= start_pointer
;
550 while (p
< end_pointer
) {
551 //RVA current_rva = static_cast<RVA>(p - adjust_pointer_to_rva);
553 // Heuristic discovery of rel32 locations in instruction stream: are the
554 // next few bytes the start of an instruction containing a rel32
556 const uint8
* rel32
= NULL
;
558 if (p
+ 5 <= end_pointer
) {
559 if (*p
== 0xE8 || *p
== 0xE9) { // jmp rel32 and call rel32
563 if (p
+ 6 <= end_pointer
) {
564 if (*p
== 0x0F && (*(p
+1) & 0xF0) == 0x80) { // Jcc long form
565 if (p
[1] != 0x8A && p
[1] != 0x8B) // JPE/JPO unlikely
570 RVA rel32_rva
= static_cast<RVA
>(rel32
- adjust_pointer_to_rva
);
572 RVA target_rva
= rel32_rva
+ 4 + Read32LittleEndian(rel32
);
573 // To be valid, rel32 target must be within image, and within this
575 if (IsValidRVA(target_rva
)) {
576 rel32_locations_
.push_back(rel32_rva
);
577 #if COURGETTE_HISTOGRAM_TARGETS
578 ++rel32_target_rvas_
[target_rva
];
590 } // namespace courgette