1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/disassembler_elf_32_arm.h"
11 #include "base/basictypes.h"
12 #include "base/logging.h"
14 #include "courgette/assembly_program.h"
15 #include "courgette/courgette.h"
16 #include "courgette/encoded_program.h"
20 CheckBool
DisassemblerElf32ARM::Compress(ARM_RVA type
, uint32 arm_op
, RVA rva
,
21 uint16
* c_op
, uint32
* addr
) {
22 // This method takes an ARM or thumb opcode, extracts the relative
23 // target address from it (addr), and creates a corresponding
24 // Courgette opcode (c_op).
26 // Details on ARM the opcodes, and how the relative targets are
27 // computed were taken from the "ARM Architecture Reference Manual",
28 // section A4.1.5 and the "Thumb-2 supplement", section 4.6.12.
29 // ARM_OFF24 is for the ARM opcode. The rest are for thumb opcodes.
32 // The offset is given by lower 8 bits of the op. It is a 9-bit
33 // offset, shifted right one bit and signed extended.
34 uint32 temp
= (arm_op
& 0x00FF) << 1;
37 temp
+= 4; // Offset from _next_ PC.
41 (*c_op
) = (arm_op
>> 8) | 0x1000;
45 // The offset is given by lower 11 bits of the op, and is a
46 // 12-bit offset, shifted right one bit and sign extended.
47 uint32 temp
= (arm_op
& 0x07FF) << 1;
48 if (temp
& 0x00000800)
50 temp
+= 4; // Offset from _next_ PC.
53 (*c_op
) = (arm_op
>> 11) | 0x2000;
57 // The offset is given by the lower 24-bits of the op, shifted
58 // left 2 bits, and sign extended.
59 uint32 temp
= (arm_op
& 0x00FFFFFF) << 2;
60 if (temp
& 0x02000000)
65 (*c_op
) = (arm_op
>> 24) | 0x3000;
70 temp
|= (arm_op
& 0x000007FF) << 1; // imm11
71 temp
|= (arm_op
& 0x03FF0000) >> 4; // imm10
73 uint32 S
= (arm_op
& (1 << 26)) >> 26;
74 uint32 j2
= (arm_op
& (1 << 11)) >> 11;
75 uint32 j1
= (arm_op
& (1 << 13)) >> 13;
76 bool bit12
= ((arm_op
& (1 << 12)) >> 12) != 0;
77 bool bit14
= ((arm_op
& (1 << 14)) >> 14) != 0;
79 uint32 i2
= ~(j2
^ S
) & 1;
80 uint32 i1
= ~(j1
^ S
) & 1;
81 bool toARM
= bit14
&& !bit12
;
83 temp
|= (S
<< 24) | (i1
<< 23) | (i2
<< 22);
85 if (temp
& 0x01000000) // sign extension
89 // Align PC on 4-byte boundary
90 uint32 align4byte
= (rva
% 4) ? 2 : 4;
91 prefetch
= align4byte
;
98 uint32 temp2
= 0x4000;
99 temp2
|= (arm_op
& (1 << 12)) >> 12;
100 temp2
|= (arm_op
& (1 << 14)) >> 13;
101 temp2
|= (arm_op
& (1 << 15)) >> 13;
102 temp2
|= (arm_op
& 0xF8000000) >> 24;
103 temp2
|= (prefetch
& 0x0000000F) << 8;
109 temp
|= (arm_op
& 0x000007FF) << 1; // imm11
110 temp
|= (arm_op
& 0x003F0000) >> 4; // imm6
112 uint32 S
= (arm_op
& (1 << 26)) >> 26;
113 uint32 j2
= (arm_op
& (1 << 11)) >> 11;
114 uint32 j1
= (arm_op
& (1 << 13)) >> 13;
116 temp
|= (S
<< 20) | (j1
<< 19) | (j2
<< 18);
118 if (temp
& 0x00100000) // sign extension
123 uint32 temp2
= 0x5000;
124 temp2
|= (arm_op
& 0x03C00000) >> 22; // just save the cond
134 CheckBool
DisassemblerElf32ARM::Decompress(ARM_RVA type
, uint16 c_op
,
135 uint32 addr
, uint32
* arm_op
) {
136 // Reverses the process in the compress() method. Takes the
137 // Courgette op and relative address and reconstructs the original
141 (*arm_op
) = ((c_op
& 0x0FFF) << 8) | (((addr
- 4) >> 1) & 0x000000FF);
144 (*arm_op
) = ((c_op
& 0x0FFF) << 11) | (((addr
- 4) >> 1) & 0x000007FF);
147 (*arm_op
) = ((c_op
& 0x0FFF) << 24) | (((addr
- 8) >> 2) & 0x00FFFFFF);
151 temp
|= (c_op
& (1 << 0)) << 12;
152 temp
|= (c_op
& (1 << 1)) << 13;
153 temp
|= (c_op
& (1 << 2)) << 13;
154 temp
|= (c_op
& (0xF8000000 >> 24)) << 24;
156 uint32 prefetch
= (c_op
& 0x0F00) >> 8;
161 uint32 S
= (addr
& (1 << 24)) >> 24;
162 uint32 i1
= (addr
& (1 << 23)) >> 23;
163 uint32 i2
= (addr
& (1 << 22)) >> 22;
165 uint32 j1
= ((~i1
) ^ S
) & 1;
166 uint32 j2
= ((~i2
) ^ S
) & 1;
172 temp
|= (addr
& (0x000007FF << 1)) >> 1;
173 temp
|= (addr
& (0x03FF0000 >> 4)) << 4;
179 uint32 temp
= 0xF0008000;
180 temp
|= (c_op
& (0x03C00000 >> 22)) << 22;
185 uint32 S
= (addr
& (1 << 20)) >> 20;
186 uint32 j1
= (addr
& (1 << 19)) >> 19;
187 uint32 j2
= (addr
& (1 << 18)) >> 18;
193 temp
|= (addr
& (0x000007FF << 1)) >> 1;
194 temp
|= (addr
& (0x003F0000 >> 4)) << 4;
205 uint16
DisassemblerElf32ARM::TypedRVAARM::op_size() const {
222 CheckBool
DisassemblerElf32ARM::TypedRVAARM::ComputeRelativeTarget(
223 const uint8
* op_pointer
) {
224 arm_op_
= op_pointer
;
230 CheckBool ret
= Compress(type_
, Read16LittleEndian(op_pointer
), rva(),
231 &c_op_
, &relative_target
);
232 set_relative_target(relative_target
);
237 CheckBool ret
= Compress(type_
, Read32LittleEndian(op_pointer
), rva(),
238 &c_op_
, &relative_target
);
239 set_relative_target(relative_target
);
245 // A thumb-2 op is 32 bits stored as two 16-bit words
246 uint32 pval
= (Read16LittleEndian(op_pointer
) << 16)
247 | Read16LittleEndian(op_pointer
+ 2);
249 CheckBool ret
= Compress(type_
, pval
, rva(), &c_op_
, &relative_target
);
250 set_relative_target(relative_target
);
258 CheckBool
DisassemblerElf32ARM::TypedRVAARM::EmitInstruction(
259 AssemblyProgram
* program
,
261 return program
->EmitRel32ARM(c_op(),
262 program
->FindOrMakeRel32Label(target_rva
),
267 DisassemblerElf32ARM::DisassemblerElf32ARM(const void* start
, size_t length
)
268 : DisassemblerElf32(start
, length
) {
271 // Convert an ELF relocation struction into an RVA
272 CheckBool
DisassemblerElf32ARM::RelToRVA(Elf32_Rel rel
, RVA
* result
) const {
274 // The rightmost byte of r_info is the type...
275 elf32_rel_arm_type_values type
=
276 (elf32_rel_arm_type_values
)(unsigned char)rel
.r_info
;
278 // The other 3 bytes of r_info are the symbol
279 uint32 symbol
= rel
.r_info
>> 8;
287 // This is a basic ABS32 relocation address
288 *result
= rel
.r_offset
;
296 CheckBool
DisassemblerElf32ARM::ParseRelocationSection(
297 const Elf32_Shdr
*section_header
,
298 AssemblyProgram
* program
) {
299 // This method compresses a contiguous stretch of R_ARM_RELATIVE
300 // entries in the relocation table with a Courgette relocation table
301 // instruction. It skips any entries at the beginning that appear
302 // in a section that Courgette doesn't support, e.g. INIT.
303 // Specifically, the entries should be
304 // (1) In the same relocation table
305 // (2) Are consecutive
306 // (3) Are sorted in memory address order
308 // Happily, this is normally the case, but it's not required by spec
309 // so we check, and just don't do it if we don't match up.
311 // The expectation is that one relocation section will contain
312 // all of our R_ARM_RELATIVE entries in the expected order followed
313 // by assorted other entries we can't use special handling for.
317 // Walk all the bytes in the section, matching relocation table or not
318 size_t file_offset
= section_header
->sh_offset
;
319 size_t section_end
= section_header
->sh_offset
+ section_header
->sh_size
;
321 Elf32_Rel
*section_relocs_iter
=
322 (Elf32_Rel
*)OffsetToPointer(section_header
->sh_offset
);
324 uint32 section_relocs_count
= section_header
->sh_size
/
325 section_header
->sh_entsize
;
327 if (abs32_locations_
.size() > section_relocs_count
)
330 if (!abs32_locations_
.empty()) {
331 std::vector
<RVA
>::iterator reloc_iter
= abs32_locations_
.begin();
333 for (uint32 i
= 0; i
< section_relocs_count
; i
++) {
334 if (section_relocs_iter
->r_offset
== *reloc_iter
)
337 if (!ParseSimpleRegion(file_offset
, file_offset
+ sizeof(Elf32_Rel
),
341 file_offset
+= sizeof(Elf32_Rel
);
342 ++section_relocs_iter
;
345 while (match
&& (reloc_iter
!= abs32_locations_
.end())) {
346 if (section_relocs_iter
->r_info
!= R_ARM_RELATIVE
||
347 section_relocs_iter
->r_offset
!= *reloc_iter
)
350 section_relocs_iter
++;
352 file_offset
+= sizeof(Elf32_Rel
);
356 // Skip over relocation tables
357 if (!program
->EmitElfARMRelocationInstruction())
362 return ParseSimpleRegion(file_offset
, section_end
, program
);
365 CheckBool
DisassemblerElf32ARM::ParseRel32RelocsFromSection(
366 const Elf32_Shdr
* section_header
) {
368 uint32 start_file_offset
= section_header
->sh_offset
;
369 uint32 end_file_offset
= start_file_offset
+ section_header
->sh_size
;
371 const uint8
* start_pointer
= OffsetToPointer(start_file_offset
);
372 const uint8
* end_pointer
= OffsetToPointer(end_file_offset
);
374 // Quick way to convert from Pointer to RVA within a single Section is to
375 // subtract 'pointer_to_rva'.
376 const uint8
* const adjust_pointer_to_rva
= start_pointer
-
377 section_header
->sh_addr
;
379 // Find the rel32 relocations.
380 const uint8
* p
= start_pointer
;
381 bool on_32bit
= 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it
382 while (p
< end_pointer
) {
383 // Heuristic discovery of rel32 locations in instruction stream: are the
384 // next few bytes the start of an instruction containing a rel32
387 TypedRVAARM
* rel32_rva
= NULL
;
392 if (!found
&& (p
+ 3) <= end_pointer
) {
393 uint16 pval
= Read16LittleEndian(p
);
394 if ((pval
& 0xF000) == 0xD000) {
395 RVA rva
= static_cast<RVA
>(p
- adjust_pointer_to_rva
);
397 rel32_rva
= new TypedRVAARM(ARM_OFF8
, rva
);
398 if (!rel32_rva
->ComputeRelativeTarget((uint8
*) p
)) {
401 target_rva
= rel32_rva
->rva() + rel32_rva
->relative_target();
403 } else if ((pval
& 0xF800) == 0xE000) {
404 RVA rva
= static_cast<RVA
>(p
- adjust_pointer_to_rva
);
406 rel32_rva
= new TypedRVAARM(ARM_OFF11
, rva
);
407 if (!rel32_rva
->ComputeRelativeTarget((uint8
*) p
)) {
410 target_rva
= rel32_rva
->rva() + rel32_rva
->relative_target();
415 // thumb-2 ops comprised of two 16-bit words
416 if (!found
&& (p
+ 5) <= end_pointer
) {
417 // This is really two 16-bit words, not one 32-bit word.
418 uint32 pval
= (Read16LittleEndian(p
) << 16) | Read16LittleEndian(p
+ 2);
419 if ((pval
& 0xF8008000) == 0xF0008000) {
420 // Covers thumb-2's 32-bit conditional/unconditional branches
422 if ( (pval
& (1 << 14)) || (pval
& (1 << 12)) ) {
423 // A branch, with link, or with link and exchange.
424 RVA rva
= static_cast<RVA
>(p
- adjust_pointer_to_rva
);
426 rel32_rva
= new TypedRVAARM(ARM_OFF25
, rva
);
427 if (!rel32_rva
->ComputeRelativeTarget((uint8
*) p
)) {
430 target_rva
= rel32_rva
->rva() + rel32_rva
->relative_target();
433 // TODO(paulgazz) make sure cond is not 111
434 // A conditional branch instruction
435 RVA rva
= static_cast<RVA
>(p
- adjust_pointer_to_rva
);
437 rel32_rva
= new TypedRVAARM(ARM_OFF21
, rva
);
438 if (!rel32_rva
->ComputeRelativeTarget((uint8
*) p
)) {
441 target_rva
= rel32_rva
->rva() + rel32_rva
->relative_target();
448 if (!found
&& on_32bit
&& (p
+ 5) <= end_pointer
) {
449 uint32 pval
= Read32LittleEndian(p
);
450 if ((pval
& 0x0E000000) == 0x0A000000) {
451 // Covers both 0x0A 0x0B ARM relative branches
452 RVA rva
= static_cast<RVA
>(p
- adjust_pointer_to_rva
);
454 rel32_rva
= new TypedRVAARM(ARM_OFF24
, rva
);
455 if (!rel32_rva
->ComputeRelativeTarget((uint8
*) p
)) {
458 target_rva
= rel32_rva
->rva() + rel32_rva
->relative_target();
463 if (found
&& IsValidRVA(target_rva
)) {
464 rel32_locations_
.push_back(rel32_rva
);
465 #if COURGETTE_HISTOGRAM_TARGETS
466 ++rel32_target_rvas_
[target_rva
];
468 p
+= rel32_rva
->op_size();
470 // A tricky way to update the on_32bit flag. Here is the truth table:
471 // on_32bit | on_32bit size is 4
472 // ---------+---------------------
477 on_32bit
= (~(on_32bit
^ (rel32_rva
->op_size() == 4))) != 0;
479 // Move 2 bytes at a time, but track 32-bit boundaries
481 on_32bit
= ((on_32bit
+ 1) % 2) != 0;
488 } // namespace courgette