Add ICU message format support
[chromium-blink-merge.git] / courgette / disassembler_elf_32_arm.cc
blob800a64c7413d11ac8bbe09e6a9080f7669816378
1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "courgette/disassembler_elf_32_arm.h"
7 #include <algorithm>
8 #include <string>
9 #include <vector>
11 #include "base/basictypes.h"
12 #include "base/logging.h"
14 #include "courgette/assembly_program.h"
15 #include "courgette/courgette.h"
16 #include "courgette/encoded_program.h"
18 namespace courgette {
20 CheckBool DisassemblerElf32ARM::Compress(ARM_RVA type, uint32 arm_op, RVA rva,
21 uint16* c_op, uint32* addr) {
22 // This method takes an ARM or thumb opcode, extracts the relative
23 // target address from it (addr), and creates a corresponding
24 // Courgette opcode (c_op).
26 // Details on ARM the opcodes, and how the relative targets are
27 // computed were taken from the "ARM Architecture Reference Manual",
28 // section A4.1.5 and the "Thumb-2 supplement", section 4.6.12.
29 // ARM_OFF24 is for the ARM opcode. The rest are for thumb opcodes.
30 switch (type) {
31 case ARM_OFF8: {
32 // The offset is given by lower 8 bits of the op. It is a 9-bit
33 // offset, shifted right one bit and signed extended.
34 uint32 temp = (arm_op & 0x00FF) << 1;
35 if (temp & 0x0100)
36 temp |= 0xFFFFFE00;
37 temp += 4; // Offset from _next_ PC.
38 fflush(stdout);
40 (*addr) = temp;
41 (*c_op) = static_cast<uint16>(arm_op >> 8) | 0x1000;
42 break;
44 case ARM_OFF11: {
45 // The offset is given by lower 11 bits of the op, and is a
46 // 12-bit offset, shifted right one bit and sign extended.
47 uint32 temp = (arm_op & 0x07FF) << 1;
48 if (temp & 0x00000800)
49 temp |= 0xFFFFF000;
50 temp += 4; // Offset from _next_ PC.
52 (*addr) = temp;
53 (*c_op) = static_cast<uint16>(arm_op >> 11) | 0x2000;
54 break;
56 case ARM_OFF24: {
57 // The offset is given by the lower 24-bits of the op, shifted
58 // left 2 bits, and sign extended.
59 uint32 temp = (arm_op & 0x00FFFFFF) << 2;
60 if (temp & 0x02000000)
61 temp |= 0xFC000000;
62 temp += 8;
64 (*addr) = temp;
65 (*c_op) = (arm_op >> 24) | 0x3000;
66 break;
68 case ARM_OFF25: {
69 uint32 temp = 0;
70 temp |= (arm_op & 0x000007FF) << 1; // imm11
71 temp |= (arm_op & 0x03FF0000) >> 4; // imm10
73 uint32 S = (arm_op & (1 << 26)) >> 26;
74 uint32 j2 = (arm_op & (1 << 11)) >> 11;
75 uint32 j1 = (arm_op & (1 << 13)) >> 13;
76 bool bit12 = ((arm_op & (1 << 12)) >> 12) != 0;
77 bool bit14 = ((arm_op & (1 << 14)) >> 14) != 0;
79 uint32 i2 = ~(j2 ^ S) & 1;
80 uint32 i1 = ~(j1 ^ S) & 1;
81 bool toARM = bit14 && !bit12;
83 temp |= (S << 24) | (i1 << 23) | (i2 << 22);
85 if (temp & 0x01000000) // sign extension
86 temp |= 0xFE000000;
87 uint32 prefetch;
88 if (toARM) {
89 // Align PC on 4-byte boundary
90 uint32 align4byte = (rva % 4) ? 2 : 4;
91 prefetch = align4byte;
92 } else {
93 prefetch = 4;
95 temp += prefetch;
96 (*addr) = temp;
98 uint32 temp2 = 0x4000;
99 temp2 |= (arm_op & (1 << 12)) >> 12;
100 temp2 |= (arm_op & (1 << 14)) >> 13;
101 temp2 |= (arm_op & (1 << 15)) >> 13;
102 temp2 |= (arm_op & 0xF8000000) >> 24;
103 temp2 |= (prefetch & 0x0000000F) << 8;
104 (*c_op) = static_cast<uint16>(temp2);
105 break;
107 case ARM_OFF21: {
108 uint32 temp = 0;
109 temp |= (arm_op & 0x000007FF) << 1; // imm11
110 temp |= (arm_op & 0x003F0000) >> 4; // imm6
112 uint32 S = (arm_op & (1 << 26)) >> 26;
113 uint32 j2 = (arm_op & (1 << 11)) >> 11;
114 uint32 j1 = (arm_op & (1 << 13)) >> 13;
116 temp |= (S << 20) | (j1 << 19) | (j2 << 18);
118 if (temp & 0x00100000) // sign extension
119 temp |= 0xFFE00000;
120 temp += 4;
121 (*addr) = temp;
123 uint32 temp2 = 0x5000;
124 temp2 |= (arm_op & 0x03C00000) >> 22; // just save the cond
125 (*c_op) = static_cast<uint16>(temp2);
126 break;
128 default:
129 return false;
131 return true;
134 CheckBool DisassemblerElf32ARM::Decompress(ARM_RVA type, uint16 c_op,
135 uint32 addr, uint32* arm_op) {
136 // Reverses the process in the compress() method. Takes the
137 // Courgette op and relative address and reconstructs the original
138 // ARM or thumb op.
139 switch (type) {
140 case ARM_OFF8:
141 (*arm_op) = ((c_op & 0x0FFF) << 8) | (((addr - 4) >> 1) & 0x000000FF);
142 break;
143 case ARM_OFF11:
144 (*arm_op) = ((c_op & 0x0FFF) << 11) | (((addr - 4) >> 1) & 0x000007FF);
145 break;
146 case ARM_OFF24:
147 (*arm_op) = ((c_op & 0x0FFF) << 24) | (((addr - 8) >> 2) & 0x00FFFFFF);
148 break;
149 case ARM_OFF25: {
150 uint32 temp = 0;
151 temp |= (c_op & (1 << 0)) << 12;
152 temp |= (c_op & (1 << 1)) << 13;
153 temp |= (c_op & (1 << 2)) << 13;
154 temp |= (c_op & (0xF8000000 >> 24)) << 24;
156 uint32 prefetch = (c_op & 0x0F00) >> 8;
157 addr -= prefetch;
159 addr &= 0x01FFFFFF;
161 uint32 S = (addr & (1 << 24)) >> 24;
162 uint32 i1 = (addr & (1 << 23)) >> 23;
163 uint32 i2 = (addr & (1 << 22)) >> 22;
165 uint32 j1 = ((~i1) ^ S) & 1;
166 uint32 j2 = ((~i2) ^ S) & 1;
168 temp |= S << 26;
169 temp |= j2 << 11;
170 temp |= j1 << 13;
172 temp |= (addr & (0x000007FF << 1)) >> 1;
173 temp |= (addr & (0x03FF0000 >> 4)) << 4;
175 (*arm_op) = temp;
176 break;
178 case ARM_OFF21: {
179 uint32 temp = 0xF0008000;
180 temp |= (c_op & (0x03C00000 >> 22)) << 22;
182 addr -= 4;
183 addr &= 0x001FFFFF;
185 uint32 S = (addr & (1 << 20)) >> 20;
186 uint32 j1 = (addr & (1 << 19)) >> 19;
187 uint32 j2 = (addr & (1 << 18)) >> 18;
189 temp |= S << 26;
190 temp |= j2 << 11;
191 temp |= j1 << 13;
193 temp |= (addr & (0x000007FF << 1)) >> 1;
194 temp |= (addr & (0x003F0000 >> 4)) << 4;
196 (*arm_op) = temp;
197 break;
199 default:
200 return false;
202 return true;
205 uint16 DisassemblerElf32ARM::TypedRVAARM::op_size() const {
206 switch (type_) {
207 case ARM_OFF8:
208 return 2;
209 case ARM_OFF11:
210 return 2;
211 case ARM_OFF24:
212 return 4;
213 case ARM_OFF25:
214 return 4;
215 case ARM_OFF21:
216 return 4;
217 default:
218 return 0xFFFF;
222 CheckBool DisassemblerElf32ARM::TypedRVAARM::ComputeRelativeTarget(
223 const uint8* op_pointer) {
224 arm_op_ = op_pointer;
225 switch (type_) {
226 case ARM_OFF8:
227 // Fall through
228 case ARM_OFF11: {
229 RVA relative_target;
230 CheckBool ret = Compress(type_, Read16LittleEndian(op_pointer), rva(),
231 &c_op_, &relative_target);
232 set_relative_target(relative_target);
233 return ret;
235 case ARM_OFF24: {
236 RVA relative_target;
237 CheckBool ret = Compress(type_, Read32LittleEndian(op_pointer), rva(),
238 &c_op_, &relative_target);
239 set_relative_target(relative_target);
240 return ret;
242 case ARM_OFF25:
243 // Fall through
244 case ARM_OFF21: {
245 // A thumb-2 op is 32 bits stored as two 16-bit words
246 uint32 pval = (Read16LittleEndian(op_pointer) << 16)
247 | Read16LittleEndian(op_pointer + 2);
248 RVA relative_target;
249 CheckBool ret = Compress(type_, pval, rva(), &c_op_, &relative_target);
250 set_relative_target(relative_target);
251 return ret;
253 default:
254 return false;
258 CheckBool DisassemblerElf32ARM::TypedRVAARM::EmitInstruction(
259 AssemblyProgram* program,
260 RVA target_rva) {
261 return program->EmitRel32ARM(c_op(),
262 program->FindOrMakeRel32Label(target_rva),
263 arm_op_,
264 op_size());
267 DisassemblerElf32ARM::DisassemblerElf32ARM(const void* start, size_t length)
268 : DisassemblerElf32(start, length) {
271 // Convert an ELF relocation struction into an RVA
272 CheckBool DisassemblerElf32ARM::RelToRVA(Elf32_Rel rel, RVA* result) const {
274 // The rightmost byte of r_info is the type...
275 elf32_rel_arm_type_values type =
276 (elf32_rel_arm_type_values)(unsigned char)rel.r_info;
278 // The other 3 bytes of r_info are the symbol
279 uint32 symbol = rel.r_info >> 8;
281 switch(type)
283 case R_ARM_RELATIVE:
284 if (symbol != 0)
285 return false;
287 // This is a basic ABS32 relocation address
288 *result = rel.r_offset;
289 return true;
291 default:
292 return false;
296 CheckBool DisassemblerElf32ARM::ParseRelocationSection(
297 const Elf32_Shdr *section_header,
298 AssemblyProgram* program) {
299 // This method compresses a contiguous stretch of R_ARM_RELATIVE
300 // entries in the relocation table with a Courgette relocation table
301 // instruction. It skips any entries at the beginning that appear
302 // in a section that Courgette doesn't support, e.g. INIT.
303 // Specifically, the entries should be
304 // (1) In the same relocation table
305 // (2) Are consecutive
306 // (3) Are sorted in memory address order
308 // Happily, this is normally the case, but it's not required by spec
309 // so we check, and just don't do it if we don't match up.
311 // The expectation is that one relocation section will contain
312 // all of our R_ARM_RELATIVE entries in the expected order followed
313 // by assorted other entries we can't use special handling for.
315 bool match = true;
317 // Walk all the bytes in the section, matching relocation table or not
318 size_t file_offset = section_header->sh_offset;
319 size_t section_end = section_header->sh_offset + section_header->sh_size;
321 Elf32_Rel *section_relocs_iter =
322 (Elf32_Rel *)OffsetToPointer(section_header->sh_offset);
324 uint32 section_relocs_count = section_header->sh_size /
325 section_header->sh_entsize;
327 if (abs32_locations_.size() > section_relocs_count)
328 match = false;
330 if (!abs32_locations_.empty()) {
331 std::vector<RVA>::iterator reloc_iter = abs32_locations_.begin();
333 for (uint32 i = 0; i < section_relocs_count; i++) {
334 if (section_relocs_iter->r_offset == *reloc_iter)
335 break;
337 if (!ParseSimpleRegion(file_offset, file_offset + sizeof(Elf32_Rel),
338 program))
339 return false;
341 file_offset += sizeof(Elf32_Rel);
342 ++section_relocs_iter;
345 while (match && (reloc_iter != abs32_locations_.end())) {
346 if (section_relocs_iter->r_info != R_ARM_RELATIVE ||
347 section_relocs_iter->r_offset != *reloc_iter)
348 match = false;
350 section_relocs_iter++;
351 reloc_iter++;
352 file_offset += sizeof(Elf32_Rel);
355 if (match) {
356 // Skip over relocation tables
357 if (!program->EmitElfARMRelocationInstruction())
358 return false;
362 return ParseSimpleRegion(file_offset, section_end, program);
365 CheckBool DisassemblerElf32ARM::ParseRel32RelocsFromSection(
366 const Elf32_Shdr* section_header) {
368 uint32 start_file_offset = section_header->sh_offset;
369 uint32 end_file_offset = start_file_offset + section_header->sh_size;
371 const uint8* start_pointer = OffsetToPointer(start_file_offset);
372 const uint8* end_pointer = OffsetToPointer(end_file_offset);
374 // Quick way to convert from Pointer to RVA within a single Section is to
375 // subtract 'pointer_to_rva'.
376 const uint8* const adjust_pointer_to_rva = start_pointer -
377 section_header->sh_addr;
379 // Find the rel32 relocations.
380 const uint8* p = start_pointer;
381 bool on_32bit = 1; // 32-bit ARM ops appear on 32-bit boundaries, so track it
382 while (p < end_pointer) {
383 // Heuristic discovery of rel32 locations in instruction stream: are the
384 // next few bytes the start of an instruction containing a rel32
385 // addressing mode?
387 TypedRVAARM* rel32_rva = NULL;
388 RVA target_rva = 0;
389 bool found = false;
391 // 16-bit thumb ops
392 if (!found && (p + 3) <= end_pointer) {
393 uint16 pval = Read16LittleEndian(p);
394 if ((pval & 0xF000) == 0xD000) {
395 RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
397 rel32_rva = new TypedRVAARM(ARM_OFF8, rva);
398 if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
399 return false;
401 target_rva = rel32_rva->rva() + rel32_rva->relative_target();
402 found = true;
403 } else if ((pval & 0xF800) == 0xE000) {
404 RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
406 rel32_rva = new TypedRVAARM(ARM_OFF11, rva);
407 if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
408 return false;
410 target_rva = rel32_rva->rva() + rel32_rva->relative_target();
411 found = true;
415 // thumb-2 ops comprised of two 16-bit words
416 if (!found && (p + 5) <= end_pointer) {
417 // This is really two 16-bit words, not one 32-bit word.
418 uint32 pval = (Read16LittleEndian(p) << 16) | Read16LittleEndian(p + 2);
419 if ((pval & 0xF8008000) == 0xF0008000) {
420 // Covers thumb-2's 32-bit conditional/unconditional branches
422 if ( (pval & (1 << 14)) || (pval & (1 << 12)) ) {
423 // A branch, with link, or with link and exchange.
424 RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
426 rel32_rva = new TypedRVAARM(ARM_OFF25, rva);
427 if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
428 return false;
430 target_rva = rel32_rva->rva() + rel32_rva->relative_target();
431 found = true;
432 } else {
433 // TODO(paulgazz) make sure cond is not 111
434 // A conditional branch instruction
435 RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
437 rel32_rva = new TypedRVAARM(ARM_OFF21, rva);
438 if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
439 return false;
441 target_rva = rel32_rva->rva() + rel32_rva->relative_target();
442 found = true;
447 // 32-bit ARM ops
448 if (!found && on_32bit && (p + 5) <= end_pointer) {
449 uint32 pval = Read32LittleEndian(p);
450 if ((pval & 0x0E000000) == 0x0A000000) {
451 // Covers both 0x0A 0x0B ARM relative branches
452 RVA rva = static_cast<RVA>(p - adjust_pointer_to_rva);
454 rel32_rva = new TypedRVAARM(ARM_OFF24, rva);
455 if (!rel32_rva->ComputeRelativeTarget((uint8*) p)) {
456 return false;
458 target_rva = rel32_rva->rva() + rel32_rva->relative_target();
459 found = true;
463 if (found && IsValidRVA(target_rva)) {
464 rel32_locations_.push_back(rel32_rva);
465 #if COURGETTE_HISTOGRAM_TARGETS
466 ++rel32_target_rvas_[target_rva];
467 #endif
468 p += rel32_rva->op_size();
470 // A tricky way to update the on_32bit flag. Here is the truth table:
471 // on_32bit | on_32bit size is 4
472 // ---------+---------------------
473 // 1 | 0 0
474 // 0 | 0 1
475 // 0 | 1 0
476 // 1 | 1 1
477 on_32bit = (~(on_32bit ^ (rel32_rva->op_size() == 4))) != 0;
478 } else {
479 // Move 2 bytes at a time, but track 32-bit boundaries
480 p += 2;
481 on_32bit = ((on_32bit + 1) % 2) != 0;
485 return true;
488 } // namespace courgette