Apply patch from Daniel Schürmann: https://sourceforge.net/p/boomerang/bugs/78/
[boomerang.git] / boomerang / loader / ElfBinaryFile.cpp
blob507b35e101d0353e3c574620438beda577c1e4ae
1 /*
2 * Copyright (C) 1997-2001, The University of Queensland
4 * See the file "LICENSE.TERMS" for information on usage and
5 * redistribution of this file, and for a DISCLAIMER OF ALL
6 * WARRANTIES.
8 */
10 /*******************************************************************************
11 * File: ElfBinaryFile.cc
12 * Desc: This file contains the implementation of the class ElfBinaryFile.
13 ******************************************************************************/
16 * $Revision$
18 * ELF binary file format.
19 * This file implements the class ElfBinaryFile, derived from class BinaryFile.
20 * See ElfBinaryFile.h and BinaryFile.h for details
21 * MVE 30/9/97
22 * 10 Mar 02 - Mike: Mods for stand alone operation; constuct function
23 * 21 May 02 - Mike: Slight mod for gcc 3.1
24 * 01 Oct 02 - Mike: Removed elf library (and include file) dependencies
25 * 02 Oct 02 - Mike: Fixed some more endianness issues
26 * 24 Mar 03 - Mike: GetAddressByName returns NO_ADDRESS on failure now
27 * 12 Jul 05 - Mike: fixed an endless loop in findRelPltOffset for pre-3.3.3 gcc compiled input files
30 /*==============================================================================
31 * Dependencies.
32 *============================================================================*/
34 #include "ElfBinaryFile.h"
35 #include <sys/types.h> // Next three for open()
36 #include <sys/stat.h>
37 #include <fcntl.h>
38 #include <iostream>
39 #include <cassert>
40 #include <cstring>
41 #if defined(_MSC_VER) && _MSC_VER >= 1400
42 #pragma warning(disable:4996) // Warnings about e.g. _strdup deprecated in VS 2005
43 #endif
45 typedef std::map<std::string, int, std::less<std::string> > StrIntMap;
47 ElfBinaryFile::ElfBinaryFile(bool bArchive /* = false */)
48 : BinaryFile(bArchive), // Initialise base class
49 next_extern(0)
51 m_fd = 0;
52 m_pFileName = 0;
53 Init(); // Initialise all the common stuff
56 ElfBinaryFile::~ElfBinaryFile()
58 if (m_pImportStubs)
59 // Delete the array of import stubs
60 delete [] m_pImportStubs;
63 // Reset internal state, except for those that keep track of which member
64 // we're up to
66 void ElfBinaryFile::Init()
68 m_pImage = 0;
69 m_pPhdrs = 0; // No program headers
70 m_pShdrs = 0; // No section headers
71 m_pStrings = 0; // No strings
72 m_pReloc = 0;
73 m_pSym = 0;
74 m_uPltMin = 0; // No PLT limits
75 m_uPltMax = 0;
76 m_iLastSize = 0;
77 m_pImportStubs = 0;
80 // Hand decompiled from sparc library function
81 extern "C" { // So we can call this with dlopen()
83 unsigned elf_hash(const char* o0)
85 int o3 = *o0;
86 const char* g1 = o0;
87 unsigned o4 = 0;
88 while (o3 != 0)
90 o4 <<= 4;
91 o3 += o4;
92 g1++;
93 o4 = o3 & 0xf0000000;
94 if (o4 != 0)
96 int o2 = (int) ((unsigned) o4 >> 24);
97 o3 = o3 ^ o2;
99 o4 = o3 & ~o4;
100 o3 = *g1;
102 return o4;
104 } // extern "C"
106 // Return true for a good load
108 bool ElfBinaryFile::RealLoad(const char* sName)
110 int i;
112 if (m_bArchive)
114 // This is a member of an archive. Should not be using this function at all
115 return false;
118 m_pFileName = sName;
119 m_fd = fopen(sName, "rb");
120 if (m_fd == NULL) return 0;
122 // Determine file size
123 if (fseek(m_fd, 0, SEEK_END))
125 fprintf(stderr, "Error seeking to end of binary file\n");
126 return false;
128 m_lImageSize = ftell(m_fd);
130 // Allocate memory to hold the file
131 m_pImage = new unsigned char[m_lImageSize];
132 if (m_pImage == 0)
134 fprintf(stderr, "Could not allocate %ld bytes for program image\n", m_lImageSize);
135 return false;
137 Elf32_Ehdr* pHeader = (Elf32_Ehdr*) m_pImage; // Save a lot of casts
139 // Read the whole file in
140 fseek(m_fd, 0, SEEK_SET);
141 size_t size = fread(m_pImage, 1, m_lImageSize, m_fd);
142 if (size != (size_t) m_lImageSize)
143 fprintf(stderr, "WARNING! Only read %lu of %ld bytes of binary file!\n", size, m_lImageSize);
145 // Basic checks
146 if (strncmp((char*)m_pImage, "\x7F""ELF", 4) != 0)
148 fprintf(stderr, "Incorrect header: %02X %02X %02X %02X\n",
149 pHeader->e_ident[0], pHeader->e_ident[1], pHeader->e_ident[2],
150 pHeader->e_ident[3]);
151 return 0;
153 if ((pHeader->endianness != 1) && (pHeader->endianness != 2))
155 fprintf(stderr, "Unknown endianness %02X\n", pHeader->endianness);
156 return 0;
158 // Needed for elfRead4 to work:
159 m_elfEndianness = pHeader->endianness - 1;
161 // Set up program header pointer (in case needed)
162 i = elfRead4(&pHeader->e_phoff);
163 if (i) m_pPhdrs = (Elf32_Phdr*) (m_pImage + i);
165 // Set up section header pointer
166 i = elfRead4(&pHeader->e_shoff);
167 if (i) m_pShdrs = (Elf32_Shdr*) (m_pImage + i);
169 // Set up section header string table pointer
170 // NOTE: it does not appear that endianness affects shorts.. they are always in little endian format
171 // Gerard: I disagree. I need the elfRead on linux/i386
172 i = elfRead2(&pHeader->e_shstrndx); // pHeader->e_shstrndx;
173 if (i) m_pStrings = (char*)(m_pImage + elfRead4(&m_pShdrs[i].sh_offset));
175 i = 1; // counter - # sects. Start @ 1, total m_iNumSections
176 char* pName; // Section's name
178 // Number of sections
179 m_iNumSections = elfRead2(&pHeader->e_shnum);
181 // Allocate room for all the Elf sections (including the silly first one)
182 m_pSections = new SectionInfo[m_iNumSections];
183 if (m_pSections == 0) return false; // Failed!
185 // Set up the m_sh_link and m_sh_info arrays
186 m_sh_link = new int[m_iNumSections];
187 m_sh_info = new int[m_iNumSections];
189 // Number of elf sections
190 bool bGotCode = false; // True when have seen a code sect
191 ADDRESS arbitaryLoadAddr = 0x08000000;
192 for (i = 0; i < m_iNumSections; i++)
194 // Get section information.
195 Elf32_Shdr* pShdr = m_pShdrs + i;
196 if ((unsigned char*)pShdr > m_pImage + m_lImageSize)
198 std::cerr << "section " << i << " header is outside the image size\n";
199 return false;
201 pName = m_pStrings + elfRead4(&pShdr->sh_name);
202 if ((unsigned char*)pName > m_pImage + m_lImageSize)
204 std::cerr << "name for section " << i << " is outside the image size\n";
205 return false;
207 m_pSections[i].pSectionName = pName;
208 int off = elfRead4(&pShdr->sh_offset);
209 if (off) m_pSections[i].uHostAddr = m_pImage + off;
210 m_pSections[i].uNativeAddr = elfRead4(&pShdr->sh_addr);
211 m_pSections[i].uSectionSize = elfRead4(&pShdr->sh_size);
212 if (m_pSections[i].uNativeAddr == 0 && strncmp(pName, ".rel", 4))
214 int align = elfRead4(&pShdr->sh_addralign);
215 if (align > 1)
217 if (arbitaryLoadAddr % align)
218 arbitaryLoadAddr += align - (arbitaryLoadAddr % align);
220 m_pSections[i].uNativeAddr = arbitaryLoadAddr;
221 arbitaryLoadAddr += m_pSections[i].uSectionSize;
223 m_pSections[i].uType = elfRead4(&pShdr->sh_type);
224 m_sh_link[i] = elfRead4(&pShdr->sh_link);
225 m_sh_info[i] = elfRead4(&pShdr->sh_info);
226 m_pSections[i].uSectionEntrySize = elfRead4(&pShdr->sh_entsize);
227 if (m_pSections[i].uNativeAddr + m_pSections[i].uSectionSize > next_extern)
228 first_extern = next_extern = m_pSections[i].uNativeAddr + m_pSections[i].uSectionSize;
229 if ((elfRead4(&pShdr->sh_flags) & SHF_WRITE) == 0)
230 m_pSections[i].bReadOnly = true;
231 // Can't use the SHF_ALLOC bit to determine bss section; the bss section has SHF_ALLOC but also SHT_NOBITS.
232 // (But many other sections, such as .comment, also have SHT_NOBITS). So for now, just use the name
233 // if ((elfRead4(&pShdr->sh_flags) & SHF_ALLOC) == 0)
234 if (strcmp(pName, ".bss") == 0)
235 m_pSections[i].bBss = true;
236 if (elfRead4(&pShdr->sh_flags) & SHF_EXECINSTR)
238 m_pSections[i].bCode = true;
239 bGotCode = true; // We've got to a code section
241 // Deciding what is data and what is not is actually quite tricky but important.
242 // For example, it's crucial to flag the .exception_ranges section as data, otherwise there is a "hole" in the
243 // allocation map, that means that there is more than one "delta" from a read-only section to a page, and in the
244 // end using -C results in a file that looks OK but when run just says "Killed".
245 // So we use the Elf designations; it seems that ALLOC.!EXEC -> data
246 // But we don't want sections before the .text section, like .interp, .hash, etc etc. Hence bGotCode.
247 // NOTE: this ASSUMES that sections appear in a sensible order in the input binary file:
248 // junk, code, rodata, data, bss
249 if (bGotCode && ((elfRead4(&pShdr->sh_flags) & (SHF_EXECINSTR | SHF_ALLOC)) == SHF_ALLOC) &&
250 (elfRead4(&pShdr->sh_type) != SHT_NOBITS))
251 m_pSections[i].bData = true;
252 } // for each section
254 // assign arbitary addresses to .rel.* sections too
255 for (i = 0; i < m_iNumSections; i++)
256 if (m_pSections[i].uNativeAddr == 0 && !strncmp(m_pSections[i].pSectionName, ".rel", 4))
258 m_pSections[i].uNativeAddr = arbitaryLoadAddr;
259 arbitaryLoadAddr += m_pSections[i].uSectionSize;
262 // Add symbol info. Note that some symbols will be in the main table only, and others in the dynamic table only.
263 // So the best idea is to add symbols for all sections of the appropriate type
264 for (i = 1; i < m_iNumSections; ++i)
266 unsigned uType = m_pSections[i].uType;
267 if (uType == SHT_SYMTAB || uType == SHT_DYNSYM)
268 AddSyms(i);
269 #if 0 // Ick; bad logic. Done with fake library function pointers now (-2 .. -1024)
270 if (uType == SHT_REL || uType == SHT_RELA)
271 AddRelocsAsSyms(i);
272 #endif
275 // Save the relocation to symbol table info
276 PSectionInfo pRel = GetSectionInfoByName(".rela.text");
277 if (pRel)
279 m_bAddend = true; // Remember its a relA table
280 m_pReloc = (Elf32_Rel*) pRel->uHostAddr; // Save pointer to reloc table
281 //SetRelocInfo(pRel);
283 else
285 m_bAddend = false;
286 pRel = GetSectionInfoByName(".rel.text");
287 if (pRel)
289 //SetRelocInfo(pRel);
290 m_pReloc = (Elf32_Rel*) pRel->uHostAddr; // Save pointer to reloc table
294 // Find the PLT limits. Required for IsDynamicLinkedProc(), e.g.
295 PSectionInfo pPlt = GetSectionInfoByName(".plt");
296 if (pPlt)
298 m_uPltMin = pPlt->uNativeAddr;
299 m_uPltMax = pPlt->uNativeAddr + pPlt->uSectionSize;
302 // Apply relocations; important when the input program is not compiled with -fPIC
303 applyRelocations();
305 return true; // Success
308 // Clean up and unload the binary image
310 void ElfBinaryFile::UnLoad()
312 if (m_pImage) delete [] m_pImage;
313 fclose(m_fd);
314 Init(); // Set all internal state to 0
317 // Like a replacement for elf_strptr()
319 char *ElfBinaryFile::GetStrPtr(int idx, int offset)
321 if (idx < 0)
323 // Most commonly, this will be an index of -1, because a call to GetSectionIndexByName() failed
324 fprintf(stderr, "Error! GetStrPtr passed index of %d\n", idx);
325 return const_cast<char *> ("Error!");
327 // Get a pointer to the start of the string table
328 char *pSym = (char*) m_pSections[idx].uHostAddr;
329 // Just add the offset
330 return pSym + offset;
333 // Search the .rel[a].plt section for an entry with symbol table index i.
334 // If found, return the native address of the associated PLT entry.
335 // A linear search will be needed. However, starting at offset i and searching backwards with wraparound should
336 // typically minimise the number of entries to search
338 ADDRESS ElfBinaryFile::findRelPltOffset(int i, unsigned char *addrRelPlt, int sizeRelPlt, int numRelPlt, ADDRESS addrPlt)
340 int first = i;
341 if (first >= numRelPlt)
342 first = numRelPlt - 1;
343 int curr = first;
346 // Each entry is sizeRelPlt bytes, and will contain the offset, then the info (addend optionally follows)
347 int* pEntry = (int*) (addrRelPlt + (curr * sizeRelPlt));
348 int entry = elfRead4(pEntry + 1); // Read pEntry[1]
349 int sym = entry >> 8; // The symbol index is in the top 24 bits (Elf32 only)
350 if (sym == i)
352 // Found! Now we want the native address of the associated PLT entry.
353 // For now, assume a size of 0x10 for each PLT entry, and assume that each entry in the .rel.plt section
354 // corresponds exactly to an entry in the .plt (except there is one dummy .plt entry)
355 return addrPlt + 0x10 * (curr + 1);
357 if (--curr < 0)
358 curr = numRelPlt - 1;
360 while (curr != first); // Will eventually wrap around to first if not present
361 return 0; // Exit if this happens
364 // Add appropriate symbols to the symbol table. secIndex is the section index of the symbol table.
366 void ElfBinaryFile::AddSyms(int secIndex)
368 int e_type = elfRead2(&((Elf32_Ehdr*) m_pImage)->e_type);
369 PSectionInfo pSect = &m_pSections[secIndex];
370 // Calc number of symbols
371 int nSyms = pSect->uSectionSize / pSect->uSectionEntrySize;
372 m_pSym = (Elf32_Sym*) pSect->uHostAddr; // Pointer to symbols
373 int strIdx = m_sh_link[secIndex]; // sh_link points to the string table
375 PSectionInfo siPlt = GetSectionInfoByName(".plt");
376 ADDRESS addrPlt = siPlt ? siPlt->uNativeAddr : 0;
377 PSectionInfo siRelPlt = GetSectionInfoByName(".rel.plt");
378 int sizeRelPlt = 8; // Size of each entry in the .rel.plt table
379 if (siRelPlt == NULL)
381 siRelPlt = GetSectionInfoByName(".rela.plt");
382 sizeRelPlt = 12; // Size of each entry in the .rela.plt table is 12 bytes
384 unsigned char *addrRelPlt = NULL;
385 int numRelPlt = 0;
386 if (siRelPlt)
388 addrRelPlt = siRelPlt->uHostAddr;
389 numRelPlt = sizeRelPlt ? siRelPlt->uSectionSize / sizeRelPlt : 0;
391 // Number of entries in the PLT:
392 // int max_i_for_hack = siPlt ? (int)siPlt->uSectionSize / 0x10 : 0;
393 // Index 0 is a dummy entry
394 for (int i = 1; i < nSyms; i++)
396 ADDRESS val = (ADDRESS) elfRead4((int*) & m_pSym[i].st_value);
397 int name = elfRead4(&m_pSym[i].st_name);
398 if (name == 0) /* Silly symbols with no names */ continue;
399 std::string str(GetStrPtr(strIdx, name));
400 // Hack off the "@@GLIBC_2.0" of Linux, if present
401 size_t pos;
402 if ((pos = str.find("@@")) != std::string::npos)
403 str.erase(pos);
404 std::map<ADDRESS, std::string>::iterator aa = m_SymTab.find(val);
405 // Ensure no overwriting (except functions)
406 if (aa == m_SymTab.end() || ELF32_ST_TYPE(m_pSym[i].st_info) == STT_FUNC)
408 if (val == 0 && siPlt) //&& i < max_i_for_hack) {
410 // Special hack for gcc circa 3.3.3: (e.g. test/pentium/settest). The value in the dynamic symbol table
411 // is zero! I was assuming that index i in the dynamic symbol table would always correspond to index i
412 // in the .plt section, but for fedora2_true, this doesn't work. So we have to look in the .rel[a].plt
413 // section. Thanks, gcc! Note that this hack can cause strange symbol names to appear
414 val = findRelPltOffset(i, addrRelPlt, sizeRelPlt, numRelPlt, addrPlt);
416 else if (e_type == E_REL)
418 int nsec = elfRead2(&m_pSym[i].st_shndx);
419 if (nsec >= 0 && nsec < m_iNumSections)
420 val += GetSectionInfo(nsec)->uNativeAddr;
423 #define ECHO_SYMS 0
424 #if ECHO_SYMS
425 std::cerr << "Elf AddSym: about to add " << str << " to address " << std::hex << val << std::dec << "\n";
426 #endif
427 m_SymTab[val] = str;
430 ADDRESS uMain = GetMainEntryPoint();
431 if (uMain != NO_ADDRESS && m_SymTab.find(uMain) == m_SymTab.end())
433 // Ugh - main mustn't have the STT_FUNC attribute. Add it
434 std::string sMain("main");
435 m_SymTab[uMain] = sMain;
437 return;
440 std::vector<ADDRESS> ElfBinaryFile::GetExportedAddresses(bool funcsOnly)
442 std::vector<ADDRESS> exported;
444 int i;
445 int secIndex = 0;
446 for (i = 1; i < m_iNumSections; ++i)
448 unsigned uType = m_pSections[i].uType;
449 if (uType == SHT_SYMTAB)
451 secIndex = i;
452 break;
455 if (secIndex == 0)
456 return exported;
458 int e_type = elfRead2(&((Elf32_Ehdr*) m_pImage)->e_type);
459 PSectionInfo pSect = &m_pSections[secIndex];
460 // Calc number of symbols
461 int nSyms = pSect->uSectionSize / pSect->uSectionEntrySize;
462 m_pSym = (Elf32_Sym*) pSect->uHostAddr; // Pointer to symbols
463 int strIdx = m_sh_link[secIndex]; // sh_link points to the string table
465 // Index 0 is a dummy entry
466 for (int i = 1; i < nSyms; i++)
468 ADDRESS val = (ADDRESS) elfRead4((int*) & m_pSym[i].st_value);
469 int name = elfRead4(&m_pSym[i].st_name);
470 if (name == 0) /* Silly symbols with no names */ continue;
471 std::string str(GetStrPtr(strIdx, name));
472 // Hack off the "@@GLIBC_2.0" of Linux, if present
473 size_t pos;
474 if ((pos = str.find("@@")) != std::string::npos)
475 str.erase(pos);
476 if (ELF32_ST_BIND(m_pSym[i].st_info) == STB_GLOBAL || ELF32_ST_BIND(m_pSym[i].st_info) == STB_WEAK)
478 if (funcsOnly == false || ELF32_ST_TYPE(m_pSym[i].st_info) == STT_FUNC)
480 if (e_type == E_REL)
482 int nsec = elfRead2(&m_pSym[i].st_shndx);
483 if (nsec >= 0 && nsec < m_iNumSections)
484 val += GetSectionInfo(nsec)->uNativeAddr;
486 exported.push_back(val);
490 return exported;
495 // FIXME: this function is way off the rails. It seems to always overwrite the relocation entry with the 32 bit value
496 // from the symbol table. Totally invalid for SPARC, and most X86 relocations!
497 // So currently not called
499 void ElfBinaryFile::AddRelocsAsSyms(int relSecIdx)
501 PSectionInfo pSect = &m_pSections[relSecIdx];
502 if (pSect == 0) return;
503 // Calc number of relocations
504 int nRelocs = pSect->uSectionSize / pSect->uSectionEntrySize;
505 m_pReloc = (Elf32_Rel*) pSect->uHostAddr; // Pointer to symbols
506 int symSecIdx = m_sh_link[relSecIdx];
507 int strSecIdx = m_sh_link[symSecIdx];
509 // Index 0 is a dummy entry
510 for (int i = 1; i < nRelocs; i++)
512 ADDRESS val = (ADDRESS) elfRead4((int*) & m_pReloc[i].r_offset);
513 int symIndex = elfRead4(&m_pReloc[i].r_info) >> 8;
514 int flags = elfRead4(&m_pReloc[i].r_info);
515 if ((flags & 0xFF) == R_386_32)
517 // Lookup the value of the symbol table entry
518 ADDRESS a = elfRead4((int*) & m_pSym[symIndex].st_value);
519 if (m_pSym[symIndex].st_info & STT_SECTION)
520 a = GetSectionInfo(elfRead2(&m_pSym[symIndex].st_shndx))->uNativeAddr;
521 // Overwrite the relocation value... ?
522 writeNative4(val, a);
523 continue;
525 if ((flags & R_386_PC32) == 0)
526 continue;
527 if (symIndex == 0) /* Silly symbols with no names */ continue;
528 std::string str(GetStrPtr(strSecIdx, elfRead4(&m_pSym[symIndex].st_name)));
529 // Hack off the "@@GLIBC_2.0" of Linux, if present
530 size_t pos;
531 if ((pos = str.find("@@")) != std::string::npos)
532 str.erase(pos);
533 std::map<ADDRESS, std::string>::iterator it;
534 // Linear search!
535 for (it = m_SymTab.begin(); it != m_SymTab.end(); it++)
536 if ((*it).second == str)
537 break;
538 // Add new extern
539 if (it == m_SymTab.end())
541 m_SymTab[next_extern] = str;
542 it = m_SymTab.find(next_extern);
543 next_extern += 4;
545 writeNative4(val, (*it).first - val - 4);
547 return;
550 // Note: this function overrides a simple "return 0" function in the base class (i.e. BinaryFile::SymbolByAddress())
552 const char* ElfBinaryFile::SymbolByAddress(const ADDRESS dwAddr)
554 std::map<ADDRESS, std::string>::iterator aa = m_SymTab.find(dwAddr);
555 if (aa == m_SymTab.end())
556 return 0;
557 return (char*) aa->second.c_str();
560 bool ElfBinaryFile::ValueByName(const char* pName, SymValue* pVal, bool bNoTypeOK /* = false */)
562 int hash, numBucket, numChain, y;
563 int *pBuckets, *pChains; // For symbol table work
564 int found;
565 int* pHash; // Pointer to hash table
566 Elf32_Sym* pSym; // Pointer to the symbol table
567 int iStr; // Section index of the string table
568 PSectionInfo pSect;
570 pSect = GetSectionInfoByName(".dynsym");
571 if (pSect == 0)
573 // We have a file with no .dynsym section, and hence no .hash section (from my understanding - MVE).
574 // It seems that the only alternative is to linearly search the symbol tables.
575 // This must be one of the big reasons that linking is so slow! (at least, for statically linked files)
576 // Note MVE: We can't use m_SymTab because we may need the size
577 return SearchValueByName(pName, pVal);
579 pSym = (Elf32_Sym*) pSect->uHostAddr;
580 if (pSym == 0) return false;
581 pSect = GetSectionInfoByName(".hash");
582 if (pSect == 0) return false;
583 pHash = (int*) pSect->uHostAddr;
584 iStr = GetSectionIndexByName(".dynstr");
586 // First organise the hash table
587 numBucket = elfRead4(&pHash[0]);
588 numChain = elfRead4(&pHash[1]);
589 pBuckets = &pHash[2];
590 pChains = &pBuckets[numBucket];
592 // Hash the symbol
593 hash = elf_hash(pName) % numBucket;
594 y = elfRead4(&pBuckets[hash]); // Look it up in the bucket list
595 // Beware of symbol tables with 0 in the buckets, e.g. libstdc++.
596 // In that case, set found to false.
597 found = (y != 0);
598 if (y)
600 while (strcmp(pName, GetStrPtr(iStr, elfRead4(&pSym[y].st_name))) != 0)
602 y = elfRead4(&pChains[y]);
603 if (y == 0)
605 found = false;
606 break;
610 // Beware of symbols with STT_NOTYPE, e.g. "open" in libstdc++ !
611 // But sometimes "main" has the STT_NOTYPE attribute, so if bNoTypeOK is passed as true, return true
612 if (found && (bNoTypeOK || (ELF32_ST_TYPE(pSym[y].st_info) != STT_NOTYPE)))
614 pVal->uSymAddr = elfRead4((int*) & pSym[y].st_value);
615 int e_type = elfRead2(&((Elf32_Ehdr*) m_pImage)->e_type);
616 if (e_type == E_REL)
618 int nsec = elfRead2(&pSym[y].st_shndx);
619 if (nsec >= 0 && nsec < m_iNumSections)
620 pVal->uSymAddr += GetSectionInfo(nsec)->uNativeAddr;
622 pVal->iSymSize = elfRead4(&pSym[y].st_size);
623 return true;
625 else
627 // We may as well do a linear search of the main symbol table. Some symbols (e.g. init_dummy) are
628 // in the main symbol table, but not in the hash table
629 return SearchValueByName(pName, pVal);
633 // Lookup the symbol table using linear searching. See comments above for why this appears to be needed.
635 bool ElfBinaryFile::SearchValueByName(const char* pName, SymValue* pVal, const char* pSectName, const char* pStrName)
637 // Note: this assumes .symtab. Many files don't have this section!!!
638 PSectionInfo pSect, pStrSect;
640 pSect = GetSectionInfoByName(pSectName);
641 if (pSect == 0) return false;
642 pStrSect = GetSectionInfoByName(pStrName);
643 if (pStrSect == 0) return false;
644 const char* pStr = (const char*) pStrSect->uHostAddr;
645 // Find number of symbols
646 int n = pSect->uSectionSize / pSect->uSectionEntrySize;
647 Elf32_Sym* pSym = (Elf32_Sym*) pSect->uHostAddr;
648 // Search all the symbols. It may be possible to start later than index 0
649 for (int i = 0; i < n; i++)
651 int idx = elfRead4(&pSym[i].st_name);
652 if (strcmp(pName, pStr + idx) == 0)
654 // We have found the symbol
655 pVal->uSymAddr = elfRead4((int*) & pSym[i].st_value);
656 int e_type = elfRead2(&((Elf32_Ehdr*) m_pImage)->e_type);
657 if (e_type == E_REL)
659 int nsec = elfRead2(&pSym[i].st_shndx);
660 if (nsec >= 0 && nsec < m_iNumSections)
661 pVal->uSymAddr += GetSectionInfo(nsec)->uNativeAddr;
663 pVal->iSymSize = elfRead4(&pSym[i].st_size);
664 return true;
667 return false; // Not found (this table)
670 // Search for the given symbol. First search .symtab (if present); if not found or the table has been stripped,
671 // search .dynstr
673 bool ElfBinaryFile::SearchValueByName(const char* pName, SymValue* pVal)
675 if (SearchValueByName(pName, pVal, ".symtab", ".strtab"))
676 return true;
677 return SearchValueByName(pName, pVal, ".dynsym", ".dynstr");
680 ADDRESS ElfBinaryFile::GetAddressByName(const char* pName,
681 bool bNoTypeOK /* = false */)
683 SymValue Val;
684 bool bSuccess = ValueByName(pName, &Val, bNoTypeOK);
685 if (bSuccess)
687 m_iLastSize = Val.iSymSize;
688 m_uLastAddr = Val.uSymAddr;
689 return Val.uSymAddr;
691 else return NO_ADDRESS;
694 int ElfBinaryFile::GetSizeByName(const char* pName, bool bNoTypeOK /* = false */)
696 SymValue Val;
697 bool bSuccess = ValueByName(pName, &Val, bNoTypeOK);
698 if (bSuccess)
700 m_iLastSize = Val.iSymSize;
701 m_uLastAddr = Val.uSymAddr;
702 return Val.iSymSize;
704 else return 0;
707 // Guess the size of a function by finding the next symbol after it, and subtracting the distance.
708 // This function is NOT efficient; it has to compare the closeness of ALL symbols in the symbol table
710 int ElfBinaryFile::GetDistanceByName(const char* sName, const char* pSectName)
712 int size = GetSizeByName(sName);
713 if (size) return size; // No need to guess!
714 // No need to guess, but if there are fillers, then subtracting labels will give a better answer for coverage
715 // purposes. For example, switch_cc. But some programs (e.g. switch_ps) have the switch tables between the
716 // end of _start and main! So we are better off overall not trying to guess the size of _start
717 unsigned value = GetAddressByName(sName);
718 if (value == 0) return 0; // Symbol doesn't even exist!
720 PSectionInfo pSect;
721 pSect = GetSectionInfoByName(pSectName);
722 if (pSect == 0) return 0;
723 // Find number of symbols
724 int n = pSect->uSectionSize / pSect->uSectionEntrySize;
725 Elf32_Sym* pSym = (Elf32_Sym*) pSect->uHostAddr;
726 // Search all the symbols. It may be possible to start later than index 0
727 unsigned closest = 0xFFFFFFFF;
728 int idx = -1;
729 for (int i = 0; i < n; i++)
731 if ((pSym[i].st_value > value) && (pSym[i].st_value < closest))
733 idx = i;
734 closest = pSym[i].st_value;
737 if (idx == -1) return 0;
738 // Do some checks on the symbol's value; it might be at the end of the .text section
739 pSect = GetSectionInfoByName(".text");
740 ADDRESS low = pSect->uNativeAddr;
741 ADDRESS hi = low + pSect->uSectionSize;
742 if ((value >= low) && (value < hi))
744 // Our symbol is in the .text section. Put a ceiling of the end of the section on closest.
745 if (closest > hi) closest = hi;
747 return closest - value;
750 int ElfBinaryFile::GetDistanceByName(const char* sName)
752 int val = GetDistanceByName(sName, ".symtab");
753 if (val) return val;
754 return GetDistanceByName(sName, ".dynsym");
757 bool ElfBinaryFile::IsDynamicLinkedProc(ADDRESS uNative)
759 if (uNative > (unsigned) - 1024 && uNative != (unsigned) - 1)
760 return true; // Say yes for fake library functions
761 if (uNative >= first_extern && uNative < next_extern)
762 return true; // Yes for externs (not currently used)
763 if (m_uPltMin == 0) return false;
764 return (uNative >= m_uPltMin) && (uNative < m_uPltMax); // Yes if a call to the PLT (false otherwise)
769 // GetEntryPoints()
770 // Returns a list of pointers to SectionInfo structs representing entry points to the program
771 // Item 0 is the main() function; items 1 and 2 are .init and .fini
774 std::list<SectionInfo*>& ElfBinaryFile::GetEntryPoints(
775 const char* pEntry /* = "main" */)
777 SectionInfo* pSect = GetSectionInfoByName(".text");
778 ADDRESS uMain = GetAddressByName(pEntry, true);
779 ptrdiff_t delta = uMain - pSect->uNativeAddr;
780 pSect->uNativeAddr += delta;
781 pSect->uHostAddr += delta;
782 // Adjust uSectionSize so uNativeAddr + uSectionSize still is end of sect
783 pSect->uSectionSize -= delta;
784 m_EntryPoint.push_back(pSect);
785 // .init and .fini sections
786 pSect = GetSectionInfoByName(".init");
787 m_EntryPoint.push_back(pSect);
788 pSect = GetSectionInfoByName(".fini");
789 m_EntryPoint.push_back(pSect);
790 return m_EntryPoint;
795 // GetMainEntryPoint()
796 // Returns the entry point to main (this should be a label in elf binaries generated by compilers).
799 ADDRESS ElfBinaryFile::GetMainEntryPoint()
801 return GetAddressByName("main", true);
804 ADDRESS ElfBinaryFile::GetEntryPoint()
806 return (ADDRESS) elfRead4(&((Elf32_Ehdr*) m_pImage)->e_entry);
809 // FIXME: the below assumes a fixed delta
811 unsigned char *ElfBinaryFile::NativeToHostAddress(ADDRESS uNative)
813 if (m_iNumSections == 0) return NULL;
814 return m_pSections[1].uHostAddr - m_pSections[1].uNativeAddr + uNative;
817 ADDRESS ElfBinaryFile::GetRelocatedAddress(ADDRESS uNative)
819 // Not implemented yet. But we need the function to make it all link
820 return 0;
823 bool ElfBinaryFile::PostLoad(void* handle)
825 // This function is called after an archive member has been loaded by ElfArchiveFile
827 // Save the elf pointer
828 //m_elf = (Elf*) handle;
830 //return ProcessElfFile();
831 return false;
835 // Open this binaryfile for reading AND writing
837 bool ElfBinaryFile::Open(const char* sName)
839 return false;
842 void ElfBinaryFile::Close()
844 UnLoad();
847 LOAD_FMT ElfBinaryFile::GetFormat() const
849 return LOADFMT_ELF;
852 MACHINE ElfBinaryFile::GetMachine() const
854 int machine = elfRead2(&((Elf32_Ehdr*) m_pImage)->e_machine);
855 if ((machine == EM_SPARC) || (machine == EM_SPARC32PLUS)) return MACHINE_SPARC;
856 else if (machine == EM_386) return MACHINE_PENTIUM;
857 else if (machine == EM_PA_RISC) return MACHINE_HPRISC;
858 else if (machine == EM_68K) return MACHINE_PALM; // Unlikely
859 else if (machine == EM_PPC) return MACHINE_PPC;
860 else if (machine == EM_ST20) return MACHINE_ST20;
861 else if (machine == EM_MIPS) return MACHINE_MIPS;
862 else if (machine == EM_X86_64)
864 std::cerr << "Error: ElfBinaryFile::GetMachine: The AMD x86-64 architecture is not supported yet\n";
865 return (MACHINE) - 1;
867 // An unknown machine type
868 std::cerr << "Error: ElfBinaryFile::GetMachine: Unsupported machine type: "
869 << machine << " (0x" << std::hex << machine << ")\n";
870 std::cerr << "(Please add a description for this type, thanks!)\n";
871 return (MACHINE) - 1;
874 bool ElfBinaryFile::isLibrary() const
876 int type = elfRead2(&((Elf32_Ehdr*) m_pImage)->e_type);
877 return (type == ET_DYN);
880 std::list<const char *> ElfBinaryFile::getDependencyList()
882 std::list<const char *> result;
883 ADDRESS stringtab = NO_ADDRESS;
884 PSectionInfo dynsect = GetSectionInfoByName(".dynamic");
885 if (dynsect == NULL)
886 return result; /* no dynamic section = statically linked */
888 Elf32_Dyn *dyn;
889 for (dyn = (Elf32_Dyn *) dynsect->uHostAddr; dyn->d_tag != DT_NULL; dyn++)
891 if (dyn->d_tag == DT_STRTAB)
893 stringtab = (ADDRESS) dyn->d_un.d_ptr;
894 break;
898 if (stringtab == NO_ADDRESS) /* No string table = no names */
899 return result;
900 unsigned char *stringtabPtr = NativeToHostAddress(stringtab);
902 for (dyn = (Elf32_Dyn *) dynsect->uHostAddr; dyn->d_tag != DT_NULL; dyn++)
904 if (dyn->d_tag == DT_NEEDED)
906 const char *need = (char *) stringtabPtr + dyn->d_un.d_val;
907 if (need != NULL)
908 result.push_back(need);
911 return result;
914 ADDRESS ElfBinaryFile::getImageBase()
916 return m_uBaseAddr;
919 size_t ElfBinaryFile::getImageSize()
921 return m_uImageSize;
924 /*==============================================================================
925 * FUNCTION: ElfBinaryFile::GetImportStubs
926 * OVERVIEW: Get an array of addresses of imported function stubs
927 * This function relies on the fact that the symbols are sorted by address, and that Elf PLT
928 * entries have successive addresses beginning soon after m_PltMin
929 * PARAMETERS: numImports - reference to integer set to the number of these
930 * RETURNS: An array of native ADDRESSes
931 *============================================================================*/
932 ADDRESS* ElfBinaryFile::GetImportStubs(int& numImports)
934 ADDRESS a = m_uPltMin;
935 int n = 0;
936 std::map<ADDRESS, std::string>::iterator aa = m_SymTab.find(a);
937 std::map<ADDRESS, std::string>::iterator ff = aa;
938 bool delDummy = false;
939 if (aa == m_SymTab.end())
941 // Need to insert a dummy entry at m_uPltMin
942 delDummy = true;
943 m_SymTab[a] = std::string();
944 ff = m_SymTab.find(a);
945 aa = ff;
946 aa++;
948 while ((aa != m_SymTab.end()) && (a < m_uPltMax))
950 n++;
951 a = aa->first;
952 aa++;
954 // Allocate an array of ADDRESSESes
955 m_pImportStubs = new ADDRESS[n];
956 aa = ff; // Start at first
957 a = aa->first;
958 int i = 0;
959 while ((aa != m_SymTab.end()) && (a < m_uPltMax))
961 m_pImportStubs[i++] = a;
962 a = aa->first;
963 aa++;
965 if (delDummy)
966 m_SymTab.erase(ff); // Delete dummy entry
967 numImports = n;
968 return m_pImportStubs;
971 /*==============================================================================
972 * FUNCTION: ElfBinaryFile::GetDynamicGlobalMap
973 * OVERVIEW: Get a map from ADDRESS to const char*. This map contains the native addresses
974 * and symbolic names of global data items (if any) which are shared with dynamically
975 * linked libraries.
976 * Example: __iob (basis for stdout). The ADDRESS is the native address of a pointer
977 * to the real dynamic data object.
978 * NOTE: The caller should delete the returned map.
979 * PARAMETERS: None
980 * RETURNS: Pointer to a new map with the info, or 0 if none
981 *============================================================================*/
982 std::map<ADDRESS, const char*>* ElfBinaryFile::GetDynamicGlobalMap()
984 std::map<ADDRESS, const char*>* ret = new std::map<ADDRESS, const char*>;
985 SectionInfo* pSect = GetSectionInfoByName(".rel.bss");
986 if (pSect == 0)
987 pSect = GetSectionInfoByName(".rela.bss");
988 if (pSect == 0)
990 // This could easily mean that this file has no dynamic globals, and
991 // that is fine.
992 return ret;
994 int numEnt = pSect->uSectionSize / pSect->uSectionEntrySize;
995 SectionInfo* sym = GetSectionInfoByName(".dynsym");
996 if (sym == 0)
998 fprintf(stderr, "Could not find section .dynsym in source binary file");
999 return ret;
1001 Elf32_Sym* pSym = (Elf32_Sym*) sym->uHostAddr;
1002 int idxStr = GetSectionIndexByName(".dynstr");
1003 if (idxStr == -1)
1005 fprintf(stderr, "Could not find section .dynstr in source binary file");
1006 return ret;
1009 unsigned char *p = pSect->uHostAddr;
1010 for (int i = 0; i < numEnt; i++)
1012 // The ugly p[1] below is because it p might point to an Elf32_Rela struct, or an Elf32_Rel struct
1013 int sym = ELF32_R_SYM(((int*) p)[1]);
1014 int name = pSym[sym].st_name; // Index into string table
1015 const char* s = GetStrPtr(idxStr, name);
1016 ADDRESS val = ((int*) p)[0];
1017 (*ret)[val] = s; // Add the (val, s) mapping to ret
1018 p += pSect->uSectionEntrySize;
1021 return ret;
1024 /*==============================================================================
1025 * FUNCTION: ElfBinaryFile::elfRead2 and elfRead4
1026 * OVERVIEW: Read a 2 or 4 byte quantity from host address (C pointer) p
1027 * NOTE: Takes care of reading the correct endianness, set early on into m_elfEndianness
1028 * PARAMETERS: ps or pi: host pointer to the data
1029 * RETURNS: An integer representing the data
1030 *============================================================================*/
1031 int ElfBinaryFile::elfRead2(short* ps) const
1033 unsigned char* p = (unsigned char*) ps;
1034 if (m_elfEndianness)
1036 // Big endian
1037 return (int) ((p[0] << 8) + p[1]);
1039 else
1041 // Little endian
1042 return (int) (p[0] + (p[1] << 8));
1046 int ElfBinaryFile::elfRead4(int* pi) const
1048 short* p = (short*) pi;
1049 if (m_elfEndianness)
1051 return (int) ((elfRead2(p) << 16) + elfRead2(p + 1));
1053 else
1054 return (int) (elfRead2(p) + (elfRead2(p + 1) << 16));
1057 void ElfBinaryFile::elfWrite4(int* pi, int val)
1059 char* p = (char*) pi;
1060 if (m_elfEndianness)
1062 // Big endian
1063 *p++ = (char) (val >> 24);
1064 *p++ = (char) (val >> 16);
1065 *p++ = (char) (val >> 8);
1066 *p = (char) val;
1068 else
1070 *p++ = (char) val;
1071 *p++ = (char) (val >> 8);
1072 *p++ = (char) (val >> 16);
1073 *p = (char) (val >> 24);
1077 int ElfBinaryFile::readNative1(ADDRESS nat)
1079 PSectionInfo si = GetSectionInfoByAddr(nat);
1080 if (si == 0)
1082 si = GetSectionInfo(0);
1084 unsigned char *host = si->uHostAddr - si->uNativeAddr + nat;
1085 return *(char *) host;
1088 // Read 2 bytes from given native address
1090 int ElfBinaryFile::readNative2(ADDRESS nat)
1092 PSectionInfo si = GetSectionInfoByAddr(nat);
1093 if (si == 0) return 0;
1094 unsigned char *host = si->uHostAddr - si->uNativeAddr + nat;
1095 return elfRead2((short*) host);
1098 // Read 4 bytes from given native address
1100 int ElfBinaryFile::readNative4(ADDRESS nat)
1102 PSectionInfo si = GetSectionInfoByAddr(nat);
1103 if (si == 0) return 0;
1104 unsigned char *host = si->uHostAddr - si->uNativeAddr + nat;
1105 return elfRead4((int*) host);
1108 void ElfBinaryFile::writeNative4(ADDRESS nat, unsigned int n)
1110 PSectionInfo si = GetSectionInfoByAddr(nat);
1111 if (si == 0) return;
1112 unsigned char *host = si->uHostAddr - si->uNativeAddr + nat;
1113 if (m_elfEndianness)
1115 *(unsigned char*) host = (n >> 24) & 0xff;
1116 *(unsigned char*) (host + 1) = (n >> 16) & 0xff;
1117 *(unsigned char*) (host + 2) = (n >> 8) & 0xff;
1118 *(unsigned char*) (host + 3) = n & 0xff;
1120 else
1122 *(unsigned char*) (host + 3) = (n >> 24) & 0xff;
1123 *(unsigned char*) (host + 2) = (n >> 16) & 0xff;
1124 *(unsigned char*) (host + 1) = (n >> 8) & 0xff;
1125 *(unsigned char*) host = n & 0xff;
1129 // Read 8 bytes from given native address
1131 QWord ElfBinaryFile::readNative8(ADDRESS nat)
1133 int raw[2];
1134 #ifdef WORDS_BIGENDIAN // This tests the host machine
1135 if (m_elfEndianness) // This tests the source machine
1137 #else
1138 if (!m_elfEndianness)
1140 #endif // Balance }
1141 // Source and host are same endianness
1142 raw[0] = readNative4(nat);
1143 raw[1] = readNative4(nat + 4);
1145 else
1147 // Source and host are different endianness
1148 raw[1] = readNative4(nat);
1149 raw[0] = readNative4(nat + 4);
1151 //return reinterpret_cast<long long>(*raw); // Note: cast, not convert!!
1152 return *(QWord*) raw;
1155 // Read 4 bytes as a float
1157 float ElfBinaryFile::readNativeFloat4(ADDRESS nat)
1159 int raw = readNative4(nat);
1160 // Ugh! gcc says that reinterpreting from int to float is invalid!!
1161 //return reinterpret_cast<float>(raw); // Note: cast, not convert!!
1162 return *(float*) & raw; // Note: cast, not convert
1165 // Read 8 bytes as a float
1167 double ElfBinaryFile::readNativeFloat8(ADDRESS nat)
1169 int raw[2];
1170 #ifdef WORDS_BIGENDIAN // This tests the host machine
1171 if (m_elfEndianness) // This tests the source machine
1173 #else
1174 if (!m_elfEndianness)
1176 #endif // Balance }
1177 // Source and host are same endianness
1178 raw[0] = readNative4(nat);
1179 raw[1] = readNative4(nat + 4);
1181 else
1183 // Source and host are different endianness
1184 raw[1] = readNative4(nat);
1185 raw[0] = readNative4(nat + 4);
1187 //return reinterpret_cast<double>(*raw); // Note: cast, not convert!!
1188 return *(double*) raw;
1191 // This function is called via dlopen/dlsym; it returns a new BinaryFile derived concrete object.
1192 // After this object is returned, the virtual function call mechanism will call the rest of the code
1193 // in this library. It needs to be C linkage so that it its name is not mangled
1194 extern "C" {
1195 #ifdef _WIN32
1197 __declspec(dllexport)
1198 #endif
1199 BinaryFile* construct()
1201 return new ElfBinaryFile;
1205 void ElfBinaryFile::applyRelocations()
1207 int nextFakeLibAddr = -2; // See R_386_PC32 below; -1 sometimes used for main
1208 if (m_pImage == 0) return; // No file loaded
1209 int machine = elfRead2(&((Elf32_Ehdr*) m_pImage)->e_machine);
1210 int e_type = elfRead2(&((Elf32_Ehdr*) m_pImage)->e_type);
1211 switch (machine)
1213 case EM_SPARC:
1214 break; // Not implemented yet
1215 case EM_386:
1217 for (int i = 1; i < m_iNumSections; ++i)
1219 SectionInfo* ps = &m_pSections[i];
1220 if (ps->uType == SHT_REL)
1222 // A section such as .rel.dyn or .rel.plt (without an addend field).
1223 // Each entry has 2 words: r_offet and r_info. The r_offset is just the offset from the beginning
1224 // of the section (section given by the section header's sh_info) to the word to be modified.
1225 // r_info has the type in the bottom byte, and a symbol table index in the top 3 bytes.
1226 // A symbol table offset of 0 (STN_UNDEF) means use value 0. The symbol table involved comes from
1227 // the section header's sh_link field.
1228 int* pReloc = (int*) ps->uHostAddr;
1229 unsigned size = ps->uSectionSize;
1230 // NOTE: the r_offset is different for .o files (E_REL in the e_type header field) than for exe's
1231 // and shared objects!
1232 ADDRESS destNatOrigin = 0;
1233 unsigned char *destHostOrigin = NULL;
1234 if (e_type == E_REL)
1236 int destSection = m_sh_info[i];
1237 destNatOrigin = m_pSections[destSection].uNativeAddr;
1238 destHostOrigin = m_pSections[destSection].uHostAddr;
1240 int symSection = m_sh_link[i]; // Section index for the associated symbol table
1241 int strSection = m_sh_link[symSection]; // Section index for the string section assoc with this
1242 char* pStrSection = (char*) m_pSections[strSection].uHostAddr;
1243 Elf32_Sym* symOrigin = (Elf32_Sym*) m_pSections[symSection].uHostAddr;
1244 for (unsigned u = 0; u < size; u += 2 * sizeof (unsigned))
1246 unsigned r_offset = elfRead4(pReloc++);
1247 unsigned info = elfRead4(pReloc++);
1248 unsigned char relType = (unsigned char) info;
1249 unsigned symTabIndex = info >> 8;
1250 int* pRelWord; // Pointer to the word to be relocated
1251 if (e_type == E_REL)
1252 pRelWord = ((int*) (destHostOrigin + r_offset));
1253 else
1255 if (r_offset == 0) continue;
1256 SectionInfo* destSec = GetSectionInfoByAddr(r_offset);
1257 pRelWord = (int*) (destSec->uHostAddr - destSec->uNativeAddr + r_offset);
1258 destNatOrigin = 0;
1260 ADDRESS A, S = 0, P;
1261 int nsec;
1262 switch (relType)
1264 case 0: // R_386_NONE: just ignore (common)
1265 break;
1266 case 1: // R_386_32: S + A
1267 S = elfRead4((int*) & symOrigin[symTabIndex].st_value);
1268 if (e_type == E_REL)
1270 nsec = elfRead2(&symOrigin[symTabIndex].st_shndx);
1271 if (nsec >= 0 && nsec < m_iNumSections)
1272 S += GetSectionInfo(nsec)->uNativeAddr;
1274 A = elfRead4(pRelWord);
1275 elfWrite4(pRelWord, S + A);
1276 break;
1277 case 2: // R_386_PC32: S + A - P
1278 if (ELF32_ST_TYPE(symOrigin[symTabIndex].st_info) == STT_SECTION)
1280 nsec = elfRead2(&symOrigin[symTabIndex].st_shndx);
1281 if (nsec >= 0 && nsec < m_iNumSections)
1282 S = GetSectionInfo(nsec)->uNativeAddr;
1284 else
1286 S = elfRead4((int*) & symOrigin[symTabIndex].st_value);
1287 if (S == 0)
1289 // This means that the symbol doesn't exist in this module, and is not accessed
1290 // through the PLT, i.e. it will be statically linked, e.g. strcmp. We have the
1291 // name of the symbol right here in the symbol table entry, but the only way
1292 // to communicate with the loader is through the target address of the call.
1293 // So we use some very improbable addresses (e.g. -1, -2, etc) and give them entries
1294 // in the symbol table
1295 int nameOffset = elfRead4((int*) & symOrigin[symTabIndex].st_name);
1296 char* pName = pStrSection + nameOffset;
1297 // this is too slow, I'm just going to assume it is 0
1298 //S = GetAddressByName(pName);
1299 //if (S == (e_type == E_REL ? 0x8000000 : 0)) {
1300 S = nextFakeLibAddr--; // Allocate a new fake address
1301 AddSymbol(S, pName);
1304 else if (e_type == E_REL)
1306 nsec = elfRead2(&symOrigin[symTabIndex].st_shndx);
1307 if (nsec >= 0 && nsec < m_iNumSections)
1308 S += GetSectionInfo(nsec)->uNativeAddr;
1311 A = elfRead4(pRelWord);
1312 P = destNatOrigin + r_offset;
1313 elfWrite4(pRelWord, S + A - P);
1314 break;
1315 case 7:
1316 case 8: // R_386_RELATIVE
1317 break; // No need to do anything with these, if a shared object
1318 default:
1319 // std::cout << "Relocation type " << (int)relType << " not handled yet\n";
1326 default:
1327 break; // Not implemented
1331 bool ElfBinaryFile::IsRelocationAt(ADDRESS uNative)
1333 //int nextFakeLibAddr = -2; // See R_386_PC32 below; -1 sometimes used for main
1334 if (m_pImage == 0) return false; // No file loaded
1335 int machine = elfRead2(&((Elf32_Ehdr*) m_pImage)->e_machine);
1336 int e_type = elfRead2(&((Elf32_Ehdr*) m_pImage)->e_type);
1337 switch (machine)
1339 case EM_SPARC:
1340 break; // Not implemented yet
1341 case EM_386:
1343 for (int i = 1; i < m_iNumSections; ++i)
1345 SectionInfo* ps = &m_pSections[i];
1346 if (ps->uType == SHT_REL)
1348 // A section such as .rel.dyn or .rel.plt (without an addend field).
1349 // Each entry has 2 words: r_offet and r_info. The r_offset is just the offset from the beginning
1350 // of the section (section given by the section header's sh_info) to the word to be modified.
1351 // r_info has the type in the bottom byte, and a symbol table index in the top 3 bytes.
1352 // A symbol table offset of 0 (STN_UNDEF) means use value 0. The symbol table involved comes from
1353 // the section header's sh_link field.
1354 int* pReloc = (int*) ps->uHostAddr;
1355 unsigned size = ps->uSectionSize;
1356 // NOTE: the r_offset is different for .o files (E_REL in the e_type header field) than for exe's
1357 // and shared objects!
1358 ADDRESS destNatOrigin = 0;
1359 unsigned char *destHostOrigin;
1360 if (e_type == E_REL)
1362 int destSection = m_sh_info[i];
1363 destNatOrigin = m_pSections[destSection].uNativeAddr;
1364 destHostOrigin = m_pSections[destSection].uHostAddr;
1366 //int symSection = m_sh_link[i]; // Section index for the associated symbol table
1367 //int strSection = m_sh_link[symSection]; // Section index for the string section assoc with this
1368 //char* pStrSection = (char*)m_pSections[strSection].uHostAddr;
1369 //Elf32_Sym* symOrigin = (Elf32_Sym*) m_pSections[symSection].uHostAddr;
1370 for (unsigned u = 0; u < size; u += 2 * sizeof (unsigned))
1372 unsigned r_offset = elfRead4(pReloc++);
1373 //unsigned info = elfRead4(pReloc);
1374 pReloc++;
1375 //unsigned char relType = (unsigned char) info;
1376 //unsigned symTabIndex = info >> 8;
1377 ADDRESS pRelWord; // Pointer to the word to be relocated
1378 if (e_type == E_REL)
1379 pRelWord = destNatOrigin + r_offset;
1380 else
1382 if (r_offset == 0) continue;
1383 SectionInfo* destSec = GetSectionInfoByAddr(r_offset);
1384 pRelWord = destSec->uNativeAddr + r_offset;
1385 destNatOrigin = 0;
1387 if (uNative == pRelWord)
1388 return true;
1393 default:
1394 break; // Not implemented
1396 return false;
1399 const char *ElfBinaryFile::getFilenameSymbolFor(const char *sym)
1401 int i;
1402 int secIndex = 0;
1403 for (i = 1; i < m_iNumSections; ++i)
1405 unsigned uType = m_pSections[i].uType;
1406 if (uType == SHT_SYMTAB)
1408 secIndex = i;
1409 break;
1412 if (secIndex == 0)
1413 return NULL;
1415 //int e_type = elfRead2(&((Elf32_Ehdr*)m_pImage)->e_type);
1416 PSectionInfo pSect = &m_pSections[secIndex];
1417 // Calc number of symbols
1418 int nSyms = pSect->uSectionSize / pSect->uSectionEntrySize;
1419 m_pSym = (Elf32_Sym*) pSect->uHostAddr; // Pointer to symbols
1420 int strIdx = m_sh_link[secIndex]; // sh_link points to the string table
1422 std::string filename;
1424 // Index 0 is a dummy entry
1425 for (int i = 1; i < nSyms; i++)
1427 //ADDRESS val = (ADDRESS) elfRead4((int*)&m_pSym[i].st_value);
1428 int name = elfRead4(&m_pSym[i].st_name);
1429 if (name == 0) /* Silly symbols with no names */ continue;
1430 std::string str(GetStrPtr(strIdx, name));
1431 // Hack off the "@@GLIBC_2.0" of Linux, if present
1432 size_t pos;
1433 if ((pos = str.find("@@")) != std::string::npos)
1434 str.erase(pos);
1435 if (ELF32_ST_TYPE(m_pSym[i].st_info) == STT_FILE)
1437 filename = str;
1438 continue;
1440 if (str == sym)
1442 if (filename.length())
1443 return strdup(filename.c_str());
1444 return NULL;
1447 return NULL;
1450 void ElfBinaryFile::getFunctionSymbols(std::map<std::string, std::map<ADDRESS, std::string> > &syms_in_file)
1452 int i;
1453 int secIndex = 0;
1454 for (i = 1; i < m_iNumSections; ++i)
1456 unsigned uType = m_pSections[i].uType;
1457 if (uType == SHT_SYMTAB)
1459 secIndex = i;
1460 break;
1463 if (secIndex == 0)
1465 fprintf(stderr, "no symtab section? Assuming stripped, looking for dynsym.\n");
1467 for (i = 1; i < m_iNumSections; ++i)
1469 unsigned uType = m_pSections[i].uType;
1470 if (uType == SHT_DYNSYM)
1472 secIndex = i;
1473 break;
1477 if (secIndex == 0)
1479 fprintf(stderr, "no dynsyms either.. guess we're out of luck.\n");
1480 return;
1484 int e_type = elfRead2(&((Elf32_Ehdr*) m_pImage)->e_type);
1485 PSectionInfo pSect = &m_pSections[secIndex];
1486 // Calc number of symbols
1487 int nSyms = pSect->uSectionSize / pSect->uSectionEntrySize;
1488 m_pSym = (Elf32_Sym*) pSect->uHostAddr; // Pointer to symbols
1489 int strIdx = m_sh_link[secIndex]; // sh_link points to the string table
1491 std::string filename = "unknown.c";
1493 // Index 0 is a dummy entry
1494 for (int i = 1; i < nSyms; i++)
1496 int name = elfRead4(&m_pSym[i].st_name);
1497 if (name == 0) /* Silly symbols with no names */ continue;
1498 std::string str(GetStrPtr(strIdx, name));
1499 // Hack off the "@@GLIBC_2.0" of Linux, if present
1500 size_t pos;
1501 if ((pos = str.find("@@")) != std::string::npos)
1502 str.erase(pos);
1503 if (ELF32_ST_TYPE(m_pSym[i].st_info) == STT_FILE)
1505 filename = str;
1506 continue;
1508 if (ELF32_ST_TYPE(m_pSym[i].st_info) == STT_FUNC)
1510 ADDRESS val = (ADDRESS) elfRead4((int*) & m_pSym[i].st_value);
1511 if (e_type == E_REL)
1513 int nsec = elfRead2(&m_pSym[i].st_shndx);
1514 if (nsec >= 0 && nsec < m_iNumSections)
1515 val += GetSectionInfo(nsec)->uNativeAddr;
1517 if (val == 0)
1519 // ignore plt for now
1521 else
1523 syms_in_file[filename][val] = str;
1529 // A map for extra symbols, those not in the usual Elf symbol tables
1531 void ElfBinaryFile::AddSymbol(ADDRESS uNative, const char *pName)
1533 m_SymTab[uNative] = pName;
1536 void ElfBinaryFile::dumpSymbols()
1538 std::map<ADDRESS, std::string>::iterator it;
1539 std::cerr << std::hex;
1540 for (it = m_SymTab.begin(); it != m_SymTab.end(); ++it)
1541 std::cerr << "0x" << it->first << " " << it->second << " ";
1542 std::cerr << std::dec << "\n";