From f068934ab1fede12aa23095981a552c58c4fc96e Mon Sep 17 00:00:00 2001 From: Ethereal Date: Thu, 24 Feb 2011 22:07:06 -0700 Subject: [PATCH] Implemented ELF parser; fixed a few bugs in RTree::splitNode(). --- SConstruct | 2 +- include/informer/PacketFormat.h | 10 ++- include/monitor/Coordinator.h | 3 + include/monitor/ElfParser.h | 62 ++++++++++++++ include/monitor/SymbolResolver.h | 36 ++++++++ include/storage/RTree.h | 62 ++++++++------ modules/informer/src/collector/Informer.c | 2 +- src/monitor/Coordinator.cpp | 3 + src/monitor/ElfParser.cpp | 132 ++++++++++++++++++++++++++++++ src/monitor/InformerMarshal.cpp | 1 + src/monitor/SymbolResolver.cpp | 47 +++++++++++ 11 files changed, 332 insertions(+), 28 deletions(-) create mode 100644 include/monitor/ElfParser.h create mode 100644 include/monitor/SymbolResolver.h create mode 100644 src/monitor/ElfParser.cpp create mode 100644 src/monitor/SymbolResolver.cpp diff --git a/SConstruct b/SConstruct index 2bcfd7b..af26bfb 100644 --- a/SConstruct +++ b/SConstruct @@ -53,7 +53,7 @@ env = Environment( 'HOME' : os.environ['HOME']}) env.Append(CPPPATH = ["#include/"]) -env.Append(CCFLAGS = ["-W", "-Wall", "-g", "-O3"]) +env.Append(CCFLAGS = ["-W", "-Wall", "-g"]) env.Append(LIBPATH = ["#.build"]) Export('env') diff --git a/include/informer/PacketFormat.h b/include/informer/PacketFormat.h index b0a7879..2406a5c 100644 --- a/include/informer/PacketFormat.h +++ b/include/informer/PacketFormat.h @@ -7,8 +7,8 @@ @file informer/PacketFormat.h */ -#ifndef AesalonInformerPacketFormat_H -#define AesalonInformerPacketFormat_H +#ifndef AesalonInformer_PacketFormat_H +#define AesalonInformer_PacketFormat_H #ifdef __cplusplus namespace Informer { @@ -24,6 +24,12 @@ enum PacketType { - name: 8-bit ASCII string. (length specified by packet header's dataSize - sizeof(ModuleID) - 1). */ ModuleLoaded, + /** A new file was mapped to an executable page; the symbol data should be loaded from it. + Format: + - Base address (base address of memory map) + - File offset (which portion of the file is mapped) + - filename: 8-bit NULL-terminated ASCII string. + */ FileLoaded, NewThread, NewProcess, diff --git a/include/monitor/Coordinator.h b/include/monitor/Coordinator.h index 1e66034..2169f1b 100644 --- a/include/monitor/Coordinator.h +++ b/include/monitor/Coordinator.h @@ -15,6 +15,7 @@ #include "config/Vault.h" #include "monitor/DataOutputController.h" #include "MarshalList.h" +#include "SymbolResolver.h" namespace Monitor { @@ -27,6 +28,7 @@ private: int m_returnValue; DataOutputController *m_dataOutputController; MarshalList *m_marshalList; + SymbolResolver *m_resolver; public: static Coordinator *instance() { return m_instance; } Coordinator(char **argv); @@ -39,6 +41,7 @@ public: void setReturnValue(int newValue) { m_returnValue = newValue; } DataOutputController *dataOutputController() const { return m_dataOutputController; } MarshalList *marshalList() const { return m_marshalList; } + SymbolResolver *resolver() const { return m_resolver; } void run(); private: diff --git a/include/monitor/ElfParser.h b/include/monitor/ElfParser.h new file mode 100644 index 0000000..47ca70c --- /dev/null +++ b/include/monitor/ElfParser.h @@ -0,0 +1,62 @@ +/** Aesalon, a tool to visualize program behaviour in real time. + Copyright (C) 2009-2011, Aesalon development team. + + Aesalon is distributed under the terms of the GNU GPLv3. See + the included file LICENSE for more information. + + @file include/monitor/ElfParser.h +*/ + +#ifndef AesalonMonitor_ElfParser_H +#define AesalonMonitor_ElfParser_H + +#include +#include + +#include "config/Vault.h" + +#define ELF32_TYPES Elf32_Ehdr, Elf32_Shdr, Elf32_Sym +#define ELF64_TYPES Elf64_Ehdr, Elf64_Shdr, Elf64_Sym + +namespace Monitor { + +class ElfParser { +public: + class Processor { + public: + virtual ~Processor() {} + + virtual void process(const char *symbolName, uint64_t symbolAddress, uint64_t symbolSize) = 0; + }; + int m_fd; + uint8_t *m_file; + uint32_t m_fileSize; + Processor *m_processor; + + enum ElfType { + ELF32, + ELF64 + } m_elfType; + + enum Encoding { + MSB_ENCODING, + LSB_ENCODING + } m_encoding; +public: + ElfParser(); + virtual ~ElfParser(); + + void parse(const std::string &filename, Processor *processor); +private: + bool identValid(); + + template + void parseElf(); + + template + void parseSymbols(SymbolHeader *symbols, int symbolCount, const char *stringTable); +}; + +} // namespace Monitor + +#endif diff --git a/include/monitor/SymbolResolver.h b/include/monitor/SymbolResolver.h new file mode 100644 index 0000000..f5f0870 --- /dev/null +++ b/include/monitor/SymbolResolver.h @@ -0,0 +1,36 @@ +/** Aesalon, a tool to visualize program behaviour in real time. + Copyright (C) 2009-2011, Aesalon development team. + + Aesalon is distributed under the terms of the GNU GPLv3. See + the included file LICENSE for more information. + + @file include/monitor/SymbolResolver.h +*/ + +#ifndef AesalonMonitor_SymbolResolver_H +#define AesalonMonitor_SymbolResolver_H + +#include +#include + +namespace Storage { +template class RTree; +} // namespace Storage + +namespace Monitor { + +class SymbolResolver { +protected: + typedef Storage::RTree RTree; +private: + RTree *m_rtree; +public: + SymbolResolver(); + ~SymbolResolver(); + + void parse(const std::string &filename); +}; + +} // namespace Monitor + +#endif diff --git a/include/storage/RTree.h b/include/storage/RTree.h index d88c9fd..2e16337 100644 --- a/include/storage/RTree.h +++ b/include/storage/RTree.h @@ -156,7 +156,7 @@ protected: Bound bound() const { Bound result; for(int i = 0; i < Dimensions; i ++) { - Key minimum, maximum; + Key minimum = 0, maximum = 0; for(int j = 0; j < branchCount(); j ++) { if(j == 0) { minimum = m_branches[j].bound.range(i).start(); @@ -436,10 +436,10 @@ typename RTree::Node * node->setBranchCount(0); nn->setBranchCount(0); - int highestStart[Dimensions]; - int lowestEnd[Dimensions]; - Key lowest[Dimensions]; - Key highest[Dimensions]; + int highestStart[Dimensions] = {0}; + int lowestEnd[Dimensions] = {0}; + Key lowest[Dimensions] = {0}; + Key highest[Dimensions] = {0}; for(int j = 0; j < Maximum+1; j ++) { for(int i = 0; i < Dimensions; i ++) { @@ -469,36 +469,50 @@ typename RTree::Node * /* Create normalized separations. */ for(int i = 0; i < Dimensions; i ++) { - Key separation = + Key separation; + if(highest[i]-lowest[i] == 0) separation = 0; + else separation = (list[lowestEnd[i]].bound.range(i).end() - list[highestStart[i]].bound.range(i).start()) / (highest[i]-lowest[i]); if(maxIndex == -1 || separation > maxSeparation) maxIndex = i, maxSeparation = separation; } - node->branch(0) = list[highestStart[maxIndex]]; - node->setBranchCount(1); - nn->branch(0) = list[lowestEnd[maxIndex]]; - nn->setBranchCount(1); + Bound nodeBound; + Bound nnBound; - Bound nodeBound = list[highestStart[maxIndex]].bound; - Bound nnBound = list[lowestEnd[maxIndex]].bound; + if(highestStart[maxIndex] != lowestEnd[maxIndex]) { + node->branch(0) = list[highestStart[maxIndex]]; + node->setBranchCount(1); + nn->branch(0) = list[lowestEnd[maxIndex]]; + nn->setBranchCount(1); - /* Do the removal in the correct order . . . */ - if(highestStart[maxIndex] > lowestEnd[maxIndex]) { - list[highestStart[maxIndex]] = list[listSize-1]; - list[lowestEnd[maxIndex]] = list[listSize-2]; - listSize -= 2; - } - else if(lowestEnd[maxIndex] > highestStart[maxIndex]) { - list[lowestEnd[maxIndex]] = list[listSize-1]; - list[highestStart[maxIndex]] = list[listSize-2]; - listSize -= 2; + nodeBound = list[highestStart[maxIndex]].bound; + nnBound = list[lowestEnd[maxIndex]].bound; + + /* Do the removal in the correct order . . . */ + if(highestStart[maxIndex] > lowestEnd[maxIndex]) { + list[highestStart[maxIndex]] = list[listSize-1]; + list[lowestEnd[maxIndex]] = list[listSize-2]; + } + else if(lowestEnd[maxIndex] > highestStart[maxIndex]) { + list[lowestEnd[maxIndex]] = list[listSize-1]; + list[highestStart[maxIndex]] = list[listSize-2]; + } } - /* They are one and the same . . . this should not happen! */ + /* They are one and the same . . . this only happens if all ranges are identical in this node. + Thus, any elements will do perfectly well. + */ else { - Message(Fatal, "R-tree: degeneracy case, all entries are the same. Support NYI."); + node->branch(0) = list[listSize-1]; + node->setBranchCount(1); + nn->branch(0) = list[listSize-2]; + nn->setBranchCount(1); + + nodeBound = list[listSize-1].bound; + nnBound = list[listSize-2].bound; } + listSize -= 2; while(true) { if(listSize == 0) break; diff --git a/modules/informer/src/collector/Informer.c b/modules/informer/src/collector/Informer.c index fb4fa4e..93a2a98 100644 --- a/modules/informer/src/collector/Informer.c +++ b/modules/informer/src/collector/Informer.c @@ -213,7 +213,7 @@ void AI_SendInitialFiles() { *(uint64_t *)AI_PacketSpace(8) = baseAddress; *(uint64_t *)AI_PacketSpace(8) = fileOffset; - char *packetFilename = AI_PacketSpace(strlen(filename)); + char *packetFilename = AI_PacketSpace(strlen(filename)+1); strcpy(packetFilename, filename); AI_EndPacket(); diff --git a/src/monitor/Coordinator.cpp b/src/monitor/Coordinator.cpp index 45ac281..998111c 100644 --- a/src/monitor/Coordinator.cpp +++ b/src/monitor/Coordinator.cpp @@ -52,6 +52,7 @@ void Coordinator::run() { } else { setupModuleIDs(); + m_resolver = new SymbolResolver(); MarshalList marshalList; DataOutputController doc; m_marshalList = &marshalList; @@ -59,6 +60,8 @@ void Coordinator::run() { Launcher launcher(m_argv + m_argcOffset); launcher.launch(); + + delete m_resolver; } } diff --git a/src/monitor/ElfParser.cpp b/src/monitor/ElfParser.cpp new file mode 100644 index 0000000..bf0d7dc --- /dev/null +++ b/src/monitor/ElfParser.cpp @@ -0,0 +1,132 @@ +/** Aesalon, a tool to visualize program behaviour in real time. + Copyright (C) 2009-2011, Aesalon development team. + + Aesalon is distributed under the terms of the GNU GPLv3. See + the included file LICENSE for more information. + + @file src/monitor/ElfParser.cpp +*/ + +#include +#include +#include +#include +#include +#include +#include + +#include "monitor/ElfParser.h" +#include "util/PathSanitizer.h" +#include "util/StreamAsString.h" + +namespace Monitor { + +ElfParser::ElfParser() { + +} + +ElfParser::~ElfParser() { + +} + +void ElfParser::parse(const std::string &filename, Processor *processor) { + m_processor = processor; + m_fd = open(filename.c_str(), O_RDONLY); + + m_fileSize = lseek(m_fd, 0, SEEK_END); + m_file = reinterpret_cast(mmap(NULL, m_fileSize, PROT_READ, MAP_PRIVATE, m_fd, 0)); + + if(identValid()) { + if(m_elfType == ELF32) { + parseElf(); + } + else if(m_elfType == ELF64) { + parseElf(); + } + } + close(m_fd); + munmap(m_file, m_fileSize); +} + +bool ElfParser::identValid() { + if(m_fileSize < EI_NIDENT) return false; + unsigned char *ident = reinterpret_cast(m_file); + if(strncmp(ELFMAG, (char *)ident, SELFMAG) != 0) return false; + + /* By this point, it's probably valid. */ + if(ident[EI_CLASS] == ELFCLASS32) m_elfType = ELF32; + else if(ident[EI_CLASS] == ELFCLASS64) m_elfType = ELF64; + /* But just in case . . . */ + else return false; + + if(ident[EI_DATA] == ELFDATA2LSB) m_encoding = LSB_ENCODING; + else if(ident[EI_DATA] == ELFDATA2MSB) m_encoding = MSB_ENCODING; + /* Only know how to handle MSB and LSB encoding . . . */ + else return false; + + /* If the execution has gotten this far, then it's a likely bet that + it's a valid ELF file. + */ + return true; +} + +template +void ElfParser::parseElf() { + if(m_encoding != LSB_ENCODING) { + std::cout << "Don't know how to handle non-LSB encoding at the moment." << std::endl; + return; + } + + lseek(m_fd, 0, SEEK_SET); + ELFHeader *eheader; + eheader = reinterpret_cast(m_file); + + SectionHeader *sections = reinterpret_cast(m_file + eheader->e_shoff); + + SectionHeader *shstrSection = sections + eheader->e_shstrndx; + const char *shstr = reinterpret_cast(m_file + shstrSection->sh_offset); + + SymbolHeader *symtab = NULL; + int symtabSize = 0; + const char *strtab = NULL; + SymbolHeader *dynsym = NULL; + int dynsymSize = 0; + const char *dynstr = NULL; + + for(int i = 0; i < eheader->e_shnum; i ++) { + SectionHeader *section = §ions[i]; + if(!strcmp(".symtab", shstr + section->sh_name)) { + symtab = reinterpret_cast(m_file + section->sh_offset); + symtabSize = section->sh_size / sizeof(SymbolHeader); + } + else if(!strcmp(".strtab", shstr + section->sh_name)) { + strtab = reinterpret_cast(m_file + section->sh_offset); + } + else if(!strcmp(".dynsym", shstr + section->sh_name)) { + dynsym = reinterpret_cast(m_file + section->sh_offset); + dynsymSize = section->sh_size / sizeof(SymbolHeader); + } + else if(!strcmp(".dynstr", shstr + section->sh_name)) { + dynstr = reinterpret_cast(m_file + section->sh_offset); + } + } + + parseSymbols(symtab, symtabSize, strtab); + parseSymbols(dynsym, dynsymSize, dynstr); +} + +template +void ElfParser::parseSymbols(SymbolHeader *symbols, int symbolCount, const char *stringTable) { + for(int i = 0; i < symbolCount; i ++) { + m_processor->process(stringTable + symbols[i].st_name, symbols[i].st_value, symbols[i].st_size); + /*std::cout << "Parsing symbol \"" << stringTable + symbols[i].st_name << "\" . . .\n";*/ + /*m_vault->set( + Util::StreamAsString() << "\"" << stringTable + symbols[i].st_name << "\":value", + Util::StreamAsString() << symbols[i].st_value); + m_vault->set( + Util::StreamAsString() << "\"" << stringTable + symbols[i].st_name << "\":size", + Util::StreamAsString() << symbols[i].st_size);*/ + } +} + +} // namespace Monitor diff --git a/src/monitor/InformerMarshal.cpp b/src/monitor/InformerMarshal.cpp index a72445f..f3d8052 100644 --- a/src/monitor/InformerMarshal.cpp +++ b/src/monitor/InformerMarshal.cpp @@ -73,6 +73,7 @@ void InformerMarshal::fileLoaded(Comm::Packet *packet) { uint64_t fileOffset = *reinterpret_cast(packet->data() + 9); std::string name = reinterpret_cast(packet->data() + 17); Message(Debug, "Filename: " << name); + Coordinator::instance()->resolver()->parse(name); } } // namespace Monitor diff --git a/src/monitor/SymbolResolver.cpp b/src/monitor/SymbolResolver.cpp new file mode 100644 index 0000000..12d928e --- /dev/null +++ b/src/monitor/SymbolResolver.cpp @@ -0,0 +1,47 @@ +/** Aesalon, a tool to visualize program behaviour in real time. + Copyright (C) 2009-2011, Aesalon development team. + + Aesalon is distributed under the terms of the GNU GPLv3. See + the included file LICENSE for more information. + + @file src/monitor/SymbolResolver.cpp +*/ + +#include "monitor/SymbolResolver.h" +#include "storage/RTree.h" +#include + +namespace Monitor { + +SymbolResolver::SymbolResolver() { + m_rtree = new RTree(); +} + +SymbolResolver::~SymbolResolver() { + delete m_rtree; +} + +void SymbolResolver::parse(const std::string &filename) { + Message(Debug, "Asked to parse \"" << filename << "\""); + class Processor : public ElfParser::Processor { + private: + RTree *m_rtree; + public: + Processor(RTree *rtree) : m_rtree(rtree) {} + virtual ~Processor() {} + + virtual void process(const char *symbolName, uint64_t symbolAddress, uint64_t symbolSize) { + RTree::Bound b; + b.setRange(RTree::Range(symbolAddress, symbolAddress + symbolSize), 0); + Message(Debug, "Processing symbol \"" << symbolName << "\""); + m_rtree->insert(b, ""); + } + }; + + Processor p(m_rtree); + + ElfParser ep; + ep.parse(filename, &p); +} + +} // namespace Monitor -- 2.11.4.GIT