Apply patch from Daniel Schürmann: https://sourceforge.net/p/boomerang/bugs/78/
[boomerang.git] / boomerang / boomerang.cpp
blob6ce4b4fe1c155b677eda75498d28baee82d6d363
1 /*
2 * Copyright (C) 2002-2006, Mike Van Emmerik and Trent Waddington
3 */
4 /*==============================================================================
5 * FILE: boomerang.cpp
6 * OVERVIEW: Command line processing for the Boomerang decompiler
7 *============================================================================*/
8 /*
9 * $Revision$ // 1.115.2.5
11 * 28 Jan 05 - G. Krol: Separated -h output into sections and neatened
12 * 02 Sep 06 - Mike: introduced USE_XML to make it easy to disable use of the expat library
15 #if __CYGWIN__
16 #define USE_XML 0 // Cygwin has a weird problem that causes libBinaryFile.dll not to load if the expat library
17 // is used. Note that other Windows versions require expat.
18 #else // For all platforms other than Cygwin:
19 #define USE_XML 0 // Set to 0 to not use the expat library for XML loading and saving
20 #endif
22 #include <iostream>
23 #include <fstream>
24 #include <ctime>
25 #include <cstring>
26 #include <cstdlib>
27 #ifdef _WIN32
28 #include <direct.h> // mkdir under Windows
29 #else
30 #include <sys/stat.h> // For mkdir
31 #include <unistd.h> // For unlink
32 #include <csignal>
33 #endif
34 #if defined(_MSC_VER) || defined(__MINGW32__)
35 #include <windows.h>
36 #endif
37 #include "prog.h"
38 #include "proc.h"
39 #include "BinaryFile.h"
40 #include "frontend.h"
41 #include "hllcode.h"
42 #include "codegen/chllcode.h"
43 //#include "transformer.h"
44 #include "boomerang.h"
45 #include "log.h"
46 #if USE_XML
47 #include "xmlprogparser.h"
48 #endif
49 #if defined(_MSC_VER) && _MSC_VER >= 1400
50 #pragma warning(disable:4996) // Warnings about e.g. _strdup deprecated in VS 2005
51 #endif
54 // For the -nG switch to disable the garbage collector
55 #ifndef NO_GARBAGE_COLLECTOR
56 #include "gc.h"
57 #endif
58 Boomerang *Boomerang::boomerang = NULL;
60 /**
61 * Initializes the Boomerang object.
62 * The default settings are:
63 * - All options disabled
64 * - Infinite propagations
65 * - A maximum memory depth of 99
66 * - The path to the executable is "./"
67 * - The output directory is "./output/"
69 Boomerang::Boomerang() : logger(NULL), vFlag(false), printRtl(false),
70 noBranchSimplify(false), noRemoveNull(false), noLocals(false),
71 noRemoveLabels(false), noDataflow(false), noDecompile(false), stopBeforeDecompile(false),
72 traceDecoder(false), dotFile(NULL), numToPropagate(-1),
73 noPromote(false), propOnlyToAll(false), debugGen(false),
74 maxMemDepth(99), debugSwitch(false), noParameterNames(false), debugLiveness(false),
75 stopAtDebugPoints(false), debugTA(false), decodeMain(true), printAST(false), dumpXML(false),
76 noRemoveReturns(false), debugDecoder(false), decodeThruIndCall(false), ofsIndCallReport(NULL),
77 noDecodeChildren(false), debugProof(false), debugUnused(false),
78 loadBeforeDecompile(false), saveBeforeDecompile(false),
79 noProve(false), noChangeSignatures(false), conTypeAnalysis(false), dfaTypeAnalysis(true),
80 propMaxDepth(3), generateCallGraph(false), generateSymbols(false), noGlobals(false), assumeABI(false),
81 experimental(false), minsToStopAfter(0)
83 progPath = "./";
84 outputPath = "./output/";
87 /**
88 * Returns the Log object associated with the object.
90 Log &Boomerang::log()
92 return *logger;
95 /**
96 * Sets the outputfile to be the file "log" in the default output directory.
98 FileLogger::FileLogger() : out((Boomerang::get()->getOutputPath() + "log").c_str())
102 * Returns the HLLCode for the given proc.
104 HLLCode *Boomerang::getHLLCode(UserProc *p)
106 return new CHLLCode(p);
110 * Prints a short usage statement.
112 void Boomerang::usage()
114 std::cout << "Usage: boomerang [ switches ] <program>" << std::endl;
115 std::cout << "boomerang -h for switch help" << std::endl;
116 exit(1);
120 * Prints help for the interactive mode.
122 void Boomerang::helpcmd()
124 // Column 98 of this source file is column 80 of output (don't use tabs)
125 // ____.____1____.____2____.____3____.____4____.____5____.____6____.____7____.____8
126 std::cout << "Available commands (for use with -k):\n";
127 std::cout << " decode : Loads and decodes the specified binary.\n";
128 std::cout << " decompile [proc] : Decompiles the program or specified proc.\n";
129 std::cout << " codegen [cluster] : Generates code for the program or a\n";
130 std::cout << " specified cluster.\n";
131 std::cout << " move proc <proc> <cluster> : Moves the specified proc to the specified\n";
132 std::cout << " cluster.\n";
133 std::cout << " move cluster <cluster> <parent> : Moves the specified cluster to the\n";
134 std::cout << " specified parent cluster.\n";
135 std::cout << " add cluster <cluster> [parent] : Adds a new cluster to the root/specified\n";
136 std::cout << " cluster.\n";
137 std::cout << " delete cluster <cluster> : Deletes an empty cluster.\n";
138 std::cout << " rename proc <proc> <newname> : Renames the specified proc.\n";
139 std::cout << " rename cluster <cluster> <newname> : Renames the specified cluster.\n";
140 std::cout << " info prog : Print info about the program.\n";
141 std::cout << " info cluster <cluster> : Print info about a cluster.\n";
142 std::cout << " info proc <proc> : Print info about a proc.\n";
143 std::cout << " print <proc> : Print the RTL for a proc.\n";
144 std::cout << " help : This help.\n";
145 std::cout << " exit : Quit the shell.\n";
149 * Prints help about the command line switches.
151 void Boomerang::help()
153 std::cout << "Symbols\n";
154 std::cout << " -s <addr> <name> : Define a symbol\n";
155 std::cout << " -sf <filename> : Read a symbol/signature file\n";
156 std::cout << "Decoding/decompilation options\n";
157 std::cout << " -e <addr> : Decode the procedure beginning at addr, and callees\n";
158 std::cout << " -E <addr> : Decode the procedure at addr, no callees\n";
159 std::cout << " Use -e and -E repeatedly for multiple entry points\n";
160 std::cout << " -ic : Decode through type 0 Indirect Calls\n";
161 std::cout << " -S <min> : Stop decompilation after specified number of minutes\n";
162 std::cout << " -t : Trace (print address of) every instruction decoded\n";
163 std::cout << " -Tc : Use old constraint-based type analysis\n";
164 std::cout << " -Td : Use data-flow-based type analysis\n";
165 #if USE_XML
166 std::cout << " -LD : Load before decompile (<program> becomes xml input file)\n";
167 std::cout << " -SD : Save before decompile\n";
168 #endif
169 std::cout << " -a : Assume ABI compliance\n";
170 std::cout << " -W : Windows specific decompilation mode (requires pdb information)\n";
171 // std::cout << " -pa : only propagate if can propagate to all\n";
172 std::cout << "Output\n";
173 std::cout << " -v : Verbose\n";
174 std::cout << " -h : This help\n";
175 std::cout << " -o <output path> : Where to generate output (defaults to ./output/)\n";
176 std::cout << " -x : Dump XML files\n";
177 std::cout << " -r : Print RTL for each proc to log before code generation\n";
178 std::cout << " -gd <dot file> : Generate a dotty graph of the program's CFG and DFG\n";
179 std::cout << " -gc : Generate a call graph (callgraph.out and callgraph.dot)\n";
180 std::cout << " -gs : Generate a symbol file (symbols.h)\n";
181 std::cout << " -iw : Write indirect call report to output/indirect.txt\n";
182 std::cout << "Misc.\n";
183 std::cout << " -k : Command mode, for available commands see -h cmd\n";
184 std::cout << " -P <path> : Path to Boomerang files, defaults to where you run\n";
185 std::cout << " Boomerang from\n";
186 std::cout << " -X : activate eXperimental code; errors likely\n";
187 std::cout << " -- : No effect (used for testing)\n";
188 std::cout << "Debug\n";
189 std::cout << " -da : Print AST before code generation\n";
190 std::cout << " -dc : Debug switch (Case) analysis\n";
191 std::cout << " -dd : Debug decoder to stdout\n";
192 std::cout << " -dg : Debug code Generation\n";
193 std::cout << " -dl : Debug liveness (from SSA) code\n";
194 std::cout << " -dp : Debug proof engine\n";
195 std::cout << " -ds : Stop at debug points for keypress\n";
196 std::cout << " -dt : Debug type analysis\n";
197 std::cout << " -du : Debug removing unused statements etc\n";
198 std::cout << "Restrictions\n";
199 std::cout << " -nb : No simplifications for branches\n";
200 std::cout << " -nc : No decode children in the call graph (callees)\n";
201 std::cout << " -nd : No (reduced) dataflow analysis\n";
202 std::cout << " -nD : No decompilation (at all!)\n";
203 std::cout << " -nl : No creation of local variables\n";
204 // std::cout << " -nm : No decoding of the 'main' procedure\n";
205 std::cout << " -ng : No replacement of expressions with Globals\n";
206 std::cout << " -nG : No garbage collection\n";
207 std::cout << " -nn : No removal of NULL and unused statements\n";
208 std::cout << " -np : No replacement of expressions with Parameter names\n";
209 std::cout << " -nP : No promotion of signatures (other than main/WinMain/\n";
210 std::cout << " DriverMain)\n";
211 std::cout << " -nr : No removal of unneeded labels\n";
212 std::cout << " -nR : No removal of unused Returns\n";
213 std::cout << " -l <depth> : Limit multi-propagations to expressions with depth <depth>\n";
214 std::cout << " -p <num> : Only do num propagations\n";
215 std::cout << " -m <num> : Max memory depth\n";
216 exit(1);
220 * Creates a directory and tests it.
222 * \param dir The name of the directory.
224 * \retval true The directory is valid.
225 * \retval false The directory is invalid.
227 bool createDirectory(std::string dir)
229 std::string remainder(dir);
230 std::string path;
231 size_t i;
232 while ((i = remainder.find('/')) != std::string::npos)
234 path += remainder.substr(0, i+1);
235 remainder = remainder.substr(i+1);
236 #ifdef _WIN32
237 mkdir(path.c_str());
238 #else
239 mkdir(path.c_str(), 0777); // Doesn't matter if already exists
240 #endif
242 // Now try to create a test file
243 path += remainder;
244 #ifdef _WIN32
245 mkdir(path.c_str()); // Make the last dir if needed
246 #else
247 mkdir(path.c_str(), 0777); // Make the last dir if needed
248 #endif
249 path += "test.file";
250 std::ofstream test;
251 test.open(path.c_str(), std::ios::out);
252 test << "testing\n";
253 bool pathOK = !test.bad();
254 test.close();
255 if (pathOK)
256 remove(path.c_str());
257 return pathOK;
261 * Prints a tree graph.
263 void Cluster::printTree(std::ostream &out)
265 out << "\t\t" << name << "\n";
266 for (uintptr_t i = 0; i < children.size(); i++)
267 children[i]->printTree(out);
270 typedef char *crazy_vc_bug;
273 * Splits a string up in different words.
274 * use like: argc = splitLine(line, &argv);
276 * \param[in] line the string to parse
277 * \param[out] pargc &argv
279 * \return The number of words found (argc).
281 int Boomerang::splitLine(char *line, char ***pargv)
283 int argc = 0;
284 *pargv = new crazy_vc_bug[100];
285 const char *p = strtok(line, " \r\n");
286 while (p)
288 (*pargv)[argc++] = (char*)p;
289 p = strtok(NULL, " \r\n");
291 return argc;
295 * Parse and execute a command supplied in interactive mode.
297 * \param argc The number of arguments.
298 * \param argv Pointers to the arguments.
300 * \return A value indicating what happened.
302 * \retval 0 Success
303 * \retval 1 Faillure
304 * \retval 2 The user exited with \a quit or \a exit
306 int Boomerang::parseCmd(int argc, const char **argv)
308 static Prog *prog = NULL;
309 if (!strcmp(argv[0], "decode"))
311 if (argc <= 1)
313 std::cerr << "not enough arguments for cmd\n";
314 return 1;
316 const char *fname = argv[1];
317 Prog *p = loadAndDecode(fname);
318 if (p == NULL)
320 std::cerr << "failed to load " << fname << "\n";
321 return 1;
323 prog = p;
324 #if USE_XML
326 else if (!strcmp(argv[0], "load"))
328 if (argc <= 1)
330 std::cerr << "not enough arguments for cmd\n";
331 return 1;
333 const char *fname = argv[1];
334 XMLProgParser *p = new XMLProgParser();
335 Prog *pr = p->parse(fname);
336 if (pr == NULL)
338 // try guessing
339 pr = p->parse((outputPath + fname + "/" + fname + ".xml").c_str());
340 if (pr == NULL)
342 std::cerr << "failed to read xml " << fname << "\n";
343 return 1;
346 prog = pr;
348 else if (!strcmp(argv[0], "save"))
350 if (prog == NULL)
352 std::cerr << "need to load or decode before save!\n";
353 return 1;
355 XMLProgParser *p = new XMLProgParser();
356 p->persistToXML(prog);
357 #endif
359 else if (!strcmp(argv[0], "decompile"))
361 if (argc > 1)
363 Proc *proc = prog->findProc(argv[1]);
364 if (proc == NULL)
366 std::cerr << "cannot find proc " << argv[1] << "\n";
367 return 1;
369 if (proc->isLib())
371 std::cerr << "cannot decompile a lib proc\n";
372 return 1;
374 int indent = 0;
375 ((UserProc*)proc)->decompile(new ProcList, indent);
377 else
379 prog->decompile();
382 else if (!strcmp(argv[0], "codegen"))
384 if (argc > 1 )
386 Cluster *cluster = prog->findCluster(argv[1]);
387 if (cluster == NULL)
389 std::cerr << "cannot find cluster " << argv[1] << "\n";
390 return 1;
392 prog->generateCode(cluster);
394 else
396 prog->generateCode();
399 else if (!strcmp(argv[0], "move"))
401 if (argc <= 1)
403 std::cerr << "not enough arguments for cmd\n";
404 return 1;
406 if (!strcmp(argv[1], "proc"))
408 if (argc <= 3)
410 std::cerr << "not enough arguments for cmd\n";
411 return 1;
414 Proc *proc = prog->findProc(argv[2]);
415 if (proc == NULL)
417 std::cerr << "cannot find proc " << argv[2] << "\n";
418 return 1;
421 Cluster *cluster = prog->findCluster(argv[3]);
422 if (cluster == NULL)
424 std::cerr << "cannot find cluster " << argv[3] << "\n";
425 return 1;
427 proc->setCluster(cluster);
429 else if (!strcmp(argv[1], "cluster"))
431 if (argc <= 3)
433 std::cerr << "not enough arguments for cmd\n";
434 return 1;
437 Cluster *cluster = prog->findCluster(argv[2]);
438 if (cluster == NULL)
440 std::cerr << "cannot find cluster " << argv[2] << "\n";
441 return 1;
444 Cluster *parent = prog->findCluster(argv[3]);
445 if (parent == NULL)
447 std::cerr << "cannot find cluster " << argv[3] << "\n";
448 return 1;
451 parent->addChild(cluster);
453 else
455 std::cerr << "don't know how to move a " << argv[1] << "\n";
456 return 1;
459 else if (!strcmp(argv[0], "add"))
461 if (argc <= 1)
463 std::cerr << "not enough arguments for cmd\n";
464 return 1;
466 if (!strcmp(argv[1], "cluster"))
468 if (argc <= 2)
470 std::cerr << "not enough arguments for cmd\n";
471 return 1;
474 Cluster *cluster = new Cluster(argv[2]);
475 if (cluster == NULL)
477 std::cerr << "cannot create cluster " << argv[2] << "\n";
478 return 1;
481 Cluster *parent = prog->getRootCluster();
482 if (argc > 3)
484 parent = prog->findCluster(argv[3]);
485 if (cluster == NULL)
487 std::cerr << "cannot find cluster " << argv[3] << "\n";
488 return 1;
492 parent->addChild(cluster);
494 else
496 std::cerr << "don't know how to add a " << argv[1] << "\n";
497 return 1;
500 else if (!strcmp(argv[0], "delete"))
502 if (argc <= 1)
504 std::cerr << "not enough arguments for cmd\n";
505 return 1;
507 if (!strcmp(argv[1], "cluster"))
509 if (argc <= 2)
511 std::cerr << "not enough arguments for cmd\n";
512 return 1;
515 Cluster *cluster = prog->findCluster(argv[2]);
516 if (cluster == NULL)
518 std::cerr << "cannot find cluster " << argv[2] << "\n";
519 return 1;
522 if (cluster->hasChildren() || cluster == prog->getRootCluster())
524 std::cerr << "cluster " << argv[2] << " is not empty\n";
525 return 1;
528 if (prog->clusterUsed(cluster))
530 std::cerr << "cluster " << argv[2] << " is not empty\n";
531 return 1;
534 unlink(cluster->getOutPath("xml"));
535 unlink(cluster->getOutPath("c"));
536 assert(cluster->getParent());
537 cluster->getParent()->removeChild(cluster);
539 else
541 std::cerr << "don't know how to delete a " << argv[1] << "\n";
542 return 1;
545 else if (!strcmp(argv[0], "rename"))
547 if (argc <= 1)
549 std::cerr << "not enough arguments for cmd\n";
550 return 1;
552 if (!strcmp(argv[1], "proc"))
554 if (argc <= 3)
556 std::cerr << "not enough arguments for cmd\n";
557 return 1;
560 Proc *proc = prog->findProc(argv[2]);
561 if (proc == NULL)
563 std::cerr << "cannot find proc " << argv[2] << "\n";
564 return 1;
567 Proc *nproc = prog->findProc(argv[3]);
568 if (nproc != NULL)
570 std::cerr << "proc " << argv[3] << " already exists\n";
571 return 1;
574 proc->setName(argv[3]);
576 else if (!strcmp(argv[1], "cluster"))
578 if (argc <= 3)
580 std::cerr << "not enough arguments for cmd\n";
581 return 1;
584 Cluster *cluster = prog->findCluster(argv[2]);
585 if (cluster == NULL)
587 std::cerr << "cannot find cluster " << argv[2] << "\n";
588 return 1;
591 Cluster *ncluster = prog->findCluster(argv[3]);
592 if (ncluster == NULL)
594 std::cerr << "cluster " << argv[3] << " already exists\n";
595 return 1;
598 cluster->setName(argv[3]);
600 else
602 std::cerr << "don't know how to rename a " << argv[1] << "\n";
603 return 1;
606 else if (!strcmp(argv[0], "info"))
608 if (argc <= 1)
610 std::cerr << "not enough arguments for cmd\n";
611 return 1;
613 if (!strcmp(argv[1], "prog"))
616 std::cout << "prog " << prog->getName() << ":\n";
617 std::cout << "\tclusters:\n";
618 prog->getRootCluster()->printTree(std::cout);
619 std::cout << "\n\tlibprocs:\n";
620 PROGMAP::const_iterator it;
621 for (Proc *p = prog->getFirstProc(it); p; p = prog->getNextProc(it))
622 if (p->isLib())
623 std::cout << "\t\t" << p->getName() << "\n";
624 std::cout << "\n\tuserprocs:\n";
625 for (Proc *p = prog->getFirstProc(it); p; p = prog->getNextProc(it))
626 if (!p->isLib())
627 std::cout << "\t\t" << p->getName() << "\n";
628 std::cout << "\n";
630 return 0;
632 else if (!strcmp(argv[1], "cluster"))
634 if (argc <= 2)
636 std::cerr << "not enough arguments for cmd\n";
637 return 1;
640 Cluster *cluster = prog->findCluster(argv[2]);
641 if (cluster == NULL)
643 std::cerr << "cannot find cluster " << argv[2] << "\n";
644 return 1;
647 std::cout << "cluster " << cluster->getName() << ":\n";
648 if (cluster->getParent())
649 std::cout << "\tparent = " << cluster->getParent()->getName() << "\n";
650 else
651 std::cout << "\troot cluster.\n";
652 std::cout << "\tprocs:\n";
653 PROGMAP::const_iterator it;
654 for (Proc *p = prog->getFirstProc(it); p; p = prog->getNextProc(it))
655 if (p->getCluster() == cluster)
656 std::cout << "\t\t" << p->getName() << "\n";
657 std::cout << "\n";
659 return 0;
661 else if (!strcmp(argv[1], "proc"))
663 if (argc <= 2)
665 std::cerr << "not enough arguments for cmd\n";
666 return 1;
669 Proc *proc = prog->findProc(argv[2]);
670 if (proc == NULL)
672 std::cerr << "cannot find proc " << argv[2] << "\n";
673 return 1;
676 std::cout << "proc " << proc->getName() << ":\n";
677 std::cout << "\tbelongs to cluster " << proc->getCluster()->getName() << "\n";
678 std::cout << "\tnative address " << std::hex << proc->getNativeAddress() << std::dec << "\n";
679 if (proc->isLib())
680 std::cout << "\tis a library proc.\n";
681 else
683 std::cout << "\tis a user proc.\n";
684 UserProc *p = (UserProc*)proc;
685 if (p->isDecoded())
686 std::cout << "\thas been decoded.\n";
687 //if (p->isAnalysed())
688 // std::cout << "\thas been analysed.\n";
690 std::cout << "\n";
692 return 0;
694 else
696 std::cerr << "don't know how to print info about a " << argv[1] << "\n";
697 return 1;
700 else if (!strcmp(argv[0], "print"))
702 if (argc <= 1)
704 std::cerr << "not enough arguments for cmd\n";
705 return 1;
708 Proc *proc = prog->findProc(argv[1]);
709 if (proc == NULL)
711 std::cerr << "cannot find proc " << argv[1] << "\n";
712 return 1;
714 if (proc->isLib())
716 std::cerr << "cannot print a libproc.\n";
717 return 1;
720 ((UserProc*)proc)->print(std::cout);
721 std::cout << "\n";
722 return 0;
724 else if (!strcmp(argv[0], "exit"))
726 return 2;
728 else if (!strcmp(argv[0], "quit"))
730 return 2;
732 else if (!strcmp(argv[0], "help"))
734 helpcmd();
735 return 0;
737 else
739 std::cerr << "unknown cmd " << argv[0] << ".\n";
740 return 1;
743 return 0;
747 * Displays a command line and processes the commands entered.
749 * \retval 0 stdin was closed.
750 * \retval 2 The user typed exit or quit.
752 int Boomerang::cmdLine()
754 char line[1024];
755 printf("boomerang: ");
756 fflush(stdout);
757 while (fgets(line, sizeof(line), stdin))
759 char **argv;
760 int argc = splitLine(line, &argv);
761 if (parseCmd(argc, (const char **)argv) == 2)
762 return 2;
763 printf("boomerang: ");
764 fflush(stdout);
766 return 0;
770 * The main function for the command line mode. Parses switches and runs decompile(filename).
772 * \return Zero on success, nonzero on faillure.
774 int Boomerang::commandLine(int argc, const char **argv)
776 printf("Boomerang %s\n", VERSION); // Display a version and date (mainly for release versions)
777 if (argc < 2) usage();
778 progPath = argv[0];
779 size_t j = progPath.rfind('/'); // Chop off after the last slash
780 if (j == std::string::npos)
781 j = progPath.rfind('\\'); // .. or reverse slash
782 if (j != std::string::npos)
784 // Do the chop; keep the trailing slash or reverse slash
785 progPath = progPath.substr(0, j+1);
787 else
789 progPath = "./"; // Just assume the current directory
791 #ifdef _MSC_VER // For the console mode version; Windows GUI will override in windows.cpp
792 // As a special case for MSVC testing, make the program path the parent of the dir with the .exe
793 j = progPath.find("ebug\\", progPath.length() - (4+1));
794 if (j != std::string::npos)
795 j--; // Point to the 'd' or 'D'
796 if (j == std::string::npos)
798 j = progPath.rfind("elease\\", progPath.length() - (6+1));
799 if (j != std::string::npos)
800 j--; // Point to the 'r' or 'R'
802 if (j != std::string::npos)
803 progPath = progPath.substr(0, j); // Chop off "Release\" or "Debug\"
804 SetCurrentDirectoryA(progPath.c_str()); // Note: setcwd() doesn't seem to work
805 #endif
806 outputPath = progPath + "output/"; // Default output path (can be overridden with -o below)
808 // Parse switches on command line
809 if ((argc == 2) && (strcmp(argv[1], "-h") == 0))
811 help();
812 return 1;
814 if (argc == 3 && !strcmp(argv[1], "-h") && !strcmp(argv[2], "cmd"))
816 helpcmd();
817 return 1;
820 int kmd = 0;
822 for (int i=1; i < argc; i++)
824 if (argv[i][0] != '-' && i == argc - 1)
825 break;
826 if (argv[i][0] != '-')
827 usage();
828 switch (argv[i][1])
830 case '-':
831 break; // No effect: ignored
832 case 'h':
833 help();
834 break;
835 case 'v':
836 vFlag = true;
837 break;
838 case 'x':
839 dumpXML = true;
840 break;
841 case 'X':
842 experimental = true;
843 std::cout << "Warning: experimental code active!\n";
844 break;
845 case 'r':
846 printRtl = true;
847 break;
848 case 't':
849 traceDecoder = true;
850 break;
851 case 'T':
852 if (argv[i][2] == 'c')
854 conTypeAnalysis = true; // -Tc: use old constraint-based type analysis
855 dfaTypeAnalysis = false;
857 else if (argv[i][2] == 'd')
858 dfaTypeAnalysis = true; // -Td: use data-flow-based type analysis (now default)
859 break;
860 case 'g':
861 if (argv[i][2]=='d')
862 dotFile = argv[++i];
863 else if (argv[i][2]=='c')
864 generateCallGraph=true;
865 else if (argv[i][2]=='s')
867 generateSymbols=true;
868 stopBeforeDecompile=true;
870 break;
871 case 'o':
873 outputPath = argv[++i];
874 char lastCh = outputPath[outputPath.size()-1];
875 if (lastCh != '/' && lastCh != '\\')
876 outputPath += '/'; // Maintain the convention of a trailing slash
877 break;
879 case 'p':
880 if (argv[i][2] == 'a')
882 propOnlyToAll = true;
883 std::cerr << " * * Warning! -pa is not implemented yet!\n";
885 else
887 if (++i == argc)
889 usage();
890 return 1;
892 sscanf(argv[i], "%i", &numToPropagate);
894 break;
895 case 'n':
896 switch (argv[i][2])
898 case 'b':
899 noBranchSimplify = true;
900 break;
901 case 'c':
902 noDecodeChildren = true;
903 break;
904 case 'd':
905 noDataflow = true;
906 break;
907 case 'D':
908 noDecompile = true;
909 break;
910 case 'l':
911 noLocals = true;
912 break;
913 case 'n':
914 noRemoveNull = true;
915 break;
916 case 'P':
917 noPromote = true;
918 break;
919 case 'p':
920 noParameterNames = true;
921 break;
922 case 'r':
923 noRemoveLabels = true;
924 break;
925 case 'R':
926 noRemoveReturns = true;
927 break;
928 case 'g':
929 noGlobals = true;
930 break;
931 case 'G':
932 #ifndef NO_GARBAGE_COLLECTOR
933 GC_disable();
934 #endif
935 break;
936 default:
937 help();
939 break;
940 case 'E':
941 noDecodeChildren = true;
942 // Fall through
943 case 'e':
945 ADDRESS addr;
946 int n;
947 decodeMain = false;
948 if (++i == argc)
950 usage();
951 return 1;
953 if (argv[i][0] == '0' && argv[i+1][1] == 'x')
955 n = sscanf(argv[i], "0x%x", &addr);
957 else
959 n = sscanf(argv[i], "%i", &addr);
961 if (n != 1)
963 std::cerr << "bad address: " << argv[i] << std::endl;
964 exit(1);
966 entrypoints.push_back(addr);
968 break;
969 case 's':
971 if (argv[i][2] == 'f')
973 symbolFiles.push_back(argv[i+1]);
974 i++;
975 break;
977 ADDRESS addr;
978 int n;
979 if (++i == argc)
981 usage();
982 return 1;
984 if (argv[i][0] == '0' && argv[i+1][1] == 'x')
986 n = sscanf(argv[i], "0x%x", &addr);
988 else
990 n = sscanf(argv[i], "%i", &addr);
992 if (n != 1)
994 std::cerr << "bad address: " << argv[i+1] << std::endl;
995 exit(1);
997 const char *nam = argv[++i];
998 symbols[addr] = nam;
1000 break;
1001 case 'd':
1002 switch (argv[i][2])
1004 case 'a':
1005 printAST = true;
1006 break;
1007 case 'c':
1008 debugSwitch = true;
1009 break;
1010 case 'd':
1011 debugDecoder = true;
1012 break;
1013 case 'g':
1014 debugGen = true;
1015 break;
1016 case 'l':
1017 debugLiveness = true;
1018 break;
1019 case 'p':
1020 debugProof = true;
1021 break;
1022 case 's':
1023 stopAtDebugPoints = true;
1024 break;
1025 case 't': // debug type analysis
1026 debugTA = true;
1027 break;
1028 case 'u': // debug unused locations (including returns and parameters now)
1029 debugUnused = true;
1030 break;
1031 default:
1032 help();
1034 break;
1035 case 'm':
1036 if (++i == argc)
1038 usage();
1039 return 1;
1041 sscanf(argv[i], "%i", &maxMemDepth);
1042 break;
1043 case 'i':
1044 if (argv[i][2] == 'c')
1045 decodeThruIndCall = true; // -ic;
1046 if (argv[i][2] == 'w') // -iw
1047 if (ofsIndCallReport)
1049 std::string fname = getOutputPath() + "indirect.txt";
1050 ofsIndCallReport = new std::ofstream(fname.c_str());
1052 break;
1053 case 'L':
1054 if (argv[i][2] == 'D')
1055 #if USE_XML
1056 loadBeforeDecompile = true;
1057 #else
1058 std::cerr << "LD command not enabled since compiled without USE_XML\n";
1059 #endif
1060 break;
1061 case 'S':
1062 if (argv[i][2] == 'D')
1063 #if USE_XML
1064 saveBeforeDecompile = true;
1065 #else
1066 std::cerr << "SD command not enabled since compiled without USE_XML\n";
1067 #endif
1068 else
1070 sscanf(argv[++i], "%i", &minsToStopAfter);
1072 break;
1073 case 'k':
1074 kmd = 1;
1075 break;
1076 case 'P':
1077 progPath = argv[++i];
1078 if (progPath[progPath.length()-1] != '\\')
1079 progPath += "\\";
1080 break;
1081 case 'a':
1082 assumeABI = true;
1083 break;
1084 case 'l':
1085 if (++i == argc)
1087 usage();
1088 return 1;
1090 sscanf(argv[i], "%i", &propMaxDepth);
1091 break;
1092 default:
1093 help();
1097 setOutputDirectory(outputPath.c_str());
1099 if (kmd)
1100 return cmdLine();
1102 return decompile(argv[argc-1]);
1106 * Sets the directory in which Boomerang creates its output files. The directory will be created if it doesn't exist.
1108 * \param path the path to the directory
1110 * \retval true Success.
1111 * \retval false The directory could not be created.
1113 bool Boomerang::setOutputDirectory(const char *path)
1115 outputPath = path;
1116 // Create the output directory, if needed
1117 if (!createDirectory(outputPath))
1119 std::cerr << "Warning! Could not create path " << outputPath << "!\n";
1120 return false;
1122 if (logger == NULL)
1123 setLogger(new FileLogger());
1124 return true;
1128 * Adds information about functions and classes from Objective-C modules to the Prog object.
1130 * \param modules A map from name to the Objective-C modules.
1131 * \param prog The Prog object to add the information to.
1133 void Boomerang::objcDecode(std::map<std::string, ObjcModule> &modules, Prog *prog)
1135 if (VERBOSE)
1136 LOG << "Adding Objective-C information to Prog.\n";
1137 Cluster *root = prog->getRootCluster();
1138 for (std::map<std::string, ObjcModule>::iterator it = modules.begin(); it != modules.end(); it++)
1140 ObjcModule &mod = (*it).second;
1141 Module *module = new Module(mod.name.c_str());
1142 root->addChild(module);
1143 if (VERBOSE)
1144 LOG << "\tModule: " << mod.name.c_str() << "\n";
1145 for (std::map<std::string, ObjcClass>::iterator it1 = mod.classes.begin(); it1 != mod.classes.end(); it1++)
1147 ObjcClass &c = (*it1).second;
1148 Class *cl = new Class(c.name.c_str());
1149 root->addChild(cl);
1150 if (VERBOSE)
1151 LOG << "\t\tClass: " << c.name.c_str() << "\n";
1152 for (std::map<std::string, ObjcMethod>::iterator it2 = c.methods.begin(); it2 != c.methods.end(); it2++)
1154 ObjcMethod &m = (*it2).second;
1155 // TODO: parse :'s in names
1156 Proc *p = prog->newProc(m.name.c_str(), m.addr);
1157 p->setCluster(cl);
1158 // TODO: decode types in m.types
1159 if (VERBOSE)
1160 LOG << "\t\t\tMethod: " << m.name.c_str() << "\n";
1164 if (VERBOSE)
1165 LOG << "\n";
1169 * Loads the executable file and decodes it.
1171 * \param fname The name of the file to load.
1172 * \param pname How the Prog will be named.
1174 * \returns A Prog object.
1176 Prog *Boomerang::loadAndDecode(const char *fname, const char *pname)
1178 std::cout << "loading...\n";
1179 Prog *prog = new Prog();
1180 FrontEnd *fe = FrontEnd::Load(fname, prog);
1181 if (fe == NULL)
1183 std::cerr << "failed.\n";
1184 return NULL;
1186 prog->setFrontEnd(fe);
1188 // Add symbols from -s switch(es)
1189 for (std::map<ADDRESS, std::string>::iterator it = symbols.begin();
1190 it != symbols.end(); it++)
1192 fe->AddSymbol((*it).first, (*it).second.c_str());
1194 fe->readLibraryCatalog(); // Needed before readSymbolFile()
1196 for (uintptr_t i = 0; i < symbolFiles.size(); i++)
1198 std::cout << "reading symbol file " << symbolFiles[i].c_str() << "\n";
1199 prog->readSymbolFile(symbolFiles[i].c_str());
1202 std::map<std::string, ObjcModule> &objcmodules = fe->getBinaryFile()->getObjcModules();
1203 if (objcmodules.size())
1204 objcDecode(objcmodules, prog);
1206 // Entry points from -e (and -E) switch(es)
1207 for (uintptr_t i = 0; i < entrypoints.size(); i++)
1209 std::cout<< "decoding specified entrypoint " << std::hex << entrypoints[i] << "\n";
1210 prog->decodeEntryPoint(entrypoints[i]);
1213 if (entrypoints.size() == 0)
1215 // no -e or -E given
1216 if (decodeMain)
1217 std::cout << "decoding entry point...\n";
1218 fe->decode(prog, decodeMain, pname);
1220 if (!noDecodeChildren)
1222 // this causes any undecoded userprocs to be decoded
1223 std::cout << "decoding anything undecoded...\n";
1224 fe->decode(prog, NO_ADDRESS);
1228 std::cout << "finishing decode...\n";
1229 prog->finishDecode();
1231 Boomerang::get()->alert_end_decode();
1233 std::cout << "found " << std::dec << prog->getNumUserProcs() << " procs\n";
1235 // GK: The analysis which was performed was not exactly very "analysing", and so it has been moved to
1236 // prog::finishDecode, UserProc::assignProcsToCalls and UserProc::finalSimplify
1237 //std::cout << "analysing...\n";
1238 //prog->analyse();
1240 if (generateSymbols)
1242 prog->printSymbolsToFile();
1244 if (generateCallGraph)
1246 prog->printCallGraph();
1247 prog->printCallGraphXML();
1249 return prog;
1252 #if defined(_WIN32) && !defined(__MINGW32__)
1253 DWORD WINAPI stopProcess(
1254 time_t start
1257 int mins = Boomerang::get()->minsToStopAfter;
1258 while (1)
1260 time_t now;
1261 time(&now);
1262 if ((now - start) > mins * 60)
1264 std::cerr << "\n\n Stopping process, timeout.\n";
1265 ExitProcess(1);
1267 Sleep(1000);
1270 #else
1271 void stopProcess(int n)
1273 std::cerr << "\n\n Stopping process, timeout.\n";
1274 exit(1);
1276 #endif
1279 * The program will be subsequently be loaded, decoded, decompiled and written to a source file.
1280 * After decompilation the elapsed time is printed to std::cerr.
1282 * \param fname The name of the file to load.
1283 * \param pname The name that will be given to the Proc.
1285 * \return Zero on success, nonzero on faillure.
1287 int Boomerang::decompile(const char *fname, const char *pname)
1289 Prog *prog;
1290 time_t start;
1291 time(&start);
1293 if (minsToStopAfter)
1295 std::cout << "stopping decompile after " << minsToStopAfter << " minutes.\n";
1296 #if defined(_WIN32) // Includes MinGW
1297 DWORD id;
1298 CreateThread(NULL, 0, (LPTHREAD_START_ROUTINE)stopProcess, (LPVOID)start, 0, &id);
1299 #else
1300 signal(SIGALRM, stopProcess);
1301 alarm(minsToStopAfter * 60);
1302 #endif
1305 // std::cout << "setting up transformers...\n";
1306 // ExpTransformer::loadAll();
1308 #if USE_XML
1309 if (loadBeforeDecompile)
1311 std::cout << "loading persisted state...\n";
1312 XMLProgParser *p = new XMLProgParser();
1313 prog = p->parse(fname);
1315 else
1316 #endif
1318 prog = loadAndDecode(fname, pname);
1319 if (prog == NULL)
1320 return 1;
1323 #if USE_XML
1324 if (saveBeforeDecompile)
1326 std::cout << "saving persistable state...\n";
1327 XMLProgParser *p = new XMLProgParser();
1328 p->persistToXML(prog);
1330 #endif
1332 if (stopBeforeDecompile)
1333 return 0;
1335 std::cout << "decompiling...\n";
1336 prog->decompile();
1338 if (dotFile)
1339 prog->generateDotFile();
1341 if (printAST)
1343 std::cout << "printing AST...\n";
1344 PROGMAP::const_iterator it;
1345 for (Proc *p = prog->getFirstProc(it); p; p = prog->getNextProc(it))
1346 if (!p->isLib())
1348 UserProc *u = (UserProc*)p;
1349 u->getCFG()->compressCfg();
1350 u->printAST();
1354 std::cout << "generating code...\n";
1355 prog->generateCode();
1357 std::cout << "output written to " << outputPath << prog->getRootCluster()->getName() << "\n";
1359 if (Boomerang::get()->ofsIndCallReport)
1360 ofsIndCallReport->close();
1362 time_t end;
1363 time(&end);
1364 int hours = (int)((end-start) / 60 / 60);
1365 int mins = (int)((end-start) / 60 - hours * 60);
1366 int secs = (int)((end-start) - hours * 60 * 60 - mins * 60);
1367 std::cout << "completed in " << std::dec;
1368 if (hours)
1369 std::cout << hours << " hours ";
1370 if (hours || mins)
1371 std::cout << mins << " mins ";
1372 std::cout << secs << " sec" << (secs == 1 ? "" : "s") << ".\n";
1374 return 0;
1377 #if USE_XML
1379 * Saves the state of the Prog object to a XML file.
1380 * \param prog The Prog object to save.
1382 void Boomerang::persistToXML(Prog *prog)
1384 LOG << "saving persistable state...\n";
1385 XMLProgParser *p = new XMLProgParser();
1386 p->persistToXML(prog);
1389 * Loads the state of a Prog object from a XML file.
1390 * \param fname The name of the XML file.
1391 * \return The loaded Prog object.
1393 Prog *Boomerang::loadFromXML(const char *fname)
1395 LOG << "loading persistable state...\n";
1396 XMLProgParser *p = new XMLProgParser();
1397 return p->parse(fname);
1399 #endif
1402 * Prints the last lines of the log file.
1404 void Boomerang::logTail()
1406 logger->tail();
1409 void Boomerang::alert_decompile_debug_point(UserProc *p, const char *description)
1411 if (stopAtDebugPoints)
1413 std::cout << "decompiling " << p->getName() << ": " << description << "\n";
1414 static char *stopAt = NULL;
1415 static std::set<Statement*> watches;
1416 if (stopAt == NULL || !strcmp(p->getName(), stopAt))
1418 // This is a mini command line debugger. Feel free to expand it.
1419 for (std::set<Statement*>::iterator it = watches.begin(); it != watches.end(); it++)
1421 (*it)->print(std::cout);
1422 std::cout << "\n";
1424 std::cout << " <press enter to continue> \n";
1425 char line[1024];
1426 while (1)
1428 *line = 0;
1429 fgets(line, 1024, stdin);
1430 if (!strncmp(line, "print", 5))
1431 p->print(std::cout);
1432 else if (!strncmp(line, "fprint", 6))
1434 std::ofstream of("out.proc");
1435 p->print(of);
1436 of.close();
1438 else if (!strncmp(line, "run ", 4))
1440 stopAt = strdup(line + 4);
1441 if (strchr(stopAt, '\n'))
1442 *strchr(stopAt, '\n') = 0;
1443 if (strchr(stopAt, ' '))
1444 *strchr(stopAt, ' ') = 0;
1445 break;
1447 else if (!strncmp(line, "watch ", 6))
1449 int n = atoi(line + 6);
1450 StatementList stmts;
1451 p->getStatements(stmts);
1452 StatementList::iterator it;
1453 for (it = stmts.begin(); it != stmts.end(); it++)
1454 if ((*it)->getNumber() == n)
1456 watches.insert(*it);
1457 std::cout << "watching " << *it << "\n";
1460 else
1461 break;
1465 for (std::set<Watcher*>::iterator it = watchers.begin(); it != watchers.end(); it++)
1466 (*it)->alert_decompile_debug_point(p, description);
1469 const char* Boomerang::getVersionStr()
1471 return VERSION;