Bug 470455 - test_database_sync_embed_visits.js leaks, r=sdwilsh
[wine-gecko.git] / tools / reorder / garope.cpp
blob082de6c53bc969bc484e5ee233c0e183bc77fa15
1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
12 * License.
14 * The Original Code is ``garope''
16 * The Initial Developer of the Original Code is Netscape
17 * Communications Corp. Portions created by the Initial Developer are
18 * Copyright (C) 2001 the Initial Developer. All Rights Reserved.
20 * Contributor(s):
21 * Chris Waterson <waterson@netscape.com>
23 * Alternatively, the contents of this file may be used under the terms of
24 * either the GNU General Public License Version 2 or later (the "GPL"), or
25 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26 * in which case the provisions of the GPL or the LGPL are applicable instead
27 * of those above. If you wish to allow use of your version of this file only
28 * under the terms of either the GPL or the LGPL, and not to allow others to
29 * use your version of this file under the terms of the MPL, indicate your
30 * decision by deleting the provisions above and replace them with the notice
31 * and other provisions required by the GPL or the LGPL. If you do not delete
32 * the provisions above, a recipient may use your version of this file under
33 * the terms of any one of the MPL, the GPL or the LGPL.
35 * ***** END LICENSE BLOCK ***** */
39 A program that attempts to find an optimal function ordering for an
40 executable using a genetic algorithm whose fitness function is
41 computed using runtime profile information.
43 The fitness function was inspired by Nat Friedman's <nat@nat.org>
44 work on `grope':
46 _GNU Rope - A Subroutine Position Optimizer_
47 <http://www.hungry.com/~shaver/grope/grope.ps>
49 Brendan Eich <brendan@mozilla.org> told me tales about Scott Furman
50 doing something like this, which sort of made me want to try it.
52 As far as I can tell, it would take a lot of computers a lot of time
53 to actually find something useful on a non-trivial program using
54 this.
58 #include <assert.h>
59 #include <fstream>
60 #include <hash_map>
61 #include <vector>
62 #include <limits.h>
63 #include <unistd.h>
64 #include <stdio.h>
65 #include <fcntl.h>
67 #include "elf_symbol_table.h"
69 #define _GNU_SOURCE
70 #include <getopt.h>
72 #define PAGE_SIZE 4096
73 #define SYMBOL_ALIGN 4
75 //----------------------------------------------------------------------
77 class call_pair
79 public:
80 const Elf32_Sym *m_lo;
81 const Elf32_Sym *m_hi;
83 call_pair(const Elf32_Sym *site1, const Elf32_Sym *site2)
85 if (site1 < site2) {
86 m_lo = site1;
87 m_hi = site2;
89 else {
90 m_hi = site1;
91 m_lo = site2;
95 friend bool
96 operator==(const call_pair &lhs, const call_pair &rhs)
98 return (lhs.m_lo == rhs.m_lo) && (lhs.m_hi == rhs.m_hi);
102 // Straight outta plhash.c!
103 #define GOLDEN_RATIO 0x9E3779B9U
105 template<>
106 struct hash<call_pair>
108 size_t operator()(const call_pair &pair) const
110 size_t h = (reinterpret_cast<size_t>(pair.m_hi) >> 4);
111 h += (reinterpret_cast<size_t>(pair.m_lo) >> 4);
112 h *= GOLDEN_RATIO;
113 return h;
117 //----------------------------------------------------------------------
119 struct hash<const Elf32_Sym *>
121 size_t operator()(const Elf32_Sym *sym) const
123 return (reinterpret_cast<size_t>(sym) >> 4) * GOLDEN_RATIO;
127 //----------------------------------------------------------------------
129 typedef hash_map<call_pair, unsigned int> call_graph_t;
130 call_graph_t call_graph;
132 typedef hash_map<const Elf32_Sym *, unsigned int> histogram_t;
133 histogram_t histogram;
134 long long total_calls = 0;
136 elf_symbol_table symtab;
138 bool opt_debug = false;
139 int opt_generations = 10;
140 int opt_mutate = 0;
141 const char *opt_out = "order.out";
142 int opt_population_size = 100;
143 int opt_tick = 0;
144 bool opt_verbose = false;
145 int opt_window = 0;
147 static struct option long_options[] = {
148 { "debug", no_argument, 0, 'd' },
149 { "exe", required_argument, 0, 'e' },
150 { "generations", required_argument, 0, 'g' },
151 { "help", no_argument, 0, '?' },
152 { "mutate", required_argument, 0, 'm' },
153 { "out", required_argument, 0, 'o' },
154 { "population", required_argument, 0, 'p' },
155 { "seed", required_argument, 0, 's' },
156 { "tick", optional_argument, 0, 't' },
157 { "verbose", no_argument, 0, 'v' },
158 { "window", required_argument, 0, 'w' },
159 { 0, 0, 0, 0 }
162 //----------------------------------------------------------------------
164 static long long
165 llrand()
167 long long result;
168 result = (long long) rand();
169 result *= (long long) (unsigned int) (RAND_MAX + 1);
170 result += (long long) rand();
171 return result;
174 //----------------------------------------------------------------------
176 class symbol_order {
177 public:
178 typedef vector<const Elf32_Sym *> vector_t;
179 typedef long long score_t;
181 static const score_t max_score;
184 * A vector of symbols that is this ordering.
186 vector_t m_ordering;
189 * The symbol ordering's score.
191 score_t m_score;
193 symbol_order() : m_score(0) {}
196 * ``Shuffle'' a symbol ordering, randomizing it.
198 void shuffle();
201 * Initialize this symbol ordering by performing a crossover from
202 * two ``parent'' symbol orderings.
204 void crossover_from(const symbol_order *father, const symbol_order *mother);
207 * Randomly mutate this symbol ordering.
209 void mutate();
212 * Score a symbol ordering based on paginated locality.
214 score_t compute_score_page();
217 * Score a symbol ordering based on a sliding window.
219 score_t compute_score_window(int window_size);
221 static score_t compute_score(symbol_order &order);
224 * Use the symbol table to dump the ordered symbolic constants.
226 void dump_symbols() const;
228 friend ostream &
229 operator<<(ostream &out, const symbol_order &order);
232 const symbol_order::score_t
233 symbol_order::max_score = ~((symbol_order::score_t)1 << ((sizeof(symbol_order::score_t) * 8) - 1));
235 symbol_order::score_t
236 symbol_order::compute_score_page()
238 m_score = 0;
240 unsigned int off = 0; // XXX in reality, probably not page-aligned to start
242 vector_t::const_iterator end = m_ordering.end(),
243 last = end,
244 sym = m_ordering.begin();
246 while (sym != end) {
247 vector_t page;
249 // If we had a symbol that spilled over from the last page,
250 // then include it here.
251 if (last != end)
252 page.push_back(*last);
254 // Pack symbols into the page
255 do {
256 page.push_back(*sym);
258 int size = (*sym)->st_size;
259 size += SYMBOL_ALIGN - 1;
260 size &= ~(SYMBOL_ALIGN - 1);
262 off += size;
263 } while (++sym != end && off < PAGE_SIZE);
265 // Remember if there was spill-over.
266 off %= PAGE_SIZE;
267 last = (off != 0) ? sym : end;
269 // Now score the page as the count of all calls to symbols on
270 // the page, less calls between the symbols on the page.
271 vector_t::const_iterator page_end = page.end();
272 for (vector_t::const_iterator i = page.begin(); i != page_end; ++i) {
273 histogram_t::const_iterator func = histogram.find(*i);
274 if (func == histogram.end())
275 continue;
277 m_score += func->second;
279 vector_t::const_iterator j = i;
280 for (++j; j != page_end; ++j) {
281 call_graph_t::const_iterator call =
282 call_graph.find(call_pair(*i, *j));
284 if (call != call_graph.end())
285 m_score -= call->second;
290 assert(m_score >= 0);
292 // Integer reciprocal so we minimize instead of maximize.
293 if (m_score == 0)
294 m_score = 1;
296 m_score = (total_calls / m_score) + 1;
298 return m_score;
301 symbol_order::score_t
302 symbol_order::compute_score_window(int window_size)
304 m_score = 0;
306 vector_t::const_iterator *window = new vector_t::const_iterator[window_size];
307 int window_fill = 0;
309 vector_t::const_iterator end = m_ordering.end(),
310 sym = m_ordering.begin();
312 for (; sym != end; ++sym) {
313 histogram_t::const_iterator func = histogram.find(*sym);
314 if (func != histogram.end()) {
315 long long scale = ((long long) 1) << window_size;
317 m_score += func->second * scale * 2;
319 vector_t::const_iterator *limit = window + window_fill;
320 vector_t::const_iterator *iter;
321 for (iter = window ; iter < limit; ++iter) {
322 call_graph_t::const_iterator call =
323 call_graph.find(call_pair(*sym, **iter));
325 if (call != call_graph.end())
326 m_score -= (call->second * scale);
328 scale >>= 1;
332 // Slide the window.
333 vector_t::const_iterator *begin = window;
334 vector_t::const_iterator *iter;
335 for (iter = window + (window_size - 1); iter > begin; --iter)
336 *iter = *(iter - 1);
338 if (window_fill < window_size)
339 ++window_fill;
341 *window = sym;
344 delete[] window;
346 assert(m_score >= 0);
348 // Integer reciprocal so we minimize instead of maximize.
349 if (m_score == 0)
350 m_score = 1;
352 m_score = (total_calls / m_score) + 1;
354 return m_score;
357 symbol_order::score_t
358 symbol_order::compute_score(symbol_order &order)
360 if (opt_window)
361 return order.compute_score_window(opt_window);
363 return order.compute_score_page();
366 void
367 symbol_order::shuffle()
369 vector_t::iterator sym = m_ordering.begin();
370 vector_t::iterator end = m_ordering.end();
371 for (; sym != end; ++sym) {
372 int i = rand() % m_ordering.size();
373 const Elf32_Sym *temp = *sym;
374 *sym = m_ordering[i];
375 m_ordering[i] = temp;
379 void
380 symbol_order::crossover_from(const symbol_order *father, const symbol_order *mother)
382 histogram_t used;
384 m_ordering = vector_t(father->m_ordering.size(), 0);
386 vector_t::const_iterator parent_sym = father->m_ordering.begin();
387 vector_t::iterator sym = m_ordering.begin();
388 vector_t::iterator end = m_ordering.end();
390 for (; sym != end; ++sym, ++parent_sym) {
391 if (rand() % 2) {
392 *sym = *parent_sym;
393 used[*parent_sym] = 1;
397 parent_sym = mother->m_ordering.begin();
398 sym = m_ordering.begin();
400 for (; sym != end; ++sym) {
401 if (! *sym) {
402 while (used[*parent_sym])
403 ++parent_sym;
405 *sym = *parent_sym++;
410 void
411 symbol_order::mutate()
413 int i, j;
414 i = rand() % m_ordering.size();
415 j = rand() % m_ordering.size();
417 const Elf32_Sym *temp = m_ordering[i];
418 m_ordering[i] = m_ordering[j];
419 m_ordering[j] = temp;
422 void
423 symbol_order::dump_symbols() const
425 ofstream out(opt_out);
427 vector_t::const_iterator sym = m_ordering.begin();
428 vector_t::const_iterator end = m_ordering.end();
429 for (; sym != end; ++sym)
430 out << symtab.get_symbol_name(*sym) << endl;
432 out.close();
435 ostream &
436 operator<<(ostream &out, const symbol_order &order)
438 out << "symbol_order(" << order.m_score << ") ";
440 symbol_order::vector_t::const_iterator sym = order.m_ordering.begin();
441 symbol_order::vector_t::const_iterator end = order.m_ordering.end();
442 for (; sym != end; ++sym)
443 out.form("%08x ", *sym);
445 out << endl;
447 return out;
450 //----------------------------------------------------------------------
452 static void
453 usage(const char *name)
455 cerr << "usage: " << name << " [options] [<file> ...]" << endl;
456 cerr << " Options:" << endl;
457 cerr << " --debug, -d" << endl;
458 cerr << " Print lots of verbose debugging cruft." << endl;
459 cerr << " --exe=<image>, -e <image> (required)" << endl;
460 cerr << " Specify the executable image from which to read symbol information." << endl;
461 cerr << " --generations=<num>, -g <num>" << endl;
462 cerr << " Specify the number of generations to run the GA (default is 10)." << endl;
463 cerr << " --help, -?" << endl;
464 cerr << " Print this message and exit." << endl;
465 cerr << " --mutate=<num>, -m <num>" << endl;
466 cerr << " Mutate every <num>th individual, or zero for no mutation (default)." << endl;
467 cerr << " --out=<file>, -o <file>" << endl;
468 cerr << " Specify the output file to which to dump the symbol ordering of the" << endl;
469 cerr << " best individual (default is `order.out')." << endl;
470 cerr << " --population=<num>, -p <num>" << endl;
471 cerr << " Set the population size to <num> individuals (default is 100)." << endl;
472 cerr << " --seed=<num>, -s <num>" << endl;
473 cerr << " Specify a seed to srand()." << endl;
474 cerr << " --tick[=<num>], -t [<num>]" << endl;
475 cerr << " When reading address data, print a dot to stderr every <num>th" << endl;
476 cerr << " address processed from the call trace. If specified with no argument," << endl;
477 cerr << " a dot will be printed for every million addresses processed." << endl;
478 cerr << " --verbose, -v" << endl;
479 cerr << " Issue progress messages to stderr." << endl;
480 cerr << " --window=<num>, -w <num>" << endl;
481 cerr << " Use a sliding window instead of pagination to score orderings." << endl;
482 cerr << endl;
483 cerr << "This program uses a genetic algorithm to produce an `optimal' ordering for" << endl;
484 cerr << "an executable based on call patterns." << endl;
485 cerr << endl;
486 cerr << "Addresses from a call trace are read as binary data from the files" << endl;
487 cerr << "specified, or from stdin if no files are specified. These addresses" << endl;
488 cerr << "are used with the symbolic information from the executable to create" << endl;
489 cerr << "a call graph. This call graph is used to `score' arbitrary symbol" << endl;
490 cerr << "orderings, and provides the fitness function for the GA." << endl;
491 cerr << endl;
495 * Using the symbol table, map a stream of address references into a
496 * callgraph and a histogram.
498 static void
499 map_addrs(int fd)
501 const Elf32_Sym *last = 0;
502 unsigned int buf[128];
503 ssize_t cb;
505 unsigned int count = 0;
506 while ((cb = read(fd, buf, sizeof buf)) > 0) {
507 if (cb % sizeof buf[0])
508 fprintf(stderr, "unaligned read\n");
510 unsigned int *addr = buf;
511 unsigned int *limit = buf + (cb / 4);
513 for (; addr < limit; ++addr) {
514 const Elf32_Sym *sym = symtab.lookup(*addr);
516 if (last && sym && last != sym) {
517 ++total_calls;
518 ++histogram[sym];
519 ++call_graph[call_pair(last, sym)];
521 if (opt_tick && (++count % opt_tick == 0)) {
522 cerr << ".";
523 flush(cerr);
527 last = sym;
531 if (opt_tick)
532 cerr << endl;
534 cerr << "Total calls: " << total_calls << endl;
535 total_calls *= 1024;
537 if (opt_window)
538 total_calls <<= (opt_window + 1);
541 static symbol_order *
542 pick_parent(symbol_order *ordering, int max, int index)
544 while (1) {
545 index -= ordering->m_score;
546 if (index < 0)
547 break;
549 ++ordering;
552 return ordering;
556 * The main program
559 main(int argc, char *argv[])
561 const char *opt_exe = 0;
563 int c;
564 while (1) {
565 int option_index = 0;
566 c = getopt_long(argc, argv, "?de:g:m:o:p:s:t:vw:", long_options, &option_index);
568 if (c < 0)
569 break;
571 switch (c) {
572 case '?':
573 usage(argv[0]);
574 return 0;
576 case 'd':
577 opt_debug = true;
578 break;
580 case 'e':
581 opt_exe = optarg;
582 break;
584 case 'g':
585 opt_generations = atoi(optarg);
586 break;
588 case 'm':
589 opt_mutate = atoi(optarg);
590 break;
592 case 'o':
593 opt_out = optarg;
594 break;
596 case 'p':
597 opt_population_size = atoi(optarg);
598 break;
600 case 's':
601 srand(atoi(optarg));
602 break;
604 case 't':
605 opt_tick = optarg ? atoi(optarg) : 1000000;
606 break;
608 case 'v':
609 opt_verbose = true;
610 break;
612 case 'w':
613 opt_window = atoi(optarg);
614 if (opt_window < 0 || opt_window > 8) {
615 cerr << "invalid window size: " << opt_window << endl;
616 return 1;
619 break;
621 default:
622 usage(argv[0]);
623 return 1;
627 // Make sure an image was specified
628 if (! opt_exe) {
629 usage(argv[0]);
630 return 1;
633 // Read the sym table.
634 symtab.init(opt_exe);
636 // Process addresses to construct the call graph.
637 if (optind >= argc) {
638 map_addrs(STDIN_FILENO);
640 else {
641 do {
642 int fd = open(argv[optind], O_RDONLY);
643 if (fd < 0) {
644 perror(argv[optind]);
645 return 1;
648 map_addrs(fd);
649 close(fd);
650 } while (++optind < argc);
653 if (opt_debug) {
654 cerr << "Call graph:" << endl;
656 call_graph_t::const_iterator limit = call_graph.end();
657 call_graph_t::const_iterator i;
658 for (i = call_graph.begin(); i != limit; ++i) {
659 const call_pair& pair = i->first;
660 cerr.form("%08x %08x %10d\n",
661 pair.m_lo->st_value,
662 pair.m_hi->st_value,
663 i->second);
667 // Collect the symbols into a vector
668 symbol_order::vector_t ordering;
669 elf_symbol_table::const_iterator end = symtab.end();
670 for (elf_symbol_table::const_iterator sym = symtab.begin(); sym != end; ++sym) {
671 if (symtab.is_function(sym))
672 ordering.push_back(sym);
675 if (opt_verbose) {
676 symbol_order initial;
677 initial.m_ordering = ordering;
678 cerr << "created initial ordering, score=" << symbol_order::compute_score(initial) << endl;
680 if (opt_debug)
681 cerr << initial;
684 // Create a population.
685 if (opt_verbose)
686 cerr << "creating population" << endl;
688 symbol_order *population = new symbol_order[opt_population_size];
690 symbol_order::score_t total = 0, min = symbol_order::max_score, max = 0;
692 // Score it.
693 symbol_order *order = population;
694 symbol_order *limit = population + opt_population_size;
695 for (; order < limit; ++order) {
696 order->m_ordering = ordering;
697 order->shuffle();
699 symbol_order::score_t score = symbol_order::compute_score(*order);
701 if (opt_debug)
702 cerr << *order;
704 if (min > score)
705 min = score;
706 if (max < score)
707 max = score;
709 total += score;
712 if (opt_verbose) {
713 cerr << "Initial population";
714 cerr << ": min=" << min;
715 cerr << ", max=" << max;
716 cerr << " mean=" << (total / opt_population_size);
717 cerr << endl;
721 // Run the GA.
722 if (opt_verbose)
723 cerr << "begininng ga" << endl;
725 symbol_order::score_t best = 0;
727 for (int generation = 1; generation <= opt_generations; ++generation) {
728 // Create a new population.
729 symbol_order *offspring = new symbol_order[opt_population_size];
731 symbol_order *kid = offspring;
732 symbol_order *offspring_limit = offspring + opt_population_size;
733 for (; kid < offspring_limit; ++kid) {
734 // Pick parents.
735 symbol_order *father, *mother;
736 father = pick_parent(population, max, llrand() % total);
737 mother = pick_parent(population, max, llrand() % total);
739 // Create a kid.
740 kid->crossover_from(father, mother);
742 // Mutate, possibly.
743 if (opt_mutate) {
744 if (rand() % opt_mutate == 0)
745 kid->mutate();
749 delete[] population;
750 population = offspring;
752 // Score the new population.
753 total = 0;
754 min = symbol_order::max_score;
755 max = 0;
757 symbol_order *fittest = 0;
759 limit = offspring_limit;
760 for (order = population; order < limit; ++order) {
761 symbol_order::score_t score = symbol_order::compute_score(*order);
763 if (opt_debug)
764 cerr << *order;
766 if (min > score)
767 min = score;
769 if (max < score)
770 max = score;
772 if (best < score) {
773 best = score;
774 fittest = order;
777 total += score;
780 if (opt_verbose) {
781 cerr << "Generation " << generation;
782 cerr << ": min=" << min;
783 cerr << ", max=" << max;
784 if (fittest)
785 cerr << "*";
786 cerr << " mean=" << (total / opt_population_size);
787 cerr << endl;
790 // If we've found a new ``best'' individual, dump it.
791 if (fittest)
792 fittest->dump_symbols();
795 delete[] population;
796 return 0;