1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
14 * The Original Code is ``garope''
16 * The Initial Developer of the Original Code is Netscape
17 * Communications Corp. Portions created by the Initial Developer are
18 * Copyright (C) 2001 the Initial Developer. All Rights Reserved.
21 * Chris Waterson <waterson@netscape.com>
23 * Alternatively, the contents of this file may be used under the terms of
24 * either the GNU General Public License Version 2 or later (the "GPL"), or
25 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26 * in which case the provisions of the GPL or the LGPL are applicable instead
27 * of those above. If you wish to allow use of your version of this file only
28 * under the terms of either the GPL or the LGPL, and not to allow others to
29 * use your version of this file under the terms of the MPL, indicate your
30 * decision by deleting the provisions above and replace them with the notice
31 * and other provisions required by the GPL or the LGPL. If you do not delete
32 * the provisions above, a recipient may use your version of this file under
33 * the terms of any one of the MPL, the GPL or the LGPL.
35 * ***** END LICENSE BLOCK ***** */
39 A program that attempts to find an optimal function ordering for an
40 executable using a genetic algorithm whose fitness function is
41 computed using runtime profile information.
43 The fitness function was inspired by Nat Friedman's <nat@nat.org>
46 _GNU Rope - A Subroutine Position Optimizer_
47 <http://www.hungry.com/~shaver/grope/grope.ps>
49 Brendan Eich <brendan@mozilla.org> told me tales about Scott Furman
50 doing something like this, which sort of made me want to try it.
52 As far as I can tell, it would take a lot of computers a lot of time
53 to actually find something useful on a non-trivial program using
67 #include "elf_symbol_table.h"
72 #define PAGE_SIZE 4096
73 #define SYMBOL_ALIGN 4
75 //----------------------------------------------------------------------
80 const Elf32_Sym
*m_lo
;
81 const Elf32_Sym
*m_hi
;
83 call_pair(const Elf32_Sym
*site1
, const Elf32_Sym
*site2
)
96 operator==(const call_pair
&lhs
, const call_pair
&rhs
)
98 return (lhs
.m_lo
== rhs
.m_lo
) && (lhs
.m_hi
== rhs
.m_hi
);
102 // Straight outta plhash.c!
103 #define GOLDEN_RATIO 0x9E3779B9U
106 struct hash
<call_pair
>
108 size_t operator()(const call_pair
&pair
) const
110 size_t h
= (reinterpret_cast<size_t>(pair
.m_hi
) >> 4);
111 h
+= (reinterpret_cast<size_t>(pair
.m_lo
) >> 4);
117 //----------------------------------------------------------------------
119 struct hash
<const Elf32_Sym
*>
121 size_t operator()(const Elf32_Sym
*sym
) const
123 return (reinterpret_cast<size_t>(sym
) >> 4) * GOLDEN_RATIO
;
127 //----------------------------------------------------------------------
129 typedef hash_map
<call_pair
, unsigned int> call_graph_t
;
130 call_graph_t call_graph
;
132 typedef hash_map
<const Elf32_Sym
*, unsigned int> histogram_t
;
133 histogram_t histogram
;
134 long long total_calls
= 0;
136 elf_symbol_table symtab
;
138 bool opt_debug
= false;
139 int opt_generations
= 10;
141 const char *opt_out
= "order.out";
142 int opt_population_size
= 100;
144 bool opt_verbose
= false;
147 static struct option long_options
[] = {
148 { "debug", no_argument
, 0, 'd' },
149 { "exe", required_argument
, 0, 'e' },
150 { "generations", required_argument
, 0, 'g' },
151 { "help", no_argument
, 0, '?' },
152 { "mutate", required_argument
, 0, 'm' },
153 { "out", required_argument
, 0, 'o' },
154 { "population", required_argument
, 0, 'p' },
155 { "seed", required_argument
, 0, 's' },
156 { "tick", optional_argument
, 0, 't' },
157 { "verbose", no_argument
, 0, 'v' },
158 { "window", required_argument
, 0, 'w' },
162 //----------------------------------------------------------------------
168 result
= (long long) rand();
169 result
*= (long long) (unsigned int) (RAND_MAX
+ 1);
170 result
+= (long long) rand();
174 //----------------------------------------------------------------------
178 typedef vector
<const Elf32_Sym
*> vector_t
;
179 typedef long long score_t
;
181 static const score_t max_score
;
184 * A vector of symbols that is this ordering.
189 * The symbol ordering's score.
193 symbol_order() : m_score(0) {}
196 * ``Shuffle'' a symbol ordering, randomizing it.
201 * Initialize this symbol ordering by performing a crossover from
202 * two ``parent'' symbol orderings.
204 void crossover_from(const symbol_order
*father
, const symbol_order
*mother
);
207 * Randomly mutate this symbol ordering.
212 * Score a symbol ordering based on paginated locality.
214 score_t
compute_score_page();
217 * Score a symbol ordering based on a sliding window.
219 score_t
compute_score_window(int window_size
);
221 static score_t
compute_score(symbol_order
&order
);
224 * Use the symbol table to dump the ordered symbolic constants.
226 void dump_symbols() const;
229 operator<<(ostream
&out
, const symbol_order
&order
);
232 const symbol_order::score_t
233 symbol_order::max_score
= ~((symbol_order::score_t
)1 << ((sizeof(symbol_order::score_t
) * 8) - 1));
235 symbol_order::score_t
236 symbol_order::compute_score_page()
240 unsigned int off
= 0; // XXX in reality, probably not page-aligned to start
242 vector_t::const_iterator end
= m_ordering
.end(),
244 sym
= m_ordering
.begin();
249 // If we had a symbol that spilled over from the last page,
250 // then include it here.
252 page
.push_back(*last
);
254 // Pack symbols into the page
256 page
.push_back(*sym
);
258 int size
= (*sym
)->st_size
;
259 size
+= SYMBOL_ALIGN
- 1;
260 size
&= ~(SYMBOL_ALIGN
- 1);
263 } while (++sym
!= end
&& off
< PAGE_SIZE
);
265 // Remember if there was spill-over.
267 last
= (off
!= 0) ? sym
: end
;
269 // Now score the page as the count of all calls to symbols on
270 // the page, less calls between the symbols on the page.
271 vector_t::const_iterator page_end
= page
.end();
272 for (vector_t::const_iterator i
= page
.begin(); i
!= page_end
; ++i
) {
273 histogram_t::const_iterator func
= histogram
.find(*i
);
274 if (func
== histogram
.end())
277 m_score
+= func
->second
;
279 vector_t::const_iterator j
= i
;
280 for (++j
; j
!= page_end
; ++j
) {
281 call_graph_t::const_iterator call
=
282 call_graph
.find(call_pair(*i
, *j
));
284 if (call
!= call_graph
.end())
285 m_score
-= call
->second
;
290 assert(m_score
>= 0);
292 // Integer reciprocal so we minimize instead of maximize.
296 m_score
= (total_calls
/ m_score
) + 1;
301 symbol_order::score_t
302 symbol_order::compute_score_window(int window_size
)
306 vector_t::const_iterator
*window
= new vector_t::const_iterator
[window_size
];
309 vector_t::const_iterator end
= m_ordering
.end(),
310 sym
= m_ordering
.begin();
312 for (; sym
!= end
; ++sym
) {
313 histogram_t::const_iterator func
= histogram
.find(*sym
);
314 if (func
!= histogram
.end()) {
315 long long scale
= ((long long) 1) << window_size
;
317 m_score
+= func
->second
* scale
* 2;
319 vector_t::const_iterator
*limit
= window
+ window_fill
;
320 vector_t::const_iterator
*iter
;
321 for (iter
= window
; iter
< limit
; ++iter
) {
322 call_graph_t::const_iterator call
=
323 call_graph
.find(call_pair(*sym
, **iter
));
325 if (call
!= call_graph
.end())
326 m_score
-= (call
->second
* scale
);
333 vector_t::const_iterator
*begin
= window
;
334 vector_t::const_iterator
*iter
;
335 for (iter
= window
+ (window_size
- 1); iter
> begin
; --iter
)
338 if (window_fill
< window_size
)
346 assert(m_score
>= 0);
348 // Integer reciprocal so we minimize instead of maximize.
352 m_score
= (total_calls
/ m_score
) + 1;
357 symbol_order::score_t
358 symbol_order::compute_score(symbol_order
&order
)
361 return order
.compute_score_window(opt_window
);
363 return order
.compute_score_page();
367 symbol_order::shuffle()
369 vector_t::iterator sym
= m_ordering
.begin();
370 vector_t::iterator end
= m_ordering
.end();
371 for (; sym
!= end
; ++sym
) {
372 int i
= rand() % m_ordering
.size();
373 const Elf32_Sym
*temp
= *sym
;
374 *sym
= m_ordering
[i
];
375 m_ordering
[i
] = temp
;
380 symbol_order::crossover_from(const symbol_order
*father
, const symbol_order
*mother
)
384 m_ordering
= vector_t(father
->m_ordering
.size(), 0);
386 vector_t::const_iterator parent_sym
= father
->m_ordering
.begin();
387 vector_t::iterator sym
= m_ordering
.begin();
388 vector_t::iterator end
= m_ordering
.end();
390 for (; sym
!= end
; ++sym
, ++parent_sym
) {
393 used
[*parent_sym
] = 1;
397 parent_sym
= mother
->m_ordering
.begin();
398 sym
= m_ordering
.begin();
400 for (; sym
!= end
; ++sym
) {
402 while (used
[*parent_sym
])
405 *sym
= *parent_sym
++;
411 symbol_order::mutate()
414 i
= rand() % m_ordering
.size();
415 j
= rand() % m_ordering
.size();
417 const Elf32_Sym
*temp
= m_ordering
[i
];
418 m_ordering
[i
] = m_ordering
[j
];
419 m_ordering
[j
] = temp
;
423 symbol_order::dump_symbols() const
425 ofstream
out(opt_out
);
427 vector_t::const_iterator sym
= m_ordering
.begin();
428 vector_t::const_iterator end
= m_ordering
.end();
429 for (; sym
!= end
; ++sym
)
430 out
<< symtab
.get_symbol_name(*sym
) << endl
;
436 operator<<(ostream
&out
, const symbol_order
&order
)
438 out
<< "symbol_order(" << order
.m_score
<< ") ";
440 symbol_order::vector_t::const_iterator sym
= order
.m_ordering
.begin();
441 symbol_order::vector_t::const_iterator end
= order
.m_ordering
.end();
442 for (; sym
!= end
; ++sym
)
443 out
.form("%08x ", *sym
);
450 //----------------------------------------------------------------------
453 usage(const char *name
)
455 cerr
<< "usage: " << name
<< " [options] [<file> ...]" << endl
;
456 cerr
<< " Options:" << endl
;
457 cerr
<< " --debug, -d" << endl
;
458 cerr
<< " Print lots of verbose debugging cruft." << endl
;
459 cerr
<< " --exe=<image>, -e <image> (required)" << endl
;
460 cerr
<< " Specify the executable image from which to read symbol information." << endl
;
461 cerr
<< " --generations=<num>, -g <num>" << endl
;
462 cerr
<< " Specify the number of generations to run the GA (default is 10)." << endl
;
463 cerr
<< " --help, -?" << endl
;
464 cerr
<< " Print this message and exit." << endl
;
465 cerr
<< " --mutate=<num>, -m <num>" << endl
;
466 cerr
<< " Mutate every <num>th individual, or zero for no mutation (default)." << endl
;
467 cerr
<< " --out=<file>, -o <file>" << endl
;
468 cerr
<< " Specify the output file to which to dump the symbol ordering of the" << endl
;
469 cerr
<< " best individual (default is `order.out')." << endl
;
470 cerr
<< " --population=<num>, -p <num>" << endl
;
471 cerr
<< " Set the population size to <num> individuals (default is 100)." << endl
;
472 cerr
<< " --seed=<num>, -s <num>" << endl
;
473 cerr
<< " Specify a seed to srand()." << endl
;
474 cerr
<< " --tick[=<num>], -t [<num>]" << endl
;
475 cerr
<< " When reading address data, print a dot to stderr every <num>th" << endl
;
476 cerr
<< " address processed from the call trace. If specified with no argument," << endl
;
477 cerr
<< " a dot will be printed for every million addresses processed." << endl
;
478 cerr
<< " --verbose, -v" << endl
;
479 cerr
<< " Issue progress messages to stderr." << endl
;
480 cerr
<< " --window=<num>, -w <num>" << endl
;
481 cerr
<< " Use a sliding window instead of pagination to score orderings." << endl
;
483 cerr
<< "This program uses a genetic algorithm to produce an `optimal' ordering for" << endl
;
484 cerr
<< "an executable based on call patterns." << endl
;
486 cerr
<< "Addresses from a call trace are read as binary data from the files" << endl
;
487 cerr
<< "specified, or from stdin if no files are specified. These addresses" << endl
;
488 cerr
<< "are used with the symbolic information from the executable to create" << endl
;
489 cerr
<< "a call graph. This call graph is used to `score' arbitrary symbol" << endl
;
490 cerr
<< "orderings, and provides the fitness function for the GA." << endl
;
495 * Using the symbol table, map a stream of address references into a
496 * callgraph and a histogram.
501 const Elf32_Sym
*last
= 0;
502 unsigned int buf
[128];
505 unsigned int count
= 0;
506 while ((cb
= read(fd
, buf
, sizeof buf
)) > 0) {
507 if (cb
% sizeof buf
[0])
508 fprintf(stderr
, "unaligned read\n");
510 unsigned int *addr
= buf
;
511 unsigned int *limit
= buf
+ (cb
/ 4);
513 for (; addr
< limit
; ++addr
) {
514 const Elf32_Sym
*sym
= symtab
.lookup(*addr
);
516 if (last
&& sym
&& last
!= sym
) {
519 ++call_graph
[call_pair(last
, sym
)];
521 if (opt_tick
&& (++count
% opt_tick
== 0)) {
534 cerr
<< "Total calls: " << total_calls
<< endl
;
538 total_calls
<<= (opt_window
+ 1);
541 static symbol_order
*
542 pick_parent(symbol_order
*ordering
, int max
, int index
)
545 index
-= ordering
->m_score
;
559 main(int argc
, char *argv
[])
561 const char *opt_exe
= 0;
565 int option_index
= 0;
566 c
= getopt_long(argc
, argv
, "?de:g:m:o:p:s:t:vw:", long_options
, &option_index
);
585 opt_generations
= atoi(optarg
);
589 opt_mutate
= atoi(optarg
);
597 opt_population_size
= atoi(optarg
);
605 opt_tick
= optarg
? atoi(optarg
) : 1000000;
613 opt_window
= atoi(optarg
);
614 if (opt_window
< 0 || opt_window
> 8) {
615 cerr
<< "invalid window size: " << opt_window
<< endl
;
627 // Make sure an image was specified
633 // Read the sym table.
634 symtab
.init(opt_exe
);
636 // Process addresses to construct the call graph.
637 if (optind
>= argc
) {
638 map_addrs(STDIN_FILENO
);
642 int fd
= open(argv
[optind
], O_RDONLY
);
644 perror(argv
[optind
]);
650 } while (++optind
< argc
);
654 cerr
<< "Call graph:" << endl
;
656 call_graph_t::const_iterator limit
= call_graph
.end();
657 call_graph_t::const_iterator i
;
658 for (i
= call_graph
.begin(); i
!= limit
; ++i
) {
659 const call_pair
& pair
= i
->first
;
660 cerr
.form("%08x %08x %10d\n",
667 // Collect the symbols into a vector
668 symbol_order::vector_t ordering
;
669 elf_symbol_table::const_iterator end
= symtab
.end();
670 for (elf_symbol_table::const_iterator sym
= symtab
.begin(); sym
!= end
; ++sym
) {
671 if (symtab
.is_function(sym
))
672 ordering
.push_back(sym
);
676 symbol_order initial
;
677 initial
.m_ordering
= ordering
;
678 cerr
<< "created initial ordering, score=" << symbol_order::compute_score(initial
) << endl
;
684 // Create a population.
686 cerr
<< "creating population" << endl
;
688 symbol_order
*population
= new symbol_order
[opt_population_size
];
690 symbol_order::score_t total
= 0, min
= symbol_order::max_score
, max
= 0;
693 symbol_order
*order
= population
;
694 symbol_order
*limit
= population
+ opt_population_size
;
695 for (; order
< limit
; ++order
) {
696 order
->m_ordering
= ordering
;
699 symbol_order::score_t score
= symbol_order::compute_score(*order
);
713 cerr
<< "Initial population";
714 cerr
<< ": min=" << min
;
715 cerr
<< ", max=" << max
;
716 cerr
<< " mean=" << (total
/ opt_population_size
);
723 cerr
<< "begininng ga" << endl
;
725 symbol_order::score_t best
= 0;
727 for (int generation
= 1; generation
<= opt_generations
; ++generation
) {
728 // Create a new population.
729 symbol_order
*offspring
= new symbol_order
[opt_population_size
];
731 symbol_order
*kid
= offspring
;
732 symbol_order
*offspring_limit
= offspring
+ opt_population_size
;
733 for (; kid
< offspring_limit
; ++kid
) {
735 symbol_order
*father
, *mother
;
736 father
= pick_parent(population
, max
, llrand() % total
);
737 mother
= pick_parent(population
, max
, llrand() % total
);
740 kid
->crossover_from(father
, mother
);
744 if (rand() % opt_mutate
== 0)
750 population
= offspring
;
752 // Score the new population.
754 min
= symbol_order::max_score
;
757 symbol_order
*fittest
= 0;
759 limit
= offspring_limit
;
760 for (order
= population
; order
< limit
; ++order
) {
761 symbol_order::score_t score
= symbol_order::compute_score(*order
);
781 cerr
<< "Generation " << generation
;
782 cerr
<< ": min=" << min
;
783 cerr
<< ", max=" << max
;
786 cerr
<< " mean=" << (total
/ opt_population_size
);
790 // If we've found a new ``best'' individual, dump it.
792 fittest
->dump_symbols();