1 /* ***** BEGIN LICENSE BLOCK *****
2 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
4 * The contents of this file are subject to the Mozilla Public License Version
5 * 1.1 (the "License"); you may not use this file except in compliance with
6 * the License. You may obtain a copy of the License at
7 * http://www.mozilla.org/MPL/
9 * Software distributed under the License is distributed on an "AS IS" basis,
10 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
11 * for the specific language governing rights and limitations under the
14 * The Original Code is ``grope''
16 * The Initial Developer of the Original Code is Netscape
17 * Communications Corp. Portions created by the Initial Developer are
18 * Copyright (C) 2001 the Initial Developer. All Rights Reserved.
21 * Chris Waterson <waterson@netscape.com>
23 * Alternatively, the contents of this file may be used under the terms of
24 * either the GNU General Public License Version 2 or later (the "GPL"), or
25 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
26 * in which case the provisions of the GPL or the LGPL are applicable instead
27 * of those above. If you wish to allow use of your version of this file only
28 * under the terms of either the GPL or the LGPL, and not to allow others to
29 * use your version of this file under the terms of the MPL, indicate your
30 * decision by deleting the provisions above and replace them with the notice
31 * and other provisions required by the GPL or the LGPL. If you do not delete
32 * the provisions above, a recipient may use your version of this file under
33 * the terms of any one of the MPL, the GPL or the LGPL.
35 * ***** END LICENSE BLOCK ***** */
39 A program that computes a function ordering for an executable based
40 on runtime profile information.
42 This program is directly based on work done by Roger Chickering
43 <rogc@netscape.com> in
45 <http://bugzilla.mozilla.org/show_bug.cgi?id=65845>
47 to implement Nat Friedman's <nat@nat.org> `grope',
49 _GNU Rope - A Subroutine Position Optimizer_
50 <http://www.hungry.com/~shaver/grope/grope.ps>
52 Specifically, it implements the procedure re-ordering algorithm
55 K. Pettis and R. Hansen. ``Profile-Guided Core Position.'' In
56 _Proceedings of the Int. Conference on Programming Language Design
57 and Implementation_, pages 16-27, June 1990.
73 #include "elf_symbol_table.h"
78 elf_symbol_table symtab
;
80 // Straight outta plhash.c!
81 #define GOLDEN_RATIO 0x9E3779B9U
83 struct hash
<const Elf32_Sym
*>
85 size_t operator()(const Elf32_Sym
*sym
) const {
86 return (reinterpret_cast<size_t>(sym
) >> 4) * GOLDEN_RATIO
; }
89 typedef unsigned int call_count_t
;
93 const Elf32_Sym
*m_from
;
94 const Elf32_Sym
*m_to
;
97 call_graph_arc(const Elf32_Sym
*left
, const Elf32_Sym
*right
, call_count_t count
= 0)
100 assert(left
!= right
);
113 operator==(const call_graph_arc
&lhs
, const call_graph_arc
&rhs
)
115 return (lhs
.m_from
== rhs
.m_from
) && (lhs
.m_to
== rhs
.m_to
);
119 operator<<(ostream
&out
, const call_graph_arc
&arc
)
122 out
.form("<(%s %s) %d>",
123 symtab
.get_symbol_name(arc
.m_from
),
124 symtab
.get_symbol_name(arc
.m_to
),
131 struct hash
<call_graph_arc
*>
134 operator()(const call_graph_arc
* arc
) const
137 result
= reinterpret_cast<unsigned long>(arc
->m_from
);
138 result
^= reinterpret_cast<unsigned long>(arc
->m_to
) >> 16;
139 result
^= reinterpret_cast<unsigned long>(arc
->m_to
) << 16;
140 result
*= GOLDEN_RATIO
;
145 struct equal_to
<call_graph_arc
*>
148 operator()(const call_graph_arc
* lhs
, const call_graph_arc
*rhs
) const
154 typedef hash_set
<call_graph_arc
*> arc_container_t
;
155 arc_container_t arcs
;
157 typedef multimap
<const Elf32_Sym
*, call_graph_arc
*> arc_map_t
;
161 struct less_call_graph_arc_count
164 operator()(const call_graph_arc
*lhs
, const call_graph_arc
*rhs
) const
166 if (lhs
->m_count
== rhs
->m_count
) {
167 if (lhs
->m_from
== rhs
->m_from
)
168 return lhs
->m_to
> rhs
->m_to
;
170 return lhs
->m_from
> rhs
->m_from
;
172 return lhs
->m_count
> rhs
->m_count
;
176 typedef set
<call_graph_arc
*, less_call_graph_arc_count
> arc_count_index_t
;
178 bool opt_debug
= false;
179 const char *opt_out
= "order.out";
181 bool opt_verbose
= false;
184 static struct option long_options
[] = {
185 { "debug", no_argument
, 0, 'd' },
186 { "exe", required_argument
, 0, 'e' },
187 { "help", no_argument
, 0, '?' },
188 { "out", required_argument
, 0, 'o' },
189 { "tick", optional_argument
, 0, 't' },
190 { "verbose", no_argument
, 0, 'v' },
191 { "window", required_argument
, 0, 'w' },
195 //----------------------------------------------------------------------
198 usage(const char *name
)
200 cerr
<< "usage: " << name
<< " [options] [<file> ...]" << endl
;
201 cerr
<< " Options:" << endl
;
202 cerr
<< " --debug, -d" << endl
;
203 cerr
<< " Print lots of verbose debugging cruft." << endl
;
204 cerr
<< " --exe=<image>, -e <image> (required)" << endl
;
205 cerr
<< " Specify the executable image from which to read symbol information." << endl
;
206 cerr
<< " --help, -?" << endl
;
207 cerr
<< " Print this message and exit." << endl
;
208 cerr
<< " --out=<file>, -o <file>" << endl
;
209 cerr
<< " Specify the output file to which to dump the symbol ordering of the" << endl
;
210 cerr
<< " best individual (default is `order.out')." << endl
;
211 cerr
<< " --tick[=<num>], -t [<num>]" << endl
;
212 cerr
<< " When reading address data, print a dot to stderr every <num>th" << endl
;
213 cerr
<< " address processed from the call trace. If specified with no argument," << endl
;
214 cerr
<< " a dot will be printed for every million addresses processed." << endl
;
215 cerr
<< " --verbose, -v" << endl
;
216 cerr
<< " Issue progress messages to stderr." << endl
;
217 cerr
<< " --window=<num>, -w <num>" << endl
;
218 cerr
<< " Use a sliding window instead of pagination to score orderings." << endl
;
222 * Using the symbol table, map a stream of address references into a
228 long long total_calls
= 0;
229 typedef list
<const Elf32_Sym
*> window_t
;
233 unsigned int buf
[128];
236 while ((cb
= read(fd
, buf
, sizeof buf
)) > 0) {
237 if (cb
% sizeof buf
[0])
238 fprintf(stderr
, "unaligned read\n");
240 unsigned int *addr
= buf
;
241 unsigned int *limit
= buf
+ (cb
/ 4);
243 for (; addr
< limit
; ++addr
) {
244 const Elf32_Sym
*sym
= symtab
.lookup(*addr
);
250 window
.push_front(sym
);
252 if (window_size
>= opt_window
)
257 window_t::const_iterator i
= window
.begin();
258 window_t::const_iterator end
= window
.end();
259 for (; i
!= end
; ++i
) {
262 call_graph_arc
key(sym
, *i
);
263 arc_container_t::iterator iter
= arcs
.find(&key
);
264 if (iter
== arcs
.end()) {
265 arc
= new call_graph_arc(sym
, *i
);
267 from_map
.insert(arc_map_t::value_type(arc
->m_from
, arc
));
268 to_map
.insert(arc_map_t::value_type(arc
->m_to
, arc
));
271 arc
= const_cast<call_graph_arc
*>(*iter
);
277 if (opt_verbose
&& opt_tick
&& (total_calls
% opt_tick
== 0)) {
288 cerr
<< "Total calls: " << total_calls
<< endl
;
293 remove_from(arc_map_t
& map
, const Elf32_Sym
*sym
, const call_graph_arc
*arc
)
295 pair
<arc_map_t::iterator
, arc_map_t::iterator
> p
296 = map
.equal_range(sym
);
298 for (arc_map_t::iterator i
= p
.first
; i
!= p
.second
; ++i
) {
299 if (i
->second
== arc
) {
310 main(int argc
, char *argv
[])
312 const char *opt_exe
= 0;
316 int option_index
= 0;
317 c
= getopt_long(argc
, argv
, "?de:o:t:vw:", long_options
, &option_index
);
340 opt_tick
= optarg
? atoi(optarg
) : 1000000;
348 opt_window
= atoi(optarg
);
349 if (opt_window
<= 0) {
350 cerr
<< "invalid window size: " << opt_window
<< endl
;
361 // Make sure an image was specified
367 // Read the sym table.
368 symtab
.init(opt_exe
);
370 // Process addresses to construct the weighted call graph.
371 if (optind
>= argc
) {
372 map_addrs(STDIN_FILENO
);
376 int fd
= open(argv
[optind
], O_RDONLY
);
378 perror(argv
[optind
]);
384 } while (++optind
< argc
);
387 // Emit the ordering.
388 ofstream
out(opt_out
);
390 // Collect all of the arcs into a sorted container, with arcs
391 // having the largest weight at the front.
392 arc_count_index_t
sorted_arcs(arcs
.begin(), arcs
.end());
394 while (sorted_arcs
.size()) {
396 cerr
<< "==========================================" << endl
<< endl
;
398 cerr
<< "Sorted Arcs:" << endl
;
399 for (arc_count_index_t::const_iterator iter
= sorted_arcs
.begin();
400 iter
!= sorted_arcs
.end();
402 cerr
<< **iter
<< endl
;
405 cerr
<< endl
<< "Arc Container:" << endl
;
406 for (arc_container_t::const_iterator iter
= arcs
.begin();
409 cerr
<< **iter
<< endl
;
412 cerr
<< endl
<< "From Map:" << endl
;
413 for (arc_map_t::const_iterator iter
= from_map
.begin();
414 iter
!= from_map
.end();
416 cerr
<< symtab
.get_symbol_name(iter
->first
) << ": " << *(iter
->second
) << endl
;
419 cerr
<< endl
<< "To Map:" << endl
;
420 for (arc_map_t::const_iterator iter
= to_map
.begin();
421 iter
!= to_map
.end();
423 cerr
<< symtab
.get_symbol_name(iter
->first
) << ": " << *(iter
->second
) << endl
;
429 // The first arc in the sorted set will have the largest
430 // weight. Pull it out, and emit its sink.
431 arc_count_index_t::iterator max
= sorted_arcs
.begin();
432 call_graph_arc
*arc
= const_cast<call_graph_arc
*>(*max
);
434 sorted_arcs
.erase(max
);
437 cerr
<< "pulling " << *arc
<< endl
;
440 remove_from(from_map
, arc
->m_from
, arc
);
441 remove_from(to_map
, arc
->m_to
, arc
);
443 out
<< symtab
.get_symbol_name(arc
->m_to
) << endl
;
445 // Merge arc->m_to into arc->m_from. First, modify anything
446 // that used to point to arc->m_to to point to arc->m_from.
447 typedef list
<call_graph_arc
*> arc_list_t
;
448 arc_list_t map_add_queue
;
450 pair
<arc_map_t::iterator
, arc_map_t::iterator
> p
;
452 // Find all the arcs that point to arc->m_to.
453 p
= to_map
.equal_range(arc
->m_to
);
454 for (arc_map_t::iterator i
= p
.first
; i
!= p
.second
; ++i
) {
455 // Remove the arc that points to arc->m_to (`doomed') from
456 // all of our indicies.
457 call_graph_arc
*doomed
= i
->second
;
458 const Elf32_Sym
*source
= doomed
->m_from
;
460 sorted_arcs
.erase(doomed
);
462 remove_from(from_map
, source
, doomed
);
463 // N.B. that `doomed' will be removed from the `to_map'
464 // after the loop completes.
466 // Create a new (or locate an existing) arc whose source
467 // was the doomed arc's source, and whose sink is
468 // arc->m_from (i.e., the node that subsumed arc->m_to).
469 call_graph_arc
*merge
;
470 call_graph_arc key
= call_graph_arc(source
, arc
->m_from
);
471 arc_container_t::iterator iter
= arcs
.find(&key
);
473 if (iter
== arcs
.end()) {
474 merge
= new call_graph_arc(source
, arc
->m_from
);
477 from_map
.insert(arc_map_t::value_type(merge
->m_from
, merge
));
478 map_add_queue
.push_back(merge
);
481 // We found an existing arc: temporarily remove it
482 // from the sorted index.
483 merge
= const_cast<call_graph_arc
*>(*iter
);
484 sorted_arcs
.erase(merge
);
487 // Include the doomed arc's weight in the merged arc, and
488 // (re-)insert it into the sorted index.
489 merge
->m_count
+= doomed
->m_count
;
490 sorted_arcs
.insert(merge
);
495 to_map
.erase(p
.first
, p
.second
);
497 for (arc_list_t::iterator merge
= map_add_queue
.begin();
498 merge
!= map_add_queue
.end();
499 map_add_queue
.erase(merge
++)) {
500 to_map
.insert(arc_map_t::value_type((*merge
)->m_to
, *merge
));
503 // Now, roll anything that arc->m_to used to point at into
504 // what arc->m_from now points at.
506 // Collect all of the arcs that originate at arc->m_to.
507 p
= from_map
.equal_range(arc
->m_to
);
508 for (arc_map_t::iterator i
= p
.first
; i
!= p
.second
; ++i
) {
509 // Remove the arc originating from arc->m_to (`doomed')
510 // from all of our indicies.
511 call_graph_arc
*doomed
= i
->second
;
512 const Elf32_Sym
*sink
= doomed
->m_to
;
514 // There shouldn't be any self-referential arcs.
515 assert(sink
!= arc
->m_to
);
517 sorted_arcs
.erase(doomed
);
519 remove_from(to_map
, sink
, doomed
);
520 // N.B. that `doomed' will be removed from the `from_map'
521 // once the loop completes.
523 // Create a new (or locate an existing) arc whose source
524 // is arc->m_from and whose sink was the removed arc's
526 call_graph_arc
*merge
;
527 call_graph_arc key
= call_graph_arc(arc
->m_from
, sink
);
528 arc_container_t::iterator iter
= arcs
.find(&key
);
530 if (iter
== arcs
.end()) {
531 merge
= new call_graph_arc(arc
->m_from
, sink
);
535 map_add_queue
.push_back(merge
);
536 to_map
.insert(arc_map_t::value_type(merge
->m_to
, merge
));
539 // We found an existing arc: temporarily remove it
540 // from the sorted index.
541 merge
= const_cast<call_graph_arc
*>(*iter
);
542 sorted_arcs
.erase(merge
);
545 // Include the doomed arc's weight in the merged arc, and
546 // (re-)insert it into the sorted index.
547 merge
->m_count
+= doomed
->m_count
;
548 sorted_arcs
.insert(merge
);
553 from_map
.erase(p
.first
, p
.second
);
555 for (arc_list_t::iterator merge
= map_add_queue
.begin();
556 merge
!= map_add_queue
.end();
557 map_add_queue
.erase(merge
++)) {
558 from_map
.insert(arc_map_t::value_type((*merge
)->m_from
, *merge
));