lldb/scripts/analyze-project-deps.py

   1 #!/usr/bin/env python
   2
   3 import argparse
   4 import itertools
   5 import os
   6 import re
   7 import sys
   8 from collections import defaultdict
   9
  10 from use_lldb_suite import lldb_root
  11
  12 parser = argparse.ArgumentParser(
  13     description='Analyze LLDB project #include dependencies.')
  14 parser.add_argument('--show-counts', default=False, action='store_true',
  15     help='When true, show the number of dependencies from each subproject')
  16 parser.add_argument('--discover-cycles', default=False, action='store_true',
  17     help='When true, find and display all project dependency cycles.  Note,'
  18          'this option is very slow')
  19
  20 args = parser.parse_args()
  21
  22 src_dir = os.path.join(lldb_root, "source")
  23 inc_dir = os.path.join(lldb_root, "include")
  24
  25 src_map = {}
  26
  27 include_regex = re.compile('#include \"((lldb|Plugins|clang)(.*/)+).*\"')
  28
  29 def is_sublist(small, big):
  30     it = iter(big)
  31     return all(c in it for c in small)
  32
  33 def normalize_host(str):
  34     if str.startswith("lldb/Host"):
  35         return "lldb/Host"
  36     if str.startswith("Plugins"):
  37         return "lldb/" + str
  38     if str.startswith("lldb/../../source"):
  39         return str.replace("lldb/../../source", "lldb")
  40     return str
  41
  42 def scan_deps(this_dir, file):
  43     global src_map
  44     deps = {}
  45     this_dir = normalize_host(this_dir)
  46     if this_dir in src_map:
  47         deps = src_map[this_dir]
  48
  49     with open(file) as f:
  50         for line in list(f):
  51             m = include_regex.match(line)
  52             if m is None:
  53                 continue
  54             relative = m.groups()[0].rstrip("/")
  55             if relative == this_dir:
  56                 continue
  57             relative = normalize_host(relative)
  58             if relative in deps:
  59                 deps[relative] += 1
  60             elif relative != this_dir:
  61                 deps[relative] = 1
  62     if this_dir not in src_map and len(deps) > 0:
  63         src_map[this_dir] = deps
  64
  65 for (base, dirs, files) in os.walk(inc_dir):
  66     dir = os.path.basename(base)
  67     relative = os.path.relpath(base, inc_dir)
  68     inc_files = [x for x in files if os.path.splitext(x)[1] in [".h"]]
  69     relative = relative.replace("\\", "/")
  70     for inc in inc_files:
  71         inc_path = os.path.join(base, inc)
  72         scan_deps(relative, inc_path)
  73
  74 for (base, dirs, files) in os.walk(src_dir):
  75     dir = os.path.basename(base)
  76     relative = os.path.relpath(base, src_dir)
  77     src_files = [x for x in files if os.path.splitext(x)[1] in [".cpp", ".h", ".mm"]]
  78     norm_base_path = os.path.normpath(os.path.join("lldb", relative))
  79     norm_base_path = norm_base_path.replace("\\", "/")
  80     for src in src_files:
  81         src_path = os.path.join(base, src)
  82         scan_deps(norm_base_path, src_path)
  83     pass
  84
  85 def is_existing_cycle(path, cycles):
  86     # If we have a cycle like # A -> B -> C (with an implicit -> A at the end)
  87     # then we don't just want to check for an occurrence of A -> B -> C in the
  88     # list of known cycles, but every possible rotation of A -> B -> C.  For
  89     # example, if we previously encountered B -> C -> A (with an implicit -> B
  90     # at the end), then A -> B -> C is also a cycle.  This is an important
  91     # optimization which reduces the search space by multiple orders of
  92     # magnitude.
  93     for i in range(0,len(path)):
  94         if any(is_sublist(x, path) for x in cycles):
  95             return True
  96         path = [path[-1]] + path[0:-1]
  97     return False
  98
  99 def expand(path_queue, path_lengths, cycles, src_map):
 100     # We do a breadth first search, to make sure we visit all paths in order
 101     # of ascending length.  This is an important optimization to make sure that
 102     # short cycles are discovered first, which will allow us to discard longer
 103     # cycles which grow the search space exponentially the longer they get.
 104     while len(path_queue) > 0:
 105         cur_path = path_queue.pop(0)
 106         if is_existing_cycle(cur_path, cycles):
 107             continue
 108
 109         next_len = path_lengths.pop(0) + 1
 110         last_component = cur_path[-1]
 111
 112         for item in src_map.get(last_component, []):
 113             if item.startswith("clang"):
 114                 continue
 115
 116             if item in cur_path:
 117                 # This is a cycle.  Minimize it and then check if the result is
 118                 # already in the list of cycles.  Insert it (or not) and then
 119                 # exit.
 120                 new_index = cur_path.index(item)
 121                 cycle = cur_path[new_index:]
 122                 if not is_existing_cycle(cycle, cycles):
 123                     cycles.append(cycle)
 124                 continue
 125
 126             path_lengths.append(next_len)
 127             path_queue.append(cur_path + [item])
 128     pass
 129
 130 cycles = []
 131
 132 path_queue = [[x] for x in iter(src_map)]
 133 path_lens = [1] * len(path_queue)
 134
 135 items = list(src_map.items())
 136 items.sort(key = lambda A : A[0])
 137
 138 for (path, deps) in items:
 139     print(path + ":")
 140     sorted_deps = list(deps.items())
 141     if args.show_counts:
 142         sorted_deps.sort(key = lambda A: (A[1], A[0]))
 143         for dep in sorted_deps:
 144             print("\t{} [{}]".format(dep[0], dep[1]))
 145     else:
 146         sorted_deps.sort(key = lambda A: A[0])
 147         for dep in sorted_deps:
 148             print("\t{}".format(dep[0]))
 149
 150 def iter_cycles(cycles):
 151     global src_map
 152     for cycle in cycles:
 153         cycle.append(cycle[0])
 154         zipper = list(zip(cycle[0:-1], cycle[1:]))
 155         result = [(x, src_map[x][y], y) for (x,y) in zipper]
 156         total = 0
 157         smallest = result[0][1]
 158         for (first, value, last) in result:
 159             total += value
 160             smallest = min(smallest, value)
 161         yield (total, smallest, result)
 162
 163 if args.discover_cycles:
 164     print("Analyzing cycles...")
 165
 166     expand(path_queue, path_lens, cycles, src_map)
 167
 168     average = sum([len(x)+1 for x in cycles]) / len(cycles)
 169
 170     print("Found {} cycles.  Average cycle length = {}.".format(len(cycles), average))
 171     counted = list(iter_cycles(cycles))
 172     if args.show_counts:
 173         counted.sort(key = lambda A: A[0])
 174         for (total, smallest, cycle) in counted:
 175             sys.stdout.write("{} deps to break: ".format(total))
 176             sys.stdout.write(cycle[0][0])
 177             for (first, count, last) in cycle:
 178                 sys.stdout.write(" [{}->] {}".format(count, last))
 179             sys.stdout.write("\n")
 180     else:
 181         for cycle in cycles:
 182             cycle.append(cycle[0])
 183             print(" -> ".join(cycle))
 184
 185     print("Analyzing islands...")
 186     islands = []
 187     outgoing_counts = defaultdict(int)
 188     incoming_counts = defaultdict(int)
 189     for (total, smallest, cycle) in counted:
 190         for (first, count, last) in cycle:
 191             outgoing_counts[first] += count
 192             incoming_counts[last] += count
 193     for cycle in cycles:
 194         this_cycle = set(cycle)
 195         disjoints = [x for x in islands if this_cycle.isdisjoint(x)]
 196         overlaps = [x for x in islands if not this_cycle.isdisjoint(x)]
 197         islands = disjoints + [set.union(this_cycle, *overlaps)]
 198     print("Found {} disjoint cycle islands...".format(len(islands)))
 199     for island in islands:
 200         print("Island ({} elements)".format(len(island)))
 201         sorted = []
 202         for node in island:
 203             sorted.append((node, incoming_counts[node], outgoing_counts[node]))
 204         sorted.sort(key = lambda x: x[1]+x[2])
 205         for (node, inc, outg) in sorted:
 206             print("  {} [{} in, {} out]".format(node, inc, outg))
 207     sys.stdout.flush()
 208 pass