lldb/scripts/analyze-project-deps.py

   1 #!/usr/bin/env python
   2
   3 import argparse
   4 import itertools
   5 import os
   6 import re
   7 import sys
   8 from collections import defaultdict
   9
  10 from use_lldb_suite import lldb_root
  11
  12 parser = argparse.ArgumentParser(
  13     description="Analyze LLDB project #include dependencies."
  14 )
  15 parser.add_argument(
  16     "--show-counts",
  17     default=False,
  18     action="store_true",
  19     help="When true, show the number of dependencies from each subproject",
  20 )
  21 parser.add_argument(
  22     "--discover-cycles",
  23     default=False,
  24     action="store_true",
  25     help="When true, find and display all project dependency cycles.  Note,"
  26     "this option is very slow",
  27 )
  28
  29 args = parser.parse_args()
  30
  31 src_dir = os.path.join(lldb_root, "source")
  32 inc_dir = os.path.join(lldb_root, "include")
  33
  34 src_map = {}
  35
  36 include_regex = re.compile('#include "((lldb|Plugins|clang)(.*/)+).*"')
  37
  38
  39 def is_sublist(small, big):
  40     it = iter(big)
  41     return all(c in it for c in small)
  42
  43
  44 def normalize_host(str):
  45     if str.startswith("lldb/Host"):
  46         return "lldb/Host"
  47     if str.startswith("Plugins"):
  48         return "lldb/" + str
  49     if str.startswith("lldb/../../source"):
  50         return str.replace("lldb/../../source", "lldb")
  51     return str
  52
  53
  54 def scan_deps(this_dir, file):
  55     global src_map
  56     deps = {}
  57     this_dir = normalize_host(this_dir)
  58     if this_dir in src_map:
  59         deps = src_map[this_dir]
  60
  61     with open(file) as f:
  62         for line in list(f):
  63             m = include_regex.match(line)
  64             if m is None:
  65                 continue
  66             relative = m.groups()[0].rstrip("/")
  67             if relative == this_dir:
  68                 continue
  69             relative = normalize_host(relative)
  70             if relative in deps:
  71                 deps[relative] += 1
  72             elif relative != this_dir:
  73                 deps[relative] = 1
  74     if this_dir not in src_map and len(deps) > 0:
  75         src_map[this_dir] = deps
  76
  77
  78 for base, dirs, files in os.walk(inc_dir):
  79     dir = os.path.basename(base)
  80     relative = os.path.relpath(base, inc_dir)
  81     inc_files = [x for x in files if os.path.splitext(x)[1] in [".h"]]
  82     relative = relative.replace("\\", "/")
  83     for inc in inc_files:
  84         inc_path = os.path.join(base, inc)
  85         scan_deps(relative, inc_path)
  86
  87 for base, dirs, files in os.walk(src_dir):
  88     dir = os.path.basename(base)
  89     relative = os.path.relpath(base, src_dir)
  90     src_files = [x for x in files if os.path.splitext(x)[1] in [".cpp", ".h", ".mm"]]
  91     norm_base_path = os.path.normpath(os.path.join("lldb", relative))
  92     norm_base_path = norm_base_path.replace("\\", "/")
  93     for src in src_files:
  94         src_path = os.path.join(base, src)
  95         scan_deps(norm_base_path, src_path)
  96     pass
  97
  98
  99 def is_existing_cycle(path, cycles):
 100     # If we have a cycle like # A -> B -> C (with an implicit -> A at the end)
 101     # then we don't just want to check for an occurrence of A -> B -> C in the
 102     # list of known cycles, but every possible rotation of A -> B -> C.  For
 103     # example, if we previously encountered B -> C -> A (with an implicit -> B
 104     # at the end), then A -> B -> C is also a cycle.  This is an important
 105     # optimization which reduces the search space by multiple orders of
 106     # magnitude.
 107     for i in range(0, len(path)):
 108         if any(is_sublist(x, path) for x in cycles):
 109             return True
 110         path = [path[-1]] + path[0:-1]
 111     return False
 112
 113
 114 def expand(path_queue, path_lengths, cycles, src_map):
 115     # We do a breadth first search, to make sure we visit all paths in order
 116     # of ascending length.  This is an important optimization to make sure that
 117     # short cycles are discovered first, which will allow us to discard longer
 118     # cycles which grow the search space exponentially the longer they get.
 119     while len(path_queue) > 0:
 120         cur_path = path_queue.pop(0)
 121         if is_existing_cycle(cur_path, cycles):
 122             continue
 123
 124         next_len = path_lengths.pop(0) + 1
 125         last_component = cur_path[-1]
 126
 127         for item in src_map.get(last_component, []):
 128             if item.startswith("clang"):
 129                 continue
 130
 131             if item in cur_path:
 132                 # This is a cycle.  Minimize it and then check if the result is
 133                 # already in the list of cycles.  Insert it (or not) and then
 134                 # exit.
 135                 new_index = cur_path.index(item)
 136                 cycle = cur_path[new_index:]
 137                 if not is_existing_cycle(cycle, cycles):
 138                     cycles.append(cycle)
 139                 continue
 140
 141             path_lengths.append(next_len)
 142             path_queue.append(cur_path + [item])
 143     pass
 144
 145
 146 cycles = []
 147
 148 path_queue = [[x] for x in iter(src_map)]
 149 path_lens = [1] * len(path_queue)
 150
 151 items = list(src_map.items())
 152 items.sort(key=lambda A: A[0])
 153
 154 for path, deps in items:
 155     print(path + ":")
 156     sorted_deps = list(deps.items())
 157     if args.show_counts:
 158         sorted_deps.sort(key=lambda A: (A[1], A[0]))
 159         for dep in sorted_deps:
 160             print("\t{} [{}]".format(dep[0], dep[1]))
 161     else:
 162         sorted_deps.sort(key=lambda A: A[0])
 163         for dep in sorted_deps:
 164             print("\t{}".format(dep[0]))
 165
 166
 167 def iter_cycles(cycles):
 168     global src_map
 169     for cycle in cycles:
 170         cycle.append(cycle[0])
 171         zipper = list(zip(cycle[0:-1], cycle[1:]))
 172         result = [(x, src_map[x][y], y) for (x, y) in zipper]
 173         total = 0
 174         smallest = result[0][1]
 175         for first, value, last in result:
 176             total += value
 177             smallest = min(smallest, value)
 178         yield (total, smallest, result)
 179
 180
 181 if args.discover_cycles:
 182     print("Analyzing cycles...")
 183
 184     expand(path_queue, path_lens, cycles, src_map)
 185
 186     average = sum([len(x) + 1 for x in cycles]) / len(cycles)
 187
 188     print("Found {} cycles.  Average cycle length = {}.".format(len(cycles), average))
 189     counted = list(iter_cycles(cycles))
 190     if args.show_counts:
 191         counted.sort(key=lambda A: A[0])
 192         for total, smallest, cycle in counted:
 193             sys.stdout.write("{} deps to break: ".format(total))
 194             sys.stdout.write(cycle[0][0])
 195             for first, count, last in cycle:
 196                 sys.stdout.write(" [{}->] {}".format(count, last))
 197             sys.stdout.write("\n")
 198     else:
 199         for cycle in cycles:
 200             cycle.append(cycle[0])
 201             print(" -> ".join(cycle))
 202
 203     print("Analyzing islands...")
 204     islands = []
 205     outgoing_counts = defaultdict(int)
 206     incoming_counts = defaultdict(int)
 207     for total, smallest, cycle in counted:
 208         for first, count, last in cycle:
 209             outgoing_counts[first] += count
 210             incoming_counts[last] += count
 211     for cycle in cycles:
 212         this_cycle = set(cycle)
 213         disjoints = [x for x in islands if this_cycle.isdisjoint(x)]
 214         overlaps = [x for x in islands if not this_cycle.isdisjoint(x)]
 215         islands = disjoints + [set.union(this_cycle, *overlaps)]
 216     print("Found {} disjoint cycle islands...".format(len(islands)))
 217     for island in islands:
 218         print("Island ({} elements)".format(len(island)))
 219         sorted = []
 220         for node in island:
 221             sorted.append((node, incoming_counts[node], outgoing_counts[node]))
 222         sorted.sort(key=lambda x: x[1] + x[2])
 223         for node, inc, outg in sorted:
 224             print("  {} [{} in, {} out]".format(node, inc, outg))
 225     sys.stdout.flush()
 226 pass