third_party/pycoverage/coverage/data.py

   1 """Coverage data for Coverage."""
   2
   3 import os
   4
   5 from coverage.backward import iitems, pickle, sorted    # pylint: disable=W0622
   6 from coverage.files import PathAliases
   7 from coverage.misc import file_be_gone
   8
   9
  10 class CoverageData(object):
  11     """Manages collected coverage data, including file storage.
  12
  13     The data file format is a pickled dict, with these keys:
  14
  15         * collector: a string identifying the collecting software
  16
  17         * lines: a dict mapping filenames to sorted lists of line numbers
  18           executed:
  19             { 'file1': [17,23,45],  'file2': [1,2,3], ... }
  20
  21         * arcs: a dict mapping filenames to sorted lists of line number pairs:
  22             { 'file1': [(17,23), (17,25), (25,26)], ... }
  23
  24     """
  25
  26     def __init__(self, basename=None, collector=None, debug=None):
  27         """Create a CoverageData.
  28
  29         `basename` is the name of the file to use for storing data.
  30
  31         `collector` is a string describing the coverage measurement software.
  32
  33         `debug` is a `DebugControl` object for writing debug messages.
  34
  35         """
  36         self.collector = collector or 'unknown'
  37         self.debug = debug
  38
  39         self.use_file = True
  40
  41         # Construct the filename that will be used for data file storage, if we
  42         # ever do any file storage.
  43         self.filename = basename or ".coverage"
  44         self.filename = os.path.abspath(self.filename)
  45
  46         # A map from canonical Python source file name to a dictionary in
  47         # which there's an entry for each line number that has been
  48         # executed:
  49         #
  50         #   {
  51         #       'filename1.py': { 12: None, 47: None, ... },
  52         #       ...
  53         #       }
  54         #
  55         self.lines = {}
  56
  57         # A map from canonical Python source file name to a dictionary with an
  58         # entry for each pair of line numbers forming an arc:
  59         #
  60         #   {
  61         #       'filename1.py': { (12,14): None, (47,48): None, ... },
  62         #       ...
  63         #       }
  64         #
  65         self.arcs = {}
  66
  67     def usefile(self, use_file=True):
  68         """Set whether or not to use a disk file for data."""
  69         self.use_file = use_file
  70
  71     def read(self):
  72         """Read coverage data from the coverage data file (if it exists)."""
  73         if self.use_file:
  74             self.lines, self.arcs = self._read_file(self.filename)
  75         else:
  76             self.lines, self.arcs = {}, {}
  77
  78     def write(self, suffix=None):
  79         """Write the collected coverage data to a file.
  80
  81         `suffix` is a suffix to append to the base file name. This can be used
  82         for multiple or parallel execution, so that many coverage data files
  83         can exist simultaneously.  A dot will be used to join the base name and
  84         the suffix.
  85
  86         """
  87         if self.use_file:
  88             filename = self.filename
  89             if suffix:
  90                 filename += "." + suffix
  91             self.write_file(filename)
  92
  93     def erase(self):
  94         """Erase the data, both in this object, and from its file storage."""
  95         if self.use_file:
  96             if self.filename:
  97                 file_be_gone(self.filename)
  98         self.lines = {}
  99         self.arcs = {}
 100
 101     def line_data(self):
 102         """Return the map from filenames to lists of line numbers executed."""
 103         return dict(
 104             [(f, sorted(lmap.keys())) for f, lmap in iitems(self.lines)]
 105             )
 106
 107     def arc_data(self):
 108         """Return the map from filenames to lists of line number pairs."""
 109         return dict(
 110             [(f, sorted(amap.keys())) for f, amap in iitems(self.arcs)]
 111             )
 112
 113     def write_file(self, filename):
 114         """Write the coverage data to `filename`."""
 115
 116         # Create the file data.
 117         data = {}
 118
 119         data['lines'] = self.line_data()
 120         arcs = self.arc_data()
 121         if arcs:
 122             data['arcs'] = arcs
 123
 124         if self.collector:
 125             data['collector'] = self.collector
 126
 127         if self.debug and self.debug.should('dataio'):
 128             self.debug.write("Writing data to %r" % (filename,))
 129
 130         # Write the pickle to the file.
 131         fdata = open(filename, 'wb')
 132         try:
 133             pickle.dump(data, fdata, 2)
 134         finally:
 135             fdata.close()
 136
 137     def read_file(self, filename):
 138         """Read the coverage data from `filename`."""
 139         self.lines, self.arcs = self._read_file(filename)
 140
 141     def raw_data(self, filename):
 142         """Return the raw pickled data from `filename`."""
 143         if self.debug and self.debug.should('dataio'):
 144             self.debug.write("Reading data from %r" % (filename,))
 145         fdata = open(filename, 'rb')
 146         try:
 147             data = pickle.load(fdata)
 148         finally:
 149             fdata.close()
 150         return data
 151
 152     def _read_file(self, filename):
 153         """Return the stored coverage data from the given file.
 154
 155         Returns two values, suitable for assigning to `self.lines` and
 156         `self.arcs`.
 157
 158         """
 159         lines = {}
 160         arcs = {}
 161         try:
 162             data = self.raw_data(filename)
 163             if isinstance(data, dict):
 164                 # Unpack the 'lines' item.
 165                 lines = dict([
 166                     (f, dict.fromkeys(linenos, None))
 167                         for f, linenos in iitems(data.get('lines', {}))
 168                     ])
 169                 # Unpack the 'arcs' item.
 170                 arcs = dict([
 171                     (f, dict.fromkeys(arcpairs, None))
 172                         for f, arcpairs in iitems(data.get('arcs', {}))
 173                     ])
 174         except Exception:
 175             pass
 176         return lines, arcs
 177
 178     def combine_parallel_data(self, aliases=None):
 179         """Combine a number of data files together.
 180
 181         Treat `self.filename` as a file prefix, and combine the data from all
 182         of the data files starting with that prefix plus a dot.
 183
 184         If `aliases` is provided, it's a `PathAliases` object that is used to
 185         re-map paths to match the local machine's.
 186
 187         """
 188         aliases = aliases or PathAliases()
 189         data_dir, local = os.path.split(self.filename)
 190         localdot = local + '.'
 191         for f in os.listdir(data_dir or '.'):
 192             if f.startswith(localdot):
 193                 full_path = os.path.join(data_dir, f)
 194                 new_lines, new_arcs = self._read_file(full_path)
 195                 for filename, file_data in iitems(new_lines):
 196                     filename = aliases.map(filename)
 197                     self.lines.setdefault(filename, {}).update(file_data)
 198                 for filename, file_data in iitems(new_arcs):
 199                     filename = aliases.map(filename)
 200                     self.arcs.setdefault(filename, {}).update(file_data)
 201                 if f != local:
 202                     os.remove(full_path)
 203
 204     def add_line_data(self, line_data):
 205         """Add executed line data.
 206
 207         `line_data` is { filename: { lineno: None, ... }, ...}
 208
 209         """
 210         for filename, linenos in iitems(line_data):
 211             self.lines.setdefault(filename, {}).update(linenos)
 212
 213     def add_arc_data(self, arc_data):
 214         """Add measured arc data.
 215
 216         `arc_data` is { filename: { (l1,l2): None, ... }, ...}
 217
 218         """
 219         for filename, arcs in iitems(arc_data):
 220             self.arcs.setdefault(filename, {}).update(arcs)
 221
 222     def touch_file(self, filename):
 223         """Ensure that `filename` appears in the data, empty if needed."""
 224         self.lines.setdefault(filename, {})
 225
 226     def measured_files(self):
 227         """A list of all files that had been measured."""
 228         return list(self.lines.keys())
 229
 230     def executed_lines(self, filename):
 231         """A map containing all the line numbers executed in `filename`.
 232
 233         If `filename` hasn't been collected at all (because it wasn't executed)
 234         then return an empty map.
 235
 236         """
 237         return self.lines.get(filename) or {}
 238
 239     def executed_arcs(self, filename):
 240         """A map containing all the arcs executed in `filename`."""
 241         return self.arcs.get(filename) or {}
 242
 243     def add_to_hash(self, filename, hasher):
 244         """Contribute `filename`'s data to the Md5Hash `hasher`."""
 245         hasher.update(self.executed_lines(filename))
 246         hasher.update(self.executed_arcs(filename))
 247
 248     def summary(self, fullpath=False):
 249         """Return a dict summarizing the coverage data.
 250
 251         Keys are based on the filenames, and values are the number of executed
 252         lines.  If `fullpath` is true, then the keys are the full pathnames of
 253         the files, otherwise they are the basenames of the files.
 254
 255         """
 256         summ = {}
 257         if fullpath:
 258             filename_fn = lambda f: f
 259         else:
 260             filename_fn = os.path.basename
 261         for filename, lines in iitems(self.lines):
 262             summ[filename_fn(filename)] = len(lines)
 263         return summ
 264
 265     def has_arcs(self):
 266         """Does this data have arcs?"""
 267         return bool(self.arcs)
 268
 269
 270 if __name__ == '__main__':
 271     # Ad-hoc: show the raw data in a data file.
 272     import pprint, sys
 273     covdata = CoverageData()
 274     if sys.argv[1:]:
 275         fname = sys.argv[1]
 276     else:
 277         fname = covdata.filename
 278     pprint.pprint(covdata.raw_data(fname))