pyx/graph/data.py

   1 # -*- encoding: utf-8 -*-
   2 #
   3 #
   4 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
   5 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
   6 # Copyright (C) 2002-2012 André Wobst <wobsta@users.sourceforge.net>
   7 #
   8 # This file is part of PyX (http://pyx.sourceforge.net/).
   9 #
  10 # PyX is free software; you can redistribute it and/or modify
  11 # it under the terms of the GNU General Public License as published by
  12 # the Free Software Foundation; either version 2 of the License, or
  13 # (at your option) any later version.
  14 #
  15 # PyX is distributed in the hope that it will be useful,
  16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
  17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  18 # GNU General Public License for more details.
  19 #
  20 # You should have received a copy of the GNU General Public License
  21 # along with PyX; if not, write to the Free Software
  22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA
  23
  24 import math, re, ConfigParser, struct, warnings
  25 from pyx import text, pycompat
  26 import style
  27 builtinlist = list
  28
  29
  30 def splitatvalue(value, *splitpoints):
  31     section = 0
  32     while section < len(splitpoints) and splitpoints[section] < value:
  33         section += 1
  34     if len(splitpoints) > 1:
  35         if section % 2:
  36             section = None
  37         else:
  38             section >>= 1
  39     return (section, value)
  40
  41
  42 _mathglobals = {"neg": lambda x: -x,
  43                 "abs": lambda x: x < 0 and -x or x,
  44                 "sgn": lambda x: x < 0 and -1 or 1,
  45                 "sqrt": math.sqrt,
  46                 "exp": math.exp,
  47                 "log": math.log,
  48                 "sin": math.sin,
  49                 "cos": math.cos,
  50                 "tan": math.tan,
  51                 "asin": math.asin,
  52                 "acos": math.acos,
  53                 "atan": math.atan,
  54                 "sind": lambda x: math.sin(math.pi/180*x),
  55                 "cosd": lambda x: math.cos(math.pi/180*x),
  56                 "tand": lambda x: math.tan(math.pi/180*x),
  57                 "asind": lambda x: 180/math.pi*math.asin(x),
  58                 "acosd": lambda x: 180/math.pi*math.acos(x),
  59                 "atand": lambda x: 180/math.pi*math.atan(x),
  60                 "norm": lambda x, y: math.hypot(x, y),
  61                 "splitatvalue": splitatvalue,
  62                 "pi": math.pi,
  63                 "e": math.e}
  64
  65
  66 class _data:
  67     """graph data interface
  68
  69     Graph data consists of columns, where each column might be identified by a
  70     string or an integer. Each row in the resulting table refers to a data
  71     point.
  72
  73     All methods except for the constructor should consider self and its
  74     attributes to be readonly, since the data instance might be shared between
  75     several graphs simultaneously.
  76
  77     The instance variable columns is a dictionary mapping column names to the
  78     data of the column (i.e. to a list). Only static columns (known at
  79     construction time) are contained in that dictionary. For data with numbered
  80     columns the column data is also available via the list columndata.
  81     Otherwise the columndata list should be missing and an access to a column
  82     number will fail.
  83
  84     The names of all columns (static and dynamic) must be fixed at the constructor
  85     and stated in the columnnames dictionary.
  86
  87     The instance variable title and defaultstyles contain the data title and
  88     the default styles (a list of styles), respectively. If defaultstyles is None,
  89     the data cannot be plotted without user provided styles.
  90     """
  91
  92     def dynamiccolumns(self, graph, axisnames):
  93         """create and return dynamic columns data
  94
  95         Returns dynamic data matching the given axes (the axes range and other
  96         data might be used). The return value is a dictionary similar to the
  97         columns instance variable. However, the static and dynamic data does
  98         not need to be correlated in any way, i.e. the number of data points in
  99         self.columns might differ from the number of data points represented by
 100         the return value of the dynamiccolumns method.
 101         """
 102         return {}
 103
 104
 105 defaultsymbols = [style.symbol()]
 106 defaultlines = [style.line()]
 107
 108
 109 class values(_data):
 110
 111     defaultstyles = defaultsymbols
 112
 113     def __init__(self, title="user provided values", **columns):
 114         for i, values in enumerate(columns.values()):
 115             if i and len(values) != l:
 116                 raise ValueError("different number of values")
 117             else:
 118                 l = len(values)
 119         self.columns = columns
 120         self.columnnames = columns.keys()
 121         self.title = title
 122
 123
 124 class points(_data):
 125     "Graph data from a list of points"
 126
 127     defaultstyles = defaultsymbols
 128
 129     def __init__(self, points, title="user provided points", addlinenumbers=1, **columns):
 130         if len(points):
 131             l = len(points[0])
 132             self.columndata = [[x] for x in points[0]]
 133             for point in points[1:]:
 134                 if l != len(point):
 135                     raise ValueError("different number of columns per point")
 136                 for i, x in enumerate(point):
 137                     self.columndata[i].append(x)
 138             for v in columns.values():
 139                 if abs(v) > l or (not addlinenumbers and abs(v) == l):
 140                     raise ValueError("column number bigger than number of columns")
 141             if addlinenumbers:
 142                 self.columndata = [range(1, len(points) + 1)] + self.columndata
 143             self.columns = dict([(key, self.columndata[i]) for key, i in columns.items()])
 144         else:
 145             self.columns = dict([(key, []) for key, i in columns.items()])
 146         self.columnnames = self.columns.keys()
 147         self.title = title
 148
 149
 150 def list(*args, **kwargs):
 151     warnings.warn("graph.data.list is deprecated. Use graph.data.points instead.")
 152     return points(*args, **kwargs)
 153
 154
 155 class _notitle:
 156     pass
 157
 158 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
 159
 160 class data(_data):
 161     "creates a new data set out of an existing data set"
 162
 163     def __init__(self, data, title=_notitle, context={}, copy=1,
 164                        replacedollar=1, columncallback="__column__", **columns):
 165         # build a nice title
 166         if title is _notitle:
 167             items = columns.items()
 168             items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
 169             self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
 170                                      ", ".join(["%s=%s" % (text.escapestring(key),
 171                                                            text.escapestring(str(value)))
 172                                                 for key, value in items]))
 173         else:
 174             self.title = title
 175
 176         self.orgdata = data
 177         self.defaultstyles = self.orgdata.defaultstyles
 178
 179         # analyse the **columns argument
 180         self.columns = {}
 181         for columnname, value in columns.items():
 182             # search in the columns dictionary
 183             try:
 184                 self.columns[columnname] = self.orgdata.columns[value]
 185             except KeyError:
 186                 # search in the columndata list
 187                 try:
 188                     self.columns[columnname] = self.orgdata.columndata[value]
 189                 except (AttributeError, TypeError):
 190                     # value was not an valid column identifier
 191                     # i.e. take it as a mathematical expression
 192                     if replacedollar:
 193                         m = _columnintref.search(value)
 194                         while m:
 195                             value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
 196                             m = _columnintref.search(value)
 197                         value = value.replace("$", columncallback)
 198                     expression = compile(value.strip(), __file__, "eval")
 199                     context = context.copy()
 200                     context[columncallback] = self.columncallback
 201                     if self.orgdata.columns:
 202                         key, columndata = self.orgdata.columns.items()[0]
 203                         count = len(columndata)
 204                     elif self.orgdata.columndata:
 205                         count = len(self.orgdata.columndata[0])
 206                     else:
 207                         count = 0
 208                     newdata = []
 209                     for i in xrange(count):
 210                         self.columncallbackcount = i
 211                         for key, values in self.orgdata.columns.items():
 212                             context[key] = values[i]
 213                         try:
 214                             newdata.append(eval(expression, _mathglobals, context))
 215                         except (ArithmeticError, ValueError):
 216                             newdata.append(None)
 217                     self.columns[columnname] = newdata
 218
 219         if copy:
 220             # copy other, non-conflicting column names
 221             for columnname, columndata in self.orgdata.columns.items():
 222                 if not self.columns.has_key(columnname):
 223                     self.columns[columnname] = columndata
 224
 225         self.columnnames = self.columns.keys()
 226
 227     def columncallback(self, value):
 228         try:
 229             return self.orgdata.columndata[value][self.columncallbackcount]
 230         except:
 231             return self.orgdata.columns[value][self.columncallbackcount]
 232
 233
 234 filecache = {}
 235
 236 class file(data):
 237
 238     defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
 239     defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
 240     defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
 241
 242     def splitline(self, line, stringpattern, columnpattern, tofloat=1):
 243         """returns a tuple created out of the string line
 244         - matches stringpattern and columnpattern, adds the first group of that
 245           match to the result and and removes those matches until the line is empty
 246         - when stringpattern matched, the result is always kept as a string
 247         - when columnpattern matched and tofloat is true, a conversion to a float
 248           is tried; when this conversion fails, the string is kept"""
 249         result = []
 250         # try to gain speed by skip matching regular expressions
 251         if line.find('"')!=-1 or \
 252            stringpattern is not self.defaultstringpattern or \
 253            columnpattern is not self.defaultcolumnpattern:
 254             while len(line):
 255                 match = stringpattern.match(line)
 256                 if match:
 257                     result.append(match.groups()[0])
 258                     line = line[match.end():]
 259                 else:
 260                     match = columnpattern.match(line)
 261                     if tofloat:
 262                         try:
 263                             result.append(float(match.groups()[0]))
 264                         except (TypeError, ValueError):
 265                             result.append(match.groups()[0])
 266                     else:
 267                         result.append(match.groups()[0])
 268                     line = line[match.end():]
 269         else:
 270             if tofloat:
 271                 try:
 272                     return map(float, line.split())
 273                 except (TypeError, ValueError):
 274                     result = []
 275                     for r in line.split():
 276                         try:
 277                             result.append(float(r))
 278                         except (TypeError, ValueError):
 279                             result.append(r)
 280             else:
 281                 return line.split()
 282         return result
 283
 284     def getcachekey(self, *args):
 285         return ":".join([str(x) for x in args])
 286
 287     def __init__(self, filename,
 288                        commentpattern=defaultcommentpattern,
 289                        stringpattern=defaultstringpattern,
 290                        columnpattern=defaultcolumnpattern,
 291                        skiphead=0, skiptail=0, every=1,
 292                        **kwargs):
 293
 294         def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
 295             columns = []
 296             columndata = []
 297             linenumber = 0
 298             maxcolumns = 0
 299             for line in file.readlines():
 300                 line = line.strip()
 301                 match = commentpattern.match(line)
 302                 if match:
 303                     if not len(columndata):
 304                         columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
 305                 else:
 306                     linedata = []
 307                     for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
 308                         linedata.append(value)
 309                     if len(linedata):
 310                         if linenumber >= skiphead and not ((linenumber - skiphead) % every):
 311                             linedata = [linenumber + 1] + linedata
 312                             if len(linedata) > maxcolumns:
 313                                 maxcolumns = len(linedata)
 314                             columndata.append(linedata)
 315                         linenumber += 1
 316             if skiptail >= every:
 317                 skip, x = divmod(skiptail, every)
 318                 del columndata[-skip:]
 319             for i in xrange(len(columndata)):
 320                 if len(columndata[i]) != maxcolumns:
 321                     columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
 322             return points(columndata, title=title, addlinenumbers=0,
 323                           **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
 324
 325         try:
 326             filename.readlines
 327         except:
 328             # not a file-like object -> open it
 329             cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
 330             if not filecache.has_key(cachekey):
 331                 filecache[cachekey] = readfile(open(filename), filename)
 332             data.__init__(self, filecache[cachekey], **kwargs)
 333         else:
 334             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 335
 336
 337 conffilecache = {}
 338
 339 class conffile(data):
 340
 341     def __init__(self, filename, **kwargs):
 342         """read data from a config-like file
 343         - filename is a string
 344         - each row is defined by a section in the config-like file (see
 345           config module description)
 346         - the columns for each row are defined by lines in the section file;
 347           the option entries identify and name the columns
 348         - further keyword arguments are passed to the constructor of data,
 349           keyword arguments data and titles excluded"""
 350
 351         def readfile(file, title):
 352             config = ConfigParser.ConfigParser()
 353             config.optionxform = str
 354             config.readfp(file)
 355             sections = config.sections()
 356             sections.sort()
 357             columndata = [None]*len(sections)
 358             maxcolumns = 1
 359             columns = {}
 360             for i in xrange(len(sections)):
 361                 point = [sections[i]] + [None]*(maxcolumns-1)
 362                 for option in config.options(sections[i]):
 363                     value = config.get(sections[i], option)
 364                     try:
 365                         value = float(value)
 366                     except:
 367                         pass
 368                     try:
 369                         index = columns[option]
 370                     except KeyError:
 371                         columns[option] = maxcolumns
 372                         point.append(value)
 373                         maxcolumns += 1
 374                     else:
 375                         point[index] = value
 376                 columndata[i] = point
 377             # wrap result into a data instance to remove column numbers
 378             result = data(points(columndata, addlinenumbers=0, **columns), title=title)
 379             # ... but reinsert sections as linenumbers
 380             result.columndata = [[x[0] for x in columndata]]
 381             return result
 382
 383         try:
 384             filename.readlines
 385         except:
 386             # not a file-like object -> open it
 387             if not filecache.has_key(filename):
 388                 filecache[filename] = readfile(open(filename), filename)
 389             data.__init__(self, filecache[filename], **kwargs)
 390         else:
 391             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 392
 393
 394 cbdfilecache = {}
 395
 396 class cbdfile(data):
 397
 398     defaultstyles = defaultlines
 399
 400     def getcachekey(self, *args):
 401         return ":".join([str(x) for x in args])
 402
 403     def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
 404
 405         class cbdhead:
 406
 407             def __init__(self, file):
 408                 (self.magic,
 409                  self.dictaddr,
 410                  self.segcount,
 411                  self.segsize,
 412                  self.segmax,
 413                  self.fill) = struct.unpack("<5i20s", file.read(40))
 414                 if self.magic != 0x20770002:
 415                     raise ValueError("bad magic number")
 416
 417         class segdict:
 418
 419             def __init__(self, file, i):
 420                 self.index = i
 421                 (self.segid,
 422                  self.maxlat,
 423                  self.minlat,
 424                  self.maxlong,
 425                  self.minlong,
 426                  self.absaddr,
 427                  self.nbytes,
 428                  self.rank) = struct.unpack("<6i2h", file.read(28))
 429
 430         class segment:
 431
 432             def __init__(self, file, sd):
 433                 file.seek(sd.absaddr)
 434                 (self.orgx,
 435                  self.orgy,
 436                  self.id,
 437                  self.nstrokes,
 438                  self.dummy) = struct.unpack("<3i2h", file.read(16))
 439                 oln, olt = self.orgx, self.orgy
 440                 self.points = [(olt, oln)]
 441                 for i in range(self.nstrokes):
 442                     c1, c2 = struct.unpack("2c", file.read(2))
 443                     if ord(c2) & 0x40:
 444                         if c1 > "\177":
 445                             dy = ord(c1) - 256
 446                         else:
 447                             dy = ord(c1)
 448                         if c2 > "\177":
 449                             dx = ord(c2) - 256
 450                         else:
 451                             dx = ord(c2) - 64
 452                     else:
 453                         c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
 454                         if c2 > "\177":
 455                             c2 = chr(ord(c2) | 0x40)
 456                         dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
 457                     oln += dx
 458                     olt += dy
 459                     self.points.append((olt, oln))
 460                 sd.nstrokes = self.nstrokes
 461
 462         def readfile(file, title):
 463             h = cbdhead(file)
 464             file.seek(h.dictaddr)
 465             sds = [segdict(file, i+1) for i in range(h.segcount)]
 466             sbs = [segment(file, sd) for sd in sds]
 467
 468             # remove jumps at long +/- 180
 469             for sd, sb in zip(sds, sbs):
 470                 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
 471                     for i, (lat, long) in enumerate(sb.points):
 472                          if long < 0:
 473                              sb.points[i] = lat, long + 360*3600
 474
 475             columndata = []
 476             for sd, sb in zip(sds, sbs):
 477                 if ((minrank is None or sd.rank >= minrank) and
 478                     (maxrank is None or sd.rank <= maxrank)):
 479                     if columndata:
 480                         columndata.append((None, None))
 481                     columndata.extend([(long/3600.0, lat/3600.0)
 482                                        for lat, long in sb.points])
 483
 484             result = points(columndata, title=title)
 485             result.defaultstyles = self.defaultstyles
 486             return result
 487
 488
 489         try:
 490             filename.readlines
 491         except:
 492             # not a file-like object -> open it
 493             cachekey = self.getcachekey(filename, minrank, maxrank)
 494             if not cbdfilecache.has_key(cachekey):
 495                 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
 496             data.__init__(self, cbdfilecache[cachekey], **kwargs)
 497         else:
 498             data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
 499
 500
 501 class function(_data):
 502
 503     defaultstyles = defaultlines
 504
 505     assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
 506
 507     def __init__(self, expression, title=_notitle, min=None, max=None,
 508                  points=100, context={}):
 509
 510         if title is _notitle:
 511             self.title = expression
 512         else:
 513             self.title = title
 514         self.min = min
 515         self.max = max
 516         self.numberofpoints = points
 517         self.context = context.copy() # be safe on late evaluations
 518         m = self.assignmentpattern.match(expression)
 519         if m:
 520             self.yname, self.xname = m.groups()
 521             expression = expression[m.end():]
 522         else:
 523             raise ValueError("y(x)=... or similar expected")
 524         if context.has_key(self.xname):
 525             raise ValueError("xname in context")
 526         self.expression = compile(expression.strip(), __file__, "eval")
 527         self.columns = {}
 528         self.columnnames = [self.xname, self.yname]
 529
 530     def dynamiccolumns(self, graph, axisnames):
 531         dynamiccolumns = {self.xname: [], self.yname: []}
 532
 533         xaxis = graph.axes[axisnames.get(self.xname, self.xname)]
 534         from pyx.graph.axis import logarithmic
 535         logaxis = isinstance(xaxis.axis, logarithmic)
 536         if self.min is not None:
 537             min = self.min
 538         else:
 539             min = xaxis.data.min
 540         if self.max is not None:
 541             max = self.max
 542         else:
 543             max = xaxis.data.max
 544         if logaxis:
 545             min = math.log(min)
 546             max = math.log(max)
 547         for i in range(self.numberofpoints):
 548             x = min + (max-min)*i / (self.numberofpoints-1.0)
 549             if logaxis:
 550                 x = math.exp(x)
 551             dynamiccolumns[self.xname].append(x)
 552             self.context[self.xname] = x
 553             try:
 554                 y = eval(self.expression, _mathglobals, self.context)
 555             except (ArithmeticError, ValueError):
 556                 y = None
 557             dynamiccolumns[self.yname].append(y)
 558         return dynamiccolumns
 559
 560
 561 class functionxy(function):
 562
 563     def __init__(self, f, min=None, max=None, **kwargs):
 564         function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
 565
 566
 567 class paramfunction(_data):
 568
 569     defaultstyles = defaultlines
 570
 571     def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
 572         if context.has_key(varname):
 573             raise ValueError("varname in context")
 574         if title is _notitle:
 575             self.title = expression
 576         else:
 577             self.title = title
 578         varlist, expression = expression.split("=")
 579         expression = compile(expression.strip(), __file__, "eval")
 580         keys = [key.strip() for key in varlist.split(",")]
 581         self.columns = dict([(key, []) for key in keys])
 582         context = context.copy()
 583         for i in range(points):
 584             param = min + (max-min)*i / (points-1.0)
 585             context[varname] = param
 586             values = eval(expression, _mathglobals, context)
 587             for key, value in zip(keys, values):
 588                 self.columns[key].append(value)
 589         if len(keys) != len(values):
 590             raise ValueError("unpack tuple of wrong size")
 591         self.columnnames = self.columns.keys()
 592
 593
 594 class paramfunctionxy(paramfunction):
 595
 596     def __init__(self, f, min, max, **kwargs):
 597         paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)
 598
 599
 600 class _nodefaultstyles:
 601     pass
 602
 603
 604 class join(_data):
 605     "creates a new data set by joining from a list of data, it does however *not* combine points, but fills data with None if necessary"
 606
 607     def merge_lists(self, lists):
 608         "merges list items w/o duplications, resulting order is arbitraty"
 609         result = pycompat.set()
 610         for l in lists:
 611             result.update(pycompat.set(l))
 612         return builtinlist(result)
 613
 614     def merge_dicts(self, dicts):
 615         """merge dicts containing lists as values (with equal number of items
 616         per list in each dict), missing data is padded by None"""
 617         keys = self.merge_lists([d.keys() for d in dicts])
 618         empties = []
 619         for d in dicts:
 620             if len(d.keys()) == len(keys):
 621                 empties.append(None) # won't be needed later on
 622             else:
 623                 values = d.values()
 624                 if len(values):
 625                     empties.append([None]*len(values[0]))
 626                 else:
 627                     # has no data at all -> do not add anything
 628                     empties.append([])
 629         result = {}
 630         for key in keys:
 631             result[key] = []
 632             for d, e in zip(dicts, empties):
 633                 result[key].extend(d.get(key, e))
 634         return result
 635
 636     def __init__(self, data, title=_notitle, defaultstyles=_nodefaultstyles):
 637         """takes a list of data, a title (if it should not be autoconstructed)
 638         and a defaultstyles list if there is no common defaultstyles setting
 639         for in the provided data"""
 640         assert len(data)
 641         self.data = data
 642         self.columnnames = self.merge_lists([d.columnnames for d in data])
 643         self.columns = self.merge_dicts([d.columns for d in data])
 644         if title is _notitle:
 645             self.title = " + ".join([d.title for d in data])
 646         else:
 647             self.title = title
 648         if defaultstyles is _nodefaultstyles:
 649             self.defaultstyles = data[0].defaultstyles
 650             for d in data[1:]:
 651                 if d.defaultstyles is not self.defaultstyles:
 652                     self.defaultstyles = None
 653                     break
 654         else:
 655             self.defaultstyles = defaultstyles
 656
 657     def dynamiccolumns(self, graph, axisnames):
 658         return self.merge_dicts([d.dynamiccolumns(graph, axisnames) for d in self.data])