fix directory layout of tagging dir
[PyX.git] / pyx / graph / data.py
blob573cf1fe316d03a88370c02523e90501cb67ef73
1 # -*- encoding: utf-8 -*-
4 # Copyright (C) 2002-2004 Jörg Lehmann <joergl@users.sourceforge.net>
5 # Copyright (C) 2003-2004 Michael Schindler <m-schindler@users.sourceforge.net>
6 # Copyright (C) 2002-2012 André Wobst <wobsta@users.sourceforge.net>
8 # This file is part of PyX (http://pyx.sourceforge.net/).
10 # PyX is free software; you can redistribute it and/or modify
11 # it under the terms of the GNU General Public License as published by
12 # the Free Software Foundation; either version 2 of the License, or
13 # (at your option) any later version.
15 # PyX is distributed in the hope that it will be useful,
16 # but WITHOUT ANY WARRANTY; without even the implied warranty of
17 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
18 # GNU General Public License for more details.
20 # You should have received a copy of the GNU General Public License
21 # along with PyX; if not, write to the Free Software
22 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import math, re, ConfigParser, struct, warnings
25 from pyx import text, pycompat
26 import style
27 builtinlist = list
30 def splitatvalue(value, *splitpoints):
31 section = 0
32 while section < len(splitpoints) and splitpoints[section] < value:
33 section += 1
34 if len(splitpoints) > 1:
35 if section % 2:
36 section = None
37 else:
38 section >>= 1
39 return (section, value)
42 _mathglobals = {"neg": lambda x: -x,
43 "abs": lambda x: x < 0 and -x or x,
44 "sgn": lambda x: x < 0 and -1 or 1,
45 "sqrt": math.sqrt,
46 "exp": math.exp,
47 "log": math.log,
48 "sin": math.sin,
49 "cos": math.cos,
50 "tan": math.tan,
51 "asin": math.asin,
52 "acos": math.acos,
53 "atan": math.atan,
54 "sind": lambda x: math.sin(math.pi/180*x),
55 "cosd": lambda x: math.cos(math.pi/180*x),
56 "tand": lambda x: math.tan(math.pi/180*x),
57 "asind": lambda x: 180/math.pi*math.asin(x),
58 "acosd": lambda x: 180/math.pi*math.acos(x),
59 "atand": lambda x: 180/math.pi*math.atan(x),
60 "norm": lambda x, y: math.hypot(x, y),
61 "splitatvalue": splitatvalue,
62 "pi": math.pi,
63 "e": math.e}
66 class _data:
67 """graph data interface
69 Graph data consists of columns, where each column might be identified by a
70 string or an integer. Each row in the resulting table refers to a data
71 point.
73 All methods except for the constructor should consider self and its
74 attributes to be readonly, since the data instance might be shared between
75 several graphs simultaneously.
77 The instance variable columns is a dictionary mapping column names to the
78 data of the column (i.e. to a list). Only static columns (known at
79 construction time) are contained in that dictionary. For data with numbered
80 columns the column data is also available via the list columndata.
81 Otherwise the columndata list should be missing and an access to a column
82 number will fail.
84 The names of all columns (static and dynamic) must be fixed at the constructor
85 and stated in the columnnames dictionary.
87 The instance variable title and defaultstyles contain the data title and
88 the default styles (a list of styles), respectively. If defaultstyles is None,
89 the data cannot be plotted without user provided styles.
90 """
92 def dynamiccolumns(self, graph, axisnames):
93 """create and return dynamic columns data
95 Returns dynamic data matching the given axes (the axes range and other
96 data might be used). The return value is a dictionary similar to the
97 columns instance variable. However, the static and dynamic data does
98 not need to be correlated in any way, i.e. the number of data points in
99 self.columns might differ from the number of data points represented by
100 the return value of the dynamiccolumns method.
102 return {}
105 defaultsymbols = [style.symbol()]
106 defaultlines = [style.line()]
109 class values(_data):
111 defaultstyles = defaultsymbols
113 def __init__(self, title="user provided values", **columns):
114 for i, values in enumerate(columns.values()):
115 if i and len(values) != l:
116 raise ValueError("different number of values")
117 else:
118 l = len(values)
119 self.columns = columns
120 self.columnnames = columns.keys()
121 self.title = title
124 class points(_data):
125 "Graph data from a list of points"
127 defaultstyles = defaultsymbols
129 def __init__(self, points, title="user provided points", addlinenumbers=1, **columns):
130 if len(points):
131 l = len(points[0])
132 self.columndata = [[x] for x in points[0]]
133 for point in points[1:]:
134 if l != len(point):
135 raise ValueError("different number of columns per point")
136 for i, x in enumerate(point):
137 self.columndata[i].append(x)
138 for v in columns.values():
139 if abs(v) > l or (not addlinenumbers and abs(v) == l):
140 raise ValueError("column number bigger than number of columns")
141 if addlinenumbers:
142 self.columndata = [range(1, len(points) + 1)] + self.columndata
143 self.columns = dict([(key, self.columndata[i]) for key, i in columns.items()])
144 else:
145 self.columns = dict([(key, []) for key, i in columns.items()])
146 self.columnnames = self.columns.keys()
147 self.title = title
150 def list(*args, **kwargs):
151 warnings.warn("graph.data.list is deprecated. Use graph.data.points instead.")
152 return points(*args, **kwargs)
155 class _notitle:
156 pass
158 _columnintref = re.compile(r"\$(-?\d+)", re.IGNORECASE)
160 class data(_data):
161 "creates a new data set out of an existing data set"
163 def __init__(self, data, title=_notitle, context={}, copy=1,
164 replacedollar=1, columncallback="__column__", **columns):
165 # build a nice title
166 if title is _notitle:
167 items = columns.items()
168 items.sort() # we want sorted items (otherwise they would be unpredictable scrambled)
169 self.title = "%s: %s" % (text.escapestring(data.title or "unkown source"),
170 ", ".join(["%s=%s" % (text.escapestring(key),
171 text.escapestring(str(value)))
172 for key, value in items]))
173 else:
174 self.title = title
176 self.orgdata = data
177 self.defaultstyles = self.orgdata.defaultstyles
179 # analyse the **columns argument
180 self.columns = {}
181 for columnname, value in columns.items():
182 # search in the columns dictionary
183 try:
184 self.columns[columnname] = self.orgdata.columns[value]
185 except KeyError:
186 # search in the columndata list
187 try:
188 self.columns[columnname] = self.orgdata.columndata[value]
189 except (AttributeError, TypeError):
190 # value was not an valid column identifier
191 # i.e. take it as a mathematical expression
192 if replacedollar:
193 m = _columnintref.search(value)
194 while m:
195 value = "%s%s(%s)%s" % (value[:m.start()], columncallback, m.groups()[0], value[m.end():])
196 m = _columnintref.search(value)
197 value = value.replace("$", columncallback)
198 expression = compile(value.strip(), __file__, "eval")
199 context = context.copy()
200 context[columncallback] = self.columncallback
201 if self.orgdata.columns:
202 key, columndata = self.orgdata.columns.items()[0]
203 count = len(columndata)
204 elif self.orgdata.columndata:
205 count = len(self.orgdata.columndata[0])
206 else:
207 count = 0
208 newdata = []
209 for i in xrange(count):
210 self.columncallbackcount = i
211 for key, values in self.orgdata.columns.items():
212 context[key] = values[i]
213 try:
214 newdata.append(eval(expression, _mathglobals, context))
215 except (ArithmeticError, ValueError):
216 newdata.append(None)
217 self.columns[columnname] = newdata
219 if copy:
220 # copy other, non-conflicting column names
221 for columnname, columndata in self.orgdata.columns.items():
222 if not self.columns.has_key(columnname):
223 self.columns[columnname] = columndata
225 self.columnnames = self.columns.keys()
227 def columncallback(self, value):
228 try:
229 return self.orgdata.columndata[value][self.columncallbackcount]
230 except:
231 return self.orgdata.columns[value][self.columncallbackcount]
234 filecache = {}
236 class file(data):
238 defaultcommentpattern = re.compile(r"(#+|!+|%+)\s*")
239 defaultstringpattern = re.compile(r"\"(.*?)\"(\s+|$)")
240 defaultcolumnpattern = re.compile(r"(.*?)(\s+|$)")
242 def splitline(self, line, stringpattern, columnpattern, tofloat=1):
243 """returns a tuple created out of the string line
244 - matches stringpattern and columnpattern, adds the first group of that
245 match to the result and and removes those matches until the line is empty
246 - when stringpattern matched, the result is always kept as a string
247 - when columnpattern matched and tofloat is true, a conversion to a float
248 is tried; when this conversion fails, the string is kept"""
249 result = []
250 # try to gain speed by skip matching regular expressions
251 if line.find('"')!=-1 or \
252 stringpattern is not self.defaultstringpattern or \
253 columnpattern is not self.defaultcolumnpattern:
254 while len(line):
255 match = stringpattern.match(line)
256 if match:
257 result.append(match.groups()[0])
258 line = line[match.end():]
259 else:
260 match = columnpattern.match(line)
261 if tofloat:
262 try:
263 result.append(float(match.groups()[0]))
264 except (TypeError, ValueError):
265 result.append(match.groups()[0])
266 else:
267 result.append(match.groups()[0])
268 line = line[match.end():]
269 else:
270 if tofloat:
271 try:
272 return map(float, line.split())
273 except (TypeError, ValueError):
274 result = []
275 for r in line.split():
276 try:
277 result.append(float(r))
278 except (TypeError, ValueError):
279 result.append(r)
280 else:
281 return line.split()
282 return result
284 def getcachekey(self, *args):
285 return ":".join([str(x) for x in args])
287 def __init__(self, filename,
288 commentpattern=defaultcommentpattern,
289 stringpattern=defaultstringpattern,
290 columnpattern=defaultcolumnpattern,
291 skiphead=0, skiptail=0, every=1,
292 **kwargs):
294 def readfile(file, title, self=self, commentpattern=commentpattern, stringpattern=stringpattern, columnpattern=columnpattern, skiphead=skiphead, skiptail=skiptail, every=every):
295 columns = []
296 columndata = []
297 linenumber = 0
298 maxcolumns = 0
299 for line in file.readlines():
300 line = line.strip()
301 match = commentpattern.match(line)
302 if match:
303 if not len(columndata):
304 columns = self.splitline(line[match.end():], stringpattern, columnpattern, tofloat=0)
305 else:
306 linedata = []
307 for value in self.splitline(line, stringpattern, columnpattern, tofloat=1):
308 linedata.append(value)
309 if len(linedata):
310 if linenumber >= skiphead and not ((linenumber - skiphead) % every):
311 linedata = [linenumber + 1] + linedata
312 if len(linedata) > maxcolumns:
313 maxcolumns = len(linedata)
314 columndata.append(linedata)
315 linenumber += 1
316 if skiptail >= every:
317 skip, x = divmod(skiptail, every)
318 del columndata[-skip:]
319 for i in xrange(len(columndata)):
320 if len(columndata[i]) != maxcolumns:
321 columndata[i].extend([None]*(maxcolumns-len(columndata[i])))
322 return points(columndata, title=title, addlinenumbers=0,
323 **dict([(column, i+1) for i, column in enumerate(columns[:maxcolumns-1])]))
325 try:
326 filename.readlines
327 except:
328 # not a file-like object -> open it
329 cachekey = self.getcachekey(filename, commentpattern, stringpattern, columnpattern, skiphead, skiptail, every)
330 if not filecache.has_key(cachekey):
331 filecache[cachekey] = readfile(open(filename), filename)
332 data.__init__(self, filecache[cachekey], **kwargs)
333 else:
334 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
337 conffilecache = {}
339 class conffile(data):
341 def __init__(self, filename, **kwargs):
342 """read data from a config-like file
343 - filename is a string
344 - each row is defined by a section in the config-like file (see
345 config module description)
346 - the columns for each row are defined by lines in the section file;
347 the option entries identify and name the columns
348 - further keyword arguments are passed to the constructor of data,
349 keyword arguments data and titles excluded"""
351 def readfile(file, title):
352 config = ConfigParser.ConfigParser()
353 config.optionxform = str
354 config.readfp(file)
355 sections = config.sections()
356 sections.sort()
357 columndata = [None]*len(sections)
358 maxcolumns = 1
359 columns = {}
360 for i in xrange(len(sections)):
361 point = [sections[i]] + [None]*(maxcolumns-1)
362 for option in config.options(sections[i]):
363 value = config.get(sections[i], option)
364 try:
365 value = float(value)
366 except:
367 pass
368 try:
369 index = columns[option]
370 except KeyError:
371 columns[option] = maxcolumns
372 point.append(value)
373 maxcolumns += 1
374 else:
375 point[index] = value
376 columndata[i] = point
377 # wrap result into a data instance to remove column numbers
378 result = data(points(columndata, addlinenumbers=0, **columns), title=title)
379 # ... but reinsert sections as linenumbers
380 result.columndata = [[x[0] for x in columndata]]
381 return result
383 try:
384 filename.readlines
385 except:
386 # not a file-like object -> open it
387 if not filecache.has_key(filename):
388 filecache[filename] = readfile(open(filename), filename)
389 data.__init__(self, filecache[filename], **kwargs)
390 else:
391 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
394 cbdfilecache = {}
396 class cbdfile(data):
398 defaultstyles = defaultlines
400 def getcachekey(self, *args):
401 return ":".join([str(x) for x in args])
403 def __init__(self, filename, minrank=None, maxrank=None, **kwargs):
405 class cbdhead:
407 def __init__(self, file):
408 (self.magic,
409 self.dictaddr,
410 self.segcount,
411 self.segsize,
412 self.segmax,
413 self.fill) = struct.unpack("<5i20s", file.read(40))
414 if self.magic != 0x20770002:
415 raise ValueError("bad magic number")
417 class segdict:
419 def __init__(self, file, i):
420 self.index = i
421 (self.segid,
422 self.maxlat,
423 self.minlat,
424 self.maxlong,
425 self.minlong,
426 self.absaddr,
427 self.nbytes,
428 self.rank) = struct.unpack("<6i2h", file.read(28))
430 class segment:
432 def __init__(self, file, sd):
433 file.seek(sd.absaddr)
434 (self.orgx,
435 self.orgy,
436 self.id,
437 self.nstrokes,
438 self.dummy) = struct.unpack("<3i2h", file.read(16))
439 oln, olt = self.orgx, self.orgy
440 self.points = [(olt, oln)]
441 for i in range(self.nstrokes):
442 c1, c2 = struct.unpack("2c", file.read(2))
443 if ord(c2) & 0x40:
444 if c1 > "\177":
445 dy = ord(c1) - 256
446 else:
447 dy = ord(c1)
448 if c2 > "\177":
449 dx = ord(c2) - 256
450 else:
451 dx = ord(c2) - 64
452 else:
453 c3, c4, c5, c6, c7, c8 = struct.unpack("6c", file.read(6))
454 if c2 > "\177":
455 c2 = chr(ord(c2) | 0x40)
456 dx, dy = struct.unpack("<2i", c3+c4+c1+c2+c7+c8+c5+c6)
457 oln += dx
458 olt += dy
459 self.points.append((olt, oln))
460 sd.nstrokes = self.nstrokes
462 def readfile(file, title):
463 h = cbdhead(file)
464 file.seek(h.dictaddr)
465 sds = [segdict(file, i+1) for i in range(h.segcount)]
466 sbs = [segment(file, sd) for sd in sds]
468 # remove jumps at long +/- 180
469 for sd, sb in zip(sds, sbs):
470 if sd.minlong < -150*3600 and sd.maxlong > 150*3600:
471 for i, (lat, long) in enumerate(sb.points):
472 if long < 0:
473 sb.points[i] = lat, long + 360*3600
475 columndata = []
476 for sd, sb in zip(sds, sbs):
477 if ((minrank is None or sd.rank >= minrank) and
478 (maxrank is None or sd.rank <= maxrank)):
479 if columndata:
480 columndata.append((None, None))
481 columndata.extend([(long/3600.0, lat/3600.0)
482 for lat, long in sb.points])
484 result = points(columndata, title=title)
485 result.defaultstyles = self.defaultstyles
486 return result
489 try:
490 filename.readlines
491 except:
492 # not a file-like object -> open it
493 cachekey = self.getcachekey(filename, minrank, maxrank)
494 if not cbdfilecache.has_key(cachekey):
495 cbdfilecache[cachekey] = readfile(open(filename, "rb"), filename)
496 data.__init__(self, cbdfilecache[cachekey], **kwargs)
497 else:
498 data.__init__(self, readfile(filename, "user provided file-like object"), **kwargs)
501 class function(_data):
503 defaultstyles = defaultlines
505 assignmentpattern = re.compile(r"\s*([a-z_][a-z0-9_]*)\s*\(\s*([a-z_][a-z0-9_]*)\s*\)\s*=", re.IGNORECASE)
507 def __init__(self, expression, title=_notitle, min=None, max=None,
508 points=100, context={}):
510 if title is _notitle:
511 self.title = expression
512 else:
513 self.title = title
514 self.min = min
515 self.max = max
516 self.numberofpoints = points
517 self.context = context.copy() # be safe on late evaluations
518 m = self.assignmentpattern.match(expression)
519 if m:
520 self.yname, self.xname = m.groups()
521 expression = expression[m.end():]
522 else:
523 raise ValueError("y(x)=... or similar expected")
524 if context.has_key(self.xname):
525 raise ValueError("xname in context")
526 self.expression = compile(expression.strip(), __file__, "eval")
527 self.columns = {}
528 self.columnnames = [self.xname, self.yname]
530 def dynamiccolumns(self, graph, axisnames):
531 dynamiccolumns = {self.xname: [], self.yname: []}
533 xaxis = graph.axes[axisnames.get(self.xname, self.xname)]
534 from pyx.graph.axis import logarithmic
535 logaxis = isinstance(xaxis.axis, logarithmic)
536 if self.min is not None:
537 min = self.min
538 else:
539 min = xaxis.data.min
540 if self.max is not None:
541 max = self.max
542 else:
543 max = xaxis.data.max
544 if logaxis:
545 min = math.log(min)
546 max = math.log(max)
547 for i in range(self.numberofpoints):
548 x = min + (max-min)*i / (self.numberofpoints-1.0)
549 if logaxis:
550 x = math.exp(x)
551 dynamiccolumns[self.xname].append(x)
552 self.context[self.xname] = x
553 try:
554 y = eval(self.expression, _mathglobals, self.context)
555 except (ArithmeticError, ValueError):
556 y = None
557 dynamiccolumns[self.yname].append(y)
558 return dynamiccolumns
561 class functionxy(function):
563 def __init__(self, f, min=None, max=None, **kwargs):
564 function.__init__(self, "y(x)=f(x)", context={"f": f}, min=min, max=max, **kwargs)
567 class paramfunction(_data):
569 defaultstyles = defaultlines
571 def __init__(self, varname, min, max, expression, title=_notitle, points=100, context={}):
572 if context.has_key(varname):
573 raise ValueError("varname in context")
574 if title is _notitle:
575 self.title = expression
576 else:
577 self.title = title
578 varlist, expression = expression.split("=")
579 expression = compile(expression.strip(), __file__, "eval")
580 keys = [key.strip() for key in varlist.split(",")]
581 self.columns = dict([(key, []) for key in keys])
582 context = context.copy()
583 for i in range(points):
584 param = min + (max-min)*i / (points-1.0)
585 context[varname] = param
586 values = eval(expression, _mathglobals, context)
587 for key, value in zip(keys, values):
588 self.columns[key].append(value)
589 if len(keys) != len(values):
590 raise ValueError("unpack tuple of wrong size")
591 self.columnnames = self.columns.keys()
594 class paramfunctionxy(paramfunction):
596 def __init__(self, f, min, max, **kwargs):
597 paramfunction.__init__(self, "t", min, max, "x, y = f(t)", context={"f": f}, **kwargs)
600 class _nodefaultstyles:
601 pass
604 class join(_data):
605 "creates a new data set by joining from a list of data, it does however *not* combine points, but fills data with None if necessary"
607 def merge_lists(self, lists):
608 "merges list items w/o duplications, resulting order is arbitraty"
609 result = pycompat.set()
610 for l in lists:
611 result.update(pycompat.set(l))
612 return builtinlist(result)
614 def merge_dicts(self, dicts):
615 """merge dicts containing lists as values (with equal number of items
616 per list in each dict), missing data is padded by None"""
617 keys = self.merge_lists([d.keys() for d in dicts])
618 empties = []
619 for d in dicts:
620 if len(d.keys()) == len(keys):
621 empties.append(None) # won't be needed later on
622 else:
623 values = d.values()
624 if len(values):
625 empties.append([None]*len(values[0]))
626 else:
627 # has no data at all -> do not add anything
628 empties.append([])
629 result = {}
630 for key in keys:
631 result[key] = []
632 for d, e in zip(dicts, empties):
633 result[key].extend(d.get(key, e))
634 return result
636 def __init__(self, data, title=_notitle, defaultstyles=_nodefaultstyles):
637 """takes a list of data, a title (if it should not be autoconstructed)
638 and a defaultstyles list if there is no common defaultstyles setting
639 for in the provided data"""
640 assert len(data)
641 self.data = data
642 self.columnnames = self.merge_lists([d.columnnames for d in data])
643 self.columns = self.merge_dicts([d.columns for d in data])
644 if title is _notitle:
645 self.title = " + ".join([d.title for d in data])
646 else:
647 self.title = title
648 if defaultstyles is _nodefaultstyles:
649 self.defaultstyles = data[0].defaultstyles
650 for d in data[1:]:
651 if d.defaultstyles is not self.defaultstyles:
652 self.defaultstyles = None
653 break
654 else:
655 self.defaultstyles = defaultstyles
657 def dynamiccolumns(self, graph, axisnames):
658 return self.merge_dicts([d.dynamiccolumns(graph, axisnames) for d in self.data])