take into account kerning and inter-character spacing in bounding box
[PyX.git] / pyx / reader.py
blob839000e04e34811be5b9857058652b59f0b263a3
1 # -*- encoding: utf-8 -*-
4 # Copyright (C) 2007-2011 Jörg Lehmann <joergl@users.sourceforge.net>
5 # Copyright (C) 2007-2011 André Wobst <wobsta@users.sourceforge.net>
7 # This file is part of PyX (http://pyx.sourceforge.net/).
9 # PyX is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # PyX is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with PyX; if not, write to the Free Software
21 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
24 import io, struct
27 class reader:
29 def __init__(self, filename):
30 self.file = open(filename, "rb")
32 def tell(self):
33 return self.file.tell()
35 def eof(self):
36 return self.file.eof()
38 def read(self, bytes):
39 return self.file.read(bytes)
41 def readint(self, bytes=4, signed=0):
42 first = 1
43 result = 0
44 while bytes:
45 value = ord(self.file.read(1))
46 if first and signed and value > 127:
47 value -= 256
48 first = 0
49 result = 256 * result + value
50 bytes -= 1
51 return result
53 def readint32(self):
54 return struct.unpack(">l", self.file.read(4))[0]
56 def readuint32(self):
57 return struct.unpack(">L", self.file.read(4))[0]
59 def readint24(self):
60 return struct.unpack(">l", b"\0"+self.file.read(3))[0]
62 def readuint24(self):
63 return struct.unpack(">L", b"\0"+self.file.read(3))[0]
65 def readint16(self):
66 return struct.unpack(">h", self.file.read(2))[0]
68 def readuint16(self):
69 return struct.unpack(">H", self.file.read(2))[0]
71 def readchar(self):
72 return struct.unpack("b", self.file.read(1))[0]
74 def readuchar(self):
75 return struct.unpack("B", self.file.read(1))[0]
77 def readstring(self, bytes):
78 l = self.readuchar()
79 assert l <= bytes-1, "inconsistency in file: string too long"
80 return self.file.read(bytes-1)[:l]
82 def close(self):
83 self.file.close()
85 def __enter__(self):
86 return self
88 def __exit__(self, exc_type, exc_value, traceback):
89 return self.file.__exit__(exc_type, exc_value, traceback)
92 class bytesreader(reader):
94 def __init__(self, b):
95 self.file = io.BytesIO(b)
98 class PStokenizer:
99 """cursor to read a string token by token"""
101 def __init__(self, data, startstring=None, eattokensep=1,
102 tokenseps=" \t\r\n", tokenstarts="()<>[]{}/%",
103 commentchar="%", newlinechars="\r\n"):
104 """creates a cursor for the string data
106 startstring is a string at which the cursor should start at. The first
107 ocurance of startstring is used. When startstring is not in data, an
108 exception is raised, otherwise the cursor is set to the position right
109 after the startstring. When eattokenseps is set, startstring must be
110 followed by a tokensep and this first tokensep is also consumed.
111 tokenseps is a string containing characters to be used as token
112 separators. tokenstarts is a string containing characters which
113 directly (even without intermediate token separator) start a new token.
115 self.data = data
116 if startstring is not None:
117 self.pos = self.data.index(startstring) + len(startstring)
118 else:
119 self.pos = 0
120 self.tokenseps = tokenseps
121 self.tokenstarts = tokenstarts
122 self.commentchar = commentchar
123 self.newlinechars = newlinechars
124 if eattokensep:
125 if self.data[self.pos] not in self.tokenstarts:
126 if self.data[self.pos] not in self.tokenseps:
127 raise ValueError("cursor initialization string is not followed by a token separator")
128 self.pos += 1
130 def gettoken(self):
131 """get the next token
133 Leading token separators and comments are silently consumed. The first token
134 separator after the token is also silently consumed."""
135 while self.data[self.pos] in self.tokenseps:
136 self.pos += 1
137 # ignore comments including subsequent whitespace characters
138 while self.data[self.pos] == self.commentchar:
139 while self.data[self.pos] not in self.newlinechars:
140 self.pos += 1
141 while self.data[self.pos] in self.tokenseps:
142 self.pos += 1
143 startpos = self.pos
144 while self.data[self.pos] not in self.tokenseps:
145 # any character in self.tokenstarts ends the token
146 if self.pos>startpos and self.data[self.pos] in self.tokenstarts:
147 break
148 self.pos += 1
149 result = self.data[startpos:self.pos]
150 if self.data[self.pos] in self.tokenseps:
151 self.pos += 1 # consume a single tokensep
152 return result
154 def getint(self):
155 """get the next token as an integer"""
156 return int(self.gettoken())
158 def getbytes(self, count):
159 """get the next count bytes"""
160 startpos = self.pos
161 self.pos += count
162 return self.data[startpos: self.pos]
166 class PSbytes_tokenizer(PStokenizer):
168 def __init__(self, data, startstring=None, eattokensep=1,
169 tokenseps=b" \t\r\n", tokenstarts=b"()<>[]{}/%",
170 commentchar=b"%", newlinechars=b"\r\n"):
171 super().__init__(data, startstring=startstring, eattokensep=eattokensep,
172 tokenseps=tokenseps, tokenstarts=tokenstarts,
173 commentchar=commentchar, newlinechars=newlinechars)