t1 font stripping: update maxsubr to include all the flexhint subrs
[PyX.git] / font / t1file.py
blobacca07dba6464b7ac6027cd6c35d1d4d0cc54b1b
1 # -*- encoding: utf-8 -*-
4 # Copyright (C) 2005-2011 André Wobst <wobsta@users.sourceforge.net>
5 # Copyright (C) 2006-2011 Jörg Lehmann <joergl@users.sourceforge.net>
7 # This file is part of PyX (http://pyx.sourceforge.net/).
9 # PyX is free software; you can redistribute it and/or modify
10 # it under the terms of the GNU General Public License as published by
11 # the Free Software Foundation; either version 2 of the License, or
12 # (at your option) any later version.
14 # PyX is distributed in the hope that it will be useful,
15 # but WITHOUT ANY WARRANTY; without even the implied warranty of
16 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 # GNU General Public License for more details.
19 # You should have received a copy of the GNU General Public License
20 # along with PyX; if not, write to the Free Software
21 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA
23 import array, binascii, io, logging, math, re
24 try:
25 import zlib
26 haszlib = True
27 except ImportError:
28 haszlib = False
30 logger = logging.getLogger("pyx")
32 from pyx import trafo, reader, writer
33 from pyx.path import path, moveto_pt, lineto_pt, curveto_pt, closepath
35 try:
36 from _t1code import *
37 except:
38 from .t1code import *
41 adobestandardencoding = [None, None, None, None, None, None, None, None,
42 None, None, None, None, None, None, None, None,
43 None, None, None, None, None, None, None, None,
44 None, None, None, None, None, None, None, None,
45 "space", "exclam", "quotedbl", "numbersign", "dollar", "percent", "ampersand", "quoteright",
46 "parenleft", "parenright", "asterisk", "plus", "comma", "hyphen", "period", "slash",
47 "zero", "one", "two", "three", "four", "five", "six", "seven",
48 "eight", "nine", "colon", "semicolon", "less", "equal", "greater", "question",
49 "at", "A", "B", "C", "D", "E", "F", "G",
50 "H", "I", "J", "K", "L", "M", "N", "O",
51 "P", "Q", "R", "S", "T", "U", "V", "W",
52 "X", "Y", "Z", "bracketleft", "backslash", "bracketright", "asciicircum", "underscore",
53 "quoteleft", "a", "b", "c", "d", "e", "f", "g",
54 "h", "i", "j", "k", "l", "m", "n", "o",
55 "p", "q", "r", "s", "t", "u", "v", "w",
56 "x", "y", "z", "braceleft", "bar", "braceright", "asciitilde", None,
57 None, None, None, None, None, None, None, None,
58 None, None, None, None, None, None, None, None,
59 None, None, None, None, None, None, None, None,
60 None, None, None, None, None, None, None, None,
61 None, "exclamdown", "cent", "sterling", "fraction", "yen", "florin", "section",
62 "currency", "quotesingle", "quotedblleft", "guillemotleft", "guilsinglleft", "guilsinglright", "fi", "fl",
63 None, "endash", "dagger", "daggerdbl", "periodcentered", None, "paragraph", "bullet",
64 "quotesinglbase", "quotedblbase", "quotedblright", "guillemotright", "ellipsis", "perthousand", None, "questiondown",
65 None, "grave", "acute", "circumflex", "tilde", "macron", "breve", "dotaccent",
66 "dieresis", None, "ring", "cedilla", None, "hungarumlaut", "ogonek", "caron",
67 "emdash", None, None, None, None, None, None, None,
68 None, None, None, None, None, None, None, None,
69 None, "AE", None, "ordfeminine", None, None, None, None,
70 "Lslash", "Oslash", "OE", "ordmasculine", None, None, None, None,
71 None, "ae", None, None, None, "dotlessi", None, None,
72 "lslash", "oslash", "oe", "germandbls", None, None, None, None]
74 class T1context:
76 def __init__(self, t1font, flex=True):
77 """context for T1cmd evaluation"""
78 self.t1font = t1font
80 # state description
81 self.x = None
82 self.y = None
83 self.wx = None
84 self.wy = None
85 self.t1stack = []
86 self.psstack = []
87 self.flex = flex
90 ######################################################################
91 # T1 commands
92 # Note, that the T1 commands are variable-free except for plain number,
93 # which are stored as integers. All other T1 commands exist as a single
94 # instance only
96 T1cmds = {}
97 T1subcmds = {}
99 class T1cmd:
101 def __init__(self, code, subcmd=0):
102 self.code = code
103 self.subcmd = subcmd
104 if subcmd:
105 T1subcmds[code] = self
106 else:
107 T1cmds[code] = self
109 def __str__(self):
110 """returns a string representation of the T1 command"""
111 raise NotImplementedError
113 def updatepath(self, path, trafo, context):
114 """update path instance applying trafo to the points"""
115 raise NotImplementedError
117 def gathercalls(self, seacglyphs, subrs, context):
118 """gather dependancy information
120 subrs is the "called-subrs" dictionary. gathercalls will insert the
121 subr number as key having the value 1, i.e. subrs will become the
122 numbers of used subrs. Similar seacglyphs will contain all glyphs in
123 composite characters (subrs for those glyphs will also
124 already be included).
126 This method might will not properly update all information in the
127 context (especially consuming values from the stack) and will also skip
128 various tests for performance reasons. For most T1 commands it just
129 doesn't need to do anything.
131 pass
134 # commands for starting and finishing
136 class _T1endchar(T1cmd):
138 def __init__(self):
139 T1cmd.__init__(self, 14)
141 def __str__(self):
142 return "endchar"
144 def updatepath(self, path, trafo, context):
145 pass
147 T1endchar = _T1endchar()
150 class _T1hsbw(T1cmd):
152 def __init__(self):
153 T1cmd.__init__(self, 13)
155 def __str__(self):
156 return "hsbw"
158 def updatepath(self, path, trafo, context):
159 sbx = context.t1stack.pop(0)
160 wx = context.t1stack.pop(0)
161 path.append(moveto_pt(*trafo.apply_pt(sbx, 0)))
162 context.x = sbx
163 context.y = 0
164 context.wx = wx
165 context.wy = 0
167 T1hsbw = _T1hsbw()
170 class _T1seac(T1cmd):
172 def __init__(self):
173 T1cmd.__init__(self, 6, subcmd=1)
175 def __str__(self):
176 return "seac"
178 def updatepath(self, path, atrafo, context):
179 sab = context.t1stack.pop(0)
180 adx = context.t1stack.pop(0)
181 ady = context.t1stack.pop(0)
182 bchar = context.t1stack.pop(0)
183 achar = context.t1stack.pop(0)
184 aglyph = adobestandardencoding[achar]
185 bglyph = adobestandardencoding[bchar]
186 context.t1font.updateglyphpath(bglyph, path, atrafo, context)
187 atrafo = atrafo * trafo.translate_pt(adx-sab, ady)
188 context.t1font.updateglyphpath(aglyph, path, atrafo, context)
190 def gathercalls(self, seacglyphs, subrs, context):
191 achar = context.t1stack.pop()
192 bchar = context.t1stack.pop()
193 aglyph = adobestandardencoding[achar]
194 bglyph = adobestandardencoding[bchar]
195 seacglyphs.add(aglyph)
196 seacglyphs.add(bglyph)
197 context.t1font.gatherglyphcalls(bglyph, seacglyphs, subrs, context)
198 context.t1font.gatherglyphcalls(aglyph, seacglyphs, subrs, context)
200 T1seac = _T1seac()
203 class _T1sbw(T1cmd):
205 def __init__(self):
206 T1cmd.__init__(self, 7, subcmd=1)
208 def __str__(self):
209 return "sbw"
211 def updatepath(self, path, trafo, context):
212 sbx = context.t1stack.pop(0)
213 sby = context.t1stack.pop(0)
214 wx = context.t1stack.pop(0)
215 wy = context.t1stack.pop(0)
216 path.append(moveto_pt(*trafo.apply_pt(sbx, sby)))
217 context.x = sbx
218 context.y = sby
219 context.wx = wx
220 context.wy = wy
222 T1sbw = _T1sbw()
225 # path construction commands
227 class _T1closepath(T1cmd):
229 def __init__(self):
230 T1cmd.__init__(self, 9)
232 def __str__(self):
233 return "closepath"
235 def updatepath(self, path, trafo, context):
236 path.append(closepath())
237 # The closepath in T1 is different from PostScripts in that it does
238 # *not* modify the current position; hence we need to add an additional
239 # moveto here ...
240 path.append(moveto_pt(*trafo.apply_pt(context.x, context.y)))
242 T1closepath = _T1closepath()
245 class _T1hlineto(T1cmd):
247 def __init__(self):
248 T1cmd.__init__(self, 6)
250 def __str__(self):
251 return "hlineto"
253 def updatepath(self, path, trafo, context):
254 dx = context.t1stack.pop(0)
255 path.append(lineto_pt(*trafo.apply_pt(context.x + dx, context.y)))
256 context.x += dx
258 T1hlineto = _T1hlineto()
261 class _T1hmoveto(T1cmd):
263 def __init__(self):
264 T1cmd.__init__(self, 22)
266 def __str__(self):
267 return "hmoveto"
269 def updatepath(self, path, trafo, context):
270 dx = context.t1stack.pop(0)
271 path.append(moveto_pt(*trafo.apply_pt(context.x + dx, context.y)))
272 context.x += dx
274 T1hmoveto = _T1hmoveto()
277 class _T1hvcurveto(T1cmd):
279 def __init__(self):
280 T1cmd.__init__(self, 31)
282 def __str__(self):
283 return "hvcurveto"
285 def updatepath(self, path, trafo, context):
286 dx1 = context.t1stack.pop(0)
287 dx2 = context.t1stack.pop(0)
288 dy2 = context.t1stack.pop(0)
289 dy3 = context.t1stack.pop(0)
290 path.append(curveto_pt(*(trafo.apply_pt(context.x + dx1, context.y) +
291 trafo.apply_pt(context.x + dx1 + dx2, context.y + dy2) +
292 trafo.apply_pt(context.x + dx1 + dx2, context.y + dy2 + dy3))))
293 context.x += dx1+dx2
294 context.y += dy2+dy3
296 T1hvcurveto = _T1hvcurveto()
299 class _T1rlineto(T1cmd):
301 def __init__(self):
302 T1cmd.__init__(self, 5)
304 def __str__(self):
305 return "rlineto"
307 def updatepath(self, path, trafo, context):
308 dx = context.t1stack.pop(0)
309 dy = context.t1stack.pop(0)
310 path.append(lineto_pt(*trafo.apply_pt(context.x + dx, context.y + dy)))
311 context.x += dx
312 context.y += dy
314 T1rlineto = _T1rlineto()
317 class _T1rmoveto(T1cmd):
319 def __init__(self):
320 T1cmd.__init__(self, 21)
322 def __str__(self):
323 return "rmoveto"
325 def updatepath(self, path, trafo, context):
326 dx = context.t1stack.pop(0)
327 dy = context.t1stack.pop(0)
328 path.append(moveto_pt(*trafo.apply_pt(context.x + dx, context.y + dy)))
329 context.x += dx
330 context.y += dy
332 T1rmoveto = _T1rmoveto()
335 class _T1rrcurveto(T1cmd):
337 def __init__(self):
338 T1cmd.__init__(self, 8)
340 def __str__(self):
341 return "rrcurveto"
343 def updatepath(self, path, trafo, context):
344 dx1 = context.t1stack.pop(0)
345 dy1 = context.t1stack.pop(0)
346 dx2 = context.t1stack.pop(0)
347 dy2 = context.t1stack.pop(0)
348 dx3 = context.t1stack.pop(0)
349 dy3 = context.t1stack.pop(0)
350 path.append(curveto_pt(*(trafo.apply_pt(context.x + dx1, context.y + dy1) +
351 trafo.apply_pt(context.x + dx1 + dx2, context.y + dy1 + dy2) +
352 trafo.apply_pt(context.x + dx1 + dx2 + dx3, context.y + dy1 + dy2 + dy3))))
353 context.x += dx1+dx2+dx3
354 context.y += dy1+dy2+dy3
356 T1rrcurveto = _T1rrcurveto()
359 class _T1vlineto(T1cmd):
361 def __init__(self):
362 T1cmd.__init__(self, 7)
364 def __str__(self):
365 return "vlineto"
367 def updatepath(self, path, trafo, context):
368 dy = context.t1stack.pop(0)
369 path.append(lineto_pt(*trafo.apply_pt(context.x, context.y + dy)))
370 context.y += dy
372 T1vlineto = _T1vlineto()
375 class _T1vmoveto(T1cmd):
377 def __init__(self):
378 T1cmd.__init__(self, 4)
380 def __str__(self):
381 return "vmoveto"
383 def updatepath(self, path, trafo, context):
384 dy = context.t1stack.pop(0)
385 path.append(moveto_pt(*trafo.apply_pt(context.x, context.y + dy)))
386 context.y += dy
388 T1vmoveto = _T1vmoveto()
391 class _T1vhcurveto(T1cmd):
393 def __init__(self):
394 T1cmd.__init__(self, 30)
396 def __str__(self):
397 return "vhcurveto"
399 def updatepath(self, path, trafo, context):
400 dy1 = context.t1stack.pop(0)
401 dx2 = context.t1stack.pop(0)
402 dy2 = context.t1stack.pop(0)
403 dx3 = context.t1stack.pop(0)
404 path.append(curveto_pt(*(trafo.apply_pt(context.x, context.y + dy1) +
405 trafo.apply_pt(context.x + dx2, context.y + dy1 + dy2) +
406 trafo.apply_pt(context.x + dx2 + dx3, context.y + dy1 + dy2))))
407 context.x += dx2+dx3
408 context.y += dy1+dy2
410 T1vhcurveto = _T1vhcurveto()
413 # hint commands
415 class _T1dotsection(T1cmd):
417 def __init__(self):
418 T1cmd.__init__(self, 0, subcmd=1)
420 def __str__(self):
421 return "dotsection"
423 def updatepath(self, path, trafo, context):
424 pass
426 T1dotsection = _T1dotsection()
429 class _T1hstem(T1cmd):
431 def __init__(self):
432 T1cmd.__init__(self, 1)
434 def __str__(self):
435 return "hstem"
437 def updatepath(self, path, trafo, context):
438 y = context.t1stack.pop(0)
439 dy = context.t1stack.pop(0)
441 T1hstem = _T1hstem()
444 class _T1hstem3(T1cmd):
446 def __init__(self):
447 T1cmd.__init__(self, 2, subcmd=1)
449 def __str__(self):
450 return "hstem3"
452 def updatepath(self, path, trafo, context):
453 y0 = context.t1stack.pop(0)
454 dy0 = context.t1stack.pop(0)
455 y1 = context.t1stack.pop(0)
456 dy1 = context.t1stack.pop(0)
457 y2 = context.t1stack.pop(0)
458 dy2 = context.t1stack.pop(0)
460 T1hstem3 = _T1hstem3()
463 class _T1vstem(T1cmd):
465 def __init__(self):
466 T1cmd.__init__(self, 3)
468 def __str__(self):
469 return "vstem"
471 def updatepath(self, path, trafo, context):
472 x = context.t1stack.pop(0)
473 dx = context.t1stack.pop(0)
475 T1vstem = _T1vstem()
478 class _T1vstem3(T1cmd):
480 def __init__(self):
481 T1cmd.__init__(self, 1, subcmd=1)
483 def __str__(self):
484 return "vstem3"
486 def updatepath(self, path, trafo, context):
487 self.x0 = context.t1stack.pop(0)
488 self.dx0 = context.t1stack.pop(0)
489 self.x1 = context.t1stack.pop(0)
490 self.dx1 = context.t1stack.pop(0)
491 self.x2 = context.t1stack.pop(0)
492 self.dx2 = context.t1stack.pop(0)
494 T1vstem3 = _T1vstem3()
497 # arithmetic command
499 class _T1div(T1cmd):
501 def __init__(self):
502 T1cmd.__init__(self, 12, subcmd=1)
504 def __str__(self):
505 return "div"
507 def updatepath(self, path, trafo, context):
508 num2 = context.t1stack.pop()
509 num1 = context.t1stack.pop()
510 context.t1stack.append(divmod(num1, num2)[0])
512 def gathercalls(self, seacglyphs, subrs, context):
513 num2 = context.t1stack.pop()
514 num1 = context.t1stack.pop()
515 context.t1stack.append(divmod(num1, num2)[0])
517 T1div = _T1div()
520 # subroutine commands
522 class _T1callothersubr(T1cmd):
524 def __init__(self):
525 T1cmd.__init__(self, 16, subcmd=1)
527 def __str__(self):
528 return "callothersubr"
530 def updatepath(self, path, trafo, context):
531 othersubrnumber = context.t1stack.pop()
532 n = context.t1stack.pop()
533 for i in range(n):
534 context.psstack.append(context.t1stack.pop(0))
535 if othersubrnumber == 0:
536 flex_size, x, y = context.psstack[-3:]
537 if context.flex:
538 x1, y1, x2, y2, x3, y3 = context.psstack[2:8]
539 x1, y1 = trafo.apply_pt(x1, y1)
540 x2, y2 = trafo.apply_pt(x2, y2)
541 x3, y3 = trafo.apply_pt(x3, y3)
542 path.append(curveto_pt(x1, y1, x2, y2, x3, y3))
543 x1, y1, x2, y2, x3, y3 = context.psstack[8:14]
544 x1, y1 = trafo.apply_pt(x1, y1)
545 x2, y2 = trafo.apply_pt(x2, y2)
546 x3, y3 = trafo.apply_pt(x3, y3)
547 path.append(curveto_pt(x1, y1, x2, y2, x3, y3))
548 else:
549 path.append(lineto_pt(*trafo.apply_pt(x, y)))
550 context.psstack = [y, x]
551 elif othersubrnumber == 1:
552 pass
553 elif othersubrnumber == 2:
554 path.pathitems.pop()
555 context.psstack.append(context.x)
556 context.psstack.append(context.y)
558 def gathercalls(self, seacglyphs, subrs, context):
559 othersubrnumber = context.t1stack.pop()
560 n = context.t1stack.pop()
561 context.psstack.extend([context.t1stack.pop() for i in range(n)][::-1])
563 T1callothersubr = _T1callothersubr()
566 class _T1callsubr(T1cmd):
568 def __init__(self):
569 T1cmd.__init__(self, 10)
571 def __str__(self):
572 return "callsubr"
574 def updatepath(self, path, trafo, context):
575 subr = context.t1stack.pop()
576 context.t1font.updatesubrpath(subr, path, trafo, context)
578 def gathercalls(self, seacglyphs, subrs, context):
579 subr = context.t1stack.pop()
580 subrs.add(subr)
581 context.t1font.gathersubrcalls(subr, seacglyphs, subrs, context)
583 T1callsubr = _T1callsubr()
586 class _T1pop(T1cmd):
588 def __init__(self):
589 T1cmd.__init__(self, 17, subcmd=1)
591 def __str__(self):
592 return "pop"
594 def updatepath(self, path, trafo, context):
595 context.t1stack.append(context.psstack.pop())
597 def gathercalls(self, seacglyphs, subrs, context):
598 context.t1stack.append(context.psstack.pop())
600 T1pop = _T1pop()
603 class _T1return(T1cmd):
605 def __init__(self):
606 T1cmd.__init__(self, 11)
608 def __str__(self):
609 return "return"
611 def updatepath(self, path, trafo, context):
612 pass
614 T1return = _T1return()
617 class _T1setcurrentpoint(T1cmd):
619 def __init__(self):
620 T1cmd.__init__(self, 33, subcmd=1)
622 def __str__(self):
623 return "setcurrentpoint"
625 def updatepath(self, path, trafo, context):
626 context.x = context.t1stack.pop(0)
627 context.y = context.t1stack.pop(0)
629 T1setcurrentpoint = _T1setcurrentpoint()
632 ######################################################################
634 class T1file:
636 eexecr = 55665
637 charstringr = 4330
639 fontnamepattern = re.compile("/FontName\s+/(.*?)\s+def\s+")
640 fontmatrixpattern = re.compile("/FontMatrix\s*\[\s*(-?[0-9.]+)\s+(-?[0-9.]+)\s+(-?[0-9.]+)\s+(-?[0-9.]+)\s+(-?[0-9.]+)\s+(-?[0-9.]+)\s*\]\s*(readonly\s+)?def")
642 def __init__(self, data1, data2eexec, data3):
643 """initializes a t1font instance
645 data1 and data3 are the two clear text data parts and data2 is
646 the binary data part"""
647 self.data1 = data1
648 self._data2eexec = data2eexec
649 self.data3 = data3
651 # marker and value for decoded data
652 self._data2 = None
653 # note that data2eexec is set to none by setsubrcmds and setglyphcmds
654 # this *also* denotes, that data2 is out-of-date; hence they are both
655 # marked by an _ and getdata2 and getdata2eexec will properly resolve
656 # the current state of decoding ...
658 # marker and value for standard encoding check
659 self.encoding = None
661 self.name, = self.fontnamepattern.search(self.data1).groups()
662 m11, m12, m21, m22, v1, v2 = list(map(float, self.fontmatrixpattern.search(self.data1).groups()[:6]))
663 self.fontmatrix = trafo.trafo_pt(matrix=((m11, m12), (m21, m22)), vector=(v1, v2))
665 def _eexecdecode(self, code):
666 """eexec decoding of code"""
667 return decoder(code, self.eexecr, 4)
669 def _charstringdecode(self, code):
670 """charstring decoding of code"""
671 return decoder(code, self.charstringr, self.lenIV)
673 def _eexecencode(self, data):
674 """eexec encoding of data"""
675 return encoder(data, self.eexecr, b"PyX!")
677 def _charstringencode(self, data):
678 """eexec encoding of data"""
679 return encoder(data, self.charstringr, b"PyX!"[:self.lenIV])
681 def _encoding(self):
682 """helper method to lookup the encoding in the font"""
683 c = reader.PStokenizer(self.data1, "/Encoding")
684 token1 = c.gettoken()
685 token2 = c.gettoken()
686 if token1 == "StandardEncoding" and token2 == "def":
687 self.encoding = adobestandardencoding
688 else:
689 self.encoding = [None]*256
690 while True:
691 self.encodingstart = c.pos
692 if c.gettoken() == "dup":
693 break
694 while True:
695 i = c.getint()
696 glyph = c.gettoken()
697 if 0 <= i < 256:
698 self.encoding[i] = glyph[1:]
699 token = c.gettoken(); assert token == "put"
700 self.encodingend = c.pos
701 token = c.gettoken()
702 if token == "readonly" or token == "def":
703 break
704 assert token == "dup"
706 lenIVpattern = re.compile(b"/lenIV\s+(\d+)\s+def\s+")
707 flexhintsubrs = [[3, 0, T1callothersubr, T1pop, T1pop, T1setcurrentpoint, T1return],
708 [0, 1, T1callothersubr, T1return],
709 [0, 2, T1callothersubr, T1return],
710 [T1return]]
712 def _data2decode(self):
713 """decodes data2eexec to the data2 string and the subr and glyphs dictionary
715 It doesn't make sense to call this method twice -- check the content of
716 data2 before calling. The method also keeps the subrs and charstrings
717 start and end positions for later use."""
718 self._data2 = self._eexecdecode(self._data2eexec)
720 m = self.lenIVpattern.search(self._data2)
721 if m:
722 self.lenIV = int(m.group(1))
723 else:
724 self.lenIV = 4
726 self.emptysubr = self._charstringencode(b"\x0b") # 11, i.e. return
728 # extract Subrs
729 c = reader.PSbytes_tokenizer(self._data2, b"/Subrs")
730 self.subrsstart = c.pos
731 arraycount = c.getint()
732 token = c.gettoken(); assert token == b"array"
733 self.subrs = []
734 for i in range(arraycount):
735 token = c.gettoken(); assert token == b"dup"
736 token = c.getint(); assert token == i
737 size = c.getint()
738 if not i:
739 self.subrrdtoken = c.gettoken()
740 else:
741 token = c.gettoken(); assert token == self.subrrdtoken
742 self.subrs.append(c.getbytes(size))
743 token = c.gettoken()
744 if token == b"noaccess":
745 token = token + b" " + c.gettoken()
746 if not i:
747 self.subrnptoken = token
748 else:
749 assert token == self.subrnptoken
750 self.subrsend = c.pos
752 # hasflexhintsubrs is a boolean indicating that the font uses flex or
753 # hint replacement subrs as specified by Adobe (tm). When it does, the
754 # first 4 subrs should all be copied except when none of them are used
755 # in the stripped version of the font since we then get a font not
756 # using flex or hint replacement subrs at all.
757 self.hasflexhintsubrs = (arraycount >= len(self.flexhintsubrs) and
758 [self.getsubrcmds(i)
759 for i in range(len(self.flexhintsubrs))] == self.flexhintsubrs)
761 # extract glyphs
762 self.glyphs = {}
763 self.glyphlist = [] # we want to keep the order of the glyph names
764 c = reader.PSbytes_tokenizer(self._data2, b"/CharStrings")
765 self.charstringsstart = c.pos
766 c.getint()
767 token = c.gettoken(); assert token == b"dict"
768 token = c.gettoken(); assert token == b"dup"
769 token = c.gettoken(); assert token == b"begin"
770 first = True
771 while True:
772 chartoken = c.gettoken().decode("ascii")
773 if chartoken == "end":
774 break
775 assert chartoken[0] == "/"
776 size = c.getint()
777 if first:
778 self.glyphrdtoken = c.gettoken()
779 else:
780 token = c.gettoken(); assert token == self.glyphrdtoken
781 self.glyphlist.append(chartoken[1:])
782 self.glyphs[chartoken[1:]] = c.getbytes(size)
783 if first:
784 self.glyphndtoken = c.gettoken()
785 else:
786 token = c.gettoken(); assert token == self.glyphndtoken
787 first = False
788 self.charstringsend = c.pos
789 assert not self.subrs or self.subrrdtoken == self.glyphrdtoken
791 def _cmds(self, code):
792 """return a list of T1cmd's for encoded charstring data in code"""
793 code = array.array("B", self._charstringdecode(code))
794 cmds = []
795 while code:
796 x = code.pop(0)
797 if x == 12: # this starts an escaped cmd
798 cmds.append(T1subcmds[code.pop(0)])
799 elif 0 <= x < 32: # those are cmd's
800 cmds.append(T1cmds[x])
801 elif 32 <= x <= 246: # short ints
802 cmds.append(x-139)
803 elif 247 <= x <= 250: # mid size ints
804 cmds.append(((x - 247)*256) + code.pop(0) + 108)
805 elif 251 <= x <= 254: # mid size ints
806 cmds.append(-((x - 251)*256) - code.pop(0) - 108)
807 else: # x = 255, i.e. full size ints
808 y = ((code.pop(0)*256+code.pop(0))*256+code.pop(0))*256+code.pop(0)
809 if y > (1 << 31):
810 cmds.append(y - (1 << 32))
811 else:
812 cmds.append(y)
813 return cmds
815 def _code(self, cmds):
816 """return an encoded charstring data for list of T1cmd's in cmds"""
817 code = array.array("B")
818 for cmd in cmds:
819 try:
820 if cmd.subcmd:
821 code.append(12)
822 code.append(cmd.code)
823 except AttributeError:
824 if -107 <= cmd <= 107:
825 code.append(cmd+139)
826 elif 108 <= cmd <= 1131:
827 a, b = divmod(cmd-108, 256)
828 code.append(a+247)
829 code.append(b)
830 elif -1131 <= cmd <= -108:
831 a, b = divmod(-cmd-108, 256)
832 code.append(a+251)
833 code.append(b)
834 else:
835 if cmd < 0:
836 cmd += 1 << 32
837 cmd, x4 = divmod(cmd, 256)
838 cmd, x3 = divmod(cmd, 256)
839 x1, x2 = divmod(cmd, 256)
840 code.append(255)
841 code.append(x1)
842 code.append(x2)
843 code.append(x3)
844 code.append(x4)
845 return self._charstringencode(code.tobytes())
847 def getsubrcmds(self, subr):
848 """return a list of T1cmd's for subr subr"""
849 if not self._data2:
850 self._data2decode()
851 return self._cmds(self.subrs[subr])
853 def getglyphcmds(self, glyph):
854 """return a list of T1cmd's for glyph glyph"""
855 if not self._data2:
856 self._data2decode()
857 return self._cmds(self.glyphs[glyph])
859 def setsubrcmds(self, subr, cmds):
860 """replaces the T1cmd's by the list cmds for subr subr"""
861 if not self._data2:
862 self._data2decode()
863 self._data2eexec = None
864 self.subrs[subr] = self._code(cmds)
866 def setglyphcmds(self, glyph, cmds):
867 """replaces the T1cmd's by the list cmds for glyph glyph"""
868 if not self._data2:
869 self._data2decode()
870 self._data2eexec = None
871 self.glyphs[glyph] = self._code(cmds)
873 def updatepath(self, cmds, path, trafo, context):
874 for cmd in cmds:
875 if isinstance(cmd, T1cmd):
876 cmd.updatepath(path, trafo, context)
877 else:
878 context.t1stack.append(cmd)
880 def updatesubrpath(self, subr, path, trafo, context):
881 self.updatepath(self.getsubrcmds(subr), path, trafo, context)
883 def updateglyphpath(self, glyph, path, trafo, context):
884 self.updatepath(self.getglyphcmds(glyph), path, trafo, context)
886 def gathercalls(self, cmds, seacglyphs, subrs, context):
887 for cmd in cmds:
888 if isinstance(cmd, T1cmd):
889 cmd.gathercalls(seacglyphs, subrs, context)
890 else:
891 context.t1stack.append(cmd)
893 def gathersubrcalls(self, subr, seacglyphs, subrs, context):
894 self.gathercalls(self.getsubrcmds(subr), seacglyphs, subrs, context)
896 def gatherglyphcalls(self, glyph, seacglyphs, subrs, context):
897 self.gathercalls(self.getglyphcmds(glyph), seacglyphs, subrs, context)
899 def getglyphpath_pt(self, x_pt, y_pt, glyph, size_pt, convertcharcode=False, flex=True):
900 """return an object containing the PyX path, wx_pt and wy_pt for glyph named glyph"""
901 if convertcharcode:
902 if not self.encoding:
903 self._encoding()
904 glyph = self.encoding[glyph]
905 t = self.fontmatrix.scaled(size_pt)
906 tpath = t.translated_pt(x_pt, y_pt)
907 context = T1context(self, flex=flex)
908 p = path()
909 self.updateglyphpath(glyph, p, tpath, context)
910 class glyphpath:
911 def __init__(self, p, wx_pt, wy_pt):
912 self.path = p
913 self.wx_pt = wx_pt
914 self.wy_pt = wy_pt
915 return glyphpath(p, *t.apply_pt(context.wx, context.wy))
917 def getdata2(self, subrs=None, glyphs=None):
918 """makes a data2 string
920 subrs is a dict containing those subrs numbers as keys,
921 which are to be contained in the subrsstring to be created.
922 If subrs is None, all subrs in self.subrs will be used.
923 The subrs dict might be modified *in place*.
925 glyphs is a dict containing those glyph names as keys,
926 which are to be contained in the charstringsstring to be created.
927 If glyphs is None, all glyphs in self.glyphs will be used."""
928 w = writer.writer(io.BytesIO())
930 def addsubrs(subrs):
931 if subrs is not None:
932 # some adjustments to the subrs dict
933 if subrs:
934 subrsmin = min(subrs)
935 subrsmax = max(subrs)
936 if self.hasflexhintsubrs and subrsmin < len(self.flexhintsubrs):
937 # According to the spec we need to keep all the flex and hint subrs
938 # as long as any of it is used.
939 for subr in range(len(self.flexhintsubrs)):
940 subrs.add(subr)
941 subrsmax = max(subrs)
942 else:
943 subrsmax = -1
944 else:
945 # build a new subrs dict containing all subrs
946 subrs = dict([(subr, 1) for subr in range(len(self.subrs))])
947 subrsmax = len(self.subrs) - 1
949 # build the string from all selected subrs
950 w.write("%d array\n" % (subrsmax + 1))
951 for subr in range(subrsmax+1):
952 if subr in subrs:
953 code = self.subrs[subr]
954 else:
955 code = self.emptysubr
956 w.write("dup %d %d " % (subr, len(code)))
957 w.write_bytes(self.subrrdtoken)
958 w.write_bytes(b" ")
959 w.write_bytes(code)
960 w.write_bytes(b" ")
961 w.write_bytes(self.subrnptoken)
962 w.write_bytes(b"\n")
964 def addcharstrings(glyphs):
965 w.write("%d dict dup begin\n" % (glyphs is None and len(self.glyphlist) or len(glyphs)))
966 for glyph in self.glyphlist:
967 if glyphs is None or glyph in glyphs:
968 w.write("/%s %d " % (glyph, len(self.glyphs[glyph])))
969 w.write_bytes(self.glyphrdtoken)
970 w.write_bytes(b" ")
971 w.write_bytes(self.glyphs[glyph])
972 w.write_bytes(b" ")
973 w.write_bytes(self.glyphndtoken)
974 w.write_bytes(b"\n")
975 w.write("end\n")
977 if self.subrsstart < self.charstringsstart:
978 w.write_bytes(self._data2[:self.subrsstart])
979 addsubrs(subrs)
980 w.write_bytes(self._data2[self.subrsend:self.charstringsstart])
981 addcharstrings(glyphs)
982 w.write_bytes(self._data2[self.charstringsend:])
983 else:
984 w.write_bytes(self._data2[:self.charstringsstart])
985 addcharstrings(glyphs)
986 w.write_bytes(self._data2[self.charstringsend:self.subrsstart])
987 addsubrs(subrs)
988 w.write_bytes(self._data2[self.subrsend:])
989 return w.file.getvalue()
991 def getdata2eexec(self):
992 if self._data2eexec:
993 return self._data2eexec
994 # note that self._data2 is out-of-date here too, hence we need to call getdata2
995 return self._eexecencode(self.getdata2())
997 newlinepattern = re.compile("\s*[\r\n]\s*")
998 uniqueidstrpattern = re.compile("%?/UniqueID\s+\d+\s+def\s+")
999 uniqueidbytespattern = re.compile(b"%?/UniqueID\s+\d+\s+def\s+")
1000 # when UniqueID is commented out (as in modern latin), prepare to remove the comment character as well
1002 def getstrippedfont(self, glyphs, charcodes):
1003 """create a T1file instance containing only certain glyphs
1005 glyphs is a set of the glyph names. It might be modified *in place*!
1007 if not self.encoding:
1008 self._encoding()
1009 for charcode in charcodes:
1010 glyphs.add(self.encoding[charcode])
1012 # collect information about used glyphs and subrs
1013 seacglyphs = set()
1014 subrs = set()
1015 for glyph in glyphs:
1016 self.gatherglyphcalls(glyph, seacglyphs, subrs, T1context(self))
1017 # while we have gathered all subrs for the seacglyphs alreadys, we
1018 # might have missed the glyphs themself (when they are not used stand-alone)
1019 glyphs.update(seacglyphs)
1020 glyphs.add(".notdef")
1022 # strip data1
1023 if self.encoding is adobestandardencoding:
1024 data1 = self.data1
1025 else:
1026 encodingstrings = []
1027 for char, glyph in enumerate(self.encoding):
1028 if glyph in glyphs:
1029 encodingstrings.append("dup %i /%s put\n" % (char, glyph))
1030 data1 = self.data1[:self.encodingstart] + "\n" + "".join(encodingstrings) + self.data1[self.encodingend:]
1031 data1 = self.newlinepattern.subn("\n", data1)[0]
1032 data1 = self.uniqueidstrpattern.subn("", data1)[0]
1034 # strip data2
1035 data2 = self.uniqueidbytespattern.subn(b"", self.getdata2(subrs, glyphs))[0]
1037 # strip data3
1038 data3 = self.newlinepattern.subn("\n", self.data3)[0]
1040 # create and return the new font instance
1041 return T1file(data1.rstrip() + "\n", self._eexecencode(data2), data3.rstrip() + "\n")
1043 # The following two methods, writePDFfontinfo and getglyphinfo,
1044 # extract informtion which should better be taken from the afm file.
1045 def writePDFfontinfo(self, file):
1046 try:
1047 glyphinfo_y = self.getglyphinfo("y")
1048 glyphinfo_W = self.getglyphinfo("W")
1049 glyphinfo_H = self.getglyphinfo("H")
1050 glyphinfo_h = self.getglyphinfo("h")
1051 glyphinfo_period = self.getglyphinfo("period")
1052 glyphinfo_colon = self.getglyphinfo("colon")
1053 except:
1054 logger.warning("Auto-guessing of font information for font '%s' failed. We're writing stub data instead." % self.name)
1055 file.write("/Flags 4\n")
1056 file.write("/FontBBox [0 -100 1000 1000]\n")
1057 file.write("/ItalicAngle 0\n")
1058 file.write("/Ascent 1000\n")
1059 file.write("/Descent -100\n")
1060 file.write("/CapHeight 700\n")
1061 file.write("/StemV 100\n")
1062 else:
1063 if not self.encoding:
1064 self._encoding()
1065 # As a simple heuristics we assume non-symbolic fonts if and only
1066 # if the Adobe standard encoding is used. All other font flags are
1067 # not specified here.
1068 if self.encoding is adobestandardencoding:
1069 file.write("/Flags 32\n")
1070 else:
1071 file.write("/Flags 4\n")
1072 file.write("/FontBBox [0 %f %f %f]\n" % (glyphinfo_y[3], glyphinfo_W[0], glyphinfo_H[5]))
1073 file.write("/ItalicAngle %f\n" % math.degrees(math.atan2(glyphinfo_period[4]-glyphinfo_colon[4], glyphinfo_colon[5]-glyphinfo_period[5])))
1074 file.write("/Ascent %f\n" % glyphinfo_H[5])
1075 file.write("/Descent %f\n" % glyphinfo_y[3])
1076 file.write("/CapHeight %f\n" % glyphinfo_h[5])
1077 file.write("/StemV %f\n" % (glyphinfo_period[4]-glyphinfo_period[2]))
1079 def getglyphinfo(self, glyph, flex=True):
1080 logger.warning("We are about to extract font information for the Type 1 font '%s' from its pfb file. This is bad practice (and it's slow). You should use an afm file instead." % self.name)
1081 context = T1context(self, flex=flex)
1082 p = path()
1083 self.updateglyphpath(glyph, p, trafo.trafo(), context)
1084 bbox = p.bbox()
1085 return context.wx, context.wy, bbox.llx_pt, bbox.lly_pt, bbox.urx_pt, bbox.ury_pt
1087 def outputPFA(self, file, remove_UniqueID_lookup=False):
1088 """output the T1file in PFA format"""
1089 data1 = self.data1
1090 data3 = self.data3
1091 if remove_UniqueID_lookup:
1092 m1 = re.search("""FontDirectory\s*/%(name)s\s+known{/%(name)s\s+findfont\s+dup\s*/UniqueID\s+known\s*{\s*dup\s*
1093 /UniqueID\s+get\s+\d+\s+eq\s+exch\s*/FontType\s+get\s+1\s+eq\s+and\s*}\s*{\s*pop\s+false\s*}\s*ifelse\s*
1094 {save\s+true\s*}\s*{\s*false\s*}\s*ifelse\s*}\s*{\s*false\s*}\s*ifelse""" % {"name": self.name},
1095 data1, re.VERBOSE)
1096 m3 = re.search("\s*{restore}\s*if", data3)
1097 if m1 and m3:
1098 data1 = data1[:m1.start()] + data1[m1.end():]
1099 data3 = data3[:m3.start()] + data3[m3.end():]
1100 file.write(data1)
1101 data2eexechex = binascii.b2a_hex(self.getdata2eexec())
1102 linelength = 64
1103 for i in range((len(data2eexechex)-1)//linelength + 1):
1104 file.write_bytes(data2eexechex[i*linelength: i*linelength+linelength])
1105 file.write("\n")
1106 file.write(data3)
1108 def outputPFB(self, file):
1109 """output the T1file in PFB format"""
1110 data2eexec = self.getdata2eexec()
1111 def pfblength(data):
1112 l = len(data)
1113 l, x1 = divmod(l, 256)
1114 l, x2 = divmod(l, 256)
1115 x4, x3 = divmod(l, 256)
1116 return chr(x1) + chr(x2) + chr(x3) + chr(x4)
1117 file.write("\200\1")
1118 file.write(pfblength(self.data1))
1119 file.write(self.data1)
1120 file.write("\200\2")
1121 file.write(pfblength(data2eexec))
1122 file.write(data2eexec)
1123 file.write("\200\1")
1124 file.write(pfblength(self.data3))
1125 file.write(self.data3)
1126 file.write("\200\3")
1128 def outputPS(self, file, writer):
1129 """output the PostScript code for the T1file to the file file"""
1130 self.outputPFA(file, remove_UniqueID_lookup=True)
1132 def outputPDF(self, file, writer):
1133 data2eexec = self.getdata2eexec()
1134 data3 = self.data3
1135 # we might be allowed to skip the third part ...
1136 if (data3.replace("\n", "")
1137 .replace("\r", "")
1138 .replace("\t", "")
1139 .replace(" ", "")) == "0"*512 + "cleartomark":
1140 data3 = ""
1142 data = self.data1.encode("ascii", errors="surrogateescape") + data2eexec + data3.encode("ascii", errors="surrogateescape")
1143 if writer.compress and haszlib:
1144 data = zlib.compress(data)
1146 file.write("<<\n"
1147 "/Length %d\n"
1148 "/Length1 %d\n"
1149 "/Length2 %d\n"
1150 "/Length3 %d\n" % (len(data), len(self.data1), len(data2eexec), len(data3)))
1151 if writer.compress and haszlib:
1152 file.write("/Filter /FlateDecode\n")
1153 file.write(">>\n"
1154 "stream\n")
1155 file.write_bytes(data)
1156 file.write("\n"
1157 "endstream\n")
1159 # factory functions
1161 class FontFormatError(Exception):
1162 pass
1164 def from_PFA_bytes(bytes):
1165 """create a T1file instance from a string of bytes corresponding to a PFA file"""
1166 try:
1167 m1 = bytes.index("eexec") + 6
1168 m2 = bytes.index("0"*40)
1169 except ValueError:
1170 raise FontFormatError
1172 data1 = bytes[:m1].decode("ascii", errors="surrogateescape")
1173 data2eexec = binascii.a2b_hex(bytes[m1: m2].replace(" ", "").replace("\r", "").replace("\n", ""))
1174 data3 = bytes[m2:].decode("ascii", errors="surrogateescape")
1175 return T1file(data1, data2eexec, data3)
1177 def from_PFA_filename(filename):
1178 """create a T1file instance from PFA font file of given name"""
1179 with open(filename, "rb") as file:
1180 t1file = from_PFA_bytes(file.read())
1181 return t1file
1183 def from_PFB_bytes(bytes):
1184 """create a T1file instance from a string of bytes corresponding to a PFB file"""
1186 def pfblength(s):
1187 if len(s) != 4:
1188 raise ValueError("invalid string length")
1189 return (s[0] +
1190 s[1]*256 +
1191 s[2]*256*256 +
1192 s[3]*256*256*256)
1193 class consumer:
1194 def __init__(self, bytes):
1195 self.bytes = bytes
1196 self.pos = 0
1197 def __call__(self, n):
1198 result = self.bytes[self.pos:self.pos+n]
1199 self.pos += n
1200 return result
1202 consume = consumer(bytes)
1203 mark = consume(2)
1204 if mark != b"\200\1":
1205 raise FontFormatError
1206 data1 = consume(pfblength(consume(4))).decode("ascii", errors="surrogateescape")
1207 mark = consume(2)
1208 if mark != b"\200\2":
1209 raise FontFormatError
1210 data2eexec = b""
1211 while mark == b"\200\2":
1212 data2eexec = data2eexec + consume(pfblength(consume(4)))
1213 mark = consume(2)
1214 if mark != b"\200\1":
1215 raise FontFormatError
1216 data3 = consume(pfblength(consume(4))).decode("ascii", errors="surrogateescape")
1217 mark = consume(2)
1218 if mark != b"\200\3":
1219 raise FontFormatError
1220 if consume(1):
1221 raise FontFormatError
1223 return T1file(data1, data2eexec, data3)
1225 def from_PFB_filename(filename):
1226 """create a T1file instance from PFB font file of given name"""
1227 with open(filename, "rb") as file:
1228 t1file = from_PFB_bytes(file.read())
1229 return t1file
1231 def from_PF_bytes(bytes):
1232 #try:
1233 return from_PFB_bytes(bytes)
1234 #except FontFormatError:
1235 # return from_PFA_bytes(bytes)
1237 def from_PF_filename(filename):
1238 """create a T1file instance from PFA or PFB font file of given name"""
1239 with open(filename, "rb") as file:
1240 t1file = from_PF_bytes(file.read())
1241 return t1file