1 ;;; Copyright (c) 2006 Zachary Beane, All Rights Reserved
3 ;;; Redistribution and use in source and binary forms, with or without
4 ;;; modification, are permitted provided that the following conditions
7 ;;; * Redistributions of source code must retain the above copyright
8 ;;; notice, this list of conditions and the following disclaimer.
10 ;;; * Redistributions in binary form must reproduce the above
11 ;;; copyright notice, this list of conditions and the following
12 ;;; disclaimer in the documentation and/or other materials
13 ;;; provided with the distribution.
15 ;;; THIS SOFTWARE IS PROVIDED BY THE AUTHOR 'AS IS' AND ANY EXPRESSED
16 ;;; OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED
17 ;;; WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 ;;; ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY
19 ;;; DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 ;;; DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE
21 ;;; GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
22 ;;; INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY,
23 ;;; WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
24 ;;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
25 ;;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 ;;; Loading data from the "cmap" table.
29 ;;; https://docs.microsoft.com/en-us/typography/opentype/spec/cmap
30 ;;; http://developer.apple.com/fonts/TTRefMan/RM06/Chap6cmap.html
32 ;;; $Id: cmap.lisp,v 1.15 2006/03/23 22:23:32 xach Exp $
34 (in-package #:zpb-ttf
)
36 (deftype cmap-value-table
()
37 `(array (unsigned-byte 16) (*)))
39 ;;; FIXME: "unicode-cmap" is actually a format 4 character map that
40 ;;; happens to currently be loaded from a Unicode-compatible
41 ;;; subtable. However, other character maps (like Microsoft's Symbol
42 ;;; encoding) also use format 4 and could be loaded with these
43 ;;; "unicode" objects and functions.
45 (defclass unicode-cmap
()
46 ((segment-count :initarg
:segment-count
:reader segment-count
)
47 (end-codes :initarg
:end-codes
:reader end-codes
)
48 (start-codes :initarg
:start-codes
:reader start-codes
)
49 (id-deltas :initarg
:id-deltas
:reader id-deltas
)
50 (id-range-offsets :initarg
:id-range-offsets
:reader id-range-offsets
)
51 (glyph-indexes :initarg
:glyph-indexes
:accessor glyph-indexes
)))
53 (defclass format-12-cmap
()
54 ((group-count :initarg
:group-count
:reader group-count
)
55 (start-codes :initarg
:start-codes
:reader start-codes
)
56 (end-codes :initarg
:end-codes
:reader end-codes
)
57 (glyph-starts :initarg
:glyph-starts
:accessor glyph-starts
)))
59 (defun load-unicode-cmap-format12 (stream)
60 "Load a Unicode character map of type 12 from STREAM starting at the
61 current offset. Assumes format is already read and checked."
62 (let* ((reserved (read-uint16 stream
))
63 (subtable-length (read-uint32 stream
))
64 (language-code (read-uint32 stream
))
65 (group-count (read-uint32 stream
))
66 (start-codes (make-array group-count
67 :element-type
'(unsigned-byte 32)
69 (end-codes (make-array group-count
70 :element-type
'(unsigned-byte 32)
72 (glyph-starts (make-array group-count
73 :element-type
'(unsigned-byte 32)
75 (declare (ignore reserved language-code subtable-length
))
76 (loop for i below group-count
77 do
(setf (aref start-codes i
) (read-uint32 stream
)
78 (aref end-codes i
) (read-uint32 stream
)
79 (aref glyph-starts i
) (read-uint32 stream
)))
80 (make-instance 'format-12-cmap
81 :group-count group-count
82 :start-codes start-codes
84 :glyph-starts glyph-starts
)))
86 (defun load-unicode-cmap (stream)
87 "Load a Unicode character map of type 4 or 12 from STREAM starting at
89 (let ((format (read-uint16 stream
)))
91 (return-from load-unicode-cmap
(load-unicode-cmap-format12 stream
)))
93 (error 'unsupported-format
94 :location
"\"cmap\" subtable"
96 :expected-values
(list 4))))
97 (let ((table-start (- (file-position stream
) 2))
98 (subtable-length (read-uint16 stream
))
99 (language-code (read-uint16 stream
))
100 (segment-count (/ (read-uint16 stream
) 2))
101 (search-range (read-uint16 stream
))
102 (entry-selector (read-uint16 stream
))
103 (range-shift (read-uint16 stream
)))
104 (declare (ignore language-code search-range entry-selector range-shift
))
105 (flet ((make-and-load-array (&optional
(size segment-count
))
106 (loop with array
= (make-array size
107 :element-type
'(unsigned-byte 16)
110 do
(setf (aref array i
) (read-uint16 stream
))
111 finally
(return array
)))
114 (1- (- (logandc2 #xFFFF i
)))
116 (let ((end-codes (make-and-load-array))
117 (pad (read-uint16 stream
))
118 (start-codes (make-and-load-array))
119 (id-deltas (make-and-load-array))
120 (id-range-offsets (make-and-load-array))
121 (glyph-index-array-size (/ (- subtable-length
122 (- (file-position stream
)
125 (declare (ignore pad
))
126 (make-instance 'unicode-cmap
127 :segment-count segment-count
129 :start-codes start-codes
130 ;; these are really signed, so sign them
131 :id-deltas
(map 'vector
#'make-signed id-deltas
)
132 :id-range-offsets id-range-offsets
133 :glyph-indexes
(make-and-load-array glyph-index-array-size
))))))
136 (defun %decode-format-4-cmap-code-point-index
(code-point cmap index
)
137 "Return the index of the Unicode CODE-POINT in a format 4 CMAP, if
138 present, otherwise NIL. Assumes INDEX points to the element of the
139 CMAP arrays (END-CODES etc) corresponding to code-point."
140 (with-slots (end-codes start-codes
141 id-deltas id-range-offsets
144 (declare (type cmap-value-table
145 end-codes start-codes
148 (let ((start-code (aref start-codes index
))
149 (end-code (aref end-codes index
))
150 (id-range-offset (aref id-range-offsets index
))
151 (id-delta (aref id-deltas index
)))
153 ((< code-point start-code
)
155 ;; ignore empty final segment
156 ((and (= 65535 start-code end-code
))
158 ((zerop id-range-offset
)
159 (logand #xFFFF
(+ code-point id-delta
)))
161 (let* ((glyph-index-offset (- (+ index
162 (ash id-range-offset -
1)
163 (- code-point start-code
))
164 (segment-count cmap
)))
165 (glyph-index (aref (glyph-indexes cmap
)
166 glyph-index-offset
)))
168 (+ glyph-index id-delta
))))))))
170 (defun %decode-format-12-cmap-code-point-index
(code-point cmap index
)
171 "Return the index of the Unicode CODE-POINT in a format 12 CMAP, if
172 present, otherwise NIL. Assumes INDEX points to the element of the
173 CMAP arrays (END-CODES etc) corresponding to code-point."
174 (with-slots (end-codes start-codes glyph-starts
)
176 (declare (type (simple-array (unsigned-byte 32))
177 end-codes start-codes glyph-starts
))
178 (let ((start-code (aref start-codes index
))
179 (start-glyph-id (aref glyph-starts index
)))
180 (if (< code-point start-code
)
182 (+ start-glyph-id
(- code-point start-code
))))))
184 (defgeneric code-point-font-index-from-cmap
(code-point cmap
)
185 (:documentation
"Return the index of the Unicode CODE-POINT in
186 CMAP, if present, otherwise NIL.")
187 (:method
(code-point (cmap unicode-cmap
))
188 (with-slots (end-codes)
190 (declare (type cmap-value-table end-codes
))
191 (dotimes (i (segment-count cmap
) 1)
192 (when (<= code-point
(aref end-codes i
))
193 (return (%decode-format-4-cmap-code-point-index code-point cmap i
))))))
194 (:method
(code-point (cmap format-12-cmap
))
195 (with-slots (end-codes)
197 (declare (type (simple-array (unsigned-byte 32)) end-codes
))
198 (dotimes (i (group-count cmap
) 1)
199 (when (<= code-point
(aref end-codes i
))
201 (%decode-format-12-cmap-code-point-index code-point cmap i
)))))))
203 (defmethod invert-character-map (font-loader)
204 "Return a vector mapping font indexes to code points."
205 (with-slots (start-codes end-codes
)
206 (character-map font-loader
)
207 (let ((points (make-array (glyph-count font-loader
) :initial-element -
1))
208 (cmap (character-map font-loader
)))
209 (dotimes (i (length end-codes
) points
)
210 (loop for j from
(aref start-codes i
) to
(aref end-codes i
)
214 (%decode-format-4-cmap-code-point-index j cmap i
))
216 (%decode-format-12-cmap-code-point-index j cmap i
))
218 (code-point-font-index-from-cmap j cmap
)))
219 when
(minusp (svref points font-index
))
220 do
(setf (svref points font-index
) j
))))))
223 (defgeneric code-point-font-index
(code-point font-loader
)
224 (:documentation
"Return the index of the Unicode CODE-POINT in
225 FONT-LOADER, if present, otherwise NIL.")
226 (:method
(code-point font-loader
)
227 (code-point-font-index-from-cmap code-point
(character-map font-loader
))))
229 (defgeneric font-index-code-point
(glyph-index font-loader
)
230 (:documentation
"Return the code-point for a given glyph index.")
231 (:method
(glyph-index font-loader
)
232 (let ((point (aref (inverse-character-map font-loader
) glyph-index
)))
237 (defun %load-cmap-info
(font-loader platform specific
)
238 (seek-to-table "cmap" font-loader
)
239 (with-slots (input-stream)
241 (let ((start-pos (file-position input-stream
))
242 (version-number (read-uint16 input-stream
))
243 (subtable-count (read-uint16 input-stream
))
245 (declare (ignore version-number
))
246 (loop repeat subtable-count
247 for platform-id
= (read-uint16 input-stream
)
248 for platform-specific-id
= (read-uint16 input-stream
)
249 for offset
= (+ start-pos
(read-uint32 input-stream
))
250 when
(and (= platform-id platform
)
251 (or (eql platform-specific-id specific
)
252 (and (consp specific
)
253 (member platform-specific-id specific
))))
255 (file-position input-stream offset
)
256 (setf (character-map font-loader
) (load-unicode-cmap input-stream
))
257 (setf (inverse-character-map font-loader
)
258 (invert-character-map font-loader
)
263 (defun %unknown-cmap-error
(font-loader)
264 (seek-to-table "cmap" font-loader
)
265 (with-slots (input-stream)
267 (let ((start-pos (file-position input-stream
))
268 (version-number (read-uint16 input-stream
))
269 (subtable-count (read-uint16 input-stream
))
271 (declare (ignore version-number
))
272 (loop repeat subtable-count
273 for platform-id
= (read-uint16 input-stream
)
274 for platform-specific-id
= (read-uint16 input-stream
)
275 for offset
= (+ start-pos
(read-uint32 input-stream
))
276 for pos
= (file-position input-stream
)
277 do
(file-position input-stream offset
)
278 (push (list (platform-id-name platform-id
)
279 (encoding-id-name platform-id platform-specific-id
)
280 :type
(read-uint16 input-stream
))
282 (file-position input-stream pos
))
283 (error "Could not find supported character map in font file~% available cmap tables = ~s"
286 (defmethod load-cmap-info ((font-loader font-loader
))
287 (or (%load-cmap-info font-loader
+unicode-platform-id
+
288 +unicode-2.0-full-encoding-id
+) ;; full unicode
289 (%load-cmap-info font-loader
+microsoft-platform-id
+
290 +microsoft-unicode-ucs4-encoding-id
+) ;; full unicode
291 (%load-cmap-info font-loader
+microsoft-platform-id
+
292 +microsoft-unicode-bmp-encoding-id
+) ;; bmp
293 (%load-cmap-info font-loader
+unicode-platform-id
+
294 +unicode-2.0-encoding-id
+) ;; bmp
295 (%load-cmap-info font-loader
+unicode-platform-id
+
296 '(0 1 2 3 4)) ;; all except variation and last-resort
297 (%load-cmap-info font-loader
+microsoft-platform-id
+
298 +microsoft-symbol-encoding-id
+) ;; ms symbol
299 (%unknown-cmap-error font-loader
)))
301 (defun available-character-maps (loader)
302 (seek-to-table "cmap" loader
)
303 (let ((stream (input-stream loader
)))
304 (let ((start-pos (file-position stream
))
305 (version-number (read-uint16 stream
))
306 (subtable-count (read-uint16 stream
)))
307 (declare (ignore start-pos
))
308 (assert (zerop version-number
))
309 (dotimes (i subtable-count
)
310 (let ((platform-id (read-uint16 stream
))
311 (encoding-id (read-uint16 stream
))
312 (offset (read-uint32 stream
)))
313 (declare (ignore offset
))
314 (format t
"~D (~A) - ~D (~A)~%"
315 platform-id
(platform-id-name platform-id
)
316 encoding-id
(encoding-id-name platform-id encoding-id
)))))))