1 // Copyright (c) 2009 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
15 // cmap - Character To Glyph Index Mapping Table
16 // http://www.microsoft.com/typography/otspec/cmap.htm
18 #define TABLE_NAME "cmap"
22 struct CMAPSubtableHeader
{
31 struct Subtable314Range
{
35 uint16_t id_range_offset
;
36 uint32_t id_range_offset_offset
;
39 // The maximum number of groups in format 12, 13 or 14 subtables.
40 // Note: 0xFFFF is the maximum number of glyphs in a single font file.
41 const unsigned kMaxCMAPGroups
= 0xFFFF;
43 // Glyph array size for the Mac Roman (format 0) table.
44 const size_t kFormat0ArraySize
= 256;
46 // The upper limit of the Unicode code point.
47 const uint32_t kUnicodeUpperLimit
= 0x10FFFF;
49 // The maximum number of UVS records (See below).
50 const uint32_t kMaxCMAPSelectorRecords
= 259;
51 // The range of UVSes are:
52 // 0x180B-0x180D (3 code points)
53 // 0xFE00-0xFE0F (16 code points)
54 // 0xE0100-0xE01EF (240 code points)
55 const uint32_t kMongolianVSStart
= 0x180B;
56 const uint32_t kMongolianVSEnd
= 0x180D;
57 const uint32_t kVSStart
= 0xFE00;
58 const uint32_t kVSEnd
= 0xFE0F;
59 const uint32_t kIVSStart
= 0xE0100;
60 const uint32_t kIVSEnd
= 0xE01EF;
61 const uint32_t kUVSUpperLimit
= 0xFFFFFF;
63 // Parses Format 4 tables
64 bool ParseFormat4(ots::Font
*font
, int platform
, int encoding
,
65 const uint8_t *data
, size_t length
, uint16_t num_glyphs
) {
66 ots::Buffer
subtable(data
, length
);
68 // 0.3.4, 3.0.4 or 3.1.4 subtables are complex and, rather than expanding the
69 // whole thing and recompacting it, we validate it and include it verbatim
73 return OTS_FAILURE_MSG("Required OS/2 table missing");
76 if (!subtable
.Skip(4)) {
77 return OTS_FAILURE_MSG("Can't read 4 bytes at start of cmap format 4 subtable");
79 uint16_t language
= 0;
80 if (!subtable
.ReadU16(&language
)) {
81 return OTS_FAILURE_MSG("Can't read language");
84 // Platform ID 3 (windows) subtables should have language '0'.
85 return OTS_FAILURE_MSG("Languages should be 0 (%d)", language
);
88 uint16_t segcountx2
, search_range
, entry_selector
, range_shift
;
89 segcountx2
= search_range
= entry_selector
= range_shift
= 0;
90 if (!subtable
.ReadU16(&segcountx2
) ||
91 !subtable
.ReadU16(&search_range
) ||
92 !subtable
.ReadU16(&entry_selector
) ||
93 !subtable
.ReadU16(&range_shift
)) {
94 return OTS_FAILURE_MSG("Failed to read subcmap structure");
97 if (segcountx2
& 1 || search_range
& 1) {
98 return OTS_FAILURE_MSG("Bad subcmap structure");
100 const uint16_t segcount
= segcountx2
>> 1;
101 // There must be at least one segment according the spec.
103 return OTS_FAILURE_MSG("Segcount < 1 (%d)", segcount
);
106 // log2segcount is the maximal x s.t. 2^x < segcount
107 unsigned log2segcount
= 0;
108 while (1u << (log2segcount
+ 1) <= segcount
) {
112 const uint16_t expected_search_range
= 2 * 1u << log2segcount
;
113 if (expected_search_range
!= search_range
) {
114 return OTS_FAILURE_MSG("expected search range != search range (%d != %d)", expected_search_range
, search_range
);
117 if (entry_selector
!= log2segcount
) {
118 return OTS_FAILURE_MSG("entry selector != log2(segement count) (%d != %d)", entry_selector
, log2segcount
);
121 const uint16_t expected_range_shift
= segcountx2
- search_range
;
122 if (range_shift
!= expected_range_shift
) {
123 return OTS_FAILURE_MSG("unexpected range shift (%d != %d)", range_shift
, expected_range_shift
);
126 std::vector
<Subtable314Range
> ranges(segcount
);
128 for (unsigned i
= 0; i
< segcount
; ++i
) {
129 if (!subtable
.ReadU16(&ranges
[i
].end_range
)) {
130 return OTS_FAILURE_MSG("Failed to read segment %d", i
);
135 if (!subtable
.ReadU16(&padding
)) {
136 return OTS_FAILURE_MSG("Failed to read cmap subtable segment padding");
139 return OTS_FAILURE_MSG("Non zero cmap subtable segment padding (%d)", padding
);
142 for (unsigned i
= 0; i
< segcount
; ++i
) {
143 if (!subtable
.ReadU16(&ranges
[i
].start_range
)) {
144 return OTS_FAILURE_MSG("Failed to read segment start range %d", i
);
147 for (unsigned i
= 0; i
< segcount
; ++i
) {
148 if (!subtable
.ReadS16(&ranges
[i
].id_delta
)) {
149 return OTS_FAILURE_MSG("Failed to read segment delta %d", i
);
152 for (unsigned i
= 0; i
< segcount
; ++i
) {
153 ranges
[i
].id_range_offset_offset
= subtable
.offset();
154 if (!subtable
.ReadU16(&ranges
[i
].id_range_offset
)) {
155 return OTS_FAILURE_MSG("Failed to read segment range offset %d", i
);
158 if (ranges
[i
].id_range_offset
& 1) {
159 // Some font generators seem to put 65535 on id_range_offset
160 // for 0xFFFF-0xFFFF range.
161 // (e.g., many fonts in http://www.princexml.com/fonts/)
162 if (i
== segcount
- 1u) {
163 OTS_WARNING("bad id_range_offset");
164 ranges
[i
].id_range_offset
= 0;
165 // The id_range_offset value in the transcoded font will not change
166 // since this table is not actually "transcoded" yet.
168 return OTS_FAILURE_MSG("Bad segment offset (%d)", ranges
[i
].id_range_offset
);
173 // ranges must be ascending order, based on the end_code. Ranges may not
175 for (unsigned i
= 1; i
< segcount
; ++i
) {
176 if ((i
== segcount
- 1u) &&
177 (ranges
[i
- 1].start_range
== 0xffff) &&
178 (ranges
[i
- 1].end_range
== 0xffff) &&
179 (ranges
[i
].start_range
== 0xffff) &&
180 (ranges
[i
].end_range
== 0xffff)) {
181 // Some fonts (e.g., Germania.ttf) have multiple 0xffff terminators.
182 // We'll accept them as an exception.
183 OTS_WARNING("multiple 0xffff terminators found");
187 // Note: some Linux fonts (e.g., LucidaSansOblique.ttf, bsmi00lp.ttf) have
189 if (ranges
[i
].end_range
<= ranges
[i
- 1].end_range
) {
190 return OTS_FAILURE_MSG("Out of order end range (%d <= %d)", ranges
[i
].end_range
, ranges
[i
-1].end_range
);
192 if (ranges
[i
].start_range
<= ranges
[i
- 1].end_range
) {
193 return OTS_FAILURE_MSG("out of order start range (%d <= %d)", ranges
[i
].start_range
, ranges
[i
-1].end_range
);
196 // On many fonts, the value of {first, last}_char_index are incorrect.
198 if (font
->os2
->first_char_index
!= 0xFFFF &&
199 ranges
[i
].start_range
!= 0xFFFF &&
200 font
->os2
->first_char_index
> ranges
[i
].start_range
) {
201 font
->os2
->first_char_index
= ranges
[i
].start_range
;
203 if (font
->os2
->last_char_index
!= 0xFFFF &&
204 ranges
[i
].end_range
!= 0xFFFF &&
205 font
->os2
->last_char_index
< ranges
[i
].end_range
) {
206 font
->os2
->last_char_index
= ranges
[i
].end_range
;
210 // The last range must end at 0xffff
211 if (ranges
[segcount
- 1].start_range
!= 0xffff || ranges
[segcount
- 1].end_range
!= 0xffff) {
212 return OTS_FAILURE_MSG("Final segment start and end must be 0xFFFF (0x%04X-0x%04X)",
213 ranges
[segcount
- 1].start_range
, ranges
[segcount
- 1].end_range
);
216 // A format 4 CMAP subtable is complex. To be safe we simulate a lookup of
217 // each code-point defined in the table and make sure that they are all valid
218 // glyphs and that we don't access anything out-of-bounds.
219 for (unsigned i
= 0; i
< segcount
; ++i
) {
220 for (unsigned cp
= ranges
[i
].start_range
; cp
<= ranges
[i
].end_range
; ++cp
) {
221 const uint16_t code_point
= static_cast<uint16_t>(cp
);
222 if (ranges
[i
].id_range_offset
== 0) {
223 // this is explictly allowed to overflow in the spec
224 const uint16_t glyph
= code_point
+ ranges
[i
].id_delta
;
225 if (glyph
>= num_glyphs
) {
226 return OTS_FAILURE_MSG("Range glyph reference too high (%d > %d)", glyph
, num_glyphs
- 1);
229 const uint16_t range_delta
= code_point
- ranges
[i
].start_range
;
230 // this might seem odd, but it's true. The offset is relative to the
231 // location of the offset value itself.
232 const uint32_t glyph_id_offset
= ranges
[i
].id_range_offset_offset
+
233 ranges
[i
].id_range_offset
+
235 // We need to be able to access a 16-bit value from this offset
236 if (glyph_id_offset
+ 1 >= length
) {
237 return OTS_FAILURE_MSG("bad glyph id offset (%d > %ld)", glyph_id_offset
, length
);
240 std::memcpy(&glyph
, data
+ glyph_id_offset
, 2);
241 glyph
= ntohs(glyph
);
242 if (glyph
>= num_glyphs
) {
243 return OTS_FAILURE_MSG("Range glyph reference too high (%d > %d)", glyph
, num_glyphs
- 1);
249 // We accept the table.
250 // TODO(yusukes): transcode the subtable.
251 if (platform
== 3 && encoding
== 0) {
252 font
->cmap
->subtable_3_0_4_data
= data
;
253 font
->cmap
->subtable_3_0_4_length
= length
;
254 } else if (platform
== 3 && encoding
== 1) {
255 font
->cmap
->subtable_3_1_4_data
= data
;
256 font
->cmap
->subtable_3_1_4_length
= length
;
257 } else if (platform
== 0 && encoding
== 3) {
258 font
->cmap
->subtable_0_3_4_data
= data
;
259 font
->cmap
->subtable_0_3_4_length
= length
;
261 return OTS_FAILURE_MSG("Unknown cmap subtable type (platform=%d, encoding=%d)", platform
, encoding
);
267 bool Parse31012(ots::Font
*font
,
268 const uint8_t *data
, size_t length
, uint16_t num_glyphs
) {
269 ots::Buffer
subtable(data
, length
);
271 // Format 12 tables are simple. We parse these and fully serialise them
274 if (!subtable
.Skip(8)) {
275 return OTS_FAILURE_MSG("failed to skip the first 8 bytes of format 12 subtable");
277 uint32_t language
= 0;
278 if (!subtable
.ReadU32(&language
)) {
279 return OTS_FAILURE_MSG("can't read format 12 subtable language");
282 return OTS_FAILURE_MSG("format 12 subtable language should be zero (%d)", language
);
285 uint32_t num_groups
= 0;
286 if (!subtable
.ReadU32(&num_groups
)) {
287 return OTS_FAILURE_MSG("can't read number of format 12 subtable groups");
289 if (num_groups
== 0 || num_groups
> kMaxCMAPGroups
) {
290 return OTS_FAILURE_MSG("Bad format 12 subtable group count %d", num_groups
);
293 std::vector
<ots::OpenTypeCMAPSubtableRange
> &groups
294 = font
->cmap
->subtable_3_10_12
;
295 groups
.resize(num_groups
);
297 for (unsigned i
= 0; i
< num_groups
; ++i
) {
298 if (!subtable
.ReadU32(&groups
[i
].start_range
) ||
299 !subtable
.ReadU32(&groups
[i
].end_range
) ||
300 !subtable
.ReadU32(&groups
[i
].start_glyph_id
)) {
301 return OTS_FAILURE_MSG("can't read format 12 subtable group");
304 if (groups
[i
].start_range
> kUnicodeUpperLimit
||
305 groups
[i
].end_range
> kUnicodeUpperLimit
||
306 groups
[i
].start_glyph_id
> 0xFFFF) {
307 return OTS_FAILURE_MSG("bad format 12 subtable group (startCharCode=0x%4X, endCharCode=0x%4X, startGlyphID=%d)",
308 groups
[i
].start_range
, groups
[i
].end_range
, groups
[i
].start_glyph_id
);
311 // [0xD800, 0xDFFF] are surrogate code points.
312 if (groups
[i
].start_range
>= 0xD800 &&
313 groups
[i
].start_range
<= 0xDFFF) {
314 return OTS_FAILURE_MSG("format 12 subtable out of range group startCharCode (0x%4X)", groups
[i
].start_range
);
316 if (groups
[i
].end_range
>= 0xD800 &&
317 groups
[i
].end_range
<= 0xDFFF) {
318 return OTS_FAILURE_MSG("format 12 subtable out of range group endCharCode (0x%4X)", groups
[i
].end_range
);
320 if (groups
[i
].start_range
< 0xD800 &&
321 groups
[i
].end_range
> 0xDFFF) {
322 return OTS_FAILURE_MSG("bad format 12 subtable group startCharCode (0x%4X) or endCharCode (0x%4X)",
323 groups
[i
].start_range
, groups
[i
].end_range
);
326 // We assert that the glyph value is within range. Because of the range
327 // limits, above, we don't need to worry about overflow.
328 if (groups
[i
].end_range
< groups
[i
].start_range
) {
329 return OTS_FAILURE_MSG("format 12 subtable group endCharCode before startCharCode (0x%4X < 0x%4X)",
330 groups
[i
].end_range
, groups
[i
].start_range
);
332 if ((groups
[i
].end_range
- groups
[i
].start_range
) +
333 groups
[i
].start_glyph_id
> num_glyphs
) {
334 return OTS_FAILURE_MSG("bad format 12 subtable group startGlyphID (%d)", groups
[i
].start_glyph_id
);
338 // the groups must be sorted by start code and may not overlap
339 for (unsigned i
= 1; i
< num_groups
; ++i
) {
340 if (groups
[i
].start_range
<= groups
[i
- 1].start_range
) {
341 return OTS_FAILURE_MSG("out of order format 12 subtable group (startCharCode=0x%4X <= startCharCode=0x%4X of previous group)",
342 groups
[i
].start_range
, groups
[i
-1].start_range
);
344 if (groups
[i
].start_range
<= groups
[i
- 1].end_range
) {
345 return OTS_FAILURE_MSG("overlapping format 12 subtable groups (startCharCode=0x%4X <= endCharCode=0x%4X of previous group)",
346 groups
[i
].start_range
, groups
[i
-1].end_range
);
353 bool Parse31013(ots::Font
*font
,
354 const uint8_t *data
, size_t length
, uint16_t num_glyphs
) {
355 ots::Buffer
subtable(data
, length
);
357 // Format 13 tables are simple. We parse these and fully serialise them
360 if (!subtable
.Skip(8)) {
361 return OTS_FAILURE_MSG("Bad cmap subtable length");
363 uint32_t language
= 0;
364 if (!subtable
.ReadU32(&language
)) {
365 return OTS_FAILURE_MSG("Can't read cmap subtable language");
368 return OTS_FAILURE_MSG("Cmap subtable language should be zero but is %d", language
);
371 uint32_t num_groups
= 0;
372 if (!subtable
.ReadU32(&num_groups
)) {
373 return OTS_FAILURE_MSG("Can't read number of groups in a cmap subtable");
376 // We limit the number of groups in the same way as in 3.10.12 tables. See
377 // the comment there in
378 if (num_groups
== 0 || num_groups
> kMaxCMAPGroups
) {
379 return OTS_FAILURE_MSG("Bad format 13 subtable group count %d", num_groups
);
382 std::vector
<ots::OpenTypeCMAPSubtableRange
> &groups
383 = font
->cmap
->subtable_3_10_13
;
384 groups
.resize(num_groups
);
386 for (unsigned i
= 0; i
< num_groups
; ++i
) {
387 if (!subtable
.ReadU32(&groups
[i
].start_range
) ||
388 !subtable
.ReadU32(&groups
[i
].end_range
) ||
389 !subtable
.ReadU32(&groups
[i
].start_glyph_id
)) {
390 return OTS_FAILURE_MSG("Can't read subrange structure in a cmap subtable");
393 // We conservatively limit all of the values to protect some parsers from
395 if (groups
[i
].start_range
> kUnicodeUpperLimit
||
396 groups
[i
].end_range
> kUnicodeUpperLimit
||
397 groups
[i
].start_glyph_id
> 0xFFFF) {
398 return OTS_FAILURE_MSG("Bad subrange with start_range=%d, end_range=%d, start_glyph_id=%d", groups
[i
].start_range
, groups
[i
].end_range
, groups
[i
].start_glyph_id
);
401 if (groups
[i
].start_glyph_id
>= num_glyphs
) {
402 return OTS_FAILURE_MSG("Subrange starting glyph id too high (%d > %d)", groups
[i
].start_glyph_id
, num_glyphs
);
406 // the groups must be sorted by start code and may not overlap
407 for (unsigned i
= 1; i
< num_groups
; ++i
) {
408 if (groups
[i
].start_range
<= groups
[i
- 1].start_range
) {
409 return OTS_FAILURE_MSG("Overlapping subrange starts (%d >= %d)", groups
[i
]. start_range
, groups
[i
-1].start_range
);
411 if (groups
[i
].start_range
<= groups
[i
- 1].end_range
) {
412 return OTS_FAILURE_MSG("Overlapping subranges (%d <= %d)", groups
[i
].start_range
, groups
[i
-1].end_range
);
419 bool Parse0514(ots::Font
*font
,
420 const uint8_t *data
, size_t length
, uint16_t num_glyphs
) {
421 // Unicode Variation Selector table
422 ots::Buffer
subtable(data
, length
);
424 // Format 14 tables are simple. We parse these and fully serialise them
427 // Skip format (USHORT) and length (ULONG)
428 if (!subtable
.Skip(6)) {
429 return OTS_FAILURE_MSG("Can't read start of cmap subtable");
432 uint32_t num_records
= 0;
433 if (!subtable
.ReadU32(&num_records
)) {
434 return OTS_FAILURE_MSG("Can't read number of records in cmap subtable");
436 if (num_records
== 0 || num_records
> kMaxCMAPSelectorRecords
) {
437 return OTS_FAILURE_MSG("Bad format 14 subtable records count %d", num_records
);
440 std::vector
<ots::OpenTypeCMAPSubtableVSRecord
>& records
441 = font
->cmap
->subtable_0_5_14
;
442 records
.resize(num_records
);
444 for (unsigned i
= 0; i
< num_records
; ++i
) {
445 if (!subtable
.ReadU24(&records
[i
].var_selector
) ||
446 !subtable
.ReadU32(&records
[i
].default_offset
) ||
447 !subtable
.ReadU32(&records
[i
].non_default_offset
)) {
448 return OTS_FAILURE_MSG("Can't read record structure of record %d in cmap subtale", i
);
450 // Checks the value of variation selector
451 if (!((records
[i
].var_selector
>= kMongolianVSStart
&&
452 records
[i
].var_selector
<= kMongolianVSEnd
) ||
453 (records
[i
].var_selector
>= kVSStart
&&
454 records
[i
].var_selector
<= kVSEnd
) ||
455 (records
[i
].var_selector
>= kIVSStart
&&
456 records
[i
].var_selector
<= kIVSEnd
))) {
457 return OTS_FAILURE_MSG("Bad record variation selector (%04X) in record %i", records
[i
].var_selector
, i
);
460 records
[i
-1].var_selector
>= records
[i
].var_selector
) {
461 return OTS_FAILURE_MSG("Out of order variation selector (%04X >= %04X) in record %d", records
[i
-1].var_selector
, records
[i
].var_selector
, i
);
465 if (!records
[i
].default_offset
&& !records
[i
].non_default_offset
) {
466 return OTS_FAILURE_MSG("No default aoffset in variation selector record %d", i
);
468 if (records
[i
].default_offset
&&
469 records
[i
].default_offset
>= length
) {
470 return OTS_FAILURE_MSG("Default offset too high (%d >= %ld) in record %d", records
[i
].default_offset
, length
, i
);
472 if (records
[i
].non_default_offset
&&
473 records
[i
].non_default_offset
>= length
) {
474 return OTS_FAILURE_MSG("Non default offset too high (%d >= %ld) in record %d", records
[i
].non_default_offset
, length
, i
);
478 for (unsigned i
= 0; i
< num_records
; ++i
) {
479 // Checks default UVS table
480 if (records
[i
].default_offset
) {
481 subtable
.set_offset(records
[i
].default_offset
);
482 uint32_t num_ranges
= 0;
483 if (!subtable
.ReadU32(&num_ranges
)) {
484 return OTS_FAILURE_MSG("Can't read number of ranges in record %d", i
);
486 if (num_ranges
== 0 || num_ranges
> kMaxCMAPGroups
) {
487 return OTS_FAILURE_MSG("Bad number of ranges (%d) in record %d", num_ranges
, i
);
490 uint32_t last_unicode_value
= 0;
491 std::vector
<ots::OpenTypeCMAPSubtableVSRange
>& ranges
493 ranges
.resize(num_ranges
);
495 for (unsigned j
= 0; j
< num_ranges
; ++j
) {
496 if (!subtable
.ReadU24(&ranges
[j
].unicode_value
) ||
497 !subtable
.ReadU8(&ranges
[j
].additional_count
)) {
498 return OTS_FAILURE_MSG("Can't read range info in variation selector record %d", i
);
500 const uint32_t check_value
=
501 ranges
[j
].unicode_value
+ ranges
[j
].additional_count
;
502 if (ranges
[j
].unicode_value
== 0 ||
503 ranges
[j
].unicode_value
> kUnicodeUpperLimit
||
504 check_value
> kUVSUpperLimit
||
505 (last_unicode_value
&&
506 ranges
[j
].unicode_value
<= last_unicode_value
)) {
507 return OTS_FAILURE_MSG("Bad Unicode value *%04X) in variation selector range %d record %d", ranges
[j
].unicode_value
, j
, i
);
509 last_unicode_value
= check_value
;
513 // Checks non default UVS table
514 if (records
[i
].non_default_offset
) {
515 subtable
.set_offset(records
[i
].non_default_offset
);
516 uint32_t num_mappings
= 0;
517 if (!subtable
.ReadU32(&num_mappings
)) {
518 return OTS_FAILURE_MSG("Can't read number of mappings in variation selector record %d", i
);
520 if (num_mappings
== 0) {
521 return OTS_FAILURE_MSG("Bad number of mappings (%d) in variation selector record %d", num_mappings
, i
);
524 uint32_t last_unicode_value
= 0;
525 std::vector
<ots::OpenTypeCMAPSubtableVSMapping
>& mappings
526 = records
[i
].mappings
;
527 mappings
.resize(num_mappings
);
529 for (unsigned j
= 0; j
< num_mappings
; ++j
) {
530 if (!subtable
.ReadU24(&mappings
[j
].unicode_value
) ||
531 !subtable
.ReadU16(&mappings
[j
].glyph_id
)) {
532 return OTS_FAILURE_MSG("Can't read mapping %d in variation selector record %d", j
, i
);
534 if (mappings
[j
].glyph_id
== 0 ||
535 mappings
[j
].unicode_value
== 0 ||
536 mappings
[j
].unicode_value
> kUnicodeUpperLimit
||
537 (last_unicode_value
&&
538 mappings
[j
].unicode_value
<= last_unicode_value
)) {
539 return OTS_FAILURE_MSG("Bad mapping (%04X -> %d) in mapping %d of variation selector %d", mappings
[j
].unicode_value
, mappings
[j
].glyph_id
, j
, i
);
541 last_unicode_value
= mappings
[j
].unicode_value
;
546 if (subtable
.offset() != length
) {
547 return OTS_FAILURE_MSG("Bad subtable offset (%ld != %ld)", subtable
.offset(), length
);
549 font
->cmap
->subtable_0_5_14_length
= subtable
.offset();
553 bool Parse100(ots::Font
*font
, const uint8_t *data
, size_t length
) {
555 ots::Buffer
subtable(data
, length
);
557 if (!subtable
.Skip(4)) {
558 return OTS_FAILURE_MSG("Bad cmap subtable");
560 uint16_t language
= 0;
561 if (!subtable
.ReadU16(&language
)) {
562 return OTS_FAILURE_MSG("Can't read language in cmap subtable");
565 // simsun.ttf has non-zero language id.
566 OTS_WARNING("language id should be zero: %u", language
);
569 font
->cmap
->subtable_1_0_0
.reserve(kFormat0ArraySize
);
570 for (size_t i
= 0; i
< kFormat0ArraySize
; ++i
) {
571 uint8_t glyph_id
= 0;
572 if (!subtable
.ReadU8(&glyph_id
)) {
573 return OTS_FAILURE_MSG("Can't read glyph id at array[%ld] in cmap subtable", i
);
575 font
->cmap
->subtable_1_0_0
.push_back(glyph_id
);
585 bool ots_cmap_parse(Font
*font
, const uint8_t *data
, size_t length
) {
586 Buffer
table(data
, length
);
587 font
->cmap
= new OpenTypeCMAP
;
589 uint16_t version
= 0;
590 uint16_t num_tables
= 0;
591 if (!table
.ReadU16(&version
) ||
592 !table
.ReadU16(&num_tables
)) {
593 return OTS_FAILURE_MSG("Can't read structure of cmap");
597 return OTS_FAILURE_MSG("Non zero cmap version (%d)", version
);
600 return OTS_FAILURE_MSG("No subtables in cmap!");
603 std::vector
<CMAPSubtableHeader
> subtable_headers
;
605 // read the subtable headers
606 subtable_headers
.reserve(num_tables
);
607 for (unsigned i
= 0; i
< num_tables
; ++i
) {
608 CMAPSubtableHeader subt
;
610 if (!table
.ReadU16(&subt
.platform
) ||
611 !table
.ReadU16(&subt
.encoding
) ||
612 !table
.ReadU32(&subt
.offset
)) {
613 return OTS_FAILURE_MSG("Can't read subtable information cmap subtable %d", i
);
616 subtable_headers
.push_back(subt
);
619 const size_t data_offset
= table
.offset();
621 // make sure that all the offsets are valid.
622 for (unsigned i
= 0; i
< num_tables
; ++i
) {
623 if (subtable_headers
[i
].offset
> 1024 * 1024 * 1024) {
624 return OTS_FAILURE_MSG("Bad subtable offset in cmap subtable %d", i
);
626 if (subtable_headers
[i
].offset
< data_offset
||
627 subtable_headers
[i
].offset
>= length
) {
628 return OTS_FAILURE_MSG("Bad subtable offset (%d) in cmap subtable %d", subtable_headers
[i
].offset
, i
);
632 // the format of the table is the first couple of bytes in the table. The
633 // length of the table is stored in a format-specific way.
634 for (unsigned i
= 0; i
< num_tables
; ++i
) {
635 table
.set_offset(subtable_headers
[i
].offset
);
636 if (!table
.ReadU16(&subtable_headers
[i
].format
)) {
637 return OTS_FAILURE_MSG("Can't read cmap subtable header format %d", i
);
642 switch (subtable_headers
[i
].format
) {
645 if (!table
.ReadU16(&len
)) {
646 return OTS_FAILURE_MSG("Can't read cmap subtable %d length", i
);
648 if (!table
.ReadU16(&lang
)) {
649 return OTS_FAILURE_MSG("Can't read cmap subtable %d language", i
);
651 subtable_headers
[i
].length
= len
;
652 subtable_headers
[i
].language
= lang
;
656 if (!table
.Skip(2)) {
657 return OTS_FAILURE_MSG("Bad cmap subtable %d structure", i
);
659 if (!table
.ReadU32(&subtable_headers
[i
].length
)) {
660 return OTS_FAILURE_MSG("Can read cmap subtable %d length", i
);
662 if (!table
.ReadU32(&subtable_headers
[i
].language
)) {
663 return OTS_FAILURE_MSG("Can't read cmap subtable %d language", i
);
667 if (!table
.ReadU32(&subtable_headers
[i
].length
)) {
668 return OTS_FAILURE_MSG("Can't read cmap subtable %d length", i
);
670 subtable_headers
[i
].language
= 0;
673 subtable_headers
[i
].length
= 0;
674 subtable_headers
[i
].language
= 0;
679 // check if the table is sorted first by platform ID, then by encoding ID.
680 uint32_t last_id
= 0;
681 for (unsigned i
= 0; i
< num_tables
; ++i
) {
683 = (subtable_headers
[i
].platform
<< 24)
684 + (subtable_headers
[i
].encoding
<< 16)
685 + subtable_headers
[i
].language
;
686 if ((i
!= 0) && (last_id
>= current_id
)) {
687 OTS_WARNING("subtable %d with platform ID %d, encoding ID %d, language ID %d "
688 "following subtable with platform ID %d, encoding ID %d, language ID %d",
690 (uint8_t)(current_id
>> 24), (uint8_t)(current_id
>> 16), (uint8_t)(current_id
),
691 (uint8_t)(last_id
>> 24), (uint8_t)(last_id
>> 16), (uint8_t)(last_id
));
693 last_id
= current_id
;
696 // Now, verify that all the lengths are sane
697 for (unsigned i
= 0; i
< num_tables
; ++i
) {
698 if (!subtable_headers
[i
].length
) continue;
699 if (subtable_headers
[i
].length
> 1024 * 1024 * 1024) {
700 return OTS_FAILURE_MSG("Bad cmap subtable %d length", i
);
702 // We know that both the offset and length are < 1GB, so the following
703 // addition doesn't overflow
704 const uint32_t end_byte
705 = subtable_headers
[i
].offset
+ subtable_headers
[i
].length
;
706 if (end_byte
> length
) {
707 return OTS_FAILURE_MSG("Over long cmap subtable %d @ %d for %d", i
, subtable_headers
[i
].offset
, subtable_headers
[i
].length
);
711 // check that the cmap subtables are not overlapping.
712 std::set
<std::pair
<uint32_t, uint32_t> > uniq_checker
;
713 std::vector
<std::pair
<uint32_t, uint8_t> > overlap_checker
;
714 for (unsigned i
= 0; i
< num_tables
; ++i
) {
715 const uint32_t end_byte
716 = subtable_headers
[i
].offset
+ subtable_headers
[i
].length
;
718 if (!uniq_checker
.insert(std::make_pair(subtable_headers
[i
].offset
,
720 // Sometimes Unicode table and MS table share exactly the same data.
724 overlap_checker
.push_back(
725 std::make_pair(subtable_headers
[i
].offset
,
726 static_cast<uint8_t>(1) /* start */));
727 overlap_checker
.push_back(
728 std::make_pair(end_byte
, static_cast<uint8_t>(0) /* end */));
730 std::sort(overlap_checker
.begin(), overlap_checker
.end());
731 int overlap_count
= 0;
732 for (unsigned i
= 0; i
< overlap_checker
.size(); ++i
) {
733 overlap_count
+= (overlap_checker
[i
].second
? 1 : -1);
734 if (overlap_count
> 1) {
735 return OTS_FAILURE_MSG("Excessive overlap count %d", overlap_count
);
739 // we grab the number of glyphs in the file from the maxp table to make sure
740 // that the character map isn't referencing anything beyound this range.
742 return OTS_FAILURE_MSG("No maxp table in font! Needed by cmap.");
744 const uint16_t num_glyphs
= font
->maxp
->num_glyphs
;
746 // We only support a subset of the possible character map tables. Microsoft
747 // 'strongly recommends' that everyone supports the Unicode BMP table with
748 // the UCS-4 table for non-BMP glyphs. We'll pass the following subtables:
749 // Platform ID Encoding ID Format
750 // 0 0 4 (Unicode Default)
751 // 0 1 4 (Unicode 1.1)
752 // 0 3 4 (Unicode BMP)
753 // 0 3 12 (Unicode UCS-4)
754 // 0 5 14 (Unicode Variation Sequences)
757 // 3 1 4 (MS Unicode BMP)
758 // 3 10 12 (MS Unicode UCS-4)
759 // 3 10 13 (MS UCS-4 Fallback mapping)
762 // * 0-0-4 and 0-1-4 tables are (usually) written as a 3-1-4 table. If 3-1-4 table
763 // also exists, the 0-0-4 or 0-1-4 tables are ignored.
764 // * Unlike 0-0-4 table, 0-3-4 table is written as a 0-3-4 table.
765 // Some fonts which include 0-5-14 table seems to be required 0-3-4
766 // table. The 0-3-4 table will be wriiten even if 3-1-4 table also exists.
767 // * 0-3-12 table is written as a 3-10-12 table. If 3-10-12 table also
768 // exists, the 0-3-12 table is ignored.
771 for (unsigned i
= 0; i
< num_tables
; ++i
) {
772 if (subtable_headers
[i
].platform
== 0) {
775 if ((subtable_headers
[i
].encoding
== 0 || subtable_headers
[i
].encoding
== 1) &&
776 (subtable_headers
[i
].format
== 4)) {
777 // parse and output the 0-0-4 and 0-1-4 tables as 3-1-4 table. Sometimes the 0-0-4
778 // table actually points to MS symbol data and thus should be parsed as
779 // 3-0-4 table (e.g., marqueem.ttf and quixotic.ttf). This error will be
780 // recovered in ots_cmap_serialise().
781 if (!ParseFormat4(font
, 3, 1, data
+ subtable_headers
[i
].offset
,
782 subtable_headers
[i
].length
, num_glyphs
)) {
783 return OTS_FAILURE_MSG("Failed to parse format 4 cmap subtable %d", i
);
785 } else if ((subtable_headers
[i
].encoding
== 3) &&
786 (subtable_headers
[i
].format
== 4)) {
787 // parse and output the 0-3-4 table as 0-3-4 table.
788 if (!ParseFormat4(font
, 0, 3, data
+ subtable_headers
[i
].offset
,
789 subtable_headers
[i
].length
, num_glyphs
)) {
790 return OTS_FAILURE_MSG("Failed to parse format 4 cmap subtable %d", i
);
792 } else if ((subtable_headers
[i
].encoding
== 3) &&
793 (subtable_headers
[i
].format
== 12)) {
794 // parse and output the 0-3-12 table as 3-10-12 table.
795 if (!Parse31012(font
, data
+ subtable_headers
[i
].offset
,
796 subtable_headers
[i
].length
, num_glyphs
)) {
797 return OTS_FAILURE_MSG("Failed to parse format 12 cmap subtable %d", i
);
799 } else if ((subtable_headers
[i
].encoding
== 5) &&
800 (subtable_headers
[i
].format
== 14)) {
801 if (!Parse0514(font
, data
+ subtable_headers
[i
].offset
,
802 subtable_headers
[i
].length
, num_glyphs
)) {
803 return OTS_FAILURE_MSG("Failed to parse format 14 cmap subtable %d", i
);
806 } else if (subtable_headers
[i
].platform
== 1) {
809 if ((subtable_headers
[i
].encoding
== 0) &&
810 (subtable_headers
[i
].format
== 0)) {
811 // parse and output the 1-0-0 table.
812 if (!Parse100(font
, data
+ subtable_headers
[i
].offset
,
813 subtable_headers
[i
].length
)) {
814 return OTS_FAILURE();
817 } else if (subtable_headers
[i
].platform
== 3) {
820 switch (subtable_headers
[i
].encoding
) {
823 if (subtable_headers
[i
].format
== 4) {
824 // parse 3-0-4 or 3-1-4 table.
825 if (!ParseFormat4(font
, subtable_headers
[i
].platform
,
826 subtable_headers
[i
].encoding
,
827 data
+ subtable_headers
[i
].offset
,
828 subtable_headers
[i
].length
, num_glyphs
)) {
829 return OTS_FAILURE();
834 if (subtable_headers
[i
].format
== 12) {
835 font
->cmap
->subtable_3_10_12
.clear();
836 if (!Parse31012(font
, data
+ subtable_headers
[i
].offset
,
837 subtable_headers
[i
].length
, num_glyphs
)) {
838 return OTS_FAILURE();
840 } else if (subtable_headers
[i
].format
== 13) {
841 font
->cmap
->subtable_3_10_13
.clear();
842 if (!Parse31013(font
, data
+ subtable_headers
[i
].offset
,
843 subtable_headers
[i
].length
, num_glyphs
)) {
844 return OTS_FAILURE();
855 bool ots_cmap_should_serialise(Font
*font
) {
856 return font
->cmap
!= NULL
;
859 bool ots_cmap_serialise(OTSStream
*out
, Font
*font
) {
860 const bool have_034
= font
->cmap
->subtable_0_3_4_data
!= NULL
;
861 const bool have_0514
= font
->cmap
->subtable_0_5_14
.size() != 0;
862 const bool have_100
= font
->cmap
->subtable_1_0_0
.size() != 0;
863 const bool have_304
= font
->cmap
->subtable_3_0_4_data
!= NULL
;
864 // MS Symbol and MS Unicode tables should not co-exist.
865 // See the comment above in 0-0-4 parser.
866 const bool have_314
= (!have_304
) && font
->cmap
->subtable_3_1_4_data
;
867 const bool have_31012
= font
->cmap
->subtable_3_10_12
.size() != 0;
868 const bool have_31013
= font
->cmap
->subtable_3_10_13
.size() != 0;
869 const uint16_t num_subtables
= static_cast<uint16_t>(have_034
) +
870 static_cast<uint16_t>(have_0514
) +
871 static_cast<uint16_t>(have_100
) +
872 static_cast<uint16_t>(have_304
) +
873 static_cast<uint16_t>(have_314
) +
874 static_cast<uint16_t>(have_31012
) +
875 static_cast<uint16_t>(have_31013
);
876 const off_t table_start
= out
->Tell();
878 // Some fonts don't have 3-0-4 MS Symbol nor 3-1-4 Unicode BMP tables
879 // (e.g., old fonts for Mac). We don't support them.
880 if (!have_304
&& !have_314
&& !have_034
&& !have_31012
&& !have_31013
) {
881 return OTS_FAILURE_MSG("no supported subtables were found");
884 if (!out
->WriteU16(0) ||
885 !out
->WriteU16(num_subtables
)) {
886 return OTS_FAILURE();
889 const off_t record_offset
= out
->Tell();
890 if (!out
->Pad(num_subtables
* 8)) {
891 return OTS_FAILURE();
894 const off_t offset_034
= out
->Tell();
896 if (!out
->Write(font
->cmap
->subtable_0_3_4_data
,
897 font
->cmap
->subtable_0_3_4_length
)) {
898 return OTS_FAILURE();
902 const off_t offset_0514
= out
->Tell();
904 const std::vector
<ots::OpenTypeCMAPSubtableVSRecord
> &records
905 = font
->cmap
->subtable_0_5_14
;
906 const unsigned num_records
= records
.size();
907 if (!out
->WriteU16(14) ||
908 !out
->WriteU32(font
->cmap
->subtable_0_5_14_length
) ||
909 !out
->WriteU32(num_records
)) {
910 return OTS_FAILURE();
912 for (unsigned i
= 0; i
< num_records
; ++i
) {
913 if (!out
->WriteU24(records
[i
].var_selector
) ||
914 !out
->WriteU32(records
[i
].default_offset
) ||
915 !out
->WriteU32(records
[i
].non_default_offset
)) {
916 return OTS_FAILURE();
919 for (unsigned i
= 0; i
< num_records
; ++i
) {
920 if (records
[i
].default_offset
) {
921 const std::vector
<ots::OpenTypeCMAPSubtableVSRange
> &ranges
923 const unsigned num_ranges
= ranges
.size();
924 if (!out
->Seek(records
[i
].default_offset
+ offset_0514
) ||
925 !out
->WriteU32(num_ranges
)) {
926 return OTS_FAILURE();
928 for (unsigned j
= 0; j
< num_ranges
; ++j
) {
929 if (!out
->WriteU24(ranges
[j
].unicode_value
) ||
930 !out
->WriteU8(ranges
[j
].additional_count
)) {
931 return OTS_FAILURE();
935 if (records
[i
].non_default_offset
) {
936 const std::vector
<ots::OpenTypeCMAPSubtableVSMapping
> &mappings
937 = records
[i
].mappings
;
938 const unsigned num_mappings
= mappings
.size();
939 if (!out
->Seek(records
[i
].non_default_offset
+ offset_0514
) ||
940 !out
->WriteU32(num_mappings
)) {
941 return OTS_FAILURE();
943 for (unsigned j
= 0; j
< num_mappings
; ++j
) {
944 if (!out
->WriteU24(mappings
[j
].unicode_value
) ||
945 !out
->WriteU16(mappings
[j
].glyph_id
)) {
946 return OTS_FAILURE();
953 const off_t offset_100
= out
->Tell();
955 if (!out
->WriteU16(0) || // format
956 !out
->WriteU16(6 + kFormat0ArraySize
) || // length
957 !out
->WriteU16(0)) { // language
958 return OTS_FAILURE();
960 if (!out
->Write(&(font
->cmap
->subtable_1_0_0
[0]), kFormat0ArraySize
)) {
961 return OTS_FAILURE();
965 const off_t offset_304
= out
->Tell();
967 if (!out
->Write(font
->cmap
->subtable_3_0_4_data
,
968 font
->cmap
->subtable_3_0_4_length
)) {
969 return OTS_FAILURE();
973 const off_t offset_314
= out
->Tell();
975 if (!out
->Write(font
->cmap
->subtable_3_1_4_data
,
976 font
->cmap
->subtable_3_1_4_length
)) {
977 return OTS_FAILURE();
981 const off_t offset_31012
= out
->Tell();
983 std::vector
<OpenTypeCMAPSubtableRange
> &groups
984 = font
->cmap
->subtable_3_10_12
;
985 const unsigned num_groups
= groups
.size();
986 if (!out
->WriteU16(12) ||
988 !out
->WriteU32(num_groups
* 12 + 16) ||
990 !out
->WriteU32(num_groups
)) {
991 return OTS_FAILURE();
994 for (unsigned i
= 0; i
< num_groups
; ++i
) {
995 if (!out
->WriteU32(groups
[i
].start_range
) ||
996 !out
->WriteU32(groups
[i
].end_range
) ||
997 !out
->WriteU32(groups
[i
].start_glyph_id
)) {
998 return OTS_FAILURE();
1003 const off_t offset_31013
= out
->Tell();
1005 std::vector
<OpenTypeCMAPSubtableRange
> &groups
1006 = font
->cmap
->subtable_3_10_13
;
1007 const unsigned num_groups
= groups
.size();
1008 if (!out
->WriteU16(13) ||
1009 !out
->WriteU16(0) ||
1010 !out
->WriteU32(num_groups
* 12 + 16) ||
1011 !out
->WriteU32(0) ||
1012 !out
->WriteU32(num_groups
)) {
1013 return OTS_FAILURE();
1016 for (unsigned i
= 0; i
< num_groups
; ++i
) {
1017 if (!out
->WriteU32(groups
[i
].start_range
) ||
1018 !out
->WriteU32(groups
[i
].end_range
) ||
1019 !out
->WriteU32(groups
[i
].start_glyph_id
)) {
1020 return OTS_FAILURE();
1025 const off_t table_end
= out
->Tell();
1027 // Now seek back and write the table of offsets
1028 if (!out
->Seek(record_offset
)) {
1029 return OTS_FAILURE();
1033 if (!out
->WriteU16(0) ||
1034 !out
->WriteU16(3) ||
1035 !out
->WriteU32(offset_034
- table_start
)) {
1036 return OTS_FAILURE();
1041 if (!out
->WriteU16(0) ||
1042 !out
->WriteU16(5) ||
1043 !out
->WriteU32(offset_0514
- table_start
)) {
1044 return OTS_FAILURE();
1049 if (!out
->WriteU16(1) ||
1050 !out
->WriteU16(0) ||
1051 !out
->WriteU32(offset_100
- table_start
)) {
1052 return OTS_FAILURE();
1057 if (!out
->WriteU16(3) ||
1058 !out
->WriteU16(0) ||
1059 !out
->WriteU32(offset_304
- table_start
)) {
1060 return OTS_FAILURE();
1065 if (!out
->WriteU16(3) ||
1066 !out
->WriteU16(1) ||
1067 !out
->WriteU32(offset_314
- table_start
)) {
1068 return OTS_FAILURE();
1073 if (!out
->WriteU16(3) ||
1074 !out
->WriteU16(10) ||
1075 !out
->WriteU32(offset_31012
- table_start
)) {
1076 return OTS_FAILURE();
1081 if (!out
->WriteU16(3) ||
1082 !out
->WriteU16(10) ||
1083 !out
->WriteU32(offset_31013
- table_start
)) {
1084 return OTS_FAILURE();
1088 if (!out
->Seek(table_end
)) {
1089 return OTS_FAILURE();
1095 void ots_cmap_reuse(Font
*font
, Font
*other
) {
1096 font
->cmap
= other
->cmap
;
1097 font
->cmap_reused
= true;
1100 void ots_cmap_free(Font
*font
) {