Sync usage with man page.
[netbsd-mini2440.git] / gnu / dist / groff / src / utils / hpftodit / hpftodit.cpp
blob1eb793edbff113e995eda0adbfc647c75f22d0f7
1 /* $NetBSD: hpftodit.cpp,v 1.1.1.3 2006/02/06 18:14:55 wiz Exp $ */
3 // -*- C++ -*-
4 /* Copyright (C) 1994, 2000, 2001, 2003, 2004 Free Software Foundation, Inc.
5 Written by James Clark (jjc@jclark.com)
7 This file is part of groff.
9 groff is free software; you can redistribute it and/or modify it under
10 the terms of the GNU General Public License as published by the Free
11 Software Foundation; either version 2, or (at your option) any later
12 version.
14 groff is distributed in the hope that it will be useful, but WITHOUT ANY
15 WARRANTY; without even the implied warranty of MERCHANTABILITY or
16 FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
17 for more details.
19 You should have received a copy of the GNU General Public License along
20 with groff; see the file COPYING. If not, write to the Free Software
21 Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA. */
24 TODO
25 devise new names for useful characters
26 option to specify symbol sets to look in
27 put filename in error messages (or fix lib)
30 #include "lib.h"
32 #include <stdio.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <ctype.h>
36 #include <math.h>
37 #include <errno.h>
38 #include "assert.h"
39 #include "posix.h"
40 #include "errarg.h"
41 #include "error.h"
42 #include "cset.h"
43 #include "nonposix.h"
44 #include "unicode.h"
46 extern "C" const char *Version_string;
47 extern const char *hp_msl_to_unicode_code(const char *);
49 #define SIZEOF(v) (sizeof(v)/sizeof(v[0]))
50 #define equal(a, b) (strcmp(a, b) == 0)
51 // only valid if is_uname(c) has returned true
52 #define is_decomposed(c) strchr(c, '_')
54 #define NO 0
55 #define YES 1
57 #define MSL 0
58 #define SYMSET 1
59 #define UNICODE 2
61 #define UNNAMED "---"
63 static double multiplier = 3.0; // make Agfa-based unitwidth an integer
65 inline
66 int scale(int n)
68 return int(n * multiplier + 0.5);
71 // tags in TFM file
73 enum tag_type {
74 min_tag = 400,
75 type_tag = 400,
76 copyright_tag = 401,
77 comment_tag = 402,
78 charcode_tag = 403, // MSL for Intellifont, Unicode for TrueType
79 symbol_set_tag = 404,
80 unique_identifier_tag = 405,
81 inches_per_point_tag = 406,
82 nominal_point_size_tag = 407,
83 design_units_per_em_tag = 408,
84 posture_tag = 409,
85 type_structure_tag = 410,
86 stroke_weight_tag = 411,
87 spacing_tag = 412,
88 slant_tag = 413,
89 appearance_width_tag = 414,
90 serif_style_tag = 415,
91 font_name_tag = 417,
92 typeface_source_tag = 418,
93 average_width_tag = 419,
94 max_width_tag = 420,
95 word_spacing_tag = 421,
96 recommended_line_spacing_tag = 422,
97 cap_height_tag = 423,
98 x_height_tag = 424,
99 max_ascent_tag = 425,
100 max_descent_tag = 426,
101 lower_ascent_tag = 427,
102 lower_descent_tag = 428,
103 underscore_depth_tag = 429,
104 underscore_thickness_tag = 430,
105 uppercase_accent_height_tag = 431,
106 lowercase_accent_height_tag = 432,
107 width_tag = 433,
108 vertical_escapement_tag = 434,
109 left_extent_tag = 435,
110 right_extent_tag = 436,
111 ascent_tag = 437,
112 descent_tag = 438,
113 pair_kern_tag = 439,
114 sector_kern_tag = 440,
115 track_kern_tag = 441,
116 typeface_tag = 442,
117 panose_tag = 443,
118 max_tag = 443
121 const char *tag_name[] = {
122 "Symbol Set",
123 "Font Type" // MSL for Intellifont, Unicode for TrueType
126 // types in TFM file
127 enum {
128 BYTE_TYPE = 1,
129 ASCII_TYPE = 2, // NUL-terminated string
130 USHORT_TYPE = 3,
131 LONG_TYPE = 4, // unused
132 RATIONAL_TYPE = 5, // 8-byte numerator + 8-byte denominator
133 SIGNED_BYTE_TYPE = 16, // unused
134 SIGNED_SHORT_TYPE = 17,
135 SIGNED_LONG_TYPE = 18 // unused
138 typedef unsigned char byte;
139 typedef unsigned short uint16;
140 typedef short int16;
141 typedef unsigned int uint32;
143 class File {
144 public:
145 File(const char *);
146 void skip(int n);
147 byte get_byte();
148 uint16 get_uint16();
149 uint32 get_uint32();
150 uint32 get_uint32(char *orig);
151 void seek(uint32 n);
152 private:
153 unsigned char *buf_;
154 const unsigned char *ptr_;
155 const unsigned char *end_;
158 struct entry {
159 char present;
160 uint16 type;
161 uint32 count;
162 uint32 value;
163 char orig_value[4];
164 entry() : present(0) { }
167 struct char_info {
168 uint16 charcode;
169 uint16 width;
170 int16 ascent;
171 int16 descent;
172 int16 left_extent;
173 uint16 right_extent;
174 uint16 symbol_set;
175 unsigned char code;
178 const uint16 NO_GLYPH = 0xffff;
179 const uint16 NO_SYMBOL_SET = 0;
181 struct name_list {
182 char *name;
183 name_list *next;
184 name_list(const char *s, name_list *p) : name(strsave(s)), next(p) { }
185 ~name_list() { a_delete name; }
188 struct symbol_set {
189 uint16 select;
190 uint16 index[256];
193 #define SYMBOL_SET(n, c) ((n) * 32 + ((c) - 64))
195 uint16 text_symbol_sets[] = {
196 SYMBOL_SET(19, 'U'), // Windows Latin 1 ("ANSI", code page 1252)
197 SYMBOL_SET(9, 'E'), // Windows Latin 2, Code Page 1250
198 SYMBOL_SET(5, 'T'), // Code Page 1254
199 SYMBOL_SET(7, 'J'), // Desktop
200 SYMBOL_SET(6, 'J'), // Microsoft Publishing
201 SYMBOL_SET(0, 'N'), // Latin 1 (subset of 19U,
202 // so we should never get here)
203 SYMBOL_SET(2, 'N'), // Latin 2 (subset of 9E,
204 // so we should never get here)
205 SYMBOL_SET(8, 'U'), // HP Roman 8
206 SYMBOL_SET(10, 'J'), // PS Standard
207 SYMBOL_SET(9, 'U'), // Windows 3.0 "ANSI"
208 SYMBOL_SET(1, 'U'), // U.S. Legal
210 SYMBOL_SET(12, 'J'), // MC Text
211 SYMBOL_SET(10, 'U'), // PC Code Page 437
212 SYMBOL_SET(11, 'U'), // PC Code Page 437N
213 SYMBOL_SET(17, 'U'), // PC Code Page 852
214 SYMBOL_SET(12, 'U'), // PC Code Page 850
215 SYMBOL_SET(9, 'T'), // PC Code Page 437T
219 uint16 special_symbol_sets[] = {
220 SYMBOL_SET(8, 'M'), // Math 8
221 SYMBOL_SET(5, 'M'), // PS Math
222 SYMBOL_SET(15, 'U'), // Pi font
223 SYMBOL_SET(13, 'J'), // Ventura International
224 SYMBOL_SET(19, 'M'), // Symbol font
225 SYMBOL_SET(579, 'L'), // Wingdings
229 entry tags[max_tag + 1 - min_tag];
231 char_info *char_table;
232 uint32 nchars = 0;
234 unsigned int charcode_name_table_size = 0;
235 name_list **charcode_name_table = NULL;
237 symbol_set *symbol_set_table;
238 unsigned int n_symbol_sets;
240 static int debug_flag = NO;
241 static int special_flag = NO; // not a special font
242 static int italic_flag = NO; // don't add italic correction
243 static int italic_sep;
244 static int all_flag = NO; // don't include glyphs not in mapfile
245 static int quiet_flag = NO; // don't suppress warnings about symbols not found
247 static char *hp_msl_to_ucode_name(int);
248 static char *unicode_to_ucode_name(int);
249 static int is_uname(char *);
250 static char *show_symset(unsigned int);
251 static void usage(FILE *);
252 static void usage();
253 static const char *xbasename(const char *);
254 static void read_tags(File &);
255 static int check_type();
256 static void check_units(File &, const int, double *, double *);
257 static int read_map(const char *, const int);
258 static void require_tag(tag_type);
259 static void dump_ascii(File &, tag_type);
260 static void dump_tags(File &);
261 static void dump_symbol_sets(File &);
262 static void dump_symbols(int);
263 static void output_font_name(File &);
264 static void output_spacewidth();
265 static void output_pclweight();
266 static void output_pclproportional();
267 static void read_and_output_pcltypeface(File &);
268 static void output_pclstyle();
269 static void output_slant();
270 static void output_ligatures();
271 static void read_symbol_sets(File &);
272 static void read_and_output_kernpairs(File &);
273 static void output_charset(const int);
274 static void read_char_table(File &);
276 inline
277 entry &tag_info(tag_type t)
279 return tags[t - min_tag];
283 main(int argc, char **argv)
285 program_name = argv[0];
287 int opt;
288 int res = 1200; // PCL unit of measure for cursor moves
289 int scalesize = 4; // LaserJet 4 only allows 1/4 point increments
290 int unitwidth = 6350;
291 double ppi; // points per inch
292 double upem; // design units per em
294 static const struct option long_options[] = {
295 { "help", no_argument, 0, CHAR_MAX + 1 },
296 { "version", no_argument, 0, 'v' },
297 { NULL, 0, 0, 0 }
299 while ((opt = getopt_long(argc, argv, "adsqvi:", long_options, NULL)) != EOF) {
300 switch (opt) {
301 case 'a':
302 all_flag = YES;
303 break;
304 case 'd':
305 debug_flag = YES;
306 break;
307 case 's':
308 special_flag = YES;
309 break;
310 case 'i':
311 italic_flag = YES;
312 italic_sep = atoi(optarg); // design units
313 break;
314 case 'q':
315 quiet_flag = YES; // suppress warnings about symbols not found
316 break;
317 case 'v':
318 printf("GNU hpftodit (groff) version %s\n", Version_string);
319 exit(0);
320 break;
321 case CHAR_MAX + 1: // --help
322 usage(stdout);
323 exit(0);
324 break;
325 case '?':
326 usage();
327 break;
328 default:
329 assert(0);
333 if (debug_flag && argc - optind < 1)
334 usage();
335 else if (!debug_flag && argc - optind != 3)
336 usage();
337 File f(argv[optind]);
338 read_tags(f);
339 int tfm_type = check_type();
340 if (debug_flag)
341 dump_tags(f);
342 if (!debug_flag && !read_map(argv[optind + 1], tfm_type))
343 exit(1);
344 else if (debug_flag && argc - optind > 1)
345 read_map(argv[optind + 1], tfm_type);
346 current_filename = NULL;
347 current_lineno = -1; // no line numbers
348 if (!debug_flag && !equal(argv[optind + 2], "-"))
349 if (freopen(argv[optind + 2], "w", stdout) == NULL)
350 fatal("cannot open `%1': %2", argv[optind + 2], strerror(errno));
351 current_filename = argv[optind];
353 check_units(f, tfm_type, &ppi, &upem);
354 if (tfm_type == UNICODE) // don't calculate for Intellifont TFMs
355 multiplier = double(res) / upem / ppi * unitwidth / scalesize;
356 if (italic_flag)
357 // convert from thousandths of an em to design units
358 italic_sep = int(italic_sep * upem / 1000 + 0.5);
360 read_char_table(f);
361 if (nchars == 0)
362 fatal("no characters");
364 if (!debug_flag) {
365 output_font_name(f);
366 printf("name %s\n", xbasename(argv[optind + 2]));
367 if (special_flag)
368 printf("special\n");
369 output_spacewidth();
370 output_slant();
371 read_and_output_pcltypeface(f);
372 output_pclproportional();
373 output_pclweight();
374 output_pclstyle();
376 read_symbol_sets(f);
377 if (debug_flag)
378 dump_symbols(tfm_type);
379 else {
380 output_ligatures();
381 read_and_output_kernpairs(f);
382 output_charset(tfm_type);
384 return 0;
387 static void
388 usage(FILE *stream)
390 fprintf(stream,
391 "usage: %s [-s] [-a] [-q] [-i n] tfm_file map_file output_font\n"
392 " %s -d tfm_file [map_file]\n",
393 program_name, program_name);
396 static void
397 usage()
399 usage(stderr);
400 exit(1);
403 File::File(const char *s)
405 // We need to read the file in binary mode because hpftodit relies
406 // on byte counts.
407 int fd = open(s, O_RDONLY | O_BINARY);
408 if (fd < 0)
409 fatal("cannot open `%1': %2", s, strerror(errno));
410 current_filename = s;
411 struct stat sb;
412 if (fstat(fd, &sb) < 0)
413 fatal("cannot stat: %1", strerror(errno));
414 if (!S_ISREG(sb.st_mode))
415 fatal("not a regular file");
416 buf_ = new unsigned char[sb.st_size];
417 long nread = read(fd, buf_, sb.st_size);
418 if (nread < 0)
419 fatal("read error: %1", strerror(errno));
420 if (nread != sb.st_size)
421 fatal("read unexpected number of bytes");
422 ptr_ = buf_;
423 end_ = buf_ + sb.st_size;
426 void
427 File::skip(int n)
429 if (end_ - ptr_ < n)
430 fatal("unexpected end of file");
431 ptr_ += n;
434 void
435 File::seek(uint32 n)
437 if (uint32(end_ - buf_) < n)
438 fatal("unexpected end of file");
439 ptr_ = buf_ + n;
442 byte
443 File::get_byte()
445 if (ptr_ >= end_)
446 fatal("unexpected end of file");
447 return *ptr_++;
450 uint16
451 File::get_uint16()
453 if (end_ - ptr_ < 2)
454 fatal("unexpected end of file");
455 uint16 n = *ptr_++;
456 return n + (*ptr_++ << 8);
459 uint32
460 File::get_uint32()
462 if (end_ - ptr_ < 4)
463 fatal("unexpected end of file");
464 uint32 n = *ptr_++;
465 for (int i = 0; i < 3; i++)
466 n += *ptr_++ << (i + 1)*8;
467 return n;
470 uint32
471 File::get_uint32(char *orig)
473 if (end_ - ptr_ < 4)
474 fatal("unexpected end of file");
475 unsigned char v = *ptr_++;
476 uint32 n = v;
477 orig[0] = v;
478 for (int i = 1; i < 4; i++) {
479 v = *ptr_++;
480 orig[i] = v;
481 n += v << i*8;
483 return n;
486 static void
487 read_tags(File &f)
489 if (f.get_byte() != 'I' || f.get_byte() != 'I')
490 fatal("not an Intel format TFM file");
491 f.skip(6);
492 uint16 ntags = f.get_uint16();
493 entry dummy;
494 for (uint16 i = 0; i < ntags; i++) {
495 uint16 tag = f.get_uint16();
496 entry *p;
497 if (min_tag <= tag && tag <= max_tag)
498 p = tags + (tag - min_tag);
499 else
500 p = &dummy;
501 p->present = 1;
502 p->type = f.get_uint16();
503 p->count = f.get_uint32();
504 p->value = f.get_uint32(p->orig_value);
508 static int
509 check_type()
511 require_tag(type_tag);
512 int tfm_type = tag_info(type_tag).value;
513 switch (tfm_type) {
514 case MSL:
515 case UNICODE:
516 break;
517 case SYMSET:
518 fatal("cannot handle Symbol Set TFM files");
519 break;
520 default:
521 fatal("unknown type tag %1", tfm_type);
523 return tfm_type;
526 static void
527 check_units(File &f, const int tfm_type, double *ppi, double *upem)
529 require_tag(design_units_per_em_tag);
530 f.seek(tag_info(design_units_per_em_tag).value);
531 uint32 num = f.get_uint32();
532 uint32 den = f.get_uint32();
533 if (tfm_type == MSL && (num != 8782 || den != 1))
534 fatal("design units per em != 8782/1");
535 *upem = double(num) / den;
536 require_tag(inches_per_point_tag);
537 f.seek(tag_info(inches_per_point_tag).value);
538 num = f.get_uint32();
539 den = f.get_uint32();
540 if (tfm_type == MSL && (num != 100 || den != 7231))
541 fatal("inches per point not 100/7231");
542 *ppi = double(den) / num;
545 static void
546 require_tag(tag_type t)
548 if (!tag_info(t).present)
549 fatal("tag %1 missing", int(t));
552 // put a human-readable font name in the file
553 static void
554 output_font_name(File &f)
556 char *p;
558 if (!tag_info(font_name_tag).present)
559 return;
560 int count = tag_info(font_name_tag).count;
561 char *font_name = new char[count];
563 if (count > 4) { // value is a file offset to the string
564 f.seek(tag_info(font_name_tag).value);
565 int n = count;
566 p = font_name;
567 while (--n)
568 *p++ = f.get_byte();
570 else // orig_value contains the string
571 sprintf(font_name, "%.*s",
572 count, tag_info(font_name_tag).orig_value);
574 // remove any trailing space
575 p = font_name + count - 1;
576 while (csspace(*--p))
578 *(p + 1) = '\0';
579 printf("# %s\n", font_name);
580 delete font_name;
583 static void
584 output_spacewidth()
586 require_tag(word_spacing_tag);
587 printf("spacewidth %d\n", scale(tag_info(word_spacing_tag).value));
590 static void
591 read_symbol_sets(File &f)
593 uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count;
594 uint16 *symbol_set_selectors;
595 n_symbol_sets = symbol_set_dir_length/14;
596 symbol_set_table = new symbol_set[n_symbol_sets];
597 unsigned int i;
599 for (i = 0; i < nchars; i++)
600 char_table[i].symbol_set = NO_SYMBOL_SET;
602 for (i = 0; i < n_symbol_sets; i++) {
603 f.seek(tag_info(symbol_set_tag).value + i*14);
604 (void)f.get_uint32(); // offset to symbol set name
605 uint32 off1 = f.get_uint32(); // offset to selection string
606 uint32 off2 = f.get_uint32(); // offset to symbol set index array
608 f.seek(off1);
609 uint16 kind = 0; // HP-GL "Kind 1" symbol set value
610 unsigned int j;
611 for (j = 0; j < off2 - off1; j++) {
612 unsigned char c = f.get_byte();
613 if ('0' <= c && c <= '9') // value
614 kind = kind*10 + (c - '0');
615 else if ('A' <= c && c <= 'Z') // terminator
616 kind = kind*32 + (c - 64);
618 symbol_set_table[i].select = kind;
619 for (j = 0; j < 256; j++)
620 symbol_set_table[i].index[j] = f.get_uint16();
623 symbol_set_selectors = (special_flag ? special_symbol_sets
624 : text_symbol_sets);
625 for (i = 0; symbol_set_selectors[i] != 0; i++) {
626 unsigned int j;
627 for (j = 0; j < n_symbol_sets; j++)
628 if (symbol_set_table[j].select == symbol_set_selectors[i])
629 break;
630 if (j < n_symbol_sets) {
631 for (int k = 0; k < 256; k++) {
632 uint16 idx = symbol_set_table[j].index[k];
633 if (idx != NO_GLYPH
634 && char_table[idx].symbol_set == NO_SYMBOL_SET) {
635 char_table[idx].symbol_set = symbol_set_table[j].select;
636 char_table[idx].code = k;
642 if (all_flag)
643 return;
645 symbol_set_selectors = (special_flag ? text_symbol_sets
646 : special_symbol_sets);
647 for (i = 0; symbol_set_selectors[i] != 0; i++) {
648 unsigned int j;
649 for (j = 0; j < n_symbol_sets; j++)
650 if (symbol_set_table[j].select == symbol_set_selectors[i])
651 break;
652 if (j < n_symbol_sets) {
653 for (int k = 0; k < 256; k++) {
654 uint16 idx = symbol_set_table[j].index[k];
655 if (idx != NO_GLYPH
656 && char_table[idx].symbol_set == NO_SYMBOL_SET) {
657 char_table[idx].symbol_set = symbol_set_table[j].select;
658 char_table[idx].code = k;
663 return;
666 static void
667 read_char_table(File &f)
669 require_tag(charcode_tag);
670 nchars = tag_info(charcode_tag).count;
671 char_table = new char_info[nchars];
673 f.seek(tag_info(charcode_tag).value);
674 uint32 i;
675 for (i = 0; i < nchars; i++)
676 char_table[i].charcode = f.get_uint16();
678 require_tag(width_tag);
679 f.seek(tag_info(width_tag).value);
680 for (i = 0; i < nchars; i++)
681 char_table[i].width = f.get_uint16();
683 require_tag(ascent_tag);
684 f.seek(tag_info(ascent_tag).value);
685 for (i = 0; i < nchars; i++) {
686 char_table[i].ascent = f.get_uint16();
687 if (char_table[i].ascent < 0)
688 char_table[i].ascent = 0;
691 require_tag(descent_tag);
692 f.seek(tag_info(descent_tag).value);
693 for (i = 0; i < nchars; i++) {
694 char_table[i].descent = f.get_uint16();
695 if (char_table[i].descent > 0)
696 char_table[i].descent = 0;
699 require_tag(left_extent_tag);
700 f.seek(tag_info(left_extent_tag).value);
701 for (i = 0; i < nchars; i++)
702 char_table[i].left_extent = int16(f.get_uint16());
704 require_tag(right_extent_tag);
705 f.seek(tag_info(right_extent_tag).value);
706 for (i = 0; i < nchars; i++)
707 char_table[i].right_extent = f.get_uint16();
710 static void
711 output_pclweight()
713 require_tag(stroke_weight_tag);
714 int stroke_weight = tag_info(stroke_weight_tag).value;
715 int pcl_stroke_weight;
716 if (stroke_weight < 128)
717 pcl_stroke_weight = -3;
718 else if (stroke_weight == 128)
719 pcl_stroke_weight = 0;
720 else if (stroke_weight <= 145)
721 pcl_stroke_weight = 1;
722 else if (stroke_weight <= 179)
723 pcl_stroke_weight = 3;
724 else
725 pcl_stroke_weight = 4;
726 printf("pclweight %d\n", pcl_stroke_weight);
729 static void
730 output_pclproportional()
732 require_tag(spacing_tag);
733 printf("pclproportional %d\n", tag_info(spacing_tag).value == 0);
736 static void
737 read_and_output_pcltypeface(File &f)
739 printf("pcltypeface ");
740 require_tag(typeface_tag);
741 if (tag_info(typeface_tag).count > 4) {
742 f.seek(tag_info(typeface_tag).value);
743 for (uint32 i = 0; i < tag_info(typeface_tag).count; i++) {
744 unsigned char c = f.get_byte();
745 if (c == '\0')
746 break;
747 putchar(c);
750 else
751 printf("%.4s", tag_info(typeface_tag).orig_value);
752 printf("\n");
755 static void
756 output_pclstyle()
758 unsigned pcl_style = 0;
759 // older tfms don't have the posture tag
760 if (tag_info(posture_tag).present) {
761 if (tag_info(posture_tag).value)
762 pcl_style |= 1;
764 else {
765 require_tag(slant_tag);
766 if (tag_info(slant_tag).value != 0)
767 pcl_style |= 1;
769 require_tag(appearance_width_tag);
770 if (tag_info(appearance_width_tag).value < 100) // guess
771 pcl_style |= 4;
772 printf("pclstyle %d\n", pcl_style);
775 static void
776 output_slant()
778 require_tag(slant_tag);
779 int slant = int16(tag_info(slant_tag).value);
780 if (slant != 0)
781 printf("slant %f\n", slant/100.0);
784 static void
785 output_ligatures()
787 // don't use ligatures for fixed space font
788 require_tag(spacing_tag);
789 if (tag_info(spacing_tag).value != 0)
790 return;
791 static const char *ligature_names[] = {
792 "fi", "fl", "ff", "ffi", "ffl"
795 static const char *ligature_chars[] = {
796 "fi", "fl", "ff", "Fi", "Fl"
799 unsigned ligature_mask = 0;
800 unsigned int i;
801 for (i = 0; i < nchars; i++) {
802 uint16 charcode = char_table[i].charcode;
803 if (charcode < charcode_name_table_size
804 && char_table[i].symbol_set != NO_SYMBOL_SET) {
805 for (name_list *p = charcode_name_table[charcode]; p; p = p->next)
806 for (unsigned int j = 0; j < SIZEOF(ligature_chars); j++)
807 if (strcmp(p->name, ligature_chars[j]) == 0) {
808 ligature_mask |= 1 << j;
809 break;
813 if (ligature_mask) {
814 printf("ligatures");
815 for (i = 0; i < SIZEOF(ligature_names); i++)
816 if (ligature_mask & (1 << i))
817 printf(" %s", ligature_names[i]);
818 printf(" 0\n");
822 static void
823 read_and_output_kernpairs(File &f)
825 if (tag_info(pair_kern_tag).present) {
826 printf("kernpairs\n");
827 f.seek(tag_info(pair_kern_tag).value);
828 uint16 n_pairs = f.get_uint16();
829 for (int i = 0; i < n_pairs; i++) {
830 uint16 i1 = f.get_uint16();
831 uint16 i2 = f.get_uint16();
832 int16 val = int16(f.get_uint16());
833 if (char_table[i1].symbol_set != NO_SYMBOL_SET
834 && char_table[i2].symbol_set != NO_SYMBOL_SET
835 && char_table[i1].charcode < charcode_name_table_size
836 && char_table[i2].charcode < charcode_name_table_size) {
837 for (name_list *p = charcode_name_table[char_table[i1].charcode];
839 p = p->next)
840 for (name_list *q = charcode_name_table[char_table[i2].charcode];
842 q = q->next)
843 if (!equal(p->name, UNNAMED) && !equal(q->name, UNNAMED))
844 printf("%s %s %d\n", p->name, q->name, scale(val));
850 static void
851 output_charset(const int tfm_type)
853 require_tag(slant_tag);
854 double slant_angle = int16(tag_info(slant_tag).value)*PI/18000.0;
855 double slant = sin(slant_angle)/cos(slant_angle);
857 if (italic_flag)
858 require_tag(x_height_tag);
859 require_tag(lower_ascent_tag);
860 require_tag(lower_descent_tag);
862 printf("charset\n");
863 unsigned int i;
864 for (i = 0; i < nchars; i++) {
865 uint16 charcode = char_table[i].charcode;
867 // the glyph is bound to one of the searched symbol sets
868 if (char_table[i].symbol_set != NO_SYMBOL_SET) {
869 // the character was in the map file
870 if (charcode < charcode_name_table_size && charcode_name_table[charcode])
871 printf("%s", charcode_name_table[charcode]->name);
872 else if (!all_flag)
873 continue;
874 else if (tfm_type == MSL)
875 printf(hp_msl_to_ucode_name(charcode));
876 else
877 printf(unicode_to_ucode_name(charcode));
879 printf("\t%d,%d",
880 scale(char_table[i].width), scale(char_table[i].ascent));
882 int depth = scale(-char_table[i].descent);
883 if (depth < 0)
884 depth = 0;
885 int italic_correction = 0;
886 int left_italic_correction = 0;
887 int subscript_correction = 0;
889 if (italic_flag) {
890 italic_correction = scale(char_table[i].right_extent
891 - char_table[i].width
892 + italic_sep);
893 if (italic_correction < 0)
894 italic_correction = 0;
895 subscript_correction = int((tag_info(x_height_tag).value
896 * slant * .8) + .5);
897 if (subscript_correction > italic_correction)
898 subscript_correction = italic_correction;
899 left_italic_correction = scale(italic_sep
900 - char_table[i].left_extent);
903 if (subscript_correction != 0)
904 printf(",%d,%d,%d,%d",
905 depth, italic_correction, left_italic_correction,
906 subscript_correction);
907 else if (left_italic_correction != 0)
908 printf(",%d,%d,%d", depth, italic_correction, left_italic_correction);
909 else if (italic_correction != 0)
910 printf(",%d,%d", depth, italic_correction);
911 else if (depth != 0)
912 printf(",%d", depth);
913 // This is fairly arbitrary. Fortunately it doesn't much matter.
914 unsigned type = 0;
915 if (char_table[i].ascent > int16(tag_info(lower_ascent_tag).value)*9/10)
916 type |= 2;
917 if (char_table[i].descent < int16(tag_info(lower_descent_tag).value)*9/10)
918 type |= 1;
919 printf("\t%d\t%d", type,
920 char_table[i].symbol_set*256 + char_table[i].code);
922 if (tfm_type == UNICODE) {
923 if (charcode >= 0xE000 && charcode <= 0xF8FF)
924 printf("\t-- HP PUA U+%04X", charcode);
925 else
926 printf("\t-- U+%04X", charcode);
928 else
929 printf("\t-- MSL %4d", charcode);
930 printf(" (%3s %3d)\n",
931 show_symset(char_table[i].symbol_set), char_table[i].code);
933 if (charcode < charcode_name_table_size
934 && charcode_name_table[charcode])
935 for (name_list *p = charcode_name_table[charcode]->next;
936 p; p = p->next)
937 printf("%s\t\"\n", p->name);
939 // warnings about characters in mapfile not found in TFM
940 else if (charcode < charcode_name_table_size
941 && charcode_name_table[charcode]) {
942 char *name = charcode_name_table[charcode]->name;
943 // don't warn about Unicode or unnamed glyphs
944 // that aren't in the the TFM file
945 if (tfm_type == UNICODE && !quiet_flag && !equal(name, UNNAMED)
946 && !is_uname(name)) {
947 fprintf(stderr, "%s: warning: symbol U+%04X (%s",
948 program_name, charcode, name);
949 for (name_list *p = charcode_name_table[charcode]->next;
950 p; p = p->next)
951 fprintf(stderr, ", %s", p->name);
952 fprintf(stderr, ") not in any searched symbol set\n");
954 else if (!quiet_flag && !equal(name, UNNAMED) && !is_uname(name)) {
955 fprintf(stderr, "%s: warning: symbol MSL %d (%s",
956 program_name, charcode, name);
957 for (name_list *p = charcode_name_table[charcode]->next;
958 p; p = p->next)
959 fprintf(stderr, ", %s", p->name);
960 fprintf(stderr, ") not in any searched symbol set\n");
966 #define em_fract(a) (upem >= 0 ? double(a)/upem : 0)
968 static void
969 dump_tags(File &f)
971 double upem = -1.0;
973 printf("TFM tags\n"
974 "\n"
975 "tag# type count value\n"
976 "---------------------\n");
978 for (int i = min_tag; i <= max_tag; i++) {
979 enum tag_type t = tag_type(i);
980 if (tag_info(t).present) {
981 printf("%4d %4d %5d", i, tag_info(t).type, tag_info(t).count);
982 switch (tag_info(t).type) {
983 case BYTE_TYPE:
984 case USHORT_TYPE:
985 printf(" %5u", tag_info(t).value);
986 switch (i) {
987 case type_tag:
988 printf(" Font Type ");
989 switch (tag_info(t).value) {
990 case MSL:
991 case SYMSET:
992 printf("(Intellifont)");
993 break;
994 case UNICODE:
995 printf("(TrueType)");
997 break;
998 case charcode_tag:
999 printf(" Number of Symbols (%u)", tag_info(t).count);
1000 break;
1001 case symbol_set_tag:
1002 printf(" Symbol Sets (%u): ",
1003 tag_info(symbol_set_tag).count / 14);
1004 dump_symbol_sets(f);
1005 break;
1006 case type_structure_tag:
1007 printf(" Type Structure (%u)", tag_info(t).value);
1008 break;
1009 case stroke_weight_tag:
1010 printf(" Stroke Weight (%u)", tag_info(t).value);
1011 break;
1012 case spacing_tag:
1013 printf(" Spacing ");
1014 switch (tag_info(t).value) {
1015 case 0:
1016 printf("(Proportional)");
1017 break;
1018 case 1:
1019 printf("(Fixed Pitch: %u DU: %.2f em)", tag_info(t).value,
1020 em_fract(tag_info(t).value));
1021 break;
1023 break;
1024 case appearance_width_tag:
1025 printf(" Appearance Width (%u)", tag_info(t).value);
1026 break;
1027 case serif_style_tag:
1028 printf(" Serif Style (%u)", tag_info(t).value);
1029 break;
1030 case posture_tag:
1031 printf(" Posture (%s)", tag_info(t).value == 0
1032 ? "Upright"
1033 : tag_info(t).value == 1
1034 ? "Italic"
1035 : "Alternate Italic");
1036 break;
1037 case max_width_tag:
1038 printf(" Maximum Width (%u DU: %.2f em)", tag_info(t).value,
1039 em_fract(tag_info(t).value));
1040 break;
1041 case word_spacing_tag:
1042 printf(" Interword Spacing (%u DU: %.2f em)", tag_info(t).value,
1043 em_fract(tag_info(t).value));
1044 break;
1045 case recommended_line_spacing_tag:
1046 printf(" Recommended Line Spacing (%u DU: %.2f em)", tag_info(t).value,
1047 em_fract(tag_info(t).value));
1048 break;
1049 case x_height_tag:
1050 printf(" x-Height (%u DU: %.2f em)", tag_info(t).value,
1051 em_fract(tag_info(t).value));
1052 break;
1053 case cap_height_tag:
1054 printf(" Cap Height (%u DU: %.2f em)", tag_info(t).value,
1055 em_fract(tag_info(t).value));
1056 break;
1057 case max_ascent_tag:
1058 printf(" Maximum Ascent (%u DU: %.2f em)", tag_info(t).value,
1059 em_fract(tag_info(t).value));
1060 break;
1061 case lower_ascent_tag:
1062 printf(" Lowercase Ascent (%u DU: %.2f em)", tag_info(t).value,
1063 em_fract(tag_info(t).value));
1064 break;
1065 case underscore_thickness_tag:
1066 printf(" Underscore Thickness (%u DU: %.2f em)", tag_info(t).value,
1067 em_fract(tag_info(t).value));
1068 break;
1069 case uppercase_accent_height_tag:
1070 printf(" Uppercase Accent Height (%u DU: %.2f em)", tag_info(t).value,
1071 em_fract(tag_info(t).value));
1072 break;
1073 case lowercase_accent_height_tag:
1074 printf(" Lowercase Accent Height (%u DU: %.2f em)", tag_info(t).value,
1075 em_fract(tag_info(t).value));
1076 break;
1077 case width_tag:
1078 printf(" Horizontal Escapement array");
1079 break;
1080 case vertical_escapement_tag:
1081 printf(" Vertical Escapement array");
1082 break;
1083 case right_extent_tag:
1084 printf(" Right Extent array");
1085 break;
1086 case ascent_tag:
1087 printf(" Character Ascent array");
1088 break;
1089 case pair_kern_tag:
1090 f.seek(tag_info(t).value);
1091 printf(" Kern Pairs (%u)", f.get_uint16());
1092 break;
1093 case panose_tag:
1094 printf(" PANOSE Classification array");
1095 break;
1097 break;
1098 case SIGNED_SHORT_TYPE:
1099 printf(" %5d", int16(tag_info(t).value));
1100 switch (i) {
1101 case slant_tag:
1102 printf(" Slant (%.2f degrees)", double(tag_info(t).value) / 100);
1103 break;
1104 case max_descent_tag:
1105 printf(" Maximum Descent (%d DU: %.2f em)", int16(tag_info(t).value),
1106 em_fract(int16(tag_info(t).value)));
1107 break;
1108 case lower_descent_tag:
1109 printf(" Lowercase Descent (%d DU: %.2f em)", int16(tag_info(t).value),
1110 em_fract(int16(tag_info(t).value)));
1111 break;
1112 case underscore_depth_tag:
1113 printf(" Underscore Depth (%d DU: %.2f em)", int16(tag_info(t).value),
1114 em_fract(int16(tag_info(t).value)));
1115 break;
1116 case left_extent_tag:
1117 printf(" Left Extent array");
1118 break;
1119 // The type of this tag has changed from SHORT to SIGNED SHORT
1120 // in TFM version 1.3.0.
1121 case ascent_tag:
1122 printf(" Character Ascent array");
1123 break;
1124 case descent_tag:
1125 printf(" Character Descent array");
1126 break;
1128 break;
1129 case RATIONAL_TYPE:
1130 printf(" %5u", tag_info(t).value);
1131 switch (i) {
1132 case inches_per_point_tag:
1133 printf(" Inches per Point");
1134 break;
1135 case nominal_point_size_tag:
1136 printf(" Nominal Point Size");
1137 break;
1138 case design_units_per_em_tag:
1139 printf(" Design Units per Em");
1140 break;
1141 case average_width_tag:
1142 printf(" Average Width");
1143 break;
1145 if (tag_info(t).count == 1) {
1146 f.seek(tag_info(t).value);
1147 uint32 num = f.get_uint32();
1148 uint32 den = f.get_uint32();
1149 if (i == design_units_per_em_tag)
1150 upem = double(num) / den;
1151 printf(" (%u/%u = %g)", num, den, double(num)/den);
1153 break;
1154 case ASCII_TYPE:
1155 printf(" %5u ", tag_info(t).value);
1156 switch (i) {
1157 case comment_tag:
1158 printf("Comment ");
1159 break;
1160 case copyright_tag:
1161 printf("Copyright ");
1162 break;
1163 case unique_identifier_tag:
1164 printf("Unique ID ");
1165 break;
1166 case font_name_tag:
1167 printf("Typeface Name ");
1168 break;
1169 case typeface_source_tag:
1170 printf("Typeface Source ");
1171 break;
1172 case typeface_tag:
1173 printf("PCL Typeface ");
1174 break;
1176 dump_ascii(f, t);
1178 putchar('\n');
1181 putchar('\n');
1183 #undef em_fract
1185 static void
1186 dump_ascii(File &f, tag_type t)
1188 putchar('"');
1189 if (tag_info(t).count > 4) {
1190 int count = tag_info(t).count;
1191 f.seek(tag_info(t).value);
1192 while (--count)
1193 printf("%c", f.get_byte());
1195 else
1196 printf("%.4s", tag_info(t).orig_value);
1197 putchar('"');
1200 static void
1201 dump_symbol_sets(File &f)
1203 uint32 symbol_set_dir_length = tag_info(symbol_set_tag).count;
1204 uint32 num_symbol_sets = symbol_set_dir_length / 14;
1206 for (uint32 i = 0; i < num_symbol_sets; i++) {
1207 f.seek(tag_info(symbol_set_tag).value + i * 14);
1208 (void)f.get_uint32(); // offset to symbol set name
1209 uint32 off1 = f.get_uint32(); // offset to selection string
1210 uint32 off2 = f.get_uint32(); // offset to symbol set index array
1211 f.seek(off1);
1212 for (uint32 j = 0; j < off2 - off1; j++) {
1213 unsigned char c = f.get_byte();
1214 if ('0' <= c && c <= '9')
1215 putchar(c);
1216 else if ('A' <= c && c <= 'Z')
1217 printf(i < num_symbol_sets - 1 ? "%c," : "%c", c);
1222 static void
1223 dump_symbols(int tfm_type)
1225 printf("Symbols:\n"
1226 "\n"
1227 " glyph id# symbol set name(s)\n"
1228 "----------------------------------\n");
1229 for (uint32 i = 0; i < nchars; i++) {
1230 uint16 charcode = char_table[i].charcode;
1231 if (charcode < charcode_name_table_size
1232 && charcode_name_table[charcode]) {
1233 if (char_table[i].symbol_set != NO_SYMBOL_SET) {
1234 printf(tfm_type == UNICODE ? "%4d (U+%04X) (%3s %3d) %s"
1235 : "%4d (MSL %4d) (%3s %3d) %s",
1236 i, charcode,
1237 show_symset(char_table[i].symbol_set),
1238 char_table[i].code,
1239 charcode_name_table[charcode]->name);
1240 for (name_list *p = charcode_name_table[charcode]->next;
1241 p; p = p->next)
1242 printf(", %s", p->name);
1243 putchar('\n');
1246 else {
1247 printf(tfm_type == UNICODE ? "%4d (U+%04X) "
1248 : "%4d (MSL %4d) ",
1249 i, charcode);
1250 if (char_table[i].symbol_set != NO_SYMBOL_SET)
1251 printf("(%3s %3d)",
1252 show_symset(char_table[i].symbol_set), char_table[i].code);
1253 putchar('\n');
1256 putchar('\n');
1259 static char *
1260 show_symset(unsigned int symset)
1262 static char symset_str[8];
1264 sprintf(symset_str, "%d%c", symset / 32, (symset & 31) + 64);
1265 return symset_str;
1268 static char *
1269 hp_msl_to_ucode_name(int msl)
1271 char codestr[8];
1273 sprintf(codestr, "%d", msl);
1274 const char *ustr = hp_msl_to_unicode_code(codestr);
1275 if (ustr == NULL)
1276 ustr = UNNAMED;
1277 else {
1278 char *nonum;
1279 int ucode = int(strtol(ustr, &nonum, 16));
1280 // don't allow PUA code points as Unicode names
1281 if (ucode >= 0xE000 && ucode <= 0xF8FF)
1282 ustr = UNNAMED;
1284 if (!equal(ustr, UNNAMED)) {
1285 const char *uname_decomposed = decompose_unicode(ustr);
1286 if (uname_decomposed)
1287 // 1st char is the number of components
1288 ustr = uname_decomposed + 1;
1290 char *value = new char[strlen(ustr) + 1];
1291 sprintf(value, equal(ustr, UNNAMED) ? ustr : "u%s", ustr);
1292 return value;
1295 static char *
1296 unicode_to_ucode_name(int ucode)
1298 const char *ustr;
1299 char codestr[8];
1301 // don't allow PUA code points as Unicode names
1302 if (ucode >= 0xE000 && ucode <= 0xF8FF)
1303 ustr = UNNAMED;
1304 else {
1305 sprintf(codestr, "%04X", ucode);
1306 ustr = codestr;
1308 if (!equal(ustr, UNNAMED)) {
1309 const char *uname_decomposed = decompose_unicode(ustr);
1310 if (uname_decomposed)
1311 // 1st char is the number of components
1312 ustr = uname_decomposed + 1;
1314 char *value = new char[strlen(ustr) + 1];
1315 sprintf(value, equal(ustr, UNNAMED) ? ustr : "u%s", ustr);
1316 return value;
1319 static int
1320 is_uname(char *name)
1322 size_t i;
1323 size_t len = strlen(name);
1324 if (len % 5)
1325 return 0;
1327 if (name[0] != 'u')
1328 return 0;
1329 for (i = 1; i < 4; i++)
1330 if (!csxdigit(name[i]))
1331 return 0;
1332 for (i = 5; i < len; i++)
1333 if (i % 5 ? !csxdigit(name[i]) : name[i] != '_')
1334 return 0;
1336 return 1;
1339 static int
1340 read_map(const char *file, const int tfm_type)
1342 errno = 0;
1343 FILE *fp = fopen(file, "r");
1344 if (!fp) {
1345 error("can't open `%1': %2", file, strerror(errno));
1346 return 0;
1348 current_filename = file;
1349 char buf[512];
1350 current_lineno = 0;
1351 char *nonum;
1352 while (fgets(buf, int(sizeof(buf)), fp)) {
1353 current_lineno++;
1354 char *ptr = buf;
1355 while (csspace(*ptr))
1356 ptr++;
1357 if (*ptr == '\0' || *ptr == '#')
1358 continue;
1359 ptr = strtok(ptr, " \n\t");
1360 if (!ptr)
1361 continue;
1363 int msl_code = int(strtol(ptr, &nonum, 10));
1364 if (*nonum != '\0') {
1365 if (csxdigit(*nonum))
1366 error("bad MSL map: got hex code (%1)", ptr);
1367 else if (ptr == nonum)
1368 error("bad MSL map: bad MSL code (%1)", ptr);
1369 else
1370 error("bad MSL map");
1371 fclose(fp);
1372 return 0;
1375 ptr = strtok(NULL, " \n\t");
1376 if (!ptr)
1377 continue;
1378 int unicode = int(strtol(ptr, &nonum, 16));
1379 if (*nonum != '\0') {
1380 if (ptr == nonum)
1381 error("bad Unicode value (%1)", ptr);
1382 else
1383 error("bad Unicode map");
1384 fclose(fp);
1385 return 0;
1387 if (strlen(ptr) != 4) {
1388 error("bad Unicode value (%1)", ptr);
1389 return 0;
1392 int n = tfm_type == MSL ? msl_code : unicode;
1393 if (tfm_type == UNICODE && n > 0xFFFF) {
1394 // greatest value supported by TFM files
1395 error("bad Unicode value (%1): greatest value is 0xFFFF", ptr);
1396 fclose(fp);
1397 return 0;
1399 else if (n < 0) {
1400 error("negative code value (%1)", ptr);
1401 fclose(fp);
1402 return 0;
1405 ptr = strtok(NULL, " \n\t");
1406 if (!ptr) { // groff name
1407 error("missing name(s)");
1408 fclose(fp);
1409 return 0;
1411 // leave decomposed Unicode values alone
1412 else if (is_uname(ptr) && !is_decomposed(ptr))
1413 ptr = unicode_to_ucode_name(strtol(ptr + 1, &nonum, 16));
1415 if (size_t(n) >= charcode_name_table_size) {
1416 size_t old_size = charcode_name_table_size;
1417 name_list **old_table = charcode_name_table;
1418 charcode_name_table_size = n + 256;
1419 charcode_name_table = new name_list *[charcode_name_table_size];
1420 if (old_table) {
1421 memcpy(charcode_name_table, old_table, old_size*sizeof(name_list *));
1422 a_delete old_table;
1424 for (size_t i = old_size; i < charcode_name_table_size; i++)
1425 charcode_name_table[i] = NULL;
1428 // a '#' that isn't the first groff name begins a comment
1429 for (int names = 1; ptr; ptr = strtok(NULL, " \n\t")) {
1430 if (names++ > 1 && *ptr == '#')
1431 break;
1432 charcode_name_table[n] = new name_list(ptr, charcode_name_table[n]);
1435 fclose(fp);
1436 return 1;
1439 static const char *
1440 xbasename(const char *s)
1442 // DIR_SEPS[] are possible directory separator characters, see
1443 // nonposix.h. We want the rightmost separator of all possible
1444 // ones. Example: d:/foo\\bar.
1445 const char *b = strrchr(s, DIR_SEPS[0]), *b1;
1446 const char *sep = &DIR_SEPS[1];
1448 while (*sep)
1450 b1 = strrchr(s, *sep);
1451 if (b1 && (!b || b1 > b))
1452 b = b1;
1453 sep++;
1455 return b ? b + 1 : s;