Merge branch 'topic/sync-to-go-2'
[s-roff.git] / src / ute-afmtodit / afmtodit.pl.in
blob1c0dd85bd1df564951973d6bdd1f220dc3e50df1
1 #!/usr/bin/env perl -w
2 # Copyright (c) 2014 - 2017 Steffen (Daode) Nurpmeso <steffen@sdaoden.eu>.
4 # Copyright (C) 1989 - 2008
5 # Free Software Foundation, Inc.
6 # Written by James Clark (jjc@jclark.com)
8 # This is free software; you can redistribute it and/or modify it under
9 # the terms of the GNU General Public License as published by the Free
10 # Software Foundation; either version 2, or (at your option) any later
11 # version.
13 # This is distributed in the hope that it will be useful, but WITHOUT ANY
14 # WARRANTY; without even the implied warranty of MERCHANTABILITY or
15 # FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License
16 # for more details.
18 # You should have received a copy of the GNU General Public License along
19 # with groff; see the file COPYING. If not, write to the Free Software
20 # Foundation, 51 Franklin St - Fifth Floor, Boston, MA 02110-1301, USA.
22 use strict;
24 @afmtodit.tables@
26 my $prog = $0;
27 $prog =~ s@.*/@@;
29 my $groff_sys_fontdir = "@FONTDIR@";
31 use Getopt::Std;
32 getopts('a:cd:e:f:i:kmno:svx');
34 our ($opt_a, $opt_c, $opt_d, $opt_e, $opt_f, $opt_i, $opt_k,
35 $opt_m, $opt_n, $opt_o, $opt_s, $opt_v, $opt_x);
37 if ($opt_v) {
38 print "@L_AFMTODIT@ (@T_ROFF@) v@VERSION@\n";
39 exit 0;
42 if ($#ARGV != 2) {
43 die "Synopsis: $prog [-ckmnsvx] [-a angle] [-d DESC] [-e encoding]\n" .
44 " [-f name] [-i n] [-o outfile] afmfile mapfile font\n";
47 my $afm = $ARGV[0];
48 my $map = $ARGV[1];
49 my $font = $ARGV[2];
50 my $outfile = $opt_o || $font;
51 my $desc = $opt_d || "DESC";
52 my $sys_map = $groff_sys_fontdir . "/dev-ps/generate/" . $map;
53 my $sys_desc = $groff_sys_fontdir . "/dev-ps/" . $desc;
55 # read the afm file
57 my $psname;
58 my ($notice, $version, $fullname, $familyname, @comments);
59 my $italic_angle = 0;
60 my (@kern1, @kern2, @kernx);
61 my (%italic_correction, %left_italic_correction);
62 my %subscript_correction;
63 # my %ligs
64 my %ligatures;
65 my (@encoding, %in_encoding);
66 my (%width, %height, %depth);
67 my (%left_side_bearing, %right_side_bearing);
69 open(AFM, $afm) || die "$prog: can't open \`$ARGV[0]': $!\n";
71 while (<AFM>) {
72 chomp;
73 s/\x0D$//;
74 my @field = split(' ');
75 next if $#field < 0;
76 if ($field[0] eq "FontName") {
77 $psname = $field[1];
78 if($opt_f) {
79 $psname = $opt_f;
82 elsif($field[0] eq "Notice") {
83 $notice = $_;
85 elsif($field[0] eq "Version") {
86 $version = $_;
88 elsif($field[0] eq "FullName") {
89 $fullname = $_;
91 elsif($field[0] eq "FamilyName") {
92 $familyname = $_;
94 elsif($field[0] eq "Comment") {
95 push(@comments, $_);
97 elsif($field[0] eq "ItalicAngle") {
98 $italic_angle = -$field[1];
100 elsif ($field[0] eq "KPX") {
101 if ($#field == 3) {
102 push(@kern1, $field[1]);
103 push(@kern2, $field[2]);
104 push(@kernx, $field[3]);
107 elsif ($field[0] eq "italicCorrection") {
108 $italic_correction{$field[1]} = $field[2];
110 elsif ($field[0] eq "leftItalicCorrection") {
111 $left_italic_correction{$field[1]} = $field[2];
113 elsif ($field[0] eq "subscriptCorrection") {
114 $subscript_correction{$field[1]} = $field[2];
116 elsif ($field[0] eq "StartCharMetrics") {
117 while (<AFM>) {
118 @field = split(' ');
119 next if $#field < 0;
120 last if ($field[0] eq "EndCharMetrics");
121 if ($field[0] eq "C") {
122 my $w;
123 my $wx = 0;
124 my $n = "";
125 # %ligs = ();
126 my $lly = 0;
127 my $ury = 0;
128 my $llx = 0;
129 my $urx = 0;
130 my $c = $field[1];
131 my $i = 2;
132 while ($i <= $#field) {
133 if ($field[$i] eq "WX") {
134 $w = $field[$i + 1];
135 $i += 2;
137 elsif ($field[$i] eq "N") {
138 $n = $field[$i + 1];
139 $i += 2;
141 elsif ($field[$i] eq "B") {
142 $llx = $field[$i + 1];
143 $lly = $field[$i + 2];
144 $urx = $field[$i + 3];
145 $ury = $field[$i + 4];
146 $i += 5;
148 # elsif ($field[$i] eq "L") {
149 # $ligs{$field[$i + 2]} = $field[$i + 1];
150 # $i += 3;
152 else {
153 while ($i <= $#field && $field[$i] ne ";") {
154 $i++;
156 $i++;
159 if (!$opt_e && $c != -1) {
160 $encoding[$c] = $n;
161 $in_encoding{$n} = 1;
163 $width{$n} = $w;
164 $height{$n} = $ury;
165 $depth{$n} = -$lly;
166 $left_side_bearing{$n} = -$llx;
167 $right_side_bearing{$n} = $urx - $w;
168 # while ((my $lig, my $glyph2) = each %ligs) {
169 # $ligatures{$lig} = $n . " " . $glyph2;
175 close(AFM);
177 # read the DESC file
179 my ($sizescale, $resolution, $unitwidth);
180 $sizescale = 1;
182 open(DESC, $desc) || open(DESC, $sys_desc) ||
183 die "$prog: can't open \`$desc' or \`$sys_desc': $!\n";
184 while (<DESC>) {
185 next if /^#/;
186 chop;
187 my @field = split(' ');
188 next if $#field < 0;
189 last if $field[0] eq "charset";
190 if ($field[0] eq "res") {
191 $resolution = $field[1];
193 elsif ($field[0] eq "unitwidth") {
194 $unitwidth = $field[1];
196 elsif ($field[0] eq "sizescale") {
197 $sizescale = $field[1];
200 close(DESC);
202 if ($opt_e) {
203 # read the encoding file
205 my $sys_opt_e = $groff_sys_fontdir . "/dev-ps/" . $opt_e;
206 open(ENCODING, $opt_e) || open(ENCODING, $sys_opt_e) ||
207 die "$prog: can't open \`$opt_e' or \`$sys_opt_e': $!\n";
208 while (<ENCODING>) {
209 next if /^#/;
210 chop;
211 my @field = split(' ');
212 next if $#field < 0;
213 if ($#field == 1) {
214 if ($field[1] >= 0 && defined $width{$field[0]}) {
215 $encoding[$field[1]] = $field[0];
216 $in_encoding{$field[0]} = 1;
220 close(ENCODING);
223 # read the map file
225 my (%nmap, %map);
227 open(MAP, $map) || open(MAP, $sys_map) ||
228 die "$prog: can't open \`$map' or \`$sys_map': $!\n";
229 while (<MAP>) {
230 next if /^#/;
231 chop;
232 my @field = split(' ');
233 next if $#field < 0;
234 if ($#field == 1) {
235 if ($field[1] eq "space") {
236 # The PostScript character "space" is automatically mapped
237 # to the groff character "space"; this is for grops.
238 warn "you are not allowed to map to " .
239 "the roff character \`space'";
241 elsif ($field[0] eq "space") {
242 warn "you are not allowed to map " .
243 "the PostScript character \`space'";
245 else {
246 $nmap{$field[0]} += 0;
247 $map{$field[0], $nmap{$field[0]}} = $field[1];
248 $nmap{$field[0]} += 1;
250 # There is more than one way to make a PS glyph name;
251 # let us try Unicode names with both `uni' and `u' prefixes.
252 my $utmp = $AGL_to_unicode{$field[0]};
253 if (defined $utmp && $utmp =~ /^[0-9A-F]{4}$/) {
254 foreach my $unicodepsname ("uni" . $utmp, "u" . $utmp) {
255 $nmap{$unicodepsname} += 0;
256 $map{$unicodepsname, $nmap{$unicodepsname}} = $field[1];
257 $nmap{$unicodepsname} += 1;
263 close(MAP);
265 $italic_angle = $opt_a if $opt_a;
268 if (!$opt_x) {
269 my %mapped;
270 my $i = ($#encoding > 256) ? ($#encoding + 1) : 256;
271 while (my $ch = each %width) {
272 # add unencoded characters
273 if (!$in_encoding{$ch}) {
274 $encoding[$i] = $ch;
275 $i++;
277 if ($nmap{$ch}) {
278 for (my $j = 0; $j < $nmap{$ch}; $j++) {
279 if (defined $mapped{$map{$ch, $j}}) {
280 warn "both $mapped{$map{$ch, $j}} and $ch " .
281 "map to $map{$ch, $j}";
283 else {
284 $mapped{$map{$ch, $j}} = $ch;
288 else {
289 my $u = ""; # the resulting groff glyph name
290 my $ucomp = ""; # Unicode string before decomposition
291 my $utmp = ""; # temporary value
292 my $component = "";
293 my $nv = 0;
295 # Step 1:
296 # Drop all characters from the glyph name starting with the
297 # first occurrence of a period (U+002E FULL STOP), if any.
298 # ?? We avoid mapping of glyphs with periods, since they are
299 # likely to be variant glyphs, leading to a `many ps glyphs --
300 # one groff glyph' conflict.
302 # If multiple glyphs in the font represent the same character
303 # in the Unicode standard, as do `A' and `A.swash', for example,
304 # they can be differentiated by using the same base name with
305 # different suffixes. This suffix (the part of glyph name that
306 # follows the first period) does not participate in the
307 # computation of a character sequence. It can be used by font
308 # designers to indicate some characteristics of the glyph. The
309 # suffix may contain periods or any other permitted characters.
310 # Small cap A, for example, could be named `uni0041.sc' or
311 # `A.sc'.
313 next if $ch =~ /\./;
315 # Step 2:
316 # Split the remaining string into a sequence of components,
317 # using the underscore character (U+005F LOW LINE) as the
318 # delimiter.
320 while ($ch =~ /([^_]+)/g) {
321 $component = $1;
323 # Step 3:
324 # Map each component to a character string according to the
325 # procedure below:
327 # * If the component is in the Adobe Glyph List, then map
328 # it to the corresponding character in that list.
330 $utmp = $AGL_to_unicode{$component};
331 if ($utmp) {
332 $utmp = "U+" . $utmp;
335 # * Otherwise, if the component is of the form `uni'
336 # (U+0075 U+006E U+0069) followed by a sequence of
337 # uppercase hexadecimal digits (0 .. 9, A .. F, i.e.,
338 # U+0030 .. U+0039, U+0041 .. U+0046), the length of
339 # that sequence is a multiple of four, and each group of
340 # four digits represents a number in the set {0x0000 ..
341 # 0xD7FF, 0xE000 .. 0xFFFF}, then interpret each such
342 # number as a Unicode scalar value and map the component
343 # to the string made of those scalar values.
345 elsif ($component =~ /^uni([0-9A-F]{4})+$/) {
346 while ($component =~ /([0-9A-F]{4})/g) {
347 $nv = hex("0x" . $1);
348 if ($nv <= 0xD7FF || $nv >= 0xE000) {
349 $utmp .= "U+" . $1;
351 else {
352 $utmp = "";
353 last;
358 # * Otherwise, if the component is of the form `u' (U+0075)
359 # followed by a sequence of four to six uppercase
360 # hexadecimal digits {0 .. 9, A .. F} (U+0030 .. U+0039,
361 # U+0041 .. U+0046), and those digits represent a number
362 # in {0x0000 .. 0xD7FF, 0xE000 .. 0x10FFFF}, then
363 # interpret this number as a Unicode scalar value and map
364 # the component to the string made of this scalar value.
366 elsif ($component =~ /^u([0-9A-F]{4,6})$/) {
367 $nv = hex("0x" . $1);
368 if ($nv <= 0xD7FF || ($nv >= 0xE000 && $nv <= 0x10FFFF)) {
369 $utmp = "U+" . $1;
373 # Finally, concatenate those strings; the result is the
374 # character string to which the glyph name is mapped.
376 $ucomp .= $utmp if $utmp;
379 # Unicode decomposition
380 while ($ucomp =~ /([0-9A-F]{4,6})/g) {
381 $component = $1;
382 $utmp = $unicode_decomposed{$component};
383 $u .= "_" . ($utmp ? $utmp : $component);
385 $u =~ s/^_/u/;
386 if ($u) {
387 if (defined $mapped{$u}) {
388 warn "both $mapped{$u} and $ch map to $u";
390 else {
391 $mapped{$u} = $ch;
393 $nmap{$ch} += 1;
394 $map{$ch, "0"} = $u;
400 # Check explicitly for groff's standard ligatures -- many afm files don't
401 # have proper `L' entries.
403 my %default_ligatures = (
404 "fi", "f i",
405 "fl", "f l",
406 "ff", "f f",
407 "ffi", "ff i",
408 "ffl", "ff l",
411 while (my ($lig, $components) = each %default_ligatures) {
412 if (defined $width{$lig} && !defined $ligatures{$lig}) {
413 $ligatures{$lig} = $components;
417 # print it all out
419 open(FONT, ">$outfile") || die "$prog: can't open \`$outfile' for output: $!\n";
420 select(FONT);
422 print("# This file has been generated with " .
423 "@L_AFMTODIT (@T_ROFF@) v@VERSION@\n");
424 print("#\n");
425 print("# $fullname\n") if defined $fullname;
426 print("# $version\n") if defined $version;
427 print("# $familyname\n") if defined $familyname;
429 if ($opt_c) {
430 print("#\n");
431 if (defined $notice || @comments) {
432 print("# The original AFM file contains the following comments:\n");
433 print("#\n");
434 print("# $notice\n") if defined $notice;
435 foreach my $comment (@comments) {
436 print("# $comment\n");
439 else {
440 print("# The original AFM file contains no comments.\n");
444 print("\n");
446 print("name $font\n");
447 print("internalname $psname\n") if $psname;
448 print("special\n") if $opt_s;
449 printf("slant %g\n", $italic_angle) if $italic_angle != 0;
450 printf("spacewidth %d\n", conv($width{"space"})) if defined $width{"space"};
452 if ($opt_e) {
453 my $e = $opt_e;
454 $e =~ s@.*/@@;
455 print("encoding $e\n");
458 if (!$opt_n && %ligatures) {
459 print("ligatures");
460 while (my $lig = each %ligatures) {
461 print(" $lig");
463 print(" 0\n");
466 if (!$opt_k && $#kern1 >= 0) {
467 print("\n");
468 print("kernpairs\n");
470 for (my $i = 0; $i <= $#kern1; $i++) {
471 my $c1 = $kern1[$i];
472 my $c2 = $kern2[$i];
473 if (defined $nmap{$c1} && $nmap{$c1} != 0
474 && defined $nmap{$c2} && $nmap{$c2} != 0) {
475 for (my $j = 0; $j < $nmap{$c1}; $j++) {
476 for (my $k = 0; $k < $nmap{$c2}; $k++) {
477 if ($kernx[$i] != 0) {
478 printf("%s %s %d\n",
479 $map{$c1, $j},
480 $map{$c2, $k},
481 conv($kernx[$i]));
489 my ($asc_boundary, $desc_boundary, $xheight, $slant);
491 # characters not shorter than asc_boundary are considered to have ascenders
493 $asc_boundary = 0;
494 $asc_boundary = $height{"t"} if defined $height{"t"};
495 $asc_boundary -= 1;
497 # likewise for descenders
499 $desc_boundary = 0;
500 $desc_boundary = $depth{"g"} if defined $depth{"g"};
501 $desc_boundary = $depth{"j"} if defined $depth{"g"} && $depth{"j"} < $desc_boundary;
502 $desc_boundary = $depth{"p"} if defined $depth{"p"} && $depth{"p"} < $desc_boundary;
503 $desc_boundary = $depth{"q"} if defined $depth{"q"} && $depth{"q"} < $desc_boundary;
504 $desc_boundary = $depth{"y"} if defined $depth{"y"} && $depth{"y"} < $desc_boundary;
505 $desc_boundary -= 1;
507 if (defined $height{"x"}) {
508 $xheight = $height{"x"};
510 elsif (defined $height{"alpha"}) {
511 $xheight = $height{"alpha"};
513 else {
514 $xheight = 450;
517 $italic_angle = $italic_angle*3.14159265358979323846/180.0;
518 $slant = sin($italic_angle)/cos($italic_angle);
519 $slant = 0 if $slant < 0;
521 print("\n");
522 print("charset\n");
523 for (my $i = 0; $i <= $#encoding; $i++) {
524 my $ch = $encoding[$i];
525 if (defined $ch && $ch ne "" && $ch ne "space") {
526 $map{$ch, "0"} = "---" if !defined $nmap{$ch} || $nmap{$ch} == 0;
527 my $type = 0;
528 my $h = $height{$ch};
529 $h = 0 if $h < 0;
530 my $d = $depth{$ch};
531 $d = 0 if $d < 0;
532 $type = 1 if $d >= $desc_boundary;
533 $type += 2 if $h >= $asc_boundary;
534 printf("%s\t%d", $map{$ch, "0"}, conv($width{$ch}));
535 my $italic_correction = 0;
536 my $left_math_fit = 0;
537 my $subscript_correction = 0;
538 if (defined $opt_i) {
539 $italic_correction = $right_side_bearing{$ch} + $opt_i;
540 $italic_correction = 0 if $italic_correction < 0;
541 $subscript_correction = $slant * $xheight * .8;
542 $subscript_correction = $italic_correction if
543 $subscript_correction > $italic_correction;
544 $left_math_fit = $left_side_bearing{$ch} + $opt_i;
545 if (defined $opt_m) {
546 $left_math_fit = 0 if $left_math_fit < 0;
549 if (defined $italic_correction{$ch}) {
550 $italic_correction = $italic_correction{$ch};
552 if (defined $left_italic_correction{$ch}) {
553 $left_math_fit = $left_italic_correction{$ch};
555 if (defined $subscript_correction{$ch}) {
556 $subscript_correction = $subscript_correction{$ch};
558 if ($subscript_correction != 0) {
559 printf(",%d,%d", conv($h), conv($d));
560 printf(",%d,%d,%d", conv($italic_correction),
561 conv($left_math_fit),
562 conv($subscript_correction));
564 elsif ($left_math_fit != 0) {
565 printf(",%d,%d", conv($h), conv($d));
566 printf(",%d,%d", conv($italic_correction),
567 conv($left_math_fit));
569 elsif ($italic_correction != 0) {
570 printf(",%d,%d", conv($h), conv($d));
571 printf(",%d", conv($italic_correction));
573 elsif ($d != 0) {
574 printf(",%d,%d", conv($h), conv($d));
576 else {
577 # always put the height in to stop groff guessing
578 printf(",%d", conv($h));
580 printf("\t%d", $type);
581 printf("\t%d\t%s\n", $i, $ch);
582 if (defined $nmap{$ch}) {
583 for (my $j = 1; $j < $nmap{$ch}; $j++) {
584 printf("%s\t\"\n", $map{$ch, $j});
588 if (defined $ch && $ch eq "space" && defined $width{"space"}) {
589 printf("space\t%d\t0\t%d\tspace\n", conv($width{"space"}), $i);
593 sub conv {
594 $_[0]*$unitwidth*$resolution/(72*1000*$sizescale) + ($_[0] < 0 ? -.5 : .5);
597 # s-it2-mode