BR 2781900: handle common labels while optimizing
[nasm/avx512.git] / doc / genps.pl
blob6447564c01c9d71b64836cf78e8abf40481c2feb
1 #!/usr/bin/perl
3 # Format the documentation as PostScript
6 use Env;
7 use lib $srcdir;
9 require 'psfonts.ph'; # The fonts we want to use
10 require 'pswidth.ph'; # PostScript string width
12 use Fcntl;
15 # PostScript configurables; these values are also available to the
16 # PostScript code itself
18 %psconf = (
19 pagewidth => 595, # Page width in PostScript points
20 pageheight => 792, # Page height in PostScript points
21 lmarg => 100, # Left margin in PostScript points
22 rmarg => 50, # Right margin in PostScript points
23 topmarg => 100, # Top margin in PostScript points
24 botmarg => 100, # Bottom margin in PostScript points
25 plmarg => 50, # Page number position relative to left margin
26 prmarg => 0, # Page number position relative to right margin
27 pymarg => 50, # Page number position relative to bot margin
28 startcopyright => 75, # How much above the bottom margin is the
29 # copyright notice stuff
30 bulladj => 12, # How much to indent a bullet paragraph
31 tocind => 12, # TOC indentation per level
32 tocpnz => 24, # Width of TOC page number only zone
33 tocdots => 8, # Spacing between TOC dots
34 idxspace => 24, # Minimum space between index title and pg#
35 idxindent => 24, # How much to indent a subindex entry
36 idxgutter => 24, # Space between index columns
37 idxcolumns => 2, # Number of index columns
40 %psbool = (
41 colorlinks => 0, # Set links in blue rather than black
44 # Known paper sizes
45 %papersizes = (
46 'a5' => [421, 595], # ISO half paper size
47 'b5' => [501, 709], # ISO small paper size
48 'a4' => [595, 842], # ISO standard paper size
49 'letter' => [612, 792], # US common paper size
50 'pa4' => [595, 792], # Compromise ("portable a4")
51 'b4' => [709,1002], # ISO intermediate paper size
52 'legal' => [612,1008], # US intermediate paper size
53 'a3' => [842,1190], # ISO double paper size
54 '11x17' => [792,1224], # US double paper size
58 # Parse the command line
60 undef $input;
61 while ( $arg = shift(@ARGV) ) {
62 if ( $arg =~ /^\-(|no\-)(.*)$/ ) {
63 $parm = $2;
64 $true = ($1 eq '') ? 1 : 0;
65 if ( $true && defined($papersizes{$parm}) ) {
66 $psconf{pagewidth} = $papersizes{$parm}->[0];
67 $psconf{pageheight} = $papersizes{$parm}->[1];
68 } elsif ( defined($psbool{$parm}) ) {
69 $psbool{$parm} = $true;
70 } elsif ( $true && defined($psconf{$parm}) ) {
71 $psconf{$parm} = shift(@ARGV);
72 } elsif ( $parm =~ /^(title|subtitle|year|author|license)$/ ) {
73 $metadata{$parm} = shift(@ARGV);
74 } else {
75 die "$0: Unknown option: $arg\n";
77 } else {
78 $input = $arg;
83 # Document formatting parameters
85 $paraskip = 6; # Space between paragraphs
86 $chapstart = 30; # Space before a chapter heading
87 $chapskip = 24; # Space after a chapter heading
88 $tocskip = 6; # Space between TOC entries
90 # Configure post-paragraph skips for each kind of paragraph
91 %skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
92 'head' => $paraskip, 'subh' => $paraskip,
93 'norm' => $paraskip, 'bull' => $paraskip,
94 'code' => $paraskip, 'toc0' => $tocskip,
95 'toc1' => $tocskip, 'toc2' => $tocskip);
97 # Custom encoding vector. This is basically the same as
98 # ISOLatin1Encoding (a level 2 feature, so we dont want to use it),
99 # but with the "naked" accents at \200-\237 moved to the \000-\037
100 # range (ASCII control characters), and a few extra characters thrown
101 # in. It is basically a modified Windows 1252 codepage, minus, for
102 # now, the euro sign (\200 is reserved for euro.)
104 @NASMEncoding =
106 undef, undef, undef, undef, undef, undef, undef, undef, undef, undef,
107 undef, undef, undef, undef, undef, undef, 'dotlessi', 'grave',
108 'acute', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent',
109 'dieresis', undef, 'ring', 'cedilla', undef, 'hungarumlaut',
110 'ogonek', 'caron', 'space', 'exclam', 'quotedbl', 'numbersign',
111 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft',
112 'parenright', 'asterisk', 'plus', 'comma', 'minus', 'period',
113 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six',
114 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
115 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
116 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
117 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright',
118 'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c', 'd', 'e',
119 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
120 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright',
121 'asciitilde', undef, undef, undef, 'quotesinglbase', 'florin',
122 'quotedblbase', 'ellipsis', 'dagger', 'dbldagger', 'circumflex',
123 'perthousand', 'Scaron', 'guilsinglleft', 'OE', undef, 'Zcaron',
124 undef, undef, 'grave', 'quotesingle', 'quotedblleft',
125 'quotedblright', 'bullet', 'endash', 'emdash', 'tilde', 'trademark',
126 'scaron', 'guilsignlright', 'oe', undef, 'zcaron', 'Ydieresis',
127 'space', 'exclamdown', 'cent', 'sterling', 'currency', 'yen',
128 'brokenbar', 'section', 'dieresis', 'copyright', 'ordfeminine',
129 'guillemotleft', 'logicalnot', 'hyphen', 'registered', 'macron',
130 'degree', 'plusminus', 'twosuperior', 'threesuperior', 'acute', 'mu',
131 'paragraph', 'periodcentered', 'cedilla', 'onesuperior',
132 'ordmasculine', 'guillemotright', 'onequarter', 'onehalf',
133 'threequarters', 'questiondown', 'Agrave', 'Aacute', 'Acircumflex',
134 'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla', 'Egrave', 'Eacute',
135 'Ecircumflex', 'Edieresis', 'Igrave', 'Iacute', 'Icircumflex',
136 'Idieresis', 'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex',
137 'Otilde', 'Odieresis', 'multiply', 'Oslash', 'Ugrave', 'Uacute',
138 'Ucircumflex', 'Udieresis', 'Yacute', 'Thorn', 'germandbls',
139 'agrave', 'aacute', 'acircumflex', 'atilde', 'adieresis', 'aring',
140 'ae', 'ccedilla', 'egrave', 'eacute', 'ecircumflex', 'edieresis',
141 'igrave', 'iacute', 'icircumflex', 'idieresis', 'eth', 'ntilde',
142 'ograve', 'oacute', 'ocircumflex', 'otilde', 'odieresis', 'divide',
143 'oslash', 'ugrave', 'uacute', 'ucircumflex', 'udieresis', 'yacute',
144 'thorn', 'ydieresis'
147 # Name-to-byte lookup hash
148 %charcode = ();
149 for ( $i = 0 ; $i < 256 ; $i++ ) {
150 $charcode{$NASMEncoding[$i]} = chr($i);
154 # First, format the stuff coming from the front end into
155 # a cleaner representation
157 if ( defined($input) ) {
158 sysopen(PARAS, $input, O_RDONLY) or
159 die "$0: cannot open $input: $!\n";
160 } else {
161 open(PARAS, "<&STDIN") or die "$0: $!\n";
163 while ( defined($line = <PARAS>) ) {
164 chomp $line;
165 $data = <PARAS>;
166 chomp $data;
167 if ( $line =~ /^meta :(.*)$/ ) {
168 $metakey = $1;
169 $metadata{$metakey} = $data;
170 } elsif ( $line =~ /^indx :(.*)$/ ) {
171 $ixentry = $1;
172 push(@ixentries, $ixentry);
173 $ixterms{$ixentry} = [split(/\037/, $data)];
174 # Look for commas. This is easier done on the string
175 # representation, so do it now.
176 if ( $data =~ /^(.*)\,\037sp\037/ ) {
177 $ixprefix = $1;
178 $ixprefix =~ s/\037n $//; # Discard possible font change at end
179 $ixhasprefix{$ixentry} = $ixprefix;
180 if ( !$ixprefixes{$ixprefix} ) {
181 $ixcommafirst{$ixentry}++;
183 $ixprefixes{$ixprefix}++;
184 } else {
185 # A complete term can also be used as a prefix
186 $ixprefixes{$data}++;
188 } else {
189 push(@ptypes, $line);
190 push(@paras, [split(/\037/, $data)]);
193 close(PARAS);
196 # Convert an integer to a chosen base
198 sub int2base($$) {
199 my($i,$b) = @_;
200 my($s) = '';
201 my($n) = '';
202 my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
203 return '0' if ($i == 0);
204 if ( $i < 0 ) { $n = '-'; $i = -$i; }
205 while ( $i ) {
206 $s = substr($z,$i%$b,1) . $s;
207 $i = int($i/$b);
209 return $n.$s;
213 # Convert a string to a rendering array
215 sub string2array($)
217 my($s) = @_;
218 my(@a) = ();
220 $s =~ s/\B\-\-\B/$charcode{'emdash'}/g;
221 $s =~ s/\B\-\B/ $charcode{'endash'} /g;
223 while ( $s =~ /^(\s+|\S+)(.*)$/ ) {
224 push(@a, [0,$1]);
225 $s = $2;
228 return @a;
232 # Take a crossreference name and generate the PostScript name for it.
234 # This hack produces a somewhat smaller PDF...
235 #%ps_xref_list = ();
236 #$ps_xref_next = 0;
237 #sub ps_xref($) {
238 # my($s) = @_;
239 # my $q = $ps_xref_list{$s};
240 # return $q if ( defined($ps_xref_list{$s}) );
241 # $q = 'X'.int2base($ps_xref_next++, 52);
242 # $ps_xref_list{$s} = $q;
243 # return $q;
246 # Somewhat bigger PDF, but one which obeys # URLs
247 sub ps_xref($) {
248 return @_[0];
252 # Flow lines according to a particular font set and width
254 # A "font set" is represented as an array containing
255 # arrays of pairs: [<size>, <metricref>]
257 # Each line is represented as:
258 # [ [type,first|last,aux,fontset,page,ypos,optional col],
259 # [rendering array] ]
261 # A space character may be "squeezed" by up to this much
262 # (as a fraction of the normal width of a space.)
264 $ps_space_squeeze = 0.00; # Min space width 100%
265 sub ps_flow_lines($$$@) {
266 my($wid, $fontset, $type, @data) = @_;
267 my($fonts) = $$fontset{fonts};
268 my($e);
269 my($w) = 0; # Width of current line
270 my($sw) = 0; # Width of current line due to spaces
271 my(@l) = (); # Current line
272 my(@ls) = (); # Accumulated output lines
273 my(@xd) = (); # Metadata that goes with subsequent text
274 my $hasmarker = 0; # Line has -6 marker
275 my $pastmarker = 0; # -6 marker found
277 # If there is a -6 marker anywhere in the paragraph,
278 # *each line* output needs to have a -6 marker
279 foreach $e ( @data ) {
280 $hasmarker = 1 if ( $$e[0] == -6 );
283 $w = 0;
284 foreach $e ( @data ) {
285 if ( $$e[0] < 0 ) {
286 # Type is metadata. Zero width.
287 if ( $$e[0] == -6 ) {
288 $pastmarker = 1;
290 if ( $$e[0] == -1 || $$e[0] == -6 ) {
291 # -1 (end anchor) or -6 (marker) goes with the preceeding
292 # text, otherwise with the subsequent text
293 push(@l, $e);
294 } else {
295 push(@xd, $e);
297 } else {
298 my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1],
299 \@NASMEncoding) *
300 ($fontset->{fonts}->[$$e[0]][0]/1000);
301 my $sp = $$e[1];
302 $sp =~ tr/[^ ]//d; # Delete nonspaces
303 my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1],
304 \@NASMEncoding) *
305 ($fontset->{fonts}->[$$e[0]][0]/1000);
307 if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
308 # Begin new line
309 # Search backwards for previous space chunk
310 my $lx = scalar(@l)-1;
311 my @rm = ();
312 while ( $lx >= 0 ) {
313 while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
314 # Skip metadata
315 $pastmarker = 0 if ( $l[$lx]->[0] == -6 );
316 $lx--;
318 if ( $lx >= 0 ) {
319 if ( $l[$lx]->[1] eq ' ' ) {
320 splice(@l, $lx, 1);
321 @rm = splice(@l, $lx);
322 last; # Found place to break
323 } else {
324 $lx--;
329 # Now @l contains the stuff to remain on the old line
330 # If we broke the line inside a link, then split the link
331 # into two.
332 my $lkref = undef;
333 foreach my $lc ( @l ) {
334 if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
335 $lkref = $lc;
336 } elsif ( $$lc[0] == -1 ) {
337 undef $lkref;
341 if ( defined($lkref) ) {
342 push(@l, [-1,undef]); # Terminate old reference
343 unshift(@rm, $lkref); # Duplicate reference on new line
346 if ( $hasmarker ) {
347 if ( $pastmarker ) {
348 unshift(@rm,[-6,undef]); # New line starts with marker
349 } else {
350 push(@l,[-6,undef]); # Old line ends with marker
354 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
355 @l = @rm;
357 $w = $sw = 0;
358 # Compute the width of the remainder array
359 for my $le ( @l ) {
360 if ( $$le[0] >= 0 ) {
361 my $xew = ps_width($$le[1],
362 $fontset->{fonts}->[$$le[0]][1],
363 \@NASMEncoding) *
364 ($fontset->{fonts}->[$$le[0]][0]/1000);
365 my $xsp = $$le[1];
366 $xsp =~ tr/[^ ]//d; # Delete nonspaces
367 my $xsw = ps_width($xsp,
368 $fontset->{fonts}->[$$le[0]][1],
369 \@NASMEncoding) *
370 ($fontset->{fonts}->[$$le[0]][0]/1000);
371 $w += $xew; $sw += $xsw;
375 push(@l, @xd); # Accumulated metadata
376 @xd = ();
377 if ( $$e[1] ne '' ) {
378 push(@l, $e);
379 $w += $ew; $sw += $esw;
383 push(@l,@xd);
384 if ( scalar(@l) ) {
385 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]); # Final line
388 # Mark the first line as first and the last line as last
389 if ( scalar(@ls) ) {
390 $ls[0]->[0]->[1] |= 1; # First in para
391 $ls[-1]->[0]->[1] |= 2; # Last in para
393 return @ls;
397 # Once we have broken things into lines, having multiple chunks
398 # with the same font index is no longer meaningful. Merge
399 # adjacent chunks to keep down the size of the whole file.
401 sub ps_merge_chunks(@) {
402 my(@ci) = @_;
403 my($c, $lc);
404 my(@co, $eco);
406 undef $lc;
407 @co = ();
408 $eco = -1; # Index of the last entry in @co
409 foreach $c ( @ci ) {
410 if ( defined($lc) && $$c[0] == $lc && $$c[0] >= 0 ) {
411 $co[$eco]->[1] .= $$c[1];
412 } else {
413 push(@co, $c); $eco++;
414 $lc = $$c[0];
417 return @co;
421 # Convert paragraphs to rendering arrays. Each
422 # element in the array contains (font, string),
423 # where font can be one of:
424 # -1 end link
425 # -2 begin crossref
426 # -3 begin weblink
427 # -4 index item anchor
428 # -5 crossref anchor
429 # -6 left/right marker (used in the index)
430 # -7 page link (used in the index)
431 # 0 normal
432 # 1 empatic (italic)
433 # 2 code (fixed spacing)
436 sub mkparaarray($@) {
437 my($ptype, @chunks) = @_;
439 my @para = ();
440 my $in_e = 0;
441 my $chunk;
443 if ( $ptype =~ /^code/ ) {
444 foreach $chunk ( @chunks ) {
445 push(@para, [2, $chunk]);
447 } else {
448 foreach $chunk ( @chunks ) {
449 my $type = substr($chunk,0,2);
450 my $text = substr($chunk,2);
452 if ( $type eq 'sp' ) {
453 push(@para, [$in_e?1:0, ' ']);
454 } elsif ( $type eq 'da' ) {
455 push(@para, [$in_e?1:0, $charcode{'endash'}]);
456 } elsif ( $type eq 'n ' ) {
457 push(@para, [0, $text]);
458 $in_e = 0;
459 } elsif ( $type =~ '^e' ) {
460 push(@para, [1, $text]);
461 $in_e = ($type eq 'es' || $type eq 'e ');
462 } elsif ( $type eq 'c ' ) {
463 push(@para, [2, $text]);
464 $in_e = 0;
465 } elsif ( $type eq 'x ' ) {
466 push(@para, [-2, ps_xref($text)]);
467 } elsif ( $type eq 'xe' ) {
468 push(@para, [-1, undef]);
469 } elsif ( $type eq 'wc' || $type eq 'w ' ) {
470 $text =~ /\<(.*)\>(.*)$/;
471 my $link = $1; $text = $2;
472 push(@para, [-3, $link]);
473 push(@para, [($type eq 'wc') ? 2:0, $text]);
474 push(@para, [-1, undef]);
475 $in_e = 0;
476 } elsif ( $type eq 'i ' ) {
477 push(@para, [-4, $text]);
478 } else {
479 die "Unexpected paragraph chunk: $chunk";
483 return @para;
486 $npara = scalar(@paras);
487 for ( $i = 0 ; $i < $npara ; $i++ ) {
488 $paras[$i] = [mkparaarray($ptypes[$i], @{$paras[$i]})];
492 # This converts a rendering array to a simple string
494 sub ps_arraytostr(@) {
495 my $s = '';
496 my $c;
497 foreach $c ( @_ ) {
498 $s .= $$c[1] if ( $$c[0] >= 0 );
500 return $s;
504 # This generates a duplicate of a paragraph
506 sub ps_dup_para(@) {
507 my(@i) = @_;
508 my(@o) = ();
509 my($c);
511 foreach $c ( @i ) {
512 my @cc = @{$c};
513 push(@o, [@cc]);
515 return @o;
519 # This generates a duplicate of a paragraph, stripping anchor
520 # tags (-4 and -5)
522 sub ps_dup_para_noanchor(@) {
523 my(@i) = @_;
524 my(@o) = ();
525 my($c);
527 foreach $c ( @i ) {
528 my @cc = @{$c};
529 push(@o, [@cc]) unless ( $cc[0] == -4 || $cc[0] == -5 );
531 return @o;
535 # Scan for header paragraphs and fix up their contents;
536 # also generate table of contents and PDF bookmarks.
538 @tocparas = ([[-5, 'contents'], [0,'Contents']]);
539 @tocptypes = ('chap');
540 @bookmarks = (['title', 0, 'Title'], ['contents', 0, 'Contents']);
541 %bookref = ();
542 for ( $i = 0 ; $i < $npara ; $i++ ) {
543 my $xtype = $ptypes[$i];
544 my $ptype = substr($xtype,0,4);
545 my $str;
546 my $book;
548 if ( $ptype eq 'chap' || $ptype eq 'appn' ) {
549 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
550 die "Bad para";
552 my $secn = $1;
553 my $sech = $2;
554 my $xref = ps_xref($sech);
555 my $chap = ($ptype eq 'chap')?'Chapter':'Appendix';
557 $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
558 push(@bookmarks, $book);
559 $bookref{$secn} = $book;
561 push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
562 push(@tocptypes, 'toc0'.' :'.$sech.':'.$chap.' '.$secn.':');
564 unshift(@{$paras[$i]},
565 [-5, $xref], [0,$chap.' '.$secn.':'], [0, ' ']);
566 } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
567 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
568 die "Bad para";
570 my $secn = $1;
571 my $sech = $2;
572 my $xref = ps_xref($sech);
573 my $pref;
574 $pref = $secn; $pref =~ s/\.[^\.]+$//; # Find parent node
576 $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
577 push(@bookmarks, $book);
578 $bookref{$secn} = $book;
579 $bookref{$pref}->[1]--; # Adjust count for parent node
581 push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
582 push(@tocptypes,
583 (($ptype eq 'subh') ? 'toc2':'toc1').' :'.$sech.':'.$secn);
585 unshift(@{$paras[$i]}, [-5, $xref]);
590 # Add TOC to beginning of paragraph list
592 unshift(@paras, @tocparas); undef @tocparas;
593 unshift(@ptypes, @tocptypes); undef @tocptypes;
596 # Add copyright notice to the beginning
598 @copyright_page =
599 ([[0, $charcode{'copyright'}],
600 [0, ' '], [0, $metadata{'year'}],
601 [0, ' '], string2array($metadata{'author'}),
602 [0, ' '], string2array($metadata{'copyright_tail'})],
603 [string2array($metadata{'license'})],
604 [string2array($metadata{'auxinfo'})]);
606 unshift(@paras, @copyright_page);
607 unshift(@ptypes, ('norm') x scalar(@copyright_page));
609 $npara = scalar(@paras);
612 # No lines generated, yet.
614 @pslines = ();
617 # Line Auxilliary Information Types
619 $AuxStr = 1; # String
620 $AuxPage = 2; # Page number (from xref)
621 $AuxPageStr = 3; # Page number as a PostScript string
622 $AuxXRef = 4; # Cross reference as a name
623 $AuxNum = 5; # Number
626 # Break or convert paragraphs into lines, and push them
627 # onto the @pslines array.
629 sub ps_break_lines($$) {
630 my ($paras,$ptypes) = @_;
632 my $linewidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
633 my $bullwidth = $linewidth-$psconf{bulladj};
634 my $indxwidth = ($linewidth-$psconf{idxgutter})/$psconf{idxcolumns}
635 -$psconf{idxspace};
637 my $npara = scalar(@{$paras});
638 my $i;
640 for ( $i = 0 ; $i < $npara ; $i++ ) {
641 my $xtype = $ptypes->[$i];
642 my $ptype = substr($xtype,0,4);
643 my @data = @{$paras->[$i]};
644 my @ls = ();
645 if ( $ptype eq 'code' ) {
646 my $p;
647 # Code paragraph; each chunk is a line
648 foreach $p ( @data ) {
649 push(@ls, [[$ptype,0,undef,\%BodyFont,0,0],[$p]]);
651 $ls[0]->[0]->[1] |= 1; # First in para
652 $ls[-1]->[0]->[1] |= 2; # Last in para
653 } elsif ( $ptype eq 'chap' || $ptype eq 'appn' ) {
654 # Chapters are flowed normally, but in an unusual font
655 @ls = ps_flow_lines($linewidth, \%ChapFont, $ptype, @data);
656 } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
657 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
658 die "Bad para";
660 my $secn = $1;
661 my $sech = $2;
662 my $font = ($ptype eq 'head') ? \%HeadFont : \%SubhFont;
663 @ls = ps_flow_lines($linewidth, $font, $ptype, @data);
664 # We need the heading number as auxillary data
665 $ls[0]->[0]->[2] = [[$AuxStr,$secn]];
666 } elsif ( $ptype eq 'norm' ) {
667 @ls = ps_flow_lines($linewidth, \%BodyFont, $ptype, @data);
668 } elsif ( $ptype eq 'bull' ) {
669 @ls = ps_flow_lines($bullwidth, \%BodyFont, $ptype, @data);
670 } elsif ( $ptype =~ /^toc/ ) {
671 unless ( $xtype =~/^\S+ :([^:]*):(.*)$/ ) {
672 die "Bad para";
674 my $xref = $1;
675 my $refname = $2.' ';
676 my $ntoc = substr($ptype,3,1)+0;
677 my $refwidth = ps_width($refname, $BodyFont{fonts}->[0][1],
678 \@NASMEncoding) *
679 ($BodyFont{fonts}->[0][0]/1000);
681 @ls = ps_flow_lines($linewidth-$ntoc*$psconf{tocind}-
682 $psconf{tocpnz}-$refwidth,
683 \%BodyFont, $ptype, @data);
685 # Auxilliary data: for the first line, the cross reference symbol
686 # and the reference name; for all lines but the first, the
687 # reference width; and for the last line, the page number
688 # as a string.
689 my $nl = scalar(@ls);
690 $ls[0]->[0]->[2] = [[$AuxStr,$refname], [$AuxXRef,$xref]];
691 for ( $j = 1 ; $j < $nl ; $j++ ) {
692 $ls[$j]->[0]->[2] = [[$AuxNum,$refwidth]];
694 push(@{$ls[$nl-1]->[0]->[2]}, [$AuxPageStr,$xref]);
695 } elsif ( $ptype =~ /^idx/ ) {
696 my $lvl = substr($ptype,3,1)+0;
698 @ls = ps_flow_lines($indxwidth-$lvl*$psconf{idxindent},
699 \%BodyFont, $ptype, @data);
700 } else {
701 die "Unknown para type: $ptype";
703 # Merge adjacent identical chunks
704 foreach $l ( @ls ) {
705 @{$$l[1]} = ps_merge_chunks(@{$$l[1]});
707 push(@pslines,@ls);
711 # Break the main body text into lines.
712 ps_break_lines(\@paras, \@ptypes);
715 # Break lines in to pages
718 # Where to start on page 2, the copyright page
719 $curpage = 2; # Start on page 2
720 $curypos = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg}-
721 $psconf{startcopyright};
722 undef $columnstart; # Not outputting columnar text
723 undef $curcolumn; # Current column
724 $nlines = scalar(@pslines);
727 # This formats lines inside the global @pslines array into pages,
728 # updating the page and y-coordinate entries. Start at the
729 # $startline position in @pslines and go to but not including
730 # $endline. The global variables $curpage, $curypos, $columnstart
731 # and $curcolumn are updated appropriately.
733 sub ps_break_pages($$) {
734 my($startline, $endline) = @_;
736 # Paragraph types which should never be broken
737 my $nobreakregexp = "^(chap|appn|head|subh|toc.|idx.)\$";
738 # Paragraph types which are heading (meaning they should not be broken
739 # immediately after)
740 my $nobreakafter = "^(chap|appn|head|subh)\$";
741 # Paragraph types which should never be broken *before*
742 my $nobreakbefore = "^idx[1-9]\$";
743 # Paragraph types which are set in columnar format
744 my $columnregexp = "^idx.\$";
746 my $upageheight = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg};
748 my $i;
750 for ( $i = $startline ; $i < $endline ; $i++ ) {
751 my $linfo = $pslines[$i]->[0];
752 if ( ($$linfo[0] eq 'chap' || $$linfo[0] eq 'appn' )
753 && ($$linfo[1] & 1) ) {
754 # First line of a new chapter heading. Start a new page.
755 undef $columnstart;
756 $curpage++ if ( $curypos > 0 || defined($columnstart) );
757 $curypos = $chapstart;
758 } elsif ( defined($columnstart) && $$linfo[0] !~ /$columnregexp/o ) {
759 undef $columnstart;
760 $curpage++;
761 $curypos = 0;
764 if ( $$linfo[0] =~ /$columnregexp/o && !defined($columnstart) ) {
765 $columnstart = $curypos;
766 $curcolumn = 0;
769 # Adjust position by the appropriate leading
770 $curypos += $$linfo[3]->{leading};
772 # Record the page and y-position
773 $$linfo[4] = $curpage;
774 $$linfo[5] = $curypos;
775 $$linfo[6] = $curcolumn if ( defined($columnstart) );
777 if ( $curypos > $upageheight ) {
778 # We need to break the page before this line.
779 my $broken = 0; # No place found yet
780 while ( !$broken && $pslines[$i]->[0]->[4] == $curpage ) {
781 my $linfo = $pslines[$i]->[0];
782 my $pinfo = $pslines[$i-1]->[0];
784 if ( $$linfo[1] == 2 ) {
785 # This would be an orphan, don't break.
786 } elsif ( $$linfo[1] & 1 ) {
787 # Sole line or start of paragraph. Break unless
788 # the previous line was part of a heading.
789 $broken = 1 if ( $$pinfo[0] !~ /$nobreakafter/o &&
790 $$linfo[0] !~ /$nobreakbefore/o );
791 } else {
792 # Middle of paragraph. Break unless we're in a
793 # no-break paragraph, or the previous line would
794 # end up being a widow.
795 $broken = 1 if ( $$linfo[0] !~ /$nobreakregexp/o &&
796 $$pinfo[1] != 1 );
798 $i--;
800 die "Nowhere to break page $curpage\n" if ( !$broken );
801 # Now $i should point to line immediately before the break, i.e.
802 # the next paragraph should be the first on the new page
803 if ( defined($columnstart) &&
804 ++$curcolumn < $psconf{idxcolumns} ) {
805 # We're actually breaking text into columns, not pages
806 $curypos = $columnstart;
807 } else {
808 undef $columnstart;
809 $curpage++;
810 $curypos = 0;
812 next;
815 # Add end of paragraph skip
816 if ( $$linfo[1] & 2 ) {
817 $curypos += $skiparray{$$linfo[0]};
822 ps_break_pages(0,$nlines); # Break the main text body into pages
825 # Find the page number of all the indices
827 %ps_xref_page = (); # Crossref anchor pages
828 %ps_index_pages = (); # Index item pages
829 $nlines = scalar(@pslines);
830 for ( $i = 0 ; $i < $nlines ; $i++ ) {
831 my $linfo = $pslines[$i]->[0];
832 foreach my $c ( @{$pslines[$i]->[1]} ) {
833 if ( $$c[0] == -4 ) {
834 if ( !defined($ps_index_pages{$$c[1]}) ) {
835 $ps_index_pages{$$c[1]} = [];
836 } elsif ( $ps_index_pages{$$c[1]}->[-1] eq $$linfo[4] ) {
837 # Pages are emitted in order; if this is a duplicated
838 # entry it will be the last one
839 next; # Duplicate
841 push(@{$ps_index_pages{$$c[1]}}, $$linfo[4]);
842 } elsif ( $$c[0] == -5 ) {
843 $ps_xref_page{$$c[1]} = $$linfo[4];
849 # Emit index paragraphs
851 $startofindex = scalar(@pslines);
852 @ixparas = ([[-5,'index'],[0,'Index']]);
853 @ixptypes = ('chap');
855 foreach $k ( @ixentries ) {
856 my $n,$i;
857 my $ixptype = 'idx0';
858 my $prefix = $ixhasprefix{$k};
859 my @ixpara = mkparaarray($ixptype,@{$ixterms{$k}});
860 my $commapos = undef;
862 if ( defined($prefix) && $ixprefixes{$prefix} > 1 ) {
863 # This entry has a "hanging comma"
864 for ( $i = 0 ; $i < scalar(@ixpara)-1 ; $i++ ) {
865 if ( substr($ixpara[$i]->[1],-1,1) eq ',' &&
866 $ixpara[$i+1]->[1] eq ' ' ) {
867 $commapos = $i;
868 last;
872 if ( defined($commapos) ) {
873 if ( $ixcommafirst{$k} ) {
874 # This is the first entry; generate the
875 # "hanging comma" entry
876 my @precomma = splice(@ixpara,0,$commapos);
877 if ( $ixpara[0]->[1] eq ',' ) {
878 shift(@ixpara); # Discard lone comma
879 } else {
880 # Discard attached comma
881 $ixpara[0]->[1] =~ s/\,$//;
882 push(@precomma,shift(@ixpara));
884 push(@precomma, [-6,undef]);
885 push(@ixparas, [@precomma]);
886 push(@ixptypes, $ixptype);
887 shift(@ixpara); # Remove space
888 } else {
889 splice(@ixpara,0,$commapos+2);
891 $ixptype = 'idx1';
894 push(@ixpara, [-6,undef]); # Left/right marker
895 $i = 1; $n = scalar(@{$ps_index_pages{$k}});
896 foreach $p ( @{$ps_index_pages{$k}} ) {
897 if ( $i++ == $n ) {
898 push(@ixpara,[-7,$p],[0,"$p"],[-1,undef]);
899 } else {
900 push(@ixpara,[-7,$p],[0,"$p,"],[-1,undef],[0,' ']);
904 push(@ixparas, [@ixpara]);
905 push(@ixptypes, $ixptype);
909 # Flow index paragraphs into lines
911 ps_break_lines(\@ixparas, \@ixptypes);
914 # Format index into pages
916 $nlines = scalar(@pslines);
917 ps_break_pages($startofindex, $nlines);
920 # Push index onto bookmark list
922 push(@bookmarks, ['index', 0, 'Index']);
924 # Get the list of fonts used
925 %ps_all_fonts = ();
926 foreach $fset ( @AllFonts ) {
927 foreach $font ( @{$fset->{fonts}} ) {
928 $ps_all_fonts{$font->[1]->{name}}++;
932 # Emit the PostScript DSC header
933 print "%!PS-Adobe-3.0\n";
934 print "%%Pages: $curpage\n";
935 print "%%BoundingBox: 0 0 ", $psconf{pagewidth}, ' ', $psconf{pageheight}, "\n";
936 print "%%Creator: (NASM psflow.pl)\n";
937 print "%%DocumentData: Clean7Bit\n";
938 print "%%DocumentFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
939 print "%%DocumentNeededFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
940 print "%%Orientation: Portrait\n";
941 print "%%PageOrder: Ascend\n";
942 print "%%EndComments\n";
943 print "%%BeginProlog\n";
945 # Emit the configurables as PostScript tokens
946 foreach $c ( keys(%psconf) ) {
947 print "/$c ", $psconf{$c}, " def\n";
949 foreach $c ( keys(%psbool) ) {
950 print "/$c ", ($psbool{$c}?'true':'false'), " def\n";
953 # Emit custom encoding vector
954 $zstr = '/NASMEncoding [ ';
955 foreach $c ( @NASMEncoding ) {
956 my $z = '/'.(defined($c)?$c:'.notdef ').' ';
957 if ( length($zstr)+length($z) > 72 ) {
958 print $zstr,"\n";
959 $zstr = ' ';
961 $zstr .= $z;
963 print $zstr, "] def\n";
965 # Font recoding routine
966 # newname fontname --
967 print "/nasmenc {\n";
968 print " findfont dup length dict begin\n";
969 print " { 1 index /FID ne {def}{pop pop} ifelse } forall\n";
970 print " /Encoding NASMEncoding def\n";
971 print " currentdict\n";
972 print " end\n";
973 print " definefont pop\n";
974 print "} def\n";
976 # Emit fontset definitions
977 foreach $font ( keys(%ps_all_fonts) ) {
978 print '/',$font,'-NASM /',$font," nasmenc\n";
981 foreach $fset ( @AllFonts ) {
982 my $i = 0;
983 my @zfonts = ();
984 foreach $font ( @{$fset->{fonts}} ) {
985 print '/', $fset->{name}, $i, ' ',
986 '/', $font->[1]->{name}, '-NASM findfont ',
987 $font->[0], " scalefont def\n";
988 push(@zfonts, $fset->{name}.$i);
989 $i++;
991 print '/', $fset->{name}, ' [', join(' ',@zfonts), "] def\n";
994 # This is used by the bullet-paragraph PostScript methods
995 print "/bullet [",ps_string($charcode{'bullet'}),"] def\n";
997 # Emit the canned PostScript prologue
998 open(PSHEAD, "< head.ps");
999 while ( defined($line = <PSHEAD>) ) {
1000 print $line;
1002 close(PSHEAD);
1003 print "%%EndProlog\n";
1005 # Generate a PostScript string
1006 sub ps_string($) {
1007 my ($s) = @_;
1008 my ($i,$c);
1009 my ($o) = '(';
1010 my ($l) = length($s);
1011 for ( $i = 0 ; $i < $l ; $i++ ) {
1012 $c = substr($s,$i,1);
1013 if ( ord($c) < 32 || ord($c) > 126 ) {
1014 $o .= sprintf("\\%03o", ord($c));
1015 } elsif ( $c eq '(' || $c eq ')' || $c eq "\\" ) {
1016 $o .= "\\".$c;
1017 } else {
1018 $o .= $c;
1021 return $o.')';
1024 # Generate PDF bookmarks
1025 print "%%BeginSetup\n";
1026 foreach $b ( @bookmarks ) {
1027 print '[/Title ', ps_string($b->[2]), "\n";
1028 print '/Count ', $b->[1], ' ' if ( $b->[1] );
1029 print '/Dest /',$b->[0]," /OUT pdfmark\n";
1032 # Ask the PostScript interpreter for the proper size media
1033 print "setpagesize\n";
1034 print "%%EndSetup\n";
1036 # Start a PostScript page
1037 sub ps_start_page() {
1038 $ps_page++;
1039 print "%%Page: $ps_page $ps_page\n";
1040 print "%%BeginPageSetup\n";
1041 print "save\n";
1042 print "%%EndPageSetup\n";
1043 print '/', $ps_page, " pa\n";
1046 # End a PostScript page
1047 sub ps_end_page($) {
1048 my($pn) = @_;
1049 if ( $pn ) {
1050 print "($ps_page)", (($ps_page & 1) ? 'pageodd' : 'pageeven'), "\n";
1052 print "restore showpage\n";
1055 $ps_page = 0;
1057 # Title page
1058 ps_start_page();
1059 $title = $metadata{'title'} || '';
1060 $title =~ s/ \- / $charcode{'emdash'} /;
1062 $subtitle = $metadata{'subtitle'} || '';
1063 $subtitle =~ s/ \- / $charcode{'emdash'} /;
1065 # Print title
1066 print "/ti ", ps_string($title), " def\n";
1067 print "/sti ", ps_string($subtitle), " def\n";
1068 print "lmarg pageheight 2 mul 3 div moveto\n";
1069 print "tfont0 setfont\n";
1070 print "/title linkdest ti show\n";
1071 print "lmarg pageheight 2 mul 3 div 10 sub moveto\n";
1072 print "0 setlinecap 3 setlinewidth\n";
1073 print "pagewidth lmarg sub rmarg sub 0 rlineto currentpoint stroke moveto\n";
1074 print "hfont1 setfont sti stringwidth pop neg ",
1075 -$HeadFont{leading}, " rmoveto\n";
1076 print "sti show\n";
1078 # Print logo, if there is one
1079 # FIX: To be 100% correct, this should look for DocumentNeeded*
1080 # and DocumentFonts in the header of the EPSF and add those to the
1081 # global header.
1082 if ( defined($metadata{epslogo}) &&
1083 sysopen(EPS, $metadata{epslogo}, O_RDONLY) ) {
1084 my @eps = ();
1085 my ($bbllx,$bblly,$bburx,$bbury) = (undef,undef,undef,undef);
1086 my $line;
1087 my $scale = 1;
1088 my $maxwidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
1089 my $maxheight = $psconf{pageheight}/3-40;
1090 my $width, $height;
1091 my $x, $y;
1093 while ( defined($line = <EPS>) ) {
1094 last if ( $line =~ /^%%EOF/ );
1095 if ( !defined($bbllx) &&
1096 $line =~ /^\%\%BoundingBox\:\s*([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)/i ) {
1097 $bbllx = $1+0; $bblly = $2+0;
1098 $bburx = $3+0; $bbury = $4+0;
1100 push(@eps,$line);
1102 close(EPS);
1104 if ( defined($bbllx) ) {
1105 $width = $bburx-$bbllx;
1106 $height = $bbury-$bblly;
1108 if ( $width > $maxwidth ) {
1109 $scale = $maxwidth/$width;
1111 if ( $height*$scale > $maxheight ) {
1112 $scale = $maxheight/$height;
1115 $x = ($psconf{pagewidth}-$width*$scale)/2;
1116 $y = ($psconf{pageheight}-$height*$scale)/2;
1118 print "BeginEPSF\n";
1119 print $x, ' ', $y, " translate\n";
1120 print $scale, " dup scale\n" unless ( $scale == 1 );
1121 print -$bbllx, ' ', -$bblly, " translate\n";
1122 print "$bbllx $bblly moveto\n";
1123 print "$bburx $bblly lineto\n";
1124 print "$bburx $bbury lineto\n";
1125 print "$bbllx $bbury lineto\n";
1126 print "$bbllx $bblly lineto clip newpath\n";
1127 print "%%BeginDocument: ",ps_string($metadata{epslogo}),"\n";
1128 print @eps;
1129 print "%%EndDocument\n";
1130 print "EndEPSF\n";
1133 ps_end_page(0);
1135 # Emit the rest of the document (page 2 and on)
1136 $curpage = 2;
1137 ps_start_page();
1138 foreach $line ( @pslines ) {
1139 my $linfo = $line->[0];
1141 if ( $$linfo[4] != $curpage ) {
1142 ps_end_page($curpage > 2);
1143 ps_start_page();
1144 $curpage = $$linfo[4];
1147 print '[';
1148 my $curfont = 0;
1149 foreach my $c ( @{$line->[1]} ) {
1150 if ( $$c[0] >= 0 ) {
1151 if ( $curfont != $$c[0] ) {
1152 print ($curfont = $$c[0]);
1154 print ps_string($$c[1]);
1155 } elsif ( $$c[0] == -1 ) {
1156 print '{el}'; # End link
1157 } elsif ( $$c[0] == -2 ) {
1158 print '{/',$$c[1],' xl}'; # xref link
1159 } elsif ( $$c[0] == -3 ) {
1160 print '{',ps_string($$c[1]),'wl}'; # web link
1161 } elsif ( $$c[0] == -4 ) {
1162 # Index anchor -- ignore
1163 } elsif ( $$c[0] == -5 ) {
1164 print '{/',$$c[1],' xa}'; #xref anchor
1165 } elsif ( $$c[0] == -6 ) {
1166 print ']['; # Start a new array
1167 $curfont = 0;
1168 } elsif ( $$c[0] == -7 ) {
1169 print '{/',$$c[1],' pl}'; # page link
1170 } else {
1171 die "Unknown annotation";
1174 print ']';
1175 if ( defined($$linfo[2]) ) {
1176 foreach my $x ( @{$$linfo[2]} ) {
1177 if ( $$x[0] == $AuxStr ) {
1178 print ps_string($$x[1]);
1179 } elsif ( $$x[0] == $AuxPage ) {
1180 print $ps_xref_page{$$x[1]},' ';
1181 } elsif ( $$x[0] == $AuxPageStr ) {
1182 print ps_string($ps_xref_page{$$x[1]});
1183 } elsif ( $$x[0] == $AuxXRef ) {
1184 print '/',ps_xref($$x[1]),' ';
1185 } elsif ( $$x[0] == $AuxNum ) {
1186 print $$x[1],' ';
1187 } else {
1188 die "Unknown auxilliary data type";
1192 print ($psconf{pageheight}-$psconf{topmarg}-$$linfo[5]);
1193 print ' ', $$linfo[6] if ( defined($$linfo[6]) );
1194 print ' ', $$linfo[0].$$linfo[1], "\n";
1197 ps_end_page(1);
1198 print "%%EOF\n";