More test files...
[nasm/avx512.git] / doc / genps.pl
blob02828c96757208435a48ff3e2dc5823f06161af5
1 #!/usr/bin/perl
3 # Format the documentation as PostScript
6 require 'psfonts.ph'; # The fonts we want to use
7 require 'pswidth.ph'; # PostScript string width
9 use Fcntl;
12 # PostScript configurables; these values are also available to the
13 # PostScript code itself
15 %psconf = (
16 pagewidth => 595, # Page width in PostScript points
17 pageheight => 792, # Page height in PostScript points
18 lmarg => 100, # Left margin in PostScript points
19 rmarg => 50, # Right margin in PostScript points
20 topmarg => 100, # Top margin in PostScript points
21 botmarg => 100, # Bottom margin in PostScript points
22 plmarg => 50, # Page number position relative to left margin
23 prmarg => 0, # Page number position relative to right margin
24 pymarg => 50, # Page number position relative to bot margin
25 startcopyright => 75, # How much above the bottom margin is the
26 # copyright notice stuff
27 bulladj => 12, # How much to indent a bullet paragraph
28 tocind => 12, # TOC indentation per level
29 tocpnz => 24, # Width of TOC page number only zone
30 tocdots => 8, # Spacing between TOC dots
31 idxspace => 24, # Minimum space between index title and pg#
32 idxindent => 24, # How much to indent a subindex entry
33 idxgutter => 24, # Space between index columns
34 idxcolumns => 2, # Number of index columns
37 %psbool = (
38 colorlinks => 0, # Set links in blue rather than black
41 # Known paper sizes
42 %papersizes = (
43 'a5' => [421, 595], # ISO half paper size
44 'b5' => [501, 709], # ISO small paper size
45 'a4' => [595, 842], # ISO standard paper size
46 'letter' => [612, 792], # US common paper size
47 'pa4' => [595, 792], # Compromise ("portable a4")
48 'b4' => [709,1002], # ISO intermediate paper size
49 'legal' => [612,1008], # US intermediate paper size
50 'a3' => [842,1190], # ISO double paper size
51 '11x17' => [792,1224], # US double paper size
55 # Parse the command line
57 undef $input;
58 while ( $arg = shift(@ARGV) ) {
59 if ( $arg =~ /^\-(|no\-)(.*)$/ ) {
60 $parm = $2;
61 $true = ($1 eq '') ? 1 : 0;
62 if ( $true && defined($papersizes{$parm}) ) {
63 $psconf{pagewidth} = $papersizes{$parm}->[0];
64 $psconf{pageheight} = $papersizes{$parm}->[1];
65 } elsif ( defined($psbool{$parm}) ) {
66 $psbool{$parm} = $true;
67 } elsif ( $true && defined($psconf{$parm}) ) {
68 $psconf{$parm} = shift(@ARGV);
69 } elsif ( $parm =~ /^(title|subtitle|year|author|license)$/ ) {
70 $metadata{$parm} = shift(@ARGV);
71 } else {
72 die "$0: Unknown option: $arg\n";
74 } else {
75 $input = $arg;
80 # Document formatting parameters
82 $paraskip = 6; # Space between paragraphs
83 $chapstart = 30; # Space before a chapter heading
84 $chapskip = 24; # Space after a chapter heading
85 $tocskip = 6; # Space between TOC entries
87 # Configure post-paragraph skips for each kind of paragraph
88 %skiparray = ('chap' => $chapskip, 'appn' => $chapstart,
89 'head' => $paraskip, 'subh' => $paraskip,
90 'norm' => $paraskip, 'bull' => $paraskip,
91 'code' => $paraskip, 'toc0' => $tocskip,
92 'toc1' => $tocskip, 'toc2' => $tocskip);
94 # Custom encoding vector. This is basically the same as
95 # ISOLatin1Encoding (a level 2 feature, so we dont want to use it),
96 # but with the "naked" accents at \200-\237 moved to the \000-\037
97 # range (ASCII control characters), and a few extra characters thrown
98 # in. It is basically a modified Windows 1252 codepage, minus, for
99 # now, the euro sign (\200 is reserved for euro.)
101 @NASMEncoding =
103 undef, undef, undef, undef, undef, undef, undef, undef, undef, undef,
104 undef, undef, undef, undef, undef, undef, 'dotlessi', 'grave',
105 'acute', 'circumflex', 'tilde', 'macron', 'breve', 'dotaccent',
106 'dieresis', undef, 'ring', 'cedilla', undef, 'hungarumlaut',
107 'ogonek', 'caron', 'space', 'exclam', 'quotedbl', 'numbersign',
108 'dollar', 'percent', 'ampersand', 'quoteright', 'parenleft',
109 'parenright', 'asterisk', 'plus', 'comma', 'minus', 'period',
110 'slash', 'zero', 'one', 'two', 'three', 'four', 'five', 'six',
111 'seven', 'eight', 'nine', 'colon', 'semicolon', 'less', 'equal',
112 'greater', 'question', 'at', 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H',
113 'I', 'J', 'K', 'L', 'M', 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V',
114 'W', 'X', 'Y', 'Z', 'bracketleft', 'backslash', 'bracketright',
115 'asciicircum', 'underscore', 'quoteleft', 'a', 'b', 'c', 'd', 'e',
116 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 'n', 'o', 'p', 'q', 'r', 's',
117 't', 'u', 'v', 'w', 'x', 'y', 'z', 'braceleft', 'bar', 'braceright',
118 'asciitilde', undef, undef, undef, 'quotesinglbase', 'florin',
119 'quotedblbase', 'ellipsis', 'dagger', 'dbldagger', 'circumflex',
120 'perthousand', 'Scaron', 'guilsinglleft', 'OE', undef, 'Zcaron',
121 undef, undef, 'grave', 'quotesingle', 'quotedblleft',
122 'quotedblright', 'bullet', 'endash', 'emdash', 'tilde', 'trademark',
123 'scaron', 'guilsignlright', 'oe', undef, 'zcaron', 'Ydieresis',
124 'space', 'exclamdown', 'cent', 'sterling', 'currency', 'yen',
125 'brokenbar', 'section', 'dieresis', 'copyright', 'ordfeminine',
126 'guillemotleft', 'logicalnot', 'hyphen', 'registered', 'macron',
127 'degree', 'plusminus', 'twosuperior', 'threesuperior', 'acute', 'mu',
128 'paragraph', 'periodcentered', 'cedilla', 'onesuperior',
129 'ordmasculine', 'guillemotright', 'onequarter', 'onehalf',
130 'threequarters', 'questiondown', 'Agrave', 'Aacute', 'Acircumflex',
131 'Atilde', 'Adieresis', 'Aring', 'AE', 'Ccedilla', 'Egrave', 'Eacute',
132 'Ecircumflex', 'Edieresis', 'Igrave', 'Iacute', 'Icircumflex',
133 'Idieresis', 'Eth', 'Ntilde', 'Ograve', 'Oacute', 'Ocircumflex',
134 'Otilde', 'Odieresis', 'multiply', 'Oslash', 'Ugrave', 'Uacute',
135 'Ucircumflex', 'Udieresis', 'Yacute', 'Thorn', 'germandbls',
136 'agrave', 'aacute', 'acircumflex', 'atilde', 'adieresis', 'aring',
137 'ae', 'ccedilla', 'egrave', 'eacute', 'ecircumflex', 'edieresis',
138 'igrave', 'iacute', 'icircumflex', 'idieresis', 'eth', 'ntilde',
139 'ograve', 'oacute', 'ocircumflex', 'otilde', 'odieresis', 'divide',
140 'oslash', 'ugrave', 'uacute', 'ucircumflex', 'udieresis', 'yacute',
141 'thorn', 'ydieresis'
144 # Name-to-byte lookup hash
145 %charcode = ();
146 for ( $i = 0 ; $i < 256 ; $i++ ) {
147 $charcode{$NASMEncoding[$i]} = chr($i);
151 # First, format the stuff coming from the front end into
152 # a cleaner representation
154 if ( defined($input) ) {
155 sysopen(PARAS, $input, O_RDONLY) or
156 die "$0: cannot open $input: $!\n";
157 } else {
158 open(PARAS, "<&STDIN") or die "$0: $!\n";
160 while ( defined($line = <PARAS>) ) {
161 chomp $line;
162 $data = <PARAS>;
163 chomp $data;
164 if ( $line =~ /^meta :(.*)$/ ) {
165 $metakey = $1;
166 $metadata{$metakey} = $data;
167 } elsif ( $line =~ /^indx :(.*)$/ ) {
168 $ixentry = $1;
169 push(@ixentries, $ixentry);
170 $ixterms{$ixentry} = [split(/\037/, $data)];
171 # Look for commas. This is easier done on the string
172 # representation, so do it now.
173 if ( $data =~ /^(.*)\,\037sp\037/ ) {
174 $ixprefix = $1;
175 $ixprefix =~ s/\037n $//; # Discard possible font change at end
176 $ixhasprefix{$ixentry} = $ixprefix;
177 if ( !$ixprefixes{$ixprefix} ) {
178 $ixcommafirst{$ixentry}++;
180 $ixprefixes{$ixprefix}++;
181 } else {
182 # A complete term can also be used as a prefix
183 $ixprefixes{$data}++;
185 } else {
186 push(@ptypes, $line);
187 push(@paras, [split(/\037/, $data)]);
190 close(PARAS);
193 # Convert an integer to a chosen base
195 sub int2base($$) {
196 my($i,$b) = @_;
197 my($s) = '';
198 my($n) = '';
199 my($z) = '0123456789abcdefghijklmnopqrstuvwxyzABCDEFGHIJKLMNOPQRSTUVWXYZ';
200 return '0' if ($i == 0);
201 if ( $i < 0 ) { $n = '-'; $i = -$i; }
202 while ( $i ) {
203 $s = substr($z,$i%$b,1) . $s;
204 $i = int($i/$b);
206 return $n.$s;
210 # Convert a string to a rendering array
212 sub string2array($)
214 my($s) = @_;
215 my(@a) = ();
217 $s =~ s/ \- / $charcode{'endash'} /g; # Replace " - " with en dash
219 while ( $s =~ /^(\s+|\S+)(.*)$/ ) {
220 push(@a, [0,$1]);
221 $s = $2;
224 return @a;
228 # Take a crossreference name and generate the PostScript name for it.
230 # This hack produces a somewhat smaller PDF...
231 #%ps_xref_list = ();
232 #$ps_xref_next = 0;
233 #sub ps_xref($) {
234 # my($s) = @_;
235 # my $q = $ps_xref_list{$s};
236 # return $q if ( defined($ps_xref_list{$s}) );
237 # $q = 'X'.int2base($ps_xref_next++, 52);
238 # $ps_xref_list{$s} = $q;
239 # return $q;
242 # Somewhat bigger PDF, but one which obeys # URLs
243 sub ps_xref($) {
244 return @_[0];
248 # Flow lines according to a particular font set and width
250 # A "font set" is represented as an array containing
251 # arrays of pairs: [<size>, <metricref>]
253 # Each line is represented as:
254 # [ [type,first|last,aux,fontset,page,ypos,optional col],
255 # [rendering array] ]
257 # A space character may be "squeezed" by up to this much
258 # (as a fraction of the normal width of a space.)
260 $ps_space_squeeze = 0.00; # Min space width 100%
261 sub ps_flow_lines($$$@) {
262 my($wid, $fontset, $type, @data) = @_;
263 my($fonts) = $$fontset{fonts};
264 my($e);
265 my($w) = 0; # Width of current line
266 my($sw) = 0; # Width of current line due to spaces
267 my(@l) = (); # Current line
268 my(@ls) = (); # Accumulated output lines
269 my(@xd) = (); # Metadata that goes with subsequent text
270 my $hasmarker = 0; # Line has -6 marker
271 my $pastmarker = 0; # -6 marker found
273 # If there is a -6 marker anywhere in the paragraph,
274 # *each line* output needs to have a -6 marker
275 foreach $e ( @data ) {
276 $hasmarker = 1 if ( $$e[0] == -6 );
279 $w = 0;
280 foreach $e ( @data ) {
281 if ( $$e[0] < 0 ) {
282 # Type is metadata. Zero width.
283 if ( $$e[0] == -6 ) {
284 $pastmarker = 1;
286 if ( $$e[0] == -1 || $$e[0] == -6 ) {
287 # -1 (end anchor) or -6 (marker) goes with the preceeding
288 # text, otherwise with the subsequent text
289 push(@l, $e);
290 } else {
291 push(@xd, $e);
293 } else {
294 my $ew = ps_width($$e[1], $fontset->{fonts}->[$$e[0]][1],
295 \@NASMEncoding) *
296 ($fontset->{fonts}->[$$e[0]][0]/1000);
297 my $sp = $$e[1];
298 $sp =~ tr/[^ ]//d; # Delete nonspaces
299 my $esw = ps_width($sp, $fontset->{fonts}->[$$e[0]][1],
300 \@NASMEncoding) *
301 ($fontset->{fonts}->[$$e[0]][0]/1000);
303 if ( ($w+$ew) - $ps_space_squeeze*($sw+$esw) > $wid ) {
304 # Begin new line
305 # Search backwards for previous space chunk
306 my $lx = scalar(@l)-1;
307 my @rm = ();
308 while ( $lx >= 0 ) {
309 while ( $lx >= 0 && $l[$lx]->[0] < 0 ) {
310 # Skip metadata
311 $pastmarker = 0 if ( $l[$lx]->[0] == -6 );
312 $lx--;
314 if ( $lx >= 0 ) {
315 if ( $l[$lx]->[1] eq ' ' ) {
316 splice(@l, $lx, 1);
317 @rm = splice(@l, $lx);
318 last; # Found place to break
319 } else {
320 $lx--;
325 # Now @l contains the stuff to remain on the old line
326 # If we broke the line inside a link, then split the link
327 # into two.
328 my $lkref = undef;
329 foreach my $lc ( @l ) {
330 if ( $$lc[0] == -2 || $$lc[0] == -3 || $lc[0] == -7 ) {
331 $lkref = $lc;
332 } elsif ( $$lc[0] == -1 ) {
333 undef $lkref;
337 if ( defined($lkref) ) {
338 push(@l, [-1,undef]); # Terminate old reference
339 unshift(@rm, $lkref); # Duplicate reference on new line
342 if ( $hasmarker ) {
343 if ( $pastmarker ) {
344 unshift(@rm,[-6,undef]); # New line starts with marker
345 } else {
346 push(@l,[-6,undef]); # Old line ends with marker
350 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]);
351 @l = @rm;
353 $w = $sw = 0;
354 # Compute the width of the remainder array
355 for my $le ( @l ) {
356 if ( $$le[0] >= 0 ) {
357 my $xew = ps_width($$le[1],
358 $fontset->{fonts}->[$$le[0]][1],
359 \@NASMEncoding) *
360 ($fontset->{fonts}->[$$le[0]][0]/1000);
361 my $xsp = $$le[1];
362 $xsp =~ tr/[^ ]//d; # Delete nonspaces
363 my $xsw = ps_width($xsp,
364 $fontset->{fonts}->[$$le[0]][1],
365 \@NASMEncoding) *
366 ($fontset->{fonts}->[$$le[0]][0]/1000);
367 $w += $xew; $sw += $xsw;
371 push(@l, @xd); # Accumulated metadata
372 @xd = ();
373 if ( $$e[1] ne '' ) {
374 push(@l, $e);
375 $w += $ew; $sw += $esw;
379 push(@l,@xd);
380 if ( scalar(@l) ) {
381 push(@ls, [[$type,0,undef,$fontset,0,0],[@l]]); # Final line
384 # Mark the first line as first and the last line as last
385 if ( scalar(@ls) ) {
386 $ls[0]->[0]->[1] |= 1; # First in para
387 $ls[-1]->[0]->[1] |= 2; # Last in para
389 return @ls;
393 # Once we have broken things into lines, having multiple chunks
394 # with the same font index is no longer meaningful. Merge
395 # adjacent chunks to keep down the size of the whole file.
397 sub ps_merge_chunks(@) {
398 my(@ci) = @_;
399 my($c, $lc);
400 my(@co, $eco);
402 undef $lc;
403 @co = ();
404 $eco = -1; # Index of the last entry in @co
405 foreach $c ( @ci ) {
406 if ( defined($lc) && $$c[0] == $lc && $$c[0] >= 0 ) {
407 $co[$eco]->[1] .= $$c[1];
408 } else {
409 push(@co, $c); $eco++;
410 $lc = $$c[0];
413 return @co;
417 # Convert paragraphs to rendering arrays. Each
418 # element in the array contains (font, string),
419 # where font can be one of:
420 # -1 end link
421 # -2 begin crossref
422 # -3 begin weblink
423 # -4 index item anchor
424 # -5 crossref anchor
425 # -6 left/right marker (used in the index)
426 # -7 page link (used in the index)
427 # 0 normal
428 # 1 empatic (italic)
429 # 2 code (fixed spacing)
432 sub mkparaarray($@) {
433 my($ptype, @chunks) = @_;
435 my @para = ();
436 my $in_e = 0;
437 my $chunk;
439 if ( $ptype =~ /^code/ ) {
440 foreach $chunk ( @chunks ) {
441 push(@para, [2, $chunk]);
443 } else {
444 foreach $chunk ( @chunks ) {
445 my $type = substr($chunk,0,2);
446 my $text = substr($chunk,2);
448 if ( $type eq 'sp' ) {
449 push(@para, [$in_e?1:0, ' ']);
450 } elsif ( $type eq 'da' ) {
451 push(@para, [$in_e?1:0, $charcode{'endash'}]);
452 } elsif ( $type eq 'n ' ) {
453 push(@para, [0, $text]);
454 $in_e = 0;
455 } elsif ( $type =~ '^e' ) {
456 push(@para, [1, $text]);
457 $in_e = ($type eq 'es' || $type eq 'e ');
458 } elsif ( $type eq 'c ' ) {
459 push(@para, [2, $text]);
460 $in_e = 0;
461 } elsif ( $type eq 'x ' ) {
462 push(@para, [-2, ps_xref($text)]);
463 } elsif ( $type eq 'xe' ) {
464 push(@para, [-1, undef]);
465 } elsif ( $type eq 'wc' || $type eq 'w ' ) {
466 $text =~ /\<(.*)\>(.*)$/;
467 my $link = $1; $text = $2;
468 push(@para, [-3, $link]);
469 push(@para, [($type eq 'wc') ? 2:0, $text]);
470 push(@para, [-1, undef]);
471 $in_e = 0;
472 } elsif ( $type eq 'i ' ) {
473 push(@para, [-4, $text]);
474 } else {
475 die "Unexpected paragraph chunk: $chunk";
479 return @para;
482 $npara = scalar(@paras);
483 for ( $i = 0 ; $i < $npara ; $i++ ) {
484 $paras[$i] = [mkparaarray($ptypes[$i], @{$paras[$i]})];
488 # This converts a rendering array to a simple string
490 sub ps_arraytostr(@) {
491 my $s = '';
492 my $c;
493 foreach $c ( @_ ) {
494 $s .= $$c[1] if ( $$c[0] >= 0 );
496 return $s;
500 # This generates a duplicate of a paragraph
502 sub ps_dup_para(@) {
503 my(@i) = @_;
504 my(@o) = ();
505 my($c);
507 foreach $c ( @i ) {
508 my @cc = @{$c};
509 push(@o, [@cc]);
511 return @o;
515 # This generates a duplicate of a paragraph, stripping anchor
516 # tags (-4 and -5)
518 sub ps_dup_para_noanchor(@) {
519 my(@i) = @_;
520 my(@o) = ();
521 my($c);
523 foreach $c ( @i ) {
524 my @cc = @{$c};
525 push(@o, [@cc]) unless ( $cc[0] == -4 || $cc[0] == -5 );
527 return @o;
531 # Scan for header paragraphs and fix up their contents;
532 # also generate table of contents and PDF bookmarks.
534 @tocparas = ([[-5, 'contents'], [0,'Contents']]);
535 @tocptypes = ('chap');
536 @bookmarks = (['title', 0, 'Title'], ['contents', 0, 'Contents']);
537 %bookref = ();
538 for ( $i = 0 ; $i < $npara ; $i++ ) {
539 my $xtype = $ptypes[$i];
540 my $ptype = substr($xtype,0,4);
541 my $str;
542 my $book;
544 if ( $ptype eq 'chap' || $ptype eq 'appn' ) {
545 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
546 die "Bad para";
548 my $secn = $1;
549 my $sech = $2;
550 my $xref = ps_xref($sech);
551 my $chap = ($ptype eq 'chap')?'Chapter':'Appendix';
553 $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
554 push(@bookmarks, $book);
555 $bookref{$secn} = $book;
557 push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
558 push(@tocptypes, 'toc0'.' :'.$sech.':'.$chap.' '.$secn.':');
560 unshift(@{$paras[$i]},
561 [-5, $xref], [0,$chap.' '.$secn.':'], [0, ' ']);
562 } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
563 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
564 die "Bad para";
566 my $secn = $1;
567 my $sech = $2;
568 my $xref = ps_xref($sech);
569 my $pref;
570 $pref = $secn; $pref =~ s/\.[^\.]+$//; # Find parent node
572 $book = [$xref, 0, ps_arraytostr(@{$paras[$i]})];
573 push(@bookmarks, $book);
574 $bookref{$secn} = $book;
575 $bookref{$pref}->[1]--; # Adjust count for parent node
577 push(@tocparas, [ps_dup_para_noanchor(@{$paras[$i]})]);
578 push(@tocptypes,
579 (($ptype eq 'subh') ? 'toc2':'toc1').' :'.$sech.':'.$secn);
581 unshift(@{$paras[$i]}, [-5, $xref]);
586 # Add TOC to beginning of paragraph list
588 unshift(@paras, @tocparas); undef @tocparas;
589 unshift(@ptypes, @tocptypes); undef @tocptypes;
592 # Add copyright notice to the beginning
594 unshift(@paras,
595 [[0, $charcode{'copyright'}], [0, ' '], [0,$metadata{'year'}],
596 [0, ' '], string2array($metadata{'author'})],
597 [string2array($metadata{'license'})]);
598 unshift(@ptypes, 'norm', 'norm');
600 $npara = scalar(@paras);
603 # No lines generated, yet.
605 @pslines = ();
608 # Line Auxilliary Information Types
610 $AuxStr = 1; # String
611 $AuxPage = 2; # Page number (from xref)
612 $AuxPageStr = 3; # Page number as a PostScript string
613 $AuxXRef = 4; # Cross reference as a name
614 $AuxNum = 5; # Number
617 # Break or convert paragraphs into lines, and push them
618 # onto the @pslines array.
620 sub ps_break_lines($$) {
621 my ($paras,$ptypes) = @_;
623 my $linewidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
624 my $bullwidth = $linewidth-$psconf{bulladj};
625 my $indxwidth = ($linewidth-$psconf{idxgutter})/$psconf{idxcolumns}
626 -$psconf{idxspace};
628 my $npara = scalar(@{$paras});
629 my $i;
631 for ( $i = 0 ; $i < $npara ; $i++ ) {
632 my $xtype = $ptypes->[$i];
633 my $ptype = substr($xtype,0,4);
634 my @data = @{$paras->[$i]};
635 my @ls = ();
636 if ( $ptype eq 'code' ) {
637 my $p;
638 # Code paragraph; each chunk is a line
639 foreach $p ( @data ) {
640 push(@ls, [[$ptype,0,undef,\%BodyFont,0,0],[$p]]);
642 $ls[0]->[0]->[1] |= 1; # First in para
643 $ls[-1]->[0]->[1] |= 2; # Last in para
644 } elsif ( $ptype eq 'chap' || $ptype eq 'appn' ) {
645 # Chapters are flowed normally, but in an unusual font
646 @ls = ps_flow_lines($linewidth, \%ChapFont, $ptype, @data);
647 } elsif ( $ptype eq 'head' || $ptype eq 'subh' ) {
648 unless ( $xtype =~ /^\S+ (\S+) :(.*)$/ ) {
649 die "Bad para";
651 my $secn = $1;
652 my $sech = $2;
653 my $font = ($ptype eq 'head') ? \%HeadFont : \%SubhFont;
654 @ls = ps_flow_lines($linewidth, $font, $ptype, @data);
655 # We need the heading number as auxillary data
656 $ls[0]->[0]->[2] = [[$AuxStr,$secn]];
657 } elsif ( $ptype eq 'norm' ) {
658 @ls = ps_flow_lines($linewidth, \%BodyFont, $ptype, @data);
659 } elsif ( $ptype eq 'bull' ) {
660 @ls = ps_flow_lines($bullwidth, \%BodyFont, $ptype, @data);
661 } elsif ( $ptype =~ /^toc/ ) {
662 unless ( $xtype =~/^\S+ :([^:]*):(.*)$/ ) {
663 die "Bad para";
665 my $xref = $1;
666 my $refname = $2.' ';
667 my $ntoc = substr($ptype,3,1)+0;
668 my $refwidth = ps_width($refname, $BodyFont{fonts}->[0][1],
669 \@NASMEncoding) *
670 ($BodyFont{fonts}->[0][0]/1000);
672 @ls = ps_flow_lines($linewidth-$ntoc*$psconf{tocind}-
673 $psconf{tocpnz}-$refwidth,
674 \%BodyFont, $ptype, @data);
676 # Auxilliary data: for the first line, the cross reference symbol
677 # and the reference name; for all lines but the first, the
678 # reference width; and for the last line, the page number
679 # as a string.
680 my $nl = scalar(@ls);
681 $ls[0]->[0]->[2] = [[$AuxStr,$refname], [$AuxXRef,$xref]];
682 for ( $j = 1 ; $j < $nl ; $j++ ) {
683 $ls[$j]->[0]->[2] = [[$AuxNum,$refwidth]];
685 push(@{$ls[$nl-1]->[0]->[2]}, [$AuxPageStr,$xref]);
686 } elsif ( $ptype =~ /^idx/ ) {
687 my $lvl = substr($ptype,3,1)+0;
689 @ls = ps_flow_lines($indxwidth-$lvl*$psconf{idxindent},
690 \%BodyFont, $ptype, @data);
691 } else {
692 die "Unknown para type: $ptype";
694 # Merge adjacent identical chunks
695 foreach $l ( @ls ) {
696 @{$$l[1]} = ps_merge_chunks(@{$$l[1]});
698 push(@pslines,@ls);
702 # Break the main body text into lines.
703 ps_break_lines(\@paras, \@ptypes);
706 # Break lines in to pages
709 # Where to start on page 2, the copyright page
710 $curpage = 2; # Start on page 2
711 $curypos = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg}-
712 $psconf{startcopyright};
713 undef $columnstart; # Not outputting columnar text
714 undef $curcolumn; # Current column
715 $nlines = scalar(@pslines);
718 # This formats lines inside the global @pslines array into pages,
719 # updating the page and y-coordinate entries. Start at the
720 # $startline position in @pslines and go to but not including
721 # $endline. The global variables $curpage, $curypos, $columnstart
722 # and $curcolumn are updated appropriately.
724 sub ps_break_pages($$) {
725 my($startline, $endline) = @_;
727 # Paragraph types which should never be broken
728 my $nobreakregexp = "^(chap|appn|head|subh|toc.|idx.)\$";
729 # Paragraph types which are heading (meaning they should not be broken
730 # immediately after)
731 my $nobreakafter = "^(chap|appn|head|subh)\$";
732 # Paragraph types which should never be broken *before*
733 my $nobreakbefore = "^idx[1-9]\$";
734 # Paragraph types which are set in columnar format
735 my $columnregexp = "^idx.\$";
737 my $upageheight = $psconf{pageheight}-$psconf{topmarg}-$psconf{botmarg};
739 my $i;
741 for ( $i = $startline ; $i < $endline ; $i++ ) {
742 my $linfo = $pslines[$i]->[0];
743 if ( ($$linfo[0] eq 'chap' || $$linfo[0] eq 'appn' )
744 && ($$linfo[1] & 1) ) {
745 # First line of a new chapter heading. Start a new page.
746 undef $columnstart;
747 $curpage++ if ( $curypos > 0 || defined($columnstart) );
748 $curypos = $chapstart;
749 } elsif ( defined($columnstart) && $$linfo[0] !~ /$columnregexp/o ) {
750 undef $columnstart;
751 $curpage++;
752 $curypos = 0;
755 if ( $$linfo[0] =~ /$columnregexp/o && !defined($columnstart) ) {
756 $columnstart = $curypos;
757 $curcolumn = 0;
760 # Adjust position by the appropriate leading
761 $curypos += $$linfo[3]->{leading};
763 # Record the page and y-position
764 $$linfo[4] = $curpage;
765 $$linfo[5] = $curypos;
766 $$linfo[6] = $curcolumn if ( defined($columnstart) );
768 if ( $curypos > $upageheight ) {
769 # We need to break the page before this line.
770 my $broken = 0; # No place found yet
771 while ( !$broken && $pslines[$i]->[0]->[4] == $curpage ) {
772 my $linfo = $pslines[$i]->[0];
773 my $pinfo = $pslines[$i-1]->[0];
775 if ( $$linfo[1] == 2 ) {
776 # This would be an orphan, don't break.
777 } elsif ( $$linfo[1] & 1 ) {
778 # Sole line or start of paragraph. Break unless
779 # the previous line was part of a heading.
780 $broken = 1 if ( $$pinfo[0] !~ /$nobreakafter/o &&
781 $$linfo[0] !~ /$nobreakbefore/o );
782 } else {
783 # Middle of paragraph. Break unless we're in a
784 # no-break paragraph, or the previous line would
785 # end up being a widow.
786 $broken = 1 if ( $$linfo[0] !~ /$nobreakregexp/o &&
787 $$pinfo[1] != 1 );
789 $i--;
791 die "Nowhere to break page $curpage\n" if ( !$broken );
792 # Now $i should point to line immediately before the break, i.e.
793 # the next paragraph should be the first on the new page
794 if ( defined($columnstart) &&
795 ++$curcolumn < $psconf{idxcolumns} ) {
796 # We're actually breaking text into columns, not pages
797 $curypos = $columnstart;
798 } else {
799 undef $columnstart;
800 $curpage++;
801 $curypos = 0;
803 next;
806 # Add end of paragraph skip
807 if ( $$linfo[1] & 2 ) {
808 $curypos += $skiparray{$$linfo[0]};
813 ps_break_pages(0,$nlines); # Break the main text body into pages
816 # Find the page number of all the indices
818 %ps_xref_page = (); # Crossref anchor pages
819 %ps_index_pages = (); # Index item pages
820 $nlines = scalar(@pslines);
821 for ( $i = 0 ; $i < $nlines ; $i++ ) {
822 my $linfo = $pslines[$i]->[0];
823 foreach my $c ( @{$pslines[$i]->[1]} ) {
824 if ( $$c[0] == -4 ) {
825 if ( !defined($ps_index_pages{$$c[1]}) ) {
826 $ps_index_pages{$$c[1]} = [];
827 } elsif ( $ps_index_pages{$$c[1]}->[-1] eq $$linfo[4] ) {
828 # Pages are emitted in order; if this is a duplicated
829 # entry it will be the last one
830 next; # Duplicate
832 push(@{$ps_index_pages{$$c[1]}}, $$linfo[4]);
833 } elsif ( $$c[0] == -5 ) {
834 $ps_xref_page{$$c[1]} = $$linfo[4];
840 # Emit index paragraphs
842 $startofindex = scalar(@pslines);
843 @ixparas = ([[-5,'index'],[0,'Index']]);
844 @ixptypes = ('chap');
846 foreach $k ( @ixentries ) {
847 my $n,$i;
848 my $ixptype = 'idx0';
849 my $prefix = $ixhasprefix{$k};
850 my @ixpara = mkparaarray($ixptype,@{$ixterms{$k}});
851 my $commapos = undef;
853 if ( defined($prefix) && $ixprefixes{$prefix} > 1 ) {
854 # This entry has a "hanging comma"
855 for ( $i = 0 ; $i < scalar(@ixpara)-1 ; $i++ ) {
856 if ( substr($ixpara[$i]->[1],-1,1) eq ',' &&
857 $ixpara[$i+1]->[1] eq ' ' ) {
858 $commapos = $i;
859 last;
863 if ( defined($commapos) ) {
864 if ( $ixcommafirst{$k} ) {
865 # This is the first entry; generate the
866 # "hanging comma" entry
867 my @precomma = splice(@ixpara,0,$commapos);
868 if ( $ixpara[0]->[1] eq ',' ) {
869 shift(@ixpara); # Discard lone comma
870 } else {
871 # Discard attached comma
872 $ixpara[0]->[1] =~ s/\,$//;
873 push(@precomma,shift(@ixpara));
875 push(@precomma, [-6,undef]);
876 push(@ixparas, [@precomma]);
877 push(@ixptypes, $ixptype);
878 shift(@ixpara); # Remove space
879 } else {
880 splice(@ixpara,0,$commapos+2);
882 $ixptype = 'idx1';
885 push(@ixpara, [-6,undef]); # Left/right marker
886 $i = 1; $n = scalar(@{$ps_index_pages{$k}});
887 foreach $p ( @{$ps_index_pages{$k}} ) {
888 if ( $i++ == $n ) {
889 push(@ixpara,[-7,$p],[0,"$p"],[-1,undef]);
890 } else {
891 push(@ixpara,[-7,$p],[0,"$p,"],[-1,undef],[0,' ']);
895 push(@ixparas, [@ixpara]);
896 push(@ixptypes, $ixptype);
900 # Flow index paragraphs into lines
902 ps_break_lines(\@ixparas, \@ixptypes);
905 # Format index into pages
907 $nlines = scalar(@pslines);
908 ps_break_pages($startofindex, $nlines);
911 # Push index onto bookmark list
913 push(@bookmarks, ['index', 0, 'Index']);
915 # Get the list of fonts used
916 %ps_all_fonts = ();
917 foreach $fset ( @AllFonts ) {
918 foreach $font ( @{$fset->{fonts}} ) {
919 $ps_all_fonts{$font->[1]->{name}}++;
923 # Emit the PostScript DSC header
924 print "%!PS-Adobe-3.0\n";
925 print "%%Pages: $curpage\n";
926 print "%%BoundingBox: 0 0 ", $psconf{pagewidth}, ' ', $psconf{pageheight}, "\n";
927 print "%%Creator: (NASM psflow.pl)\n";
928 print "%%DocumentData: Clean7Bit\n";
929 print "%%DocumentFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
930 print "%%DocumentNeededFonts: ", join(' ', keys(%ps_all_fonts)), "\n";
931 print "%%Orientation: Portrait\n";
932 print "%%PageOrder: Ascend\n";
933 print "%%EndComments\n";
934 print "%%BeginProlog\n";
936 # Emit the configurables as PostScript tokens
937 foreach $c ( keys(%psconf) ) {
938 print "/$c ", $psconf{$c}, " def\n";
940 foreach $c ( keys(%psbool) ) {
941 print "/$c ", ($psbool{$c}?'true':'false'), " def\n";
944 # Emit custom encoding vector
945 $zstr = '/NASMEncoding [ ';
946 foreach $c ( @NASMEncoding ) {
947 my $z = '/'.(defined($c)?$c:'.notdef ').' ';
948 if ( length($zstr)+length($z) > 72 ) {
949 print $zstr,"\n";
950 $zstr = ' ';
952 $zstr .= $z;
954 print $zstr, "] def\n";
956 # Font recoding routine
957 # newname fontname --
958 print "/nasmenc {\n";
959 print " findfont dup length dict begin\n";
960 print " { 1 index /FID ne {def}{pop pop} ifelse } forall\n";
961 print " /Encoding NASMEncoding def\n";
962 print " currentdict\n";
963 print " end\n";
964 print " definefont pop\n";
965 print "} def\n";
967 # Emit fontset definitions
968 foreach $font ( keys(%ps_all_fonts) ) {
969 print '/',$font,'-NASM /',$font," nasmenc\n";
972 foreach $fset ( @AllFonts ) {
973 my $i = 0;
974 my @zfonts = ();
975 foreach $font ( @{$fset->{fonts}} ) {
976 print '/', $fset->{name}, $i, ' ',
977 '/', $font->[1]->{name}, '-NASM findfont ',
978 $font->[0], " scalefont def\n";
979 push(@zfonts, $fset->{name}.$i);
980 $i++;
982 print '/', $fset->{name}, ' [', join(' ',@zfonts), "] def\n";
985 # This is used by the bullet-paragraph PostScript methods
986 print "/bullet [",ps_string($charcode{'bullet'}),"] def\n";
988 # Emit the canned PostScript prologue
989 open(PSHEAD, "< head.ps");
990 while ( defined($line = <PSHEAD>) ) {
991 print $line;
993 close(PSHEAD);
994 print "%%EndProlog\n";
996 # Generate a PostScript string
997 sub ps_string($) {
998 my ($s) = @_;
999 my ($i,$c);
1000 my ($o) = '(';
1001 my ($l) = length($s);
1002 for ( $i = 0 ; $i < $l ; $i++ ) {
1003 $c = substr($s,$i,1);
1004 if ( ord($c) < 32 || ord($c) > 126 ) {
1005 $o .= sprintf("\\%03o", ord($c));
1006 } elsif ( $c eq '(' || $c eq ')' || $c eq "\\" ) {
1007 $o .= "\\".$c;
1008 } else {
1009 $o .= $c;
1012 return $o.')';
1015 # Generate PDF bookmarks
1016 print "%%BeginSetup\n";
1017 foreach $b ( @bookmarks ) {
1018 print '[/Title ', ps_string($b->[2]), "\n";
1019 print '/Count ', $b->[1], ' ' if ( $b->[1] );
1020 print '/Dest /',$b->[0]," /OUT pdfmark\n";
1023 # Ask the PostScript interpreter for the proper size media
1024 print "setpagesize\n";
1025 print "%%EndSetup\n";
1027 # Start a PostScript page
1028 sub ps_start_page() {
1029 $ps_page++;
1030 print "%%Page: $ps_page $ps_page\n";
1031 print "%%BeginPageSetup\n";
1032 print "save\n";
1033 print "%%EndPageSetup\n";
1034 print '/', $ps_page, " pa\n";
1037 # End a PostScript page
1038 sub ps_end_page($) {
1039 my($pn) = @_;
1040 if ( $pn ) {
1041 print "($ps_page)", (($ps_page & 1) ? 'pageodd' : 'pageeven'), "\n";
1043 print "restore showpage\n";
1046 $ps_page = 0;
1048 # Title page
1049 ps_start_page();
1050 $title = $metadata{'title'} || '';
1051 $title =~ s/ \- / $charcode{'emdash'} /;
1053 $subtitle = $metadata{'subtitle'} || '';
1054 $subtitle =~ s/ \- / $charcode{'emdash'} /;
1056 # Print title
1057 print "/ti ", ps_string($title), " def\n";
1058 print "/sti ", ps_string($subtitle), " def\n";
1059 print "lmarg pageheight 2 mul 3 div moveto\n";
1060 print "tfont0 setfont\n";
1061 print "/title linkdest ti show\n";
1062 print "lmarg pageheight 2 mul 3 div 10 sub moveto\n";
1063 print "0 setlinecap 3 setlinewidth\n";
1064 print "pagewidth lmarg sub rmarg sub 0 rlineto currentpoint stroke moveto\n";
1065 print "hfont1 setfont sti stringwidth pop neg ",
1066 -$HeadFont{leading}, " rmoveto\n";
1067 print "sti show\n";
1069 # Print logo, if there is one
1070 # FIX: To be 100% correct, this should look for DocumentNeeded*
1071 # and DocumentFonts in the header of the EPSF and add those to the
1072 # global header.
1073 if ( defined($metadata{epslogo}) &&
1074 sysopen(EPS, $metadata{epslogo}, O_RDONLY) ) {
1075 my @eps = ();
1076 my ($bbllx,$bblly,$bburx,$bbury) = (undef,undef,undef,undef);
1077 my $line;
1078 my $scale = 1;
1079 my $maxwidth = $psconf{pagewidth}-$psconf{lmarg}-$psconf{rmarg};
1080 my $maxheight = $psconf{pageheight}/3-40;
1081 my $width, $height;
1082 my $x, $y;
1084 while ( defined($line = <EPS>) ) {
1085 last if ( $line =~ /^%%EOF/ );
1086 if ( !defined($bbllx) &&
1087 $line =~ /^\%\%BoundingBox\:\s*([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)\s+([0-9\.]+)/i ) {
1088 $bbllx = $1+0; $bblly = $2+0;
1089 $bburx = $3+0; $bbury = $4+0;
1091 push(@eps,$line);
1093 close(EPS);
1095 if ( defined($bbllx) ) {
1096 $width = $bburx-$bbllx;
1097 $height = $bbury-$bblly;
1099 if ( $width > $maxwidth ) {
1100 $scale = $maxwidth/$width;
1102 if ( $height*$scale > $maxheight ) {
1103 $scale = $maxheight/$height;
1106 $x = ($psconf{pagewidth}-$width*$scale)/2;
1107 $y = ($psconf{pageheight}-$height*$scale)/2;
1109 print "BeginEPSF\n";
1110 print $x, ' ', $y, " translate\n";
1111 print $scale, " dup scale\n" unless ( $scale == 1 );
1112 print -$bbllx, ' ', -$bblly, " translate\n";
1113 print "$bbllx $bblly moveto\n";
1114 print "$bburx $bblly lineto\n";
1115 print "$bburx $bbury lineto\n";
1116 print "$bbllx $bbury lineto\n";
1117 print "$bbllx $bblly lineto clip newpath\n";
1118 print "%%BeginDocument: ",ps_string($metadata{epslogo}),"\n";
1119 print @eps;
1120 print "%%EndDocument\n";
1121 print "EndEPSF\n";
1124 ps_end_page(0);
1126 # Emit the rest of the document (page 2 and on)
1127 $curpage = 2;
1128 ps_start_page();
1129 foreach $line ( @pslines ) {
1130 my $linfo = $line->[0];
1132 if ( $$linfo[4] != $curpage ) {
1133 ps_end_page($curpage > 2);
1134 ps_start_page();
1135 $curpage = $$linfo[4];
1138 print '[';
1139 my $curfont = 0;
1140 foreach my $c ( @{$line->[1]} ) {
1141 if ( $$c[0] >= 0 ) {
1142 if ( $curfont != $$c[0] ) {
1143 print ($curfont = $$c[0]);
1145 print ps_string($$c[1]);
1146 } elsif ( $$c[0] == -1 ) {
1147 print '{el}'; # End link
1148 } elsif ( $$c[0] == -2 ) {
1149 print '{/',$$c[1],' xl}'; # xref link
1150 } elsif ( $$c[0] == -3 ) {
1151 print '{',ps_string($$c[1]),'wl}'; # web link
1152 } elsif ( $$c[0] == -4 ) {
1153 # Index anchor -- ignore
1154 } elsif ( $$c[0] == -5 ) {
1155 print '{/',$$c[1],' xa}'; #xref anchor
1156 } elsif ( $$c[0] == -6 ) {
1157 print ']['; # Start a new array
1158 $curfont = 0;
1159 } elsif ( $$c[0] == -7 ) {
1160 print '{/',$$c[1],' pl}'; # page link
1161 } else {
1162 die "Unknown annotation";
1165 print ']';
1166 if ( defined($$linfo[2]) ) {
1167 foreach my $x ( @{$$linfo[2]} ) {
1168 if ( $$x[0] == $AuxStr ) {
1169 print ps_string($$x[1]);
1170 } elsif ( $$x[0] == $AuxPage ) {
1171 print $ps_xref_page{$$x[1]},' ';
1172 } elsif ( $$x[0] == $AuxPageStr ) {
1173 print ps_string($ps_xref_page{$$x[1]});
1174 } elsif ( $$x[0] == $AuxXRef ) {
1175 print '/',ps_xref($$x[1]),' ';
1176 } elsif ( $$x[0] == $AuxNum ) {
1177 print $$x[1],' ';
1178 } else {
1179 die "Unknown auxilliary data type";
1183 print ($psconf{pageheight}-$psconf{topmarg}-$$linfo[5]);
1184 print ' ', $$linfo[6] if ( defined($$linfo[6]) );
1185 print ' ', $$linfo[0].$$linfo[1], "\n";
1188 ps_end_page(1);
1189 print "%%EOF\n";