2 # -----------------------------
3 # Perl 5.8 or later required
4 # -----------------------------
8 $infofile_encoding = ":utf8";
10 binmode STDOUT
, $infofile_encoding;
12 $unit_separator = "\x1f";
17 # ------------------------------------------------------------------
18 # PART 1. BUILD INDEX FOR @DEFFN AND @DEFVR ITEMS
19 # ------------------------------------------------------------------
21 # (1.1) Build index tables.
23 # (1.1a) Scan the *.info-* files for unit separator characters;
24 # those mark the start of each texinfo node.
25 # Build a hash table which associates the node name with the filename
26 # and byte offset (NOT character offset) of the unit separator.
28 # Do NOT use the indirect table + tag table (generated by makeinfo),
29 # because those tables give character offsets; we want byte offsets.
30 # It is easier to construct a byte offset table by hand,
31 # rather than attempting to fix up the character offsets.
32 # (Which are strange anyway.)
34 open (FH
, "<" . $infofile_encoding, $main_info);
35 read (FH
, $stuff, -s FH
);
37 # check which version of makeinfo produced $main_info
38 # for purposes of bug workaround
39 ($makeinfo_major_version, $makeinfo_minor_version) =
40 $stuff =~ /makeinfo version (\d+)\.(\d+)/;
41 # print STDERR "makeinfo version $makeinfo_major_version . $makeinfo_minor_version\n";
43 $filename = $main_info;
44 push @info_filenames, $filename;
46 while ($stuff =~ m/\G.*?(?=\n$unit_separator)/cgsm) {
49 if ($stuff =~ m/^File:.*?Node: (.*?),/csgm) {
51 $last_node_name = $node_name;
54 # print ";; IN SEC 1.1a, SEARCH MAIN INFO; NODE NAME=$node_name, FILENAME=$filename, OFFSET=$offset\n";
55 $node_offset{$node_name} = [($filename, int($offset))];
60 open (FH
, "<" . $infofile_encoding, $main_info);
61 read (FH
, $stuff, -s FH
);
63 while ($stuff =~ m/^($main_info-\d+): (\d+)/cgsm) {
65 push @info_filenames, $filename;
67 open FH2
, "<" . $infofile_encoding, $filename;
68 read FH2
, $stuff2, -s FH2
;
70 while ($stuff2 =~ m/\G.*?(?=\n$unit_separator)/cgsm) {
71 $offset = pos $stuff2;
73 if ($stuff2 =~ m/^File:.*?Node: (.*?),/csgm) {
75 $last_node_name = $node_name;
78 # print ";; IN SEC 1.1a, SEARCH SUBSIDIARY INFO; NODE NAME=$node_name, FILENAME=$filename, OFFSET=$offset\n";
79 $node_offset{$node_name} = [($filename, int($offset))];
87 # (1.1b) Read the info index, which gives the node name and number of lines offset
88 # for each indexed item.
90 # ASSUME THAT THE INFO INDEX IS THE LAST NODE.
91 # (GETTING THE NODE NAME FROM THE COMMAND LINE IS PROBLEMATIC.)
92 $index_node_name = $last_node_name;
94 ($index_filename, $index_node_offset) = @
{$node_offset{$index_node_name}};
95 # print ";; IN SEC 1.1b, INDEX NODE NAME=$index_node_name, INDEX FILENAME=$index_filename, INDEX NODE OFFSET=$index_node_offset\n";
97 open (FH
, "<" . $infofile_encoding, $index_filename);
98 read (FH
, $stuff, -s FH
);
100 while ($stuff =~ m/^File:.*?Node: $index_node_name/icgsm) {
101 while ($stuff =~ m/\G.*?^\* (?!Menu)(\S+|[^:]+):\s+(.*?)\.\s+\(line\s+(\d+)\)/cgsm) {
105 # print ";; IN SEC 1.1b, TOPIC NAME=$topic_name, NODE NAME=$node_name, LINES OFFSET=$lines_offset\n";
106 $topic_locator{$topic_name} = [($node_name, $lines_offset)];
112 # (1.2) Translate node name and number of lines offset into file name and byte offset
113 # for each indexed item.
114 # Also find the length of each item.
116 foreach $key (sort keys %topic_locator) {
117 ($node_name, $lines_offset) = @
{$topic_locator{$key}};
118 ($filename, $character_offset) = @
{$node_offset{$node_name}};
119 $byte_offset = seek_lines
($filename, $character_offset, $lines_offset);
121 open FH
, "<" . $infofile_encoding, $filename;
122 seek FH
, $byte_offset, 0;
123 read FH
, $stuff, -s FH
;
124 if ($stuff =~ m/(.*?)(?:\n\n(?= -- )|\n(?=[0-9])|(?=$unit_separator))/cgsm) {
125 $text_length = length $1;
128 # Eat everything up til end of file.
129 $stuff =~ m/(.*)/cgsm;
130 $text_length = length $1;
134 # print ";; IN SEC 1.2, KEY=$key, NODE NAME=$node_name, FILENAME=$filename, BYTE OFFSET=$byte_offset, TEXT LENGTH=$text_length\n";
135 $topic_locator{$key} = [($node_name, $filename, $byte_offset, $text_length)];
138 # (1.3) Generate Lisp code. The functions in info.lisp expect this stuff.
140 print "(in-package :cl-info)\n";
142 # Pairs of the form (<index topic> . (<filename> <byte offset> <length> <node name>))
145 print "(deffn-defvr-pairs '(\n";
146 print "; CONTENT: (<INDEX TOPIC> . (<FILENAME> <BYTE OFFSET> <LENGTH IN CHARACTERS> <NODE NAME>))\n";
148 foreach $key (sort keys %topic_locator) {
150 my $sanitized_key = $key;
151 $sanitized_key =~ s/"/\\"/g;
152 my $file_name = $topic_locator{$key}[1];
153 my $byte_offset = $topic_locator{$key}[2];
154 my $nchars = $topic_locator{$key}[3];
155 my $node_name = $topic_locator{$key}[0];
156 if ($sanitized_key eq '' or $file_name eq '' or $byte_offset < 0 or $nchars < 0 or $node_name eq '') {
157 print STDERR
"build_index.pl: something seems wrong for key=\"$sanitized_key\"; emit it anyway.\n";
158 print STDERR
"build_index.pl: sanitized_key=\"$sanitized_key\", file_name=\"$file_name\", byte_offset=$byte_offset, nchars=$nchars, node_name=\"$node_name\"\n";
159 print ";; build_index.pl: something seems wrong for this next item\n";
161 print "(\"$sanitized_key\" . (\"$file_name\" $byte_offset $nchars \"$node_name\"))\n";
166 # ------------------------------------------------------------------
167 # PART 2. BUILD INDEX FOR @NODE ITEMS
168 # ------------------------------------------------------------------
170 # (2.1) Search for 'mmm.nnn' at the start of a line,
171 # and take each one of those to be the start of a node.
173 # We could use the node table ($node_offset here), but we don't.
175 # (a) The node table indexes nodes which contain only menus.
176 # We don't want those because they have no useful text.
178 # (b) The offset stated in the node table tells the location
179 # of the "File: ..." header. We would have to cut off that stuff.
181 # (c) Offsets computed by makeinfo are character offsets,
182 # so we would have to convert those to byte offsets.
183 # (But we have to do that anyway, so I guess there's no
184 # advantage either way on that point.)
186 for $filename (@info_filenames) {
188 open (FH
, "<" . $infofile_encoding, $filename);
189 read (FH
, $stuff, -s FH
);
191 while ($stuff =~ m/\G(.*?)(?=^\d+\.\d+ .*?\n)/cgsm) {
193 # Since FH was opened with $infofile_encoding,
194 # pos returns a CHARACTER offset.
195 $begin_node_offset = pos($stuff);
197 if ($stuff =~ m/((^\d+\.\d+) (.*?)\n)/cgsm) {
199 $node_length = length $1;
202 # Node text ends at a unit separator character,
203 # or at the end of the file.
205 if ($stuff =~ m/\G(.*?)($unit_separator)/cgsm) {
206 $node_length += length $1;
209 $stuff =~ m/\G(.*)/csgm;
210 $node_length += length $1;
213 $node_locator{$node_title} = [($filename, $begin_node_offset, $node_length)];
219 # Translate character offsets to byte offsets.
221 foreach $node_title (sort keys %node_locator) {
222 ($filename, $begin_node_offset, $node_length) = @
{$node_locator{$node_title}};
223 open FH
, "<" . $infofile_encoding, $filename;
224 read FH
, $stuff, $begin_node_offset;
225 my $begin_node_offset_bytes = tell FH
;
228 $node_locator{$node_title} = [($filename, $begin_node_offset_bytes, $node_length)];
231 # (2.2) Generate Lisp code.
233 # Pairs of the form (<node name> . (<filename> <byte offset> <length>))
235 print "(section-pairs '(\n";
236 print "; CONTENT: (<NODE NAME> . (<FILENAME> <BYTE OFFSET> <LENGTH IN CHARACTERS>))\n";
238 foreach $node_title (sort keys %node_locator) {
240 ($filename, $begin_node_offset, $length) = @
{$node_locator{$node_title}};
241 my $sanitized_title = $node_title;
242 $sanitized_title =~ s/"/\\"/g;
243 if ($sanitized_title eq '' or $filename eq '' or $begin_node_offset < 0 or $length < 0) {
244 print STDERR
"build_index.pl: something seems wrong for title=\"$sanitized_title\"; emit it anyway.\n";
245 print STDERR
"build_index.pl: sanitized_title=\"$sanitized_title\", filename=\"$filename\", begin_node_offset=$begin_node_offset, length=$length\n";
246 print ";; build_index.pl: something seems wrong for this next item\n";
248 print "(\"$sanitized_title\" . (\"$filename\" $begin_node_offset ", $length, "))\n";
253 # Construct hashtables from the lists given above.
255 print "(load-info-hashtables (maxima::maxima-load-pathname-directory) deffn-defvr-pairs section-pairs))\n";
257 # (2.3) Do we have any items or sections?
259 # Warn if no index items or secions found.
261 ($item_cnt+$section_cnt)>0 ||
262 print STDERR
"WARNING: Empty index. Not sure what's going on.\n";
264 # ------------------------------------------------------------------
266 # ------------------------------------------------------------------
269 my ($filename, $character_offset, $lines_offset) = @_;
270 open FH
, "<" . $infofile_encoding, $filename;
271 read FH
, $stuff, $character_offset;
273 # MAKEINFO BUG: LINE OFFSET IS LINE NUMBER OF LAST LINE IN FUNCTION DEFINITION
274 # (BUT WE NEED THE FIRST LINE OF THE FUNCTION DEFINITION)
276 # EXAMPLE. THE PROBLEM IS THAT THE FUNCTION DEFINITION IS BROKEN ACROSS TWO
277 # OR MORE LINES (NOT THAT THERE ARE MULTIPLE FUNCTION DEFINITIONS):
278 # -- Function: setup_autoload (<filename>, <function_1>, ...,
281 # BUG IS PRESENT IN MAKEINFO 4.8, NOT PRESENT IN MAKEINFO 5.1
284 if ($makeinfo_major_version == 4) {
288 for (1 .. $lines_offset + 1) {
291 if ($line =~ /^ -- \S/) {
297 # We didn't encounter any match for "^ -- \S".
301 # VERSION WITHOUT BUG WORKAROUND,
302 # FOR MAKEINFO VERSION 5
303 <FH
> for 1 .. $lines_offset;