1 # Tools for info file processing.
3 # XXX Need to be more careful with reading ahead searching for nodes.
10 # Exported exceptions.
12 NoSuchFile
= 'no such file'
13 NoSuchNode
= 'no such node'
16 # The search path for info files; this is site-specific.
17 # Directory names should end in a partname delimiter,
18 # so they can simply be concatenated to a relative pathname.
20 #INFOPATH = ['', ':Info.Ibrowse:', ':Info:'] # Mac
21 INFOPATH
= ['', '/usr/local/emacs/info/'] # X11 on UNIX
26 BLOCKSIZE
= 512 # Qty to align reads to, if possible
27 FUZZ
= 2*BLOCKSIZE
# Qty to back-up before searching for a node
28 CHUNKSIZE
= 4*BLOCKSIZE
# Qty to read at once when reading lots of data
31 # Regular expressions used.
32 # Note that it is essential that Python leaves unrecognized backslash
33 # escapes in a string so they can be seen by regexp.compile!
35 findheader
= regexp
.compile('\037\014?\n(.*\n)').match
36 findescape
= regexp
.compile('\037').match
37 parseheader
= regexp
.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
38 findfirstline
= regexp
.compile('^.*\n').match
39 findnode
= regexp
.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
40 findprev
= regexp
.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match
41 findnext
= regexp
.compile('[nN]ext:[ \t]*([^\t,\n]*)').match
42 findup
= regexp
.compile('[uU]p:[ \t]*([^\t,\n]*)').match
43 findmenu
= regexp
.compile('^\* [mM]enu:').match
44 findmenuitem
= regexp
.compile( \
45 '^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match
46 findfootnote
= regexp
.compile( \
47 '\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match
48 parsenoderef
= regexp
.compile('^\((.*)\)(.*)$').match
51 # Get a node and all information pertaining to it.
52 # This doesn't work if there is an indirect tag table,
53 # and in general you are better off using icache.get_node() instead.
54 # Functions get_whole_file() and get_file_node() provide part
55 # functionality used by icache.
56 # Raise NoSuchFile or NoSuchNode as appropriate.
58 def get_node(curfile
, ref
):
59 file, node
= parse_ref(curfile
, ref
)
61 return get_whole_file(file)
63 return get_file_node(file, 0, node
)
65 def get_whole_file(file):
66 f
= try_open(file) # May raise NoSuchFile
68 header
, menu
, footnotes
= ('', '', ''), [], []
69 return file, '*', header
, menu
, footnotes
, text
71 def get_file_node(file, offset
, node
):
72 f
= try_open(file) # May raise NoSuchFile
73 text
= find_node(f
, offset
, node
) # May raise NoSuchNode
74 node
, header
, menu
, footnotes
= analyze_node(text
)
75 return file, node
, header
, menu
, footnotes
, text
78 # Parse a node reference into a file (possibly default) and node name.
79 # Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE".
80 # Default file is the curfile argument; default node is Top.
81 # A node value of '*' is a special case: the whole file should
82 # be interpreted (by the caller!) as a single node.
84 def parse_ref(curfile
, ref
):
85 match
= parsenoderef(ref
)
87 file, node
= curfile
, ref
89 (a
, b
), (a1
, b1
), (a2
, b2
) = match
90 file, node
= ref
[a1
:b1
], ref
[a2
:b2
]
92 file = curfile
# (Is this necessary?)
98 # Extract node name, links, menu and footnotes from the node text.
100 def analyze_node(text
):
102 # Get node name and links from the header line
104 match
= findfirstline(text
)
110 node
= get_it(text
, findnode
)
111 prev
= get_it(text
, findprev
)
112 next
= get_it(text
, findnext
)
113 up
= get_it(text
, findup
)
115 # Get the menu items, if there is a menu
118 match
= findmenu(text
)
122 match
= findmenuitem(text
, b
)
125 (a
, b
), (a1
, b1
), (a2
, b2
) = match
126 topic
, ref
= text
[a1
:b1
], text
[a2
:b2
]
129 menu
.append(topic
, ref
)
136 match
= findfootnote(text
, b
)
139 (a
, b
), (a1
, b1
), (a2
, b2
) = match
140 topic
, ref
= text
[a1
:b1
], text
[a2
:b2
]
143 footnotes
.append(topic
, ref
)
145 return node
, (prev
, next
, up
), menu
, footnotes
147 def get_it(line
, matcher
):
148 match
= matcher(line
)
152 (a
, b
), (a1
, b1
) = match
156 # Find a node in an open file.
157 # The offset (from the tags table) is a hint about the node's position.
158 # Pass zero if there is no tags table.
159 # Raise NoSuchNode if the node isn't found.
160 # NB: This seeks around in the file.
162 def find_node(f
, offset
, node
):
163 node
= string
.lower(node
) # Just to be sure
165 # Position a little before the given offset,
166 # so we may find the node even if it has moved around
167 # in the file a little.
169 offset
= max(0, ((offset
-FUZZ
) / BLOCKSIZE
) * BLOCKSIZE
)
172 # Loop, hunting for a matching node header.
175 buf
= f
.read(CHUNKSIZE
)
180 match
= findheader(buf
, i
)
182 (a
,b
), (a1
,b1
) = match
186 match
= parseheader(line
)
188 (a
,b
), (a1
,b1
) = match
189 key
= string
.lower(line
[a1
:b1
])
191 # Got it! Now read the rest.
192 return read_node(f
, buf
[start
:])
193 elif findescape(buf
, i
):
194 next
= f
.read(CHUNKSIZE
)
201 # If we get here, we didn't find it. Too bad.
203 raise NoSuchNode
, node
206 # Finish off getting a node (subroutine for find_node()).
207 # The node begins at the start of buf and may end in buf;
208 # if it doesn't end there, read additional data from f.
210 def read_node(f
, buf
):
212 match
= findescape(buf
, i
)
214 next
= f
.read(CHUNKSIZE
)
220 match
= findescape(buf
, i
)
225 # Strip trailing newlines
226 while end
> 0 and buf
[end
-1] == '\n':
232 # Read reverse starting at offset until the beginning of a node is found.
233 # Then return a buffer containing the beginning of the node,
234 # with f positioned just after the buffer.
235 # The buffer will contain at least the full header line of the node;
236 # the caller should finish off with read_node() if it is the right node.
237 # (It is also possible that the buffer extends beyond the node!)
238 # Return an empty string if there is no node before the given offset.
240 def backup_node(f
, offset
):
241 start
= max(0, ((offset
-CHUNKSIZE
) / BLOCKSIZE
) * BLOCKSIZE
)
245 buf
= f
.read(end
-start
)
249 match
= findheader(buf
, i
)
251 (a
,b
), (a1
,b1
) = match
254 elif end
< offset
and findescape(buf
, i
):
255 next
= f
.read(min(offset
-end
, BLOCKSIZE
))
259 end
= end
+ len(next
)
265 start
= max(0, end
- CHUNKSIZE
)
269 # Make a tag table for the given file by scanning the file.
270 # The file must be open for reading, and positioned at the beginning
271 # (or wherever the hunt for tags must begin; it is read till the end).
277 buf
= f
.read(CHUNKSIZE
)
282 match
= findheader(buf
, i
)
284 (a
,b
), (a1
,b1
) = match
288 match
= parseheader(line
)
290 (a
,b
), (a1
,b1
) = match
291 key
= string
.lower(line
[a1
:b1
])
292 if tags
.has_key(key
):
293 print 'Duplicate node:',
295 tags
[key
] = '', start
, line
296 elif findescape(buf
, i
):
297 next
= f
.read(CHUNKSIZE
)
306 # Try to open a file, return a file object if succeeds.
307 # Raise NoSuchFile if the file can't be opened.
308 # Should treat absolute pathnames special.
313 return open(dir + file, 'r')
316 raise NoSuchFile
, file
319 # A little test for the speed of make_tags().
321 TESTFILE
= 'texinfo-1'
322 def test_make_tags():
324 f
= try_open(TESTFILE
)
325 t1
= time
.millitimer()
327 t2
= time
.millitimer()
328 print 'Making tag table for', `TESTFILE`
, 'took', t2
-t1
, 'msec.'