Demo/stdwin/ibrowse/ifile.py

   1 # Tools for info file processing.
   2
   3 # XXX Need to be more careful with reading ahead searching for nodes.
   4
   5
   6 import regexp
   7 import string
   8
   9
  10 # Exported exceptions.
  11 #
  12 NoSuchFile = 'no such file'
  13 NoSuchNode = 'no such node'
  14
  15
  16 # The search path for info files; this is site-specific.
  17 # Directory names should end in a partname delimiter,
  18 # so they can simply be concatenated to a relative pathname.
  19 #
  20 #INFOPATH = ['', ':Info.Ibrowse:', ':Info:']    # Mac
  21 INFOPATH = ['', '/usr/local/emacs/info/']       # X11 on UNIX
  22
  23
  24 # Tunable constants.
  25 #
  26 BLOCKSIZE = 512                 # Qty to align reads to, if possible
  27 FUZZ = 2*BLOCKSIZE              # Qty to back-up before searching for a node
  28 CHUNKSIZE = 4*BLOCKSIZE         # Qty to read at once when reading lots of data
  29
  30
  31 # Regular expressions used.
  32 # Note that it is essential that Python leaves unrecognized backslash
  33 # escapes in a string so they can be seen by regexp.compile!
  34 #
  35 findheader = regexp.compile('\037\014?\n(.*\n)').match
  36 findescape = regexp.compile('\037').match
  37 parseheader = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
  38 findfirstline = regexp.compile('^.*\n').match
  39 findnode = regexp.compile('[nN]ode:[ \t]*([^\t,\n]*)').match
  40 findprev = regexp.compile('[pP]rev[ious]*:[ \t]*([^\t,\n]*)').match
  41 findnext = regexp.compile('[nN]ext:[ \t]*([^\t,\n]*)').match
  42 findup = regexp.compile('[uU]p:[ \t]*([^\t,\n]*)').match
  43 findmenu = regexp.compile('^\* [mM]enu:').match
  44 findmenuitem = regexp.compile( \
  45         '^\* ([^:]+):[ \t]*(:|\([^\t]*\)[^\t,\n.]*|[^:(][^\t,\n.]*)').match
  46 findfootnote = regexp.compile( \
  47         '\*[nN]ote ([^:]+):[ \t]*(:|[^:][^\t,\n.]*)').match
  48 parsenoderef = regexp.compile('^\((.*)\)(.*)$').match
  49
  50
  51 # Get a node and all information pertaining to it.
  52 # This doesn't work if there is an indirect tag table,
  53 # and in general you are better off using icache.get_node() instead.
  54 # Functions get_whole_file() and get_file_node() provide part
  55 # functionality used by icache.
  56 # Raise NoSuchFile or NoSuchNode as appropriate.
  57 #
  58 def get_node(curfile, ref):
  59         file, node = parse_ref(curfile, ref)
  60         if node == '*':
  61                 return get_whole_file(file)
  62         else:
  63                 return get_file_node(file, 0, node)
  64 #
  65 def get_whole_file(file):
  66         f = try_open(file) # May raise NoSuchFile
  67         text = f.read()
  68         header, menu, footnotes = ('', '', ''), [], []
  69         return file, '*', header, menu, footnotes, text
  70 #
  71 def get_file_node(file, offset, node):
  72         f = try_open(file) # May raise NoSuchFile
  73         text = find_node(f, offset, node) # May raise NoSuchNode
  74         node, header, menu, footnotes = analyze_node(text)
  75         return file, node, header, menu, footnotes, text
  76
  77
  78 # Parse a node reference into a file (possibly default) and node name.
  79 # Possible reference formats are: "NODE", "(FILE)", "(FILE)NODE".
  80 # Default file is the curfile argument; default node is Top.
  81 # A node value of '*' is a special case: the whole file should
  82 # be interpreted (by the caller!) as a single node.
  83 #
  84 def parse_ref(curfile, ref):
  85         match = parsenoderef(ref)
  86         if not match:
  87                 file, node = curfile, ref
  88         else:
  89                 (a, b), (a1, b1), (a2, b2) = match
  90                 file, node = ref[a1:b1], ref[a2:b2]
  91         if not file:
  92                 file = curfile # (Is this necessary?)
  93         if not node:
  94                 node = 'Top'
  95         return file, node
  96
  97
  98 # Extract node name, links, menu and footnotes from the node text.
  99 #
 100 def analyze_node(text):
 101         #
 102         # Get node name and links from the header line
 103         #
 104         match = findfirstline(text)
 105         if match:
 106                 (a, b) = match[0]
 107                 line = text[a:b]
 108         else:
 109                 line = ''
 110         node = get_it(text, findnode)
 111         prev = get_it(text, findprev)
 112         next = get_it(text, findnext)
 113         up = get_it(text, findup)
 114         #
 115         # Get the menu items, if there is a menu
 116         #
 117         menu = []
 118         match = findmenu(text)
 119         if match:
 120                 (a, b) = match[0]
 121                 while 1:
 122                         match = findmenuitem(text, b)
 123                         if not match:
 124                                 break
 125                         (a, b), (a1, b1), (a2, b2) = match
 126                         topic, ref = text[a1:b1], text[a2:b2]
 127                         if ref == ':':
 128                                 ref = topic
 129                         menu.append(topic, ref)
 130         #
 131         # Get the footnotes
 132         #
 133         footnotes = []
 134         b = 0
 135         while 1:
 136                 match = findfootnote(text, b)
 137                 if not match:
 138                         break
 139                 (a, b), (a1, b1), (a2, b2) = match
 140                 topic, ref = text[a1:b1], text[a2:b2]
 141                 if ref == ':':
 142                         ref = topic
 143                 footnotes.append(topic, ref)
 144         #
 145         return node, (prev, next, up), menu, footnotes
 146 #
 147 def get_it(line, matcher):
 148         match = matcher(line)
 149         if not match:
 150                 return ''
 151         else:
 152                 (a, b), (a1, b1) = match
 153                 return line[a1:b1]
 154
 155
 156 # Find a node in an open file.
 157 # The offset (from the tags table) is a hint about the node's position.
 158 # Pass zero if there is no tags table.
 159 # Raise NoSuchNode if the node isn't found.
 160 # NB: This seeks around in the file.
 161 #
 162 def find_node(f, offset, node):
 163         node = string.lower(node) # Just to be sure
 164         #
 165         # Position a little before the given offset,
 166         # so we may find the node even if it has moved around
 167         # in the file a little.
 168         #
 169         offset = max(0, ((offset-FUZZ) / BLOCKSIZE) * BLOCKSIZE)
 170         f.seek(offset)
 171         #
 172         # Loop, hunting for a matching node header.
 173         #
 174         while 1:
 175                 buf = f.read(CHUNKSIZE)
 176                 if not buf:
 177                         break
 178                 i = 0
 179                 while 1:
 180                         match = findheader(buf, i)
 181                         if match:
 182                                 (a,b), (a1,b1) = match
 183                                 start = a1
 184                                 line = buf[a1:b1]
 185                                 i = b
 186                                 match = parseheader(line)
 187                                 if match:
 188                                         (a,b), (a1,b1) = match
 189                                         key = string.lower(line[a1:b1])
 190                                         if key == node:
 191                                                 # Got it!  Now read the rest.
 192                                                 return read_node(f, buf[start:])
 193                         elif findescape(buf, i):
 194                                 next = f.read(CHUNKSIZE)
 195                                 if not next:
 196                                         break
 197                                 buf = buf + next
 198                         else:
 199                                 break
 200         #
 201         # If we get here, we didn't find it.  Too bad.
 202         #
 203         raise NoSuchNode, node
 204
 205
 206 # Finish off getting a node (subroutine for find_node()).
 207 # The node begins at the start of buf and may end in buf;
 208 # if it doesn't end there, read additional data from f.
 209 #
 210 def read_node(f, buf):
 211         i = 0
 212         match = findescape(buf, i)
 213         while not match:
 214                 next = f.read(CHUNKSIZE)
 215                 if not next:
 216                         end = len(buf)
 217                         break
 218                 i = len(buf)
 219                 buf = buf + next
 220                 match = findescape(buf, i)
 221         else:
 222                 # Got a match
 223                 (a, b) = match[0]
 224                 end = a
 225         # Strip trailing newlines
 226         while end > 0 and buf[end-1] == '\n':
 227                 end = end-1
 228         buf = buf[:end]
 229         return buf
 230
 231
 232 # Read reverse starting at offset until the beginning of a node is found.
 233 # Then return a buffer containing the beginning of the node,
 234 # with f positioned just after the buffer.
 235 # The buffer will contain at least the full header line of the node;
 236 # the caller should finish off with read_node() if it is the right node.
 237 # (It is also possible that the buffer extends beyond the node!)
 238 # Return an empty string if there is no node before the given offset.
 239 #
 240 def backup_node(f, offset):
 241         start = max(0, ((offset-CHUNKSIZE) / BLOCKSIZE) * BLOCKSIZE)
 242         end = offset
 243         while start < end:
 244                 f.seek(start)
 245                 buf = f.read(end-start)
 246                 i = 0
 247                 hit = -1
 248                 while 1:
 249                         match = findheader(buf, i)
 250                         if match:
 251                                 (a,b), (a1,b1) = match
 252                                 hit = a1
 253                                 i = b
 254                         elif end < offset and findescape(buf, i):
 255                                 next = f.read(min(offset-end, BLOCKSIZE))
 256                                 if not next:
 257                                         break
 258                                 buf = buf + next
 259                                 end = end + len(next)
 260                         else:
 261                                 break
 262                 if hit >= 0:
 263                         return buf[hit:]
 264                 end = start
 265                 start = max(0, end - CHUNKSIZE)
 266         return ''
 267
 268
 269 # Make a tag table for the given file by scanning the file.
 270 # The file must be open for reading, and positioned at the beginning
 271 # (or wherever the hunt for tags must begin; it is read till the end).
 272 #
 273 def make_tags(f):
 274         tags = {}
 275         while 1:
 276                 offset = f.tell()
 277                 buf = f.read(CHUNKSIZE)
 278                 if not buf:
 279                         break
 280                 i = 0
 281                 while 1:
 282                         match = findheader(buf, i)
 283                         if match:
 284                                 (a,b), (a1,b1) = match
 285                                 start = offset+a1
 286                                 line = buf[a1:b1]
 287                                 i = b
 288                                 match = parseheader(line)
 289                                 if match:
 290                                         (a,b), (a1,b1) = match
 291                                         key = string.lower(line[a1:b1])
 292                                         if tags.has_key(key):
 293                                                 print 'Duplicate node:',
 294                                                 print key
 295                                         tags[key] = '', start, line
 296                         elif findescape(buf, i):
 297                                 next = f.read(CHUNKSIZE)
 298                                 if not next:
 299                                         break
 300                                 buf = buf + next
 301                         else:
 302                                 break
 303         return tags
 304
 305
 306 # Try to open a file, return a file object if succeeds.
 307 # Raise NoSuchFile if the file can't be opened.
 308 # Should treat absolute pathnames special.
 309 #
 310 def try_open(file):
 311         for dir in INFOPATH:
 312                 try:
 313                         return open(dir + file, 'r')
 314                 except IOError:
 315                         pass
 316         raise NoSuchFile, file
 317
 318
 319 # A little test for the speed of make_tags().
 320 #
 321 TESTFILE = 'texinfo-1'
 322 def test_make_tags():
 323         import time
 324         f = try_open(TESTFILE)
 325         t1 = time.millitimer()
 326         tags = make_tags(f)
 327         t2 = time.millitimer()
 328         print 'Making tag table for', `TESTFILE`, 'took', t2-t1, 'msec.'