mkman,mkpdf: also fix path arg handling
[gtk-doc.git] / gtkdoc / fixxref.py
blob4893b870e219bc3aee7439aca4205a2d7bff78ac
1 # -*- python -*-
3 # gtk-doc - GTK DocBook documentation generator.
4 # Copyright (C) 1998 Damon Chaplin
5 # 2007-2016 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 ''"Fix cross-references in the HTML documentation.''"
24 # Support both Python 2 and 3
25 from __future__ import print_function
27 import logging
28 import os
29 import re
30 import shlex
31 import subprocess
32 import tempfile
34 from . import common, config
36 # This contains all the entities and their relative URLs.
37 Links = {}
39 # failing link targets we don't warn about even once
40 NoLinks = {
41 'char',
42 'double',
43 'float',
44 'int',
45 'long',
46 'main',
47 'signed',
48 'unsigned',
49 'va-list',
50 'void',
51 'GBoxed',
52 'GEnum',
53 'GFlags',
54 'GInterface'
57 # Cache of dirs we already scanned for index files
58 DirCache = {}
61 def Run(options):
62 # logging.basicConfig(level=logging.INFO)
64 path_prefix = ''
65 m = re.search(r'(.*?)/share/gtk-doc/html', options.html_dir)
66 if m:
67 path_prefix = m.group(1)
68 logging.info('Path prefix: %s', path_prefix)
69 prefix_match = r'^' + re.escape(path_prefix) + r'/'
71 # We scan the directory containing GLib and any directories in GNOME2_PATH
72 # first, but these will be overriden by any later scans.
73 dir = common.GetModuleDocDir('glib-2.0')
74 if os.path.exists(dir):
75 # Some predefined link targets to get links into type hierarchies as these
76 # have no targets. These are always absolute for now.
77 Links['GBoxed'] = dir + '/gobject/gobject-Boxed-Types.html'
78 Links['GEnum'] = dir + '/gobject/gobject-Enumeration-and-Flag-Types.html'
79 Links['GFlags'] = dir + '/gobject/gobject-Enumeration-and-Flag-Types.html'
80 Links['GInterface'] = dir + '/gobject/GTypeModule.html'
82 if dir != options.html_dir:
83 logging.info('Scanning GLib directory: %s', dir)
84 ScanIndices(dir, (re.search(prefix_match, dir) is None))
86 path = os.environ.get('GNOME2_PATH')
87 if path:
88 for dir in path.split(':'):
89 dir += '/share/gtk-doc/html'
90 if os.path.exists(dir) and dir != options.html_dir:
91 logging.info('Scanning GNOME2_PATH directory: %s', dir)
92 ScanIndices(dir, (re.search(prefix_match, dir) is None))
94 logging.info('Scanning HTML_DIR directory: %s', options.html_dir)
95 ScanIndices(options.html_dir, 0)
96 logging.info('Scanning MODULE_DIR directory: %s', options.module_dir)
97 ScanIndices(options.module_dir, 0)
99 # check all extra dirs, but skip already scanned dirs or subdirs of those
100 for dir in options.extra_dir:
101 dir = dir.rstrip('/')
102 logging.info('Scanning EXTRA_DIR directory: %s', dir)
104 # If the --extra-dir option is not relative and is not sharing the same
105 # prefix as the target directory of the docs, we need to use absolute
106 # directories for the links
107 if not dir.startswith('..') and re.search(prefix_match, dir) is None:
108 ScanIndices(dir, 1)
109 else:
110 ScanIndices(dir, 0)
112 ReadSections(options)
113 FixCrossReferences(options)
116 def ScanIndices(scan_dir, use_absolute_links):
117 if not scan_dir or scan_dir in DirCache:
118 return
119 DirCache[scan_dir] = 1
121 logging.info('Scanning index directory: %s, absolute: %d', scan_dir, use_absolute_links)
123 # TODO(ensonic): this code is the same as in rebase.py
124 if not os.path.isdir(scan_dir):
125 logging.info('Cannot open dir "%s"', scan_dir)
126 return
128 subdirs = []
129 for entry in os.listdir(scan_dir):
130 full_entry = os.path.join(scan_dir, entry)
131 if os.path.isdir(full_entry):
132 subdirs.append(full_entry)
133 continue
135 if entry.endswith('.devhelp2'):
136 # if devhelp-file is good don't read index.sgml
137 ReadDevhelp(full_entry, use_absolute_links)
138 elif entry == "index.sgml.gz" and not os.path.exists(os.path.join(scan_dir, 'index.sgml')):
139 # debian/ubuntu started to compress this as index.sgml.gz :/
140 print(''' Please fix https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/77138 . For now run:
141 gunzip %s
142 ''' % full_entry)
143 elif entry.endswith('.devhelp2.gz') and not os.path.exists(full_entry[:-3]):
144 # debian/ubuntu started to compress this as *devhelp2.gz :/
145 print('''Please fix https://bugs.launchpad.net/ubuntu/+source/gtk-doc/+bug/1466210 . For now run:
146 gunzip %d
147 ''' % full_entry)
148 # we could consider supporting: gzip module
150 # Now recursively scan the subdirectories.
151 for subdir in subdirs:
152 ScanIndices(subdir, use_absolute_links)
155 def ReadDevhelp(file, use_absolute_links):
156 # Determine the absolute directory, to be added to links in $file
157 # if we need to use an absolute link.
158 # $file will be something like /prefix/gnome/share/gtk-doc/html/gtk/$file
159 # We want the part up to 'html/.*' since the links in $file include
160 # the rest.
161 dir = "../"
162 if use_absolute_links:
163 # For uninstalled index files we'd need to map the path to where it
164 # will be installed to
165 if not file.startswith('./'):
166 m = re.search(r'(.*\/)(.*?)\/.*?\.devhelp2', file)
167 dir = m.group(1) + m.group(2) + '/'
168 else:
169 m = re.search(r'(.*\/)(.*?)\/.*?\.devhelp2', file)
170 if m:
171 dir += m.group(2) + '/'
172 else:
173 dir = ''
175 logging.info('Scanning index file=%s, absolute=%d, dir=%s', file, use_absolute_links, dir)
177 for line in open(file):
178 m = re.search(r' link="([^#]*)#([^"]*)"', line)
179 if m:
180 link = m.group(1) + '#' + m.group(2)
181 logging.debug('Found id: %s href: %s', m.group(2), link)
182 Links[m.group(2)] = dir + link
185 def ReadSections(options):
186 for line in open(options.module + '-sections.txt'):
187 m1 = re.search(r'^<SUBSECTION\s*(.*)>', line)
188 if line.startswith('#') or line.strip() == '':
189 continue
190 elif line.startswith('<SECTION>'):
191 subsection = ''
192 elif m1:
193 subsection = m1.group(1)
194 elif line.startswith('<SUBSECTION>') or line.startswith('</SECTION>'):
195 continue
196 elif re.search(r'^<TITLE>(.*)<\/TITLE>', line):
197 continue
198 elif re.search(r'^<FILE>(.*)<\/FILE>', line):
199 continue
200 elif re.search(r'^<INCLUDE>(.*)<\/INCLUDE>', line):
201 continue
202 else:
203 symbol = line.strip()
204 if subsection == "Standard" or subsection == "Private":
205 NoLinks.add(common.CreateValidSGMLID(symbol))
208 def FixCrossReferences(options):
209 scan_dir = options.module_dir
210 # TODO(ensonic): use glob.glob()?
211 for entry in os.listdir(scan_dir):
212 full_entry = os.path.join(scan_dir, entry)
213 if os.path.isdir(full_entry):
214 continue
215 elif entry.endswith('.html') or entry.endswith('.htm'):
216 FixHTMLFile(options, full_entry)
219 def FixHTMLFile(options, file):
220 logging.info('Fixing file: %s', file)
222 content = open(file).read()
224 if config.highlight:
225 # FIXME: ideally we'd pass a clue about the example language to the highligher
226 # unfortunately the "language" attribute is not appearing in the html output
227 # we could patch the customization to have <code class="xxx"> inside of <pre>
228 if config.highlight.endswith('vim'):
229 def repl_func(m):
230 return HighlightSourceVim(options, m.group(1), m.group(2))
231 content = re.sub(
232 r'<div class=\"(example-contents|informalexample)\"><pre class=\"programlisting\">(.*?)</pre></div>',
233 repl_func, content, flags=re.DOTALL)
234 else:
235 def repl_func(m):
236 return HighlightSource(options, m.group(1), m.group(2))
237 content = re.sub(
238 r'<div class=\"(example-contents|informalexample)\"><pre class=\"programlisting\">(.*?)</pre></div>',
239 repl_func, content, flags=re.DOTALL)
241 content = re.sub(r'\&lt;GTKDOCLINK\s+HREF=\&quot;(.*?)\&quot;\&gt;(.*?)\&lt;/GTKDOCLINK\&gt;',
242 r'\<GTKDOCLINK\ HREF=\"\1\"\>\2\</GTKDOCLINK\>', content, flags=re.DOTALL)
244 # From the highlighter we get all the functions marked up. Now we can turn them into GTKDOCLINK items
245 def repl_func(m):
246 return MakeGtkDocLink(m.group(1), m.group(2), m.group(3))
247 content = re.sub(r'(<span class=\"function\">)(.*?)(</span>)', repl_func, content, flags=re.DOTALL)
248 # We can also try the first item in stuff marked up as 'normal'
249 content = re.sub(
250 r'(<span class=\"normal\">\s*)(.+?)((\s+.+?)?\s*</span>)', repl_func, content, flags=re.DOTALL)
252 lines = content.rstrip().split('\n')
254 def repl_func_with_ix(i):
255 def repl_func(m):
256 return MakeXRef(options, file, i + 1, m.group(1), m.group(2))
257 return repl_func
259 for i in range(len(lines)):
260 lines[i] = re.sub(r'<GTKDOCLINK\s+HREF="([^"]*)"\s*>(.*?)</GTKDOCLINK\s*>', repl_func_with_ix(i), lines[i])
261 if 'GTKDOCLINK' in lines[i]:
262 logging.info('make xref failed for line %d: "%s"', i, lines[i])
264 new_file = file + '.new'
265 open(new_file, 'w').write('\n'.join(lines))
267 os.unlink(file)
268 os.rename(new_file, file)
271 def MakeXRef(options, file, line, id, text):
272 href = Links.get(id)
274 # This is a workaround for some inconsistency we have with CreateValidSGMLID
275 if not href and ':' in id:
276 href = Links.get(id.replace(':', '--'))
277 # poor mans plural support
278 if not href and id.endswith('s'):
279 tid = id[:-1]
280 href = Links.get(tid)
281 if not href:
282 href = Links.get(tid + '-struct')
283 if not href:
284 href = Links.get(id + '-struct')
286 if href:
287 # if it is a link to same module, remove path to make it work uninstalled
288 m = re.search(r'^\.\./' + options.module + '/(.*)$', href)
289 if m:
290 href = m.group(1)
291 logging.info('Fixing link to uninstalled doc: %s, %s, %s', id, href, text)
292 else:
293 logging.info('Fixing link: %s, %s, %s', id, href, text)
294 return "<a href=\"%s\">%s</a>" % (href, text)
295 else:
296 logging.info('no link for: %s, %s', id, text)
298 # don't warn multiple times and also skip blacklisted (ctypes)
299 if id in NoLinks:
300 return text
301 # if it's a function, don't warn if it does not contain a "_"
302 # (transformed to "-")
303 # - gnome coding style would use '_'
304 # - will avoid wrong warnings for ansi c functions
305 if re.search(r' class=\"function\"', text) and '-' not in id:
306 return text
307 # if it's a 'return value', don't warn (implicitly created link)
308 if re.search(r' class=\"returnvalue\"', text):
309 return text
310 # if it's a 'type', don't warn if it starts with lowercase
311 # - gnome coding style would use CamelCase
312 if re.search(r' class=\"type\"', text) and id[0].islower():
313 return text
314 # don't warn for self links
315 if text == id:
316 return text
318 common.LogWarning(file, line, 'no link for: "%s" -> (%s).' % (id, text))
319 NoLinks.add(id)
320 return text
323 def MakeGtkDocLink(pre, symbol, post):
324 id = common.CreateValidSGMLID(symbol)
326 # these are implicitely created links in highlighed sources
327 # we don't want warnings for those if the links cannot be resolved.
328 NoLinks.add(id)
330 return pre + '<GTKDOCLINK HREF="' + id + '">' + symbol + '</GTKDOCLINK>' + post
333 def HighlightSource(options, type, source):
334 source = HighlightSourcePreProcess(source)
336 # write source to a temp file
337 # FIXME: use .c for now to hint the language to the highlighter
338 with tempfile.NamedTemporaryFile(suffix='.c') as f:
339 f.write(source)
340 f.flush()
341 temp_source_file = f.name
342 highlight_options = config.highlight_options.replace('$SRC_LANG', options.src_lang)
344 logging.info('running %s %s %s', config.highlight, highlight_options, temp_source_file)
346 # format source
347 highlighted_source = subprocess.check_output(
348 [config.highlight] + shlex.split(highlight_options) + [temp_source_file])
349 logging.debug('result: [%s]', highlighted_source)
350 if config.highlight.endswith('/source-highlight'):
351 highlighted_source = re.sub(r'^<\!-- .*? -->', '', highlighted_source, flags=re.MULTILINE | re.DOTALL)
352 highlighted_source = re.sub(
353 r'<pre><tt>(.*?)</tt></pre>', r'\1', highlighted_source, flags=re.MULTILINE | re.DOTALL)
354 elif config.highlight.endswith('/highlight'):
355 # need to rewrite the stylesheet classes
356 highlighted_source = highlighted_source.replace('<span class="gtkdoc com">', '<span class="comment">')
357 highlighted_source = highlighted_source.replace('<span class="gtkdoc dir">', '<span class="preproc">')
358 highlighted_source = highlighted_source.replace('<span class="gtkdoc kwd">', '<span class="function">')
359 highlighted_source = highlighted_source.replace('<span class="gtkdoc kwa">', '<span class="keyword">')
360 highlighted_source = highlighted_source.replace('<span class="gtkdoc line">', '<span class="linenum">')
361 highlighted_source = highlighted_source.replace('<span class="gtkdoc num">', '<span class="number">')
362 highlighted_source = highlighted_source.replace('<span class="gtkdoc str">', '<span class="string">')
363 highlighted_source = highlighted_source.replace('<span class="gtkdoc sym">', '<span class="symbol">')
364 # maybe also do
365 # highlighted_source = re.sub(r'</span>(.+)<span', '</span><span class="normal">\1</span><span')
367 return HighlightSourcePostprocess(type, highlighted_source)
370 def HighlightSourceVim(options, type, source):
371 source = HighlightSourcePreProcess(source)
373 # write source to a temp file
374 with tempfile.NamedTemporaryFile(suffix='.h') as f:
375 f.write(source)
376 f.flush()
377 temp_source_file = f.name
379 # format source
380 # TODO(ensonic): use p.communicate()
381 script = "echo 'let html_number_lines=0|let html_use_css=1|let html_use_xhtml=1|e %s|syn on|set syntax=%s|run! syntax/2html.vim|w! %s.html|qa!' | " % (
382 temp_source_file, options.src_lang, temp_source_file)
383 script += "%s -n -e -u NONE -T xterm >/dev/null" % config.highlight
384 subprocess.check_call([script], shell=True)
386 highlighted_source = open(temp_source_file + ".html").read()
387 highlighted_source = re.sub(r'.*<pre\b[^>]*>\n', '', highlighted_source, flags=re.MULTILINE)
388 highlighted_source = re.sub(r'</pre>.*', '', highlighted_source, flags=re.MULTILINE)
390 # need to rewrite the stylesheet classes
391 highlighted_source = highlighted_source.replace('<span class="Comment">', '<span class="comment">')
392 highlighted_source = highlighted_source.replace('<span class="PreProc">', '<span class="preproc">')
393 highlighted_source = highlighted_source.replace('<span class="Statement">', '<span class="keyword">')
394 highlighted_source = highlighted_source.replace('<span class="Identifier">', '<span class="function">')
395 highlighted_source = highlighted_source.replace('<span class="Constant">', '<span class="number">')
396 highlighted_source = highlighted_source.replace('<span class="Special">', '<span class="symbol">')
397 highlighted_source = highlighted_source.replace('<span class="Type">', '<span class="type">')
399 # remove temp files
400 os.unlink(temp_source_file + '.html')
402 return HighlightSourcePostprocess(type, highlighted_source)
405 def HighlightSourcePreProcess(source):
406 # chop of leading and trailing empty lines, leave leading space in first real line
407 source = source.strip(' ')
408 source = source.strip('\n')
409 source = source.rstrip()
411 # cut common indent
412 m = re.search(r'^(\s+)', source)
413 if m:
414 source = re.sub(r'^' + m.group(1), '', source, flags=re.MULTILINE)
415 # avoid double entity replacement
416 source = source.replace('&lt;', '<')
417 source = source.replace('&gt;', '>')
418 source = source.replace('&amp;', '&')
419 return source
422 def HighlightSourcePostprocess(type, highlighted_source):
423 # chop of leading and trailing empty lines
424 highlighted_source = highlighted_source.strip()
426 # turn common urls in comments into links
427 highlighted_source = re.sub(r'<span class="url">(.*?)</span>',
428 r'<span class="url"><a href="\1">\1</a></span>',
429 highlighted_source, flags=re.DOTALL)
431 # we do own line-numbering
432 line_count = highlighted_source.count('\n')
433 source_lines = '\n'.join([str(i) for i in range(1, line_count + 2)])
435 return """<div class="%s">
436 <table class="listing_frame" border="0" cellpadding="0" cellspacing="0">
437 <tbody>
438 <tr>
439 <td class="listing_lines" align="right"><pre>%s</pre></td>
440 <td class="listing_code"><pre class="programlisting">%s</pre></td>
441 </tr>
442 </tbody>
443 </table>
444 </div>
445 """ % (type, source_lines, highlighted_source)