3 # gtk-doc - GTK DocBook documentation generator.
4 # Copyright (C) 2001 Damon Chaplin
5 # 2007-2016 Stefan Sauer
7 # This program is free software; you can redistribute it and/or modify
8 # it under the terms of the GNU General Public License as published by
9 # the Free Software Foundation; either version 2 of the License, or
10 # (at your option) any later version.
12 # This program is distributed in the hope that it will be useful,
13 # but WITHOUT ANY WARRANTY; without even the implied warranty of
14 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
15 # GNU General Public License for more details.
17 # You should have received a copy of the GNU General Public License
18 # along with this program; if not, write to the Free Software
19 # Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
22 from collections
import OrderedDict
33 """Check GTKDOC_TRACE environment variable.
35 Set python log level to the value of the environment variable (DEBUG, INFO,
36 WARNING, ERROR and CRITICAL) or INFO if the environment variable is empty.
38 log_level
= os
.environ
.get('GTKDOC_TRACE', 'WARNING')
41 logging
.basicConfig(stream
=sys
.stdout
,
42 level
=logging
.getLevelName(log_level
.upper()),
43 format
='%(asctime)s:%(filename)s:%(funcName)s:%(lineno)d:%(levelname)s:%(message)s')
44 # When redirecting the output and running with a non utf-8 locale
45 # we get UnicodeEncodeError:
46 encoding
= sys
.stdout
.encoding
47 if 'PYTHONIOENCODING' not in os
.environ
and (not encoding
or encoding
!= 'UTF-8'):
49 sys
.stdout
= open(sys
.stdout
.fileno(), mode
='w', encoding
='utf8', buffering
=1)
52 def UpdateFileIfChanged(old_file
, new_file
, make_backup
):
53 """Compares the old version of the file with the new version and if the
54 file has changed it moves the new version into the old versions place. This
55 is used so we only change files if needed, so we can do proper dependency
59 old_file (str): The pathname of the old file.
60 new_file (str): The pathname of the new version of the file.
61 make_backup (bool): True if a backup of the old file should be kept.
62 It will have the .bak suffix added to the file name.
65 bool: It returns False if the file hasn't changed, and True if it has.
68 logging
.debug("Comparing %s with %s...", old_file
, new_file
)
70 if os
.path
.exists(old_file
):
71 old_contents
= new_contents
= None
72 with
open(old_file
, 'rb') as f
:
73 old_contents
= f
.read()
74 with
open(new_file
, 'rb') as f
:
75 new_contents
= f
.read()
76 if old_contents
== new_contents
:
78 logging
.debug("-> content is the same.")
82 backupname
= old_file
+ '.bak'
83 if os
.path
.exists(backupname
):
85 os
.rename(old_file
, backupname
)
88 logging
.debug("-> content differs.")
90 logging
.debug("-> %s created.", old_file
)
92 os
.rename(new_file
, old_file
)
96 def GetModuleDocDir(module_name
):
97 """Get the docdir for the given module via pkg-config
100 module_name (string): The module, e.g. 'glib-2.0'
103 str: the doc directory or None
107 path
= subprocess
.check_output([config
.pkg_config
, '--variable=prefix', module_name
], universal_newlines
=True)
108 except subprocess
.CalledProcessError
:
110 return os
.path
.join(path
.strip(), 'share/gtk-doc/html')
113 def LogWarning(filename
, line
, message
):
114 """Log a warning in gcc style format
117 file (str): The file the error comes from
118 line (int): line number in the file
119 message (str): the error message to print
121 filename
= filename
or "unknown"
123 # TODO: write to stderr
124 print("%s:%d: warning: %s" % (filename
, line
, message
))
127 def CreateValidSGMLID(xml_id
):
128 """Creates a valid SGML 'id' from the given string.
130 According to http://www.w3.org/TR/html4/types.html#type-id "ID and NAME
131 tokens must begin with a letter ([A-Za-z]) and may be followed by any number
132 of letters, digits ([0-9]), hyphens ("-"), underscores ("_"), colons (":"),
135 When creating SGML IDS, we append ":CAPS" to all all-caps identifiers to
136 prevent name clashes (SGML ids are case-insensitive). (It basically never is
137 the case that mixed-case identifiers would collide.)
140 id (str): The text to be converted into a valid SGML id.
143 str: The converted id.
146 # Special case, '_' would end up as '' so we use 'gettext-macro' instead.
148 return "gettext-macro"
150 xml_id
= re
.sub(r
'[,;]', '', xml_id
)
151 xml_id
= re
.sub(r
'[_ ]', '-', xml_id
)
152 xml_id
= re
.sub(r
'^-+', '', xml_id
)
153 xml_id
= xml_id
.replace('::', '-')
154 xml_id
= xml_id
.replace(':', '--')
156 # Append ":CAPS" to all all-caps identifiers
157 # FIXME: there are some inconsistencies here, we have index files containing e.g. TRUE--CAPS
158 if xml_id
.isupper() and not xml_id
.endswith('-CAPS'):
164 # Parsing helpers (move to mkdb ?)
166 class ParseError(Exception):
170 def PreprocessStructOrEnum(declaration
):
171 """Trim a type declaration for display.
173 Removes private sections and comments from the declaration.
176 declaration (str): the type declaration (struct or enum)
179 str: the trimmed declaration
181 # Remove private symbols
182 # Assume end of declaration if line begins with '}'
183 declaration
= re
.sub(r
'\n?[ \t]*/\*\s*<\s*(private|protected)\s*>\s*\*/.*?(?:/\*\s*<\s*public\s*>\s*\*/|(?=^\}))',
184 '', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
186 # Remove all other comments
187 declaration
= re
.sub(r
'\n\s*/\*.*?\*/\s*\n', r
'\n', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
188 declaration
= re
.sub(r
'/\*([^*]+|\*(?!/))*\*/', r
' ', declaration
)
189 declaration
= re
.sub(r
'\n\s*//.*?\n', r
'\n', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
190 declaration
= re
.sub(r
'//.*', '', declaration
)
195 # TODO: output_function_params is always passed as 0
196 # TODO: we always pass both functions
197 def ParseStructDeclaration(declaration
, is_object
, output_function_params
, typefunc
=None, namefunc
=None):
198 """ Parse a struct declaration.
200 Takes a structure declaration and breaks it into individual type declarations.
203 declaration (str): the declaration to parse
204 is_object (bool): true if this is an object structure
205 output_function_params (bool): true if full type is wanted for function pointer members
206 typefunc (func): function to apply to type
207 namefunc (func): function to apply to name
210 dict: map of (symbol, decl) pairs describing the public declaration
213 # For forward struct declarations just return an empty array.
214 if re
.search(r
'(?:struct|union)\s+\S+\s*;', declaration
, flags
=re
.MULTILINE | re
.DOTALL
):
217 # Remove all private parts of the declaration
218 # For objects, assume private
220 declaration
= re
.sub(r
'''((?:struct|union)\s+\w*\s*\{)
222 (?:/\*\s*<\s*public\s*>\s*\*/|(?=\}))''',
223 r
'\1', declaration
, flags
=re
.MULTILINE | re
.DOTALL | re
.VERBOSE
)
225 # Remove g_iface, parent_instance and parent_class if they are first member
226 declaration
= re
.sub(r
'(\{)\s*(\w)+\s+(g_iface|parent_instance|parent_class)\s*;', r
'\1', declaration
)
228 declaration
= PreprocessStructOrEnum(declaration
)
230 if declaration
.strip() == '':
233 # Prime match after "struct/union {" declaration
234 match
= re
.search(r
'(?:struct|union)\s+\w*\s*\{', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
236 raise ParseError('Declaration "%s" does not begin with "struct/union [NAME] {"' % declaration
)
238 logging
.debug('public fields in struct/union: %s', declaration
)
240 result
= OrderedDict()
242 # Treat lines in sequence, allowing singly nested anonymous structs and unions.
243 for m
in re
.finditer(r
'\s*([^{;]+(\{[^\}]*\}[^{;]+)?);', declaration
[match
.end():], flags
=re
.MULTILINE | re
.DOTALL
):
246 logging
.debug('checking "%s"', line
)
248 if re
.search(r
'^\s*\}\s*\w*\s*$', line
):
251 # FIXME: Just ignore nested structs and unions for now
255 # ignore preprocessor directives
256 line
= re
.sub(r
'^#.*?\n\s*', '', line
, flags
=re
.MULTILINE | re
.DOTALL
)
258 if re
.search(r
'^\s*\}\s*\w*\s*$', line
):
261 func_match
= re
.search(r
'''^
262 (const\s+|G_CONST_RETURN\s+|unsigned\s+|signed\s+|long\s+|short\s+)*(struct\s+|enum\s+)? # mod1
264 (\**(?:\s*restrict)?)\s* # ptr1
268 \(\s*\*\s*(\w+)\s*\)\s* # name
269 \(([^)]*)\)\s* # func_params
270 $''', line
, flags
=re
.VERBOSE
)
271 vars_match
= re
.search(r
'''^
272 ((?:const\s+|volatile\s+|unsigned\s+|signed\s+|short\s+|long\s+)?)(struct\s+|enum\s+)? # mod1
274 (\** \s* const\s+)? # mod2
276 $''', line
, flags
=re
.VERBOSE
)
278 # Try to match structure members which are functions
280 mod1
= func_match
.group(1) or ''
281 if func_match
.group(2):
282 mod1
+= func_match
.group(2)
283 func_type
= func_match
.group(3)
284 ptr1
= func_match
.group(4)
285 mod2
= func_match
.group(5) or ''
286 ptr2
= func_match
.group(6)
287 mod3
= func_match
.group(7) or ''
288 name
= func_match
.group(8)
289 func_params
= func_match
.group(9)
292 ptype
= typefunc(func_type
, '<type>%s</type>' % func_type
)
295 pname
= namefunc(name
)
297 if output_function_params
:
298 result
[name
] = '%s%s%s%s%s%s (*%s) (%s)' % (
299 mod1
, ptype
, ptr1
, mod2
, ptr2
, mod3
, pname
, func_params
)
301 result
[name
] = '%s ()' % pname
303 # Try to match normal struct fields of comma-separated variables/
305 mod1
= vars_match
.group(1) or ''
306 if vars_match
.group(2):
307 mod1
+= vars_match
.group(2)
308 vtype
= vars_match
.group(3)
311 ptype
= typefunc(vtype
, '<type>%s</type>' % vtype
)
312 mod2
= vars_match
.group(4) or ''
315 var_list
= vars_match
.group(5)
317 logging
.debug('"%s" "%s" "%s" "%s"', mod1
, vtype
, mod2
, var_list
)
319 mod1
= mod1
.replace(' ', ' ')
320 mod2
= mod2
.replace(' ', ' ')
322 for n
in var_list
.split(','):
323 # Each variable can have any number of '*' before the identifier,
324 # and be followed by any number of pairs of brackets or a bit field specifier.
325 # e.g. *foo, ***bar, *baz[12][23], foo : 25.
327 r
'^\s* (\**(?:\s*restrict\b)?) \s* (\w+) \s* (?: ((?:\[[^\]]*\]\s*)+) | (:\s*\d+)?) \s* $',
332 array
= m
.group(3) or ''
338 if ptrs
and not ptrs
.endswith('*'):
341 array
= array
.replace(' ', ' ')
342 bits
= bits
.replace(' ', ' ')
346 pname
= namefunc(name
)
348 result
[name
] = '%s%s%s %s%s%s%s;' % (mod1
, ptype
, mod2
, ptrs
, pname
, array
, bits
)
350 logging
.debug('Matched line: %s%s%s %s%s%s%s', mod1
, ptype
, mod2
, ptrs
, pname
, array
, bits
)
352 logging
.warning('Cannot parse struct field: "%s"', n
)
355 logging
.warning('Cannot parse structure field: "%s"', line
)
360 def ParseEnumDeclaration(declaration
):
361 """Parse an enum declaration.
363 This function takes a enumeration declaration and breaks it into individual
364 enum member declarations.
367 declaration (str): the declaration to parse
370 str: list of strings describing the public declaration
373 # For forward struct declarations just return an empty array.
374 if re
.search(r
'enum\s+\S+\s*;', declaration
, flags
=re
.MULTILINE | re
.DOTALL
):
377 declaration
= PreprocessStructOrEnum(declaration
)
379 if declaration
.strip() == '':
384 # Remove parenthesized expressions (in macros like GTK_BLAH = BLAH(1,3))
385 # to avoid getting confused by commas they might contain. This doesn't
386 # handle nested parentheses correctly.
387 declaration
= re
.sub(r
'\([^)\n]+\)', '', declaration
)
389 # Remove apostrophed characters (e.g. '}' or ',') values to avoid getting
390 # confused with end of enumeration.
391 # See https://bugzilla.gnome.org/show_bug.cgi?id=741305
392 declaration
= re
.sub(r
'\'.\'', '', declaration)
394 # Remove comma from comma - possible whitespace - closing brace sequence
395 # since it is legal in GNU C and C99 to have a trailing comma but doesn't
396 # result in an actual enum member
397 declaration
= re
.sub(r
',(\s*})', r
'\1', declaration
)
399 # Prime match after "typedef enum {" declaration
400 match
= re
.search(r
'(typedef\s+)?enum\s*(\S+\s*)?\{', declaration
, flags
=re
.MULTILINE | re
.DOTALL
)
402 raise ParseError('Enum declaration "%s" does not begin with "typedef enum {" or "enum [NAME] {"' % declaration
)
404 logging
.debug("public fields in enum: %s'", declaration
)
406 # Treat lines in sequence.
407 for m
in re
.finditer(r
'\s*([^,\}]+)([,\}])', declaration
[match
.end():], flags
=re
.MULTILINE | re
.DOTALL
):
409 terminator
= m
.group(2)
411 # ignore preprocessor directives
412 line
= re
.sub(r
'^#.*?\n\s*', '', line
, flags
=re
.MULTILINE | re
.DOTALL
)
414 m1
= re
.search(r
'^(\w+)\s*(=.*)?$', line
, flags
=re
.MULTILINE | re
.DOTALL
)
415 # Special case for GIOCondition, where the values are specified by
416 # macros which expand to include the equal sign like '=1'.
417 m2
= re
.search(r
'^(\w+)\s*GLIB_SYSDEF_POLL', line
, flags
=re
.MULTILINE | re
.DOTALL
)
419 result
.append(m1
.group(1))
421 result
.append(m2
.group(1))
422 elif line
.strip().startswith('#'):
423 # Special case include of <gdk/gdkcursors.h>, just ignore it
424 # Special case for #ifdef/#else/#endif, just ignore it
427 logging
.warning('Cannot parse enumeration member: %s', line
)
429 if terminator
== '}':
435 def ParseFunctionDeclaration(declaration
, typefunc
, namefunc
):
436 """Parse a function declaration.
438 This function takes a function declaration and breaks it into individual
439 parameter declarations.
442 declaration (str): the declaration to parse
443 typefunc (func): function to apply to type
444 namefunc (func): function to apply to name
447 dict: map of (symbol, decl) pairs describing the prototype
450 result
= OrderedDict()
454 logging
.debug('decl=[%s]', declaration
)
456 # skip whitespace and commas
457 declaration
, n
= re
.subn(r
'^[\s,]+', '', declaration
)
461 declaration
, n
= re
.subn(r
'^void\s*[,\n]', '', declaration
)
464 logging
.warning('void used as parameter %d in function %s', param_num
, declaration
)
465 result
['void'] = namefunc('<type>void</type>')
469 declaration
, n
= re
.subn(r
'^\s*[_a-zA-Z0-9]*\.\.\.\s*[,\n]', '', declaration
)
471 result
['...'] = namefunc('...')
475 # allow alphanumerics, '_', '[' & ']' in param names, try to match a standard parameter
477 regex
= r
'^\s*((?:(?:G_CONST_RETURN|G_GNUC_[A-Z_]+\s+|unsigned long|unsigned short|signed long|signed short|unsigned|signed|long|short|volatile|const)\s+)*)((?:struct\b|enum\b)?\s*\w+)\s*((?:(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*\*?\s*(?:const\b|restrict\b|G_GNUC_[A-Z_]+\b)?\s*)*)(\w+)?\s*((?:\[\S*\])*)\s*(?:G_GNUC_[A-Z_]+)?\s*[,\n]'
478 m
= re
.match(regex
, declaration
)
480 declaration
= re
.sub(regex
, '', declaration
)
482 pre
= m
.group(1) or ''
484 ptr
= m
.group(3) or ''
485 name
= m
.group(4) or ''
486 array
= m
.group(5) or ''
488 pre
= re
.sub(r
'\s+', ' ', pre
)
489 type = re
.sub(r
'\s+', ' ', type)
490 ptr
= re
.sub(r
'\s+', ' ', ptr
)
491 ptr
= re
.sub(r
'\s+$', '', ptr
)
492 if ptr
and not ptr
.endswith('*'):
495 logging
.debug('"%s" "%s" "%s" "%s" "%s"', pre
, type, ptr
, name
, array
)
497 m
= re
.search(r
'^((un)?signed .*)\s?', pre
)
504 name
= 'Param' + str(param_num
+ 1)
506 logging
.debug('"%s" "%s" "%s" "%s" "%s"', pre
, type, ptr
, name
, array
)
508 xref
= typefunc(type, '<type>%s</type>' % type)
509 result
[name
] = namefunc('%s%s %s%s%s' % (pre
, xref
, ptr
, name
, array
))
513 # Try to match parameters which are functions
514 # $1 $2 $3 $4 $5 $6 $7 $8
515 regex
= r
'^(const\s+|G_CONST_RETURN\s+|G_GNUC_[A-Z_]+\s+|signed\s+|unsigned\s+)*(struct\s+)?(\w+)\s*(\**)\s*(?:restrict\b)?\s*(const\s+)?\(\s*(\*[\s\*]*)\s*(\w+)\s*\)\s*\(([^)]*)\)\s*[,\n]'
516 m
= re
.match(regex
, declaration
)
518 declaration
= re
.sub(regex
, '', declaration
)
520 mod1
= m
.group(1) or ''
525 mod2
= m
.group(5) or ''
526 func_ptr
= m
.group(6)
528 func_params
= m
.group(8) or ''
530 if ptr1
and not ptr1
.endswith('*'):
532 func_ptr
= re
.sub(r
'\s+', ' ', func_ptr
)
534 logging
.debug('"%s" "%s" "%s" "%s" "%s"', mod1
, type, mod2
, func_ptr
, name
)
536 xref
= typefunc(type, '<type>%s</type>' % type)
537 result
[name
] = namefunc('%s%s%s%s (%s%s) (%s)' % (mod1
, xref
, ptr1
, mod2
, func_ptr
, name
, func_params
))
541 logging
.warning('Cannnot parse args for function in "%s"', declaration
)
547 def ParseMacroDeclaration(declaration
, namefunc
):
548 """Parse a macro declaration.
550 This function takes a macro declaration and breaks it into individual
551 parameter declarations.
554 declaration (str): the declaration to parse
555 namefunc (func): function to apply to name
558 dict: map of (symbol, decl) pairs describing the macro
561 result
= OrderedDict()
563 logging
.debug('decl=[%s]', declaration
)
565 m
= re
.search(r
'^\s*#\s*define\s+\w+\(([^\)]*)\)', declaration
)
568 params
= re
.sub(r
'\n', '', params
)
570 logging
.debug('params=[%s]', params
)
572 for param
in params
.split(','):
573 param
= param
.strip()
575 # Allow varargs variations
576 if param
.endswith('...'):
580 result
[param
] = namefunc(param
)