tools/make-wsluarm.py

   1 #!/usr/bin/env python3
   2 #
   3 # make-wsluarm.py
   4 #
   5 # By Gerald Combs <gerald@wireshark.org>
   6 # Based on make-wsluarm.pl by Luis E. Garcia Onatnon <luis.ontanon@gmail.com> and Hadriel Kaplan
   7 #
   8 # Wireshark - Network traffic analyzer
   9 # By Gerald Combs <gerald@wireshark.org>
  10 # Copyright 1998 Gerald Combs
  11 #
  12 # SPDX-License-Identifier: GPL-2.0-or-later
  13 '''\
  14 WSLUA's Reference Manual Generator
  15
  16 This reads Doxygen-style comments in C code and generates wslua API documentation
  17 formatted as AsciiDoc.
  18
  19 Behavior as documented by Hadriel:
  20 - Allows modules (i.e., WSLUA_MODULE) to have detailed descriptions
  21 - Two (or more) line breaks in comments result in separate paragraphs
  22 - Any indent with a single leading star '*' followed by space is a bulleted list item
  23   reducing indent or having an extra linebreak stops the list
  24 - Any indent with a leading digits-dot followed by space, i.e. "1. ", is a numbered list item
  25   reducing indent or having an extra linebreak stops the list
  26 '''
  27
  28 import argparse
  29 import logging
  30 import os
  31 import re
  32 import sys
  33
  34 from enum import Enum
  35 from string import Template
  36
  37 def parse_desc(description):
  38     '''\
  39 Break up descriptions based on newlines and keywords. Some processing
  40 is done for code blocks and lists, but the output is otherwise left
  41 intact. Assumes the input has been stripped.
  42 '''
  43
  44     c_lines = description.strip().splitlines()
  45
  46     if len(c_lines) < 1:
  47         return ''
  48
  49     adoc_lines = []
  50     cli = iter(c_lines)
  51     for line in cli:
  52         raw_len = len(line)
  53         line = line.lstrip()
  54         indent = raw_len - len(line)
  55
  56         # If we find "[source,...]" then treat it as a block
  57         if re.search(r'\[source.*\]', line):
  58             # The next line *should* be a delimiter...
  59             block_delim = next(cli).strip()
  60             line += f'\n{block_delim}\n'
  61             block_line = next(cli)
  62             # XXX try except StopIteration
  63             while block_line.strip() != block_delim:
  64                 # Keep eating lines until the closing delimiter.
  65                 # XXX Strip indent spaces?
  66                 line += block_line + '\n'
  67                 block_line = next(cli)
  68             line += block_delim + '\n'
  69
  70             adoc_lines.append(line)
  71         elif re.match(r'^\s*$', line):
  72             # line is either empty or just whitespace, and we're not in a @code block
  73             # so it's the end of a previous paragraph, beginning of new one
  74             adoc_lines.append('')
  75         else:
  76             # We have a regular line, not in a @code block.
  77             # Add it as-is.
  78
  79             # if line starts with "@version" or "@since", make it a "Since:"
  80             if re.match(r'^@(version|since)\s+', line):
  81                 line = re.sub(r'^@(version|since)\s+', 'Since: ', line)
  82                 adoc_lines.append(line)
  83
  84             # If line starts with single "*" and space, leave it mostly intact.
  85             elif re.match(r'^\*\s', line):
  86                 adoc_lines += ['', line]
  87                 # keep eating until we find a blank line or end
  88                 line = next(cli)
  89                 try:
  90                     while not re.match(r'^\s*$', line):
  91                         raw_len = len(line)
  92                         line = line.lstrip()
  93                         # if this is less indented than before, break out
  94                         if raw_len - len(line) < indent:
  95                             break
  96                         adoc_lines += [line]
  97                         line = next(cli)
  98                 except StopIteration:
  99                     pass
 100                 adoc_lines.append('')
 101
 102             # if line starts with "1." and space, leave it mostly intact.
 103             elif re.match(r'^1\.\s', line):
 104                 adoc_lines += ['', line]
 105                 # keep eating until we find a blank line or end
 106                 line = next(cli)
 107                 try:
 108                     while not re.match(r'^\s*$', line):
 109                         raw_len = len(line)
 110                         line = line.lstrip()
 111                         # if this is less indented than before, break out
 112                         if raw_len - len(line) < indent:
 113                             break
 114                         adoc_lines += [line]
 115                         line = next(cli)
 116                 except StopIteration:
 117                     pass
 118                 adoc_lines.append('')
 119
 120             # Just a normal line, add it to array
 121             else:
 122                 # Nested Lua arrays
 123                 line = re.sub(r'\[\[(.*)\]\]', r'$$\1$$', line)
 124                 adoc_lines += [line]
 125
 126     # Strip out consecutive empty lines.
 127     # This isn't strictly necessary but makes the AsciiDoc output prettier.
 128     adoc_lines = '\n'.join(adoc_lines).splitlines()
 129     adoc_lines = [val for idx, val in enumerate(adoc_lines) if idx == 0 or not (val == '' and val == adoc_lines[idx - 1])]
 130
 131     return '\n'.join(adoc_lines)
 132
 133
 134 class LuaFunction:
 135     def __init__(self, c_file, id, start, name, raw_description):
 136         self.c_file = c_file
 137         self.id = id
 138         self.start = start
 139         self.name = name
 140         if not raw_description:
 141             raw_description = ''
 142         self.description = parse_desc(raw_description)
 143         self.arguments = [] # (name, description, optional)
 144         self.returns = [] # description
 145         self.errors = [] # description
 146         logging.info(f'Created function {id} ({name}) at {start}')
 147
 148     def add_argument(self, id, raw_name, raw_description, raw_optional):
 149         if id != self.id:
 150             logging.critical(f'Invalid argument ID {id} in function {self.id}')
 151             sys.exit(1)
 152         if not raw_description:
 153             raw_description = ''
 154         optional = False
 155         if raw_optional == 'OPT':
 156             optional = True
 157         self.arguments.append((raw_name.lower(), parse_desc(raw_description), optional))
 158
 159     def extract_buf(self, buf):
 160         "Extract arguments, errors, and return values from a function's buffer."
 161
 162         # Splits "WSLUA_OPTARG_ProtoField_int8_NAME /* food */" into
 163         # "OPT" (1), "ProtoField_int8" (2), "NAME" (3), ..., ..., " food " (6)
 164         # Handles functions like "loadfile(filename)" too.
 165         for m in re.finditer(r'#define WSLUA_(OPT)?ARG_((?:[A-Za-z0-9]+_)?[a-z0-9_]+)_([A-Z0-9_]+)\s+\d+' + TRAILING_COMMENT_RE, buf, re.MULTILINE|re.DOTALL):
 166             self.add_argument(m.group(2), m.group(3), m.group(6), m.group(1))
 167             logging.info(f'Created arg {m.group(3)} for {self.id} at {m.start()}')
 168
 169         # Same as above, except that there is no macro but a (multi-line) comment.
 170         for m in re.finditer(r'/\*\s*WSLUA_(OPT)?ARG_((?:[A-Za-z0-9]+_)?[a-z0-9_]+)_([A-Z0-9_]+)\s*(.*?)\*/', buf, re.MULTILINE|re.DOTALL):
 171             self.add_argument(m.group(2), m.group(3), m.group(4), m.group(1))
 172             logging.info(f'Created arg {m.group(3)} for {self.id} at {m.start()}')
 173
 174         for m in re.finditer(r'/\*\s+WSLUA_MOREARGS\s+([A-Za-z_]+)\s+(.*?)\*/', buf, re.MULTILINE|re.DOTALL):
 175             self.add_argument(m.group(1), '...', m.group(2), False)
 176             logging.info(f'Created morearg for {self.id}')
 177
 178         for m in re.finditer(r'WSLUA_(FINAL_)?RETURN\(\s*.*?\s*\)\s*;' + TRAILING_COMMENT_RE, buf, re.MULTILINE|re.DOTALL):
 179             if m.group(4) and len(m.group(4)) > 0:
 180                 self.returns.append(m.group(4).strip())
 181                 logging.info(f'Created return for {self.id} at {m.start()}')
 182
 183         for m in re.finditer(r'/\*\s*_WSLUA_RETURNS_\s*(.*?)\*/', buf, re.MULTILINE|re.DOTALL):
 184             if m.group(1) and len(m.group(1)) > 0:
 185                 self.returns.append(m.group(1).strip())
 186                 logging.info(f'Created return for {self.id} at {m.start()}')
 187
 188         for m in re.finditer(r'WSLUA_ERROR\s*\(\s*(([A-Z][A-Za-z]+)_)?([a-z_]+),' + QUOTED_RE, buf, re.MULTILINE|re.DOTALL):
 189             self.errors.append(m.group(4).strip())
 190             logging.info(f'Created error {m.group(4)[:10]} for {self.id} at {m.start()}')
 191
 192     def to_adoc(self):
 193         # The Perl script wrapped optional args in '[]', joined them with ', ', and
 194         # converted non-alphabetic characters to underscores.
 195         mangled_names = [f'_{a}_' if optional else a for a, _, optional in self.arguments]
 196         section_name = re.sub('[^A-Za-z0-9]', '_', f'{self.name}_{"__".join(mangled_names)}_')
 197         opt_names = [f'[{a}]' if optional else a for a, _, optional in self.arguments]
 198         adoc_buf = f'''
 199 // {self.c_file}
 200 [#lua_fn_{section_name}]
 201 ===== {self.name}({', '.join(opt_names)})
 202
 203 {self.description}
 204 '''
 205         if len(self.arguments) > 0:
 206             adoc_buf += '''
 207 [float]
 208 ===== Arguments
 209 '''
 210         for (name, description, optional) in self.arguments:
 211             if optional:
 212                 name += ' (optional)'
 213             adoc_buf += f'\n{name}::\n'
 214
 215             if len(description) > 0:
 216                 adoc_buf += f'\n{description}\n'
 217
 218             adoc_buf += f'\n// function_arg_footer: {name}'
 219
 220         if len(self.arguments) > 0:
 221             adoc_buf += '\n// end of function_args\n'
 222
 223         if len(self.returns) > 0:
 224             adoc_buf += '''
 225 [float]
 226 ===== Returns
 227 '''
 228         for description in self.returns:
 229             adoc_buf += f'\n{description}\n'
 230
 231         if len(self.returns) > 0:
 232             adoc_buf += f'\n// function_returns_footer: {self.name}'
 233
 234         if len(self.errors) > 0:
 235             adoc_buf += '''
 236 [float]
 237 ===== Errors
 238 '''
 239         for description in self.errors:
 240             adoc_buf += f'\n* {description}\n'
 241
 242         if len(self.errors) > 0:
 243             adoc_buf += f'\n// function_errors_footer: {self.name}'
 244
 245         adoc_buf += f'\n// function_footer: {section_name}\n'
 246
 247         return adoc_buf
 248
 249
 250 # group 1: whole trailing comment (possibly empty), e.g. " /* foo */"
 251 # group 2: any leading whitespace. XXX why is this not removed using (?:...)
 252 # group 3: actual comment text, e.g. " foo ".
 253 TRAILING_COMMENT_RE = r'((\s*|[\n\r]*)/\*(.*?)\*/)?'
 254 IN_COMMENT_RE       = r'[\s\r\n]*((.*?)\s*\*/)?'
 255 QUOTED_RE           = r'"([^"]*)"'
 256
 257 # XXX We might want to create a "LuaClass" class similar to LuaFunction
 258 # and move these there.
 259 def extract_class_definitions(c_file, c_buf, module, classes, functions):
 260     for m in re.finditer(r'WSLUA_CLASS_DEFINE(?:_BASE)?\(\s*([A-Z][a-zA-Z0-9]+).*?\);' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
 261         raw_desc = m.group(4)
 262         if raw_desc is None:
 263             raw_desc = ''
 264         name = m.group(1)
 265         mod_class = {
 266             'description': parse_desc(raw_desc),
 267             'constructors': [],
 268             'methods': [],
 269             'attributes': [],
 270         }
 271         classes[name] = mod_class
 272         logging.info(f'Created class {name}')
 273     return 0
 274
 275 def extract_function_definitions(c_file, c_buf, module, classes, functions):
 276     for m in re.finditer(r'WSLUA_FUNCTION\s+wslua_([a-z_0-9]+)[^\{]*\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
 277         id = m.group(1)
 278         functions[id] = LuaFunction(c_file, id, m.start(), id, m.group(4))
 279
 280 def extract_constructor_definitions(c_file, c_buf, module, classes, functions):
 281     for m in re.finditer(r'WSLUA_CONSTRUCTOR\s+([A-Za-z0-9]+)_([a-z0-9_]+).*?\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
 282         class_name = m.group(1)
 283         id = f'{class_name}_{m.group(2)}'
 284         name = f'{class_name}.{m.group(2)}'
 285         functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(5))
 286         classes[class_name]['constructors'].append(id)
 287
 288 def extract_constructor_markups(c_file, c_buf, module, classes, functions):
 289     for m in re.finditer(r'_WSLUA_CONSTRUCTOR_\s+([A-Za-z0-9]+)_([a-z0-9_]+)\s*(.*?)\*/', c_buf, re.MULTILINE|re.DOTALL):
 290         class_name = m.group(1)
 291         id = f'{class_name}_{m.group(2)}'
 292         name = f'{class_name}.{m.group(2)}'
 293         functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(3))
 294         classes[class_name]['constructors'].append(id)
 295
 296 def extract_method_definitions(c_file, c_buf, module, classes, functions):
 297     for m in re.finditer(r'WSLUA_METHOD\s+([A-Za-z0-9]+)_([a-z0-9_]+)[^\{]*\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
 298         class_name = m.group(1)
 299         id = f'{class_name}_{m.group(2)}'
 300         name = f'{class_name.lower()}:{m.group(2)}'
 301         functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(5))
 302         classes[class_name]['methods'].append(id)
 303
 304 def extract_metamethod_definitions(c_file, c_buf, module, classes, functions):
 305     for m in re.finditer(r'WSLUA_METAMETHOD\s+([A-Za-z0-9]+)(__[a-z0-9]+)[^\{]*\{' + TRAILING_COMMENT_RE, c_buf, re.MULTILINE|re.DOTALL):
 306         class_name = m.group(1)
 307         id = f'{class_name}{m.group(2)}'
 308         name = f'{class_name.lower()}:{m.group(2)}'
 309         functions[id] = LuaFunction(c_file, id, m.start(), name, m.group(5))
 310         classes[class_name]['methods'].append(id)
 311
 312 def extract_attribute_markups(c_file, c_buf, module, classes, functions):
 313     for m in re.finditer(r'/\*\s+WSLUA_ATTRIBUTE\s+([A-Za-z0-9]+)_([a-z0-9_]+)\s+([A-Z]*)\s*(.*?)\*/', c_buf, re.MULTILINE|re.DOTALL):
 314         class_name = m.group(1)
 315         name = f'{m.group(1).lower()}.{m.group(2)}'
 316         mode = m.group(3)
 317         mode_desc = 'Mode: '
 318         if 'RO' in mode:
 319             mode_desc += 'Retrieve only.\n'
 320         elif 'WO' in mode:
 321             mode_desc += 'Assign only.\n'
 322         elif 'RW' in mode or 'WR' in mode:
 323             mode_desc += 'Retrieve or assign.\n'
 324         else:
 325             sys.stderr.write(f'Attribute does not have a RO/WO/RW mode {mode}\n')
 326             sys.exit(1)
 327
 328         attribute = {
 329             'name': name,
 330             'description': parse_desc(f'{mode_desc}\n{m.group(4)}'),
 331         }
 332         classes[class_name]['attributes'].append(attribute)
 333         logging.info(f'Created attribute {name} for class {class_name}')
 334
 335 def main():
 336     parser = argparse.ArgumentParser(description="WSLUA's Reference Manual Generator")
 337     parser.add_argument("c_files", nargs='+', metavar='C file', help="C file")
 338     parser.add_argument('--output-directory', help='Output directory')
 339     parser.add_argument('--verbose', action='store_true', help='Show more output')
 340     args = parser.parse_args()
 341
 342     logging.basicConfig(format='%(levelname)s: %(message)s', level=logging.DEBUG if args.verbose else logging.WARNING)
 343
 344     modules = {}
 345
 346     for c_file in args.c_files:
 347         with open(c_file, encoding='utf-8') as c_f:
 348             c_buf = c_f.read()
 349
 350             # Peek for modules vs continuations.
 351             m = re.search(r'WSLUA_(|CONTINUE_)MODULE\s*(\w+)', c_buf)
 352             if m:
 353                 module_name = m.group(2)
 354                 c_pair = (os.path.basename(c_file), c_buf)
 355                 try:
 356                     if m.group(1) == 'CONTINUE_':
 357                         modules[module_name]['c'].append(c_pair)
 358                     else:
 359                         modules[module_name]['c'].insert(0, c_pair)
 360                 except KeyError:
 361                     modules[module_name] = {}
 362                     modules[module_name]['c'] = [c_pair]
 363                     modules[module_name]['file_base'] = os.path.splitext(c_pair[0])[0]
 364             else:
 365                 logging.warning(f'No module found in {c_file}')
 366
 367     extractors = [
 368         extract_class_definitions,
 369         extract_function_definitions,
 370         extract_constructor_definitions,
 371         extract_constructor_markups,
 372         extract_method_definitions,
 373         extract_metamethod_definitions,
 374         extract_attribute_markups,
 375     ]
 376
 377     for module_name in sorted(modules):
 378         adoc_file = f'{modules[module_name]["file_base"]}.adoc'
 379         logging.info(f'Writing module {module_name} to {adoc_file} from {len(modules[module_name]["c"])} input(s)')
 380         functions = {}
 381         classes = {}
 382
 383         # Extract our module's description.
 384         m = re.search(r'WSLUA_MODULE\s*[A-Z][a-zA-Z0-9]+' + IN_COMMENT_RE, modules[module_name]['c'][0][1], re.MULTILINE|re.DOTALL)
 385         if not m:
 386             return
 387         modules[module_name]['description'] = parse_desc(f'{m.group(2)}')
 388
 389         # Extract module-level information from each file.
 390         for (c_file, c_buf) in modules[module_name]['c']:
 391             for extractor in extractors:
 392                 extractor(c_file, c_buf, modules[module_name], classes, functions)
 393
 394         # Extract function-level information from each file.
 395         for (c_file, c_buf) in modules[module_name]['c']:
 396             c_file_ids = filter(lambda k: functions[k].c_file == c_file, functions.keys())
 397             func_ids = sorted(c_file_ids, key=lambda k: functions[k].start)
 398             id = func_ids.pop(0)
 399             for next_id in func_ids:
 400                 functions[id].extract_buf(c_buf[functions[id].start:functions[next_id].start])
 401                 id = next_id
 402             functions[id].extract_buf(c_buf[functions[id].start:])
 403
 404         with open(os.path.join(args.output_directory, adoc_file), 'w', encoding='utf-8') as adoc_f:
 405             adoc_f.write(f'''\
 406 // {c_file}
 407 [#lua_module_{module_name}]
 408 === {modules[module_name]["description"]}
 409 ''')
 410             for class_name in sorted(classes.keys()):
 411                 lua_class = classes[class_name]
 412                 adoc_f.write(f'''
 413 // {c_file}
 414 [#lua_class_{class_name}]
 415 ==== {class_name}
 416 ''')
 417
 418                 if not lua_class["description"] == '':
 419                     adoc_f.write(f'\n{lua_class["description"]}\n')
 420
 421                 for constructor_id in sorted(lua_class['constructors'], key=lambda id: functions[id].start):
 422                     adoc_f.write(functions[constructor_id].to_adoc())
 423                     del functions[constructor_id]
 424
 425                 for method_id in sorted(lua_class['methods'], key=lambda id: functions[id].start):
 426                     adoc_f.write(functions[method_id].to_adoc())
 427                     del functions[method_id]
 428
 429                 for attribute in lua_class['attributes']:
 430                     attribute_id = re.sub('[^A-Za-z0-9]', '_', f'{attribute["name"]}')
 431                     adoc_f.write(f'''
 432 [#lua_class_attrib_{attribute_id}]
 433 ===== {attribute["name"]}
 434
 435 {attribute["description"]}
 436
 437 // End {attribute["name"]}
 438 ''')
 439
 440
 441                 adoc_f.write(f'\n// class_footer: {class_name}\n')
 442
 443             if len(functions.keys()) > 0:
 444                 adoc_f.write(f'''\
 445 [#global_functions_{module_name}]
 446 ==== Global Functions
 447 ''')
 448
 449             for global_id in sorted(functions.keys(), key=lambda id: functions[id].start):
 450                 adoc_f.write(functions[global_id].to_adoc())
 451
 452             if len(functions.keys()) > 0:
 453                 adoc_f.write(f'// Global function\n')
 454
 455             adoc_f.write('// end of module\n')
 456
 457 if __name__ == '__main__':
 458     main()