From 435cdb7023a91e1f1ddeafa5959d0c2028dd0527 Mon Sep 17 00:00:00 2001 From: Zach Wegner Date: Wed, 10 Aug 2011 01:15:27 -0700 Subject: [PATCH] Initial commit of prethon. Just a Python preprocessor I wrote a long time back, now slightly cleaned up. Code not necessarily nice. Totally undocumented syntax. --- prethon.py | 322 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 322 insertions(+) create mode 100755 prethon.py diff --git a/prethon.py b/prethon.py new file mode 100755 index 0000000..e1a1c9f --- /dev/null +++ b/prethon.py @@ -0,0 +1,322 @@ +################################################################################ +## Prethon-Python-based preprocessor. +## +## Copyright 2011 Zach Wegner +## +## This file is part of Prethon. +## +## Prethon is free software: you can redistribute it and/or modify +## it under the terms of the GNU General Public License as published by +## the Free Software Foundation, either version 3 of the License, or +## (at your option) any later version. +## +## Prethon is distributed in the hope that it will be useful, +## but WITHOUT ANY WARRANTY; without even the implied warranty of +## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +## GNU General Public License for more details. +## +## You should have received a copy of the GNU General Public License +## along with Prethon. If not, see . +################################################################################ + +import copy +import io +import os +import re +import subprocess +import sys + +# This state is used by the preprocessor external functions. The preprocessor +# uses its own local state for the parsing, but the preprocessed code needs +# access (through this module) to this state. +pre_state = None + +# Mode enum +NORMAL, PRE, DEF, QUOTE_H, QUOTE = range(5) + +################################################################################ +## Preprocessor functions ###################################################### +################################################################################ + +# Emit function. This is what preprocessor code uses to emit real code. +def emit(s): + global pre_state + pre_state.out.write(str(s)) + +# Include: Recursively call the preprocessor +def include(path, var_dict=None, mode=NORMAL): + global pre_state, depend_files + depend_files += [path] + if var_dict: + vd = pre_state.variables.copy() + for key, value in var_dict.items(): + vd[key] = value + pre_state.variables = vd + pre(pre_state.out, pre_state.pre_globals, path, mode=mode) + +def include_py(path, var_dict=None): + include(path, var_dict, mode=PRE) + +################################################################################ +## Parser functions ############################################################ +################################################################################ + +PRE_START = '<@' +PRE_END = '@>' +DEF_START = '<$' +DEF_END = '$>' +QUOTE_H_START = '<#' +QUOTE_H_END = ':' +QUOTE_CONT = '##' +QUOTE_END = '#>' + +DELIMS = [PRE_START, PRE_END, DEF_START, DEF_END, QUOTE_H_START, QUOTE_H_END, + QUOTE_CONT, QUOTE_END] + +# Make the reentrant +class ParserState: + def __init__(self, mode): + self.cur_block = [] + self.quote_blocks = [] + self.indent = 0 + self.mode = [] + self.last_mode = -1 + self.last_len = -1 + self.push(mode) + + def push(self, mode): + # Flush anything from the last mode + if len(self.mode) >= 1: + self.flush(self.mode[-1]) + + self.mode.append(mode) + + self.cur_block.append([]) + if mode == QUOTE_H: + self.quote_blocks.append([]) + + def pop(self): + mode = self.mode.pop() + if mode == QUOTE: + s = self.quote_fn(self.quote_blocks.pop()) + self.run(s) + else: + self.flush(mode) + + self.cur_block.pop() + + def flush(self, mode): + block = ''.join(self.cur_block.pop()) + self.cur_block.append([]) + s = '' + if block: + if mode == NORMAL: + s = 'emit(%s)\n' % repr(block) + s = self.fix_ws(s) + elif mode == PRE: + s = block + s = self.fix_ws(s) + elif mode == DEF: + s = 'emit(%s)\n' % block + s = self.fix_ws(s) + elif mode == QUOTE_H: + s = block + self.quote_blocks[-1].append(s) + s = '' + else: + s = '' + + self.run(s) + + def run(self, s): + # Execute the python code + if QUOTE in self.mode: + self.quote_blocks[-1].append(s) + elif s is not '': + try: + exec(s, self.pre_globals) + except: + print('Exception in code:\n%s' % s) + raise + + def quote_fn(self, blocks): + header = blocks[0] + body = ''.join(blocks[1:]) + + header = '%s:\n' % header + header = self.fix_ws(header) + + # Set up body + #body = 'emit(%s)\n' % repr(body) + self.indent += 4 + body = self.fix_ws(body) + self.indent -= 4 + + return '\n'.join([header, body]) + + # Fix the indenting of a block to be at the global scope + def fix_ws(self, block): + lines = block.split('\n') + + pre = None + l = 0 + for line in lines: + if not line.strip(): + continue + elif pre is None: + pre = re.match('\\s*', line).group(0) + l = len(pre) + else: + for x in range(l): + if x >= len(line) or line[x] != pre[x]: + l = x + break + + # Re-indent the lines to match the indent level + lines = [line[l:] if line.strip() else line for line in lines] + lines = [' '*self.indent + line for line in lines] + + return '%s\n' % '\n'.join(lines) + + +# Just add a character to a buffer +def _emit(state, s): + state.cur_block[-1] += [s] + if QUOTE == state.mode[-1] and s: + s = 'emit(%s)\n' % repr(s) + state.quote_blocks[-1].append(s) + +def tokenize(s, delims): + tokens = [] + while s: + idx = None + t = None + for d in delims: + i = s.find(d) + if i != -1 and (idx is None or i < idx): + idx = i + t = d + + if t: + tokens.append(s[:idx]) + tokens.append(t) + s = s[idx + len(t):] + else: + tokens.append(s) + s = '' + + return tokens + +def pre(out, pre_globals, file, mode=NORMAL): + global pre_state + + # Set up the state of the parser + state = ParserState(mode) + state.path = file + state.quote = False + state.last_quote = False + state.out = out + state.emit = [True] + + # Set up globals for the pre-space + state.pre_globals = pre_globals + + # Set the global state so functions in this module can use it while being + # called from the preprocessed code. We back up the old state since we can + # preprocess recursively (through includes) + old_state = pre_state + pre_state = state + + # Open the file for reading + with open(file, 'rt') as f: + for c in f: + #tokens = re.findall(pattern, c, re.DOTALL) # DOTALL means keep newlines + tokens = tokenize(c, DELIMS) + + for tok in tokens: + # Regular preprocessed sections + if tok == PRE_START: + state.push(PRE) + elif tok == PRE_END: + state.pop() + # Def + elif tok == DEF_START: + state.push(DEF) + elif tok == DEF_END: + state.pop() + # Quote + elif tok == QUOTE_H_START: + state.push(QUOTE_H) + elif tok == QUOTE_H_END and state.mode[-1] == QUOTE_H: + state.pop() + state.push(QUOTE) + elif tok == QUOTE_CONT and state.mode[-1] == QUOTE: + state.pop() + state.push(QUOTE_H) + elif tok == QUOTE_END: + state.pop() + else: + _emit(state, tok) + + # Finish up: flush the last block of characters + state.pop() + + # Restore the old parser state + pre_state = old_state + +# Set up options +if len(sys.argv) < 3: + print('Usage: %s [options] [var=value...]' % sys.argv[0]) + sys.exit(1) + +depend = None +depend_files = [] + +while True: + if sys.argv[1] == '-d': + depend = sys.argv[2] + sys.argv[1:] = sys.argv[3:] + else: + break + +# Set up input/output files +i = sys.argv[1] +o = sys.argv[2] + +# Wrapper class for passing stuff to the program +class PreData: pass + +# Loop over all key=value pairs and set these variables. +variables = {} +for opt in sys.argv[3:]: + key, _, value = opt.partition('=') + variables[key] = value + +p = PreData() +p.variables = variables + +# Preprocessor globals. This keeps the state of the preprocessed blocks +pre_globals = { + 'emit' : emit, + 'include' : include, + 'include_py' : include_py, + 'pre' : p + } + +# Run the preprocessor +with open(o, 'wt') as out: + pre(out, pre_globals, i) + +if depend: + if os.path.isfile(depend): + with open(depend, 'rt') as d_file: + lines = d_file.readlines() + lines = [l for l in lines if l.strip() and l[:l.find(':')] != o] + else: + lines = [] + + line = '%s: %s' % (o, ' '.join(depend_files)) + lines += [line] + + with open(depend, 'wt') as d_file: + d_file.write('\n'.join(lines)) -- 2.11.4.GIT