From 435cdb7023a91e1f1ddeafa5959d0c2028dd0527 Mon Sep 17 00:00:00 2001
From: Zach Wegner <zwegner@gmail.com>
Date: Wed, 10 Aug 2011 01:15:27 -0700
Subject: [PATCH] Initial commit of prethon.

Just a Python preprocessor I wrote a long time back, now slightly cleaned up.
Code not necessarily nice. Totally undocumented syntax.
---
 prethon.py | 322 +++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 322 insertions(+)
 create mode 100755 prethon.py

diff --git a/prethon.py b/prethon.py
new file mode 100755
index 0000000..e1a1c9f
--- /dev/null
+++ b/prethon.py
@@ -0,0 +1,322 @@
+################################################################################
+## Prethon-Python-based preprocessor.
+## 
+## Copyright 2011 Zach Wegner
+##
+## This file is part of Prethon.
+## 
+## Prethon is free software: you can redistribute it and/or modify
+## it under the terms of the GNU General Public License as published by
+## the Free Software Foundation, either version 3 of the License, or
+## (at your option) any later version.
+## 
+## Prethon is distributed in the hope that it will be useful,
+## but WITHOUT ANY WARRANTY; without even the implied warranty of
+## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+## GNU General Public License for more details.
+## 
+## You should have received a copy of the GNU General Public License
+## along with Prethon.  If not, see <http://www.gnu.org/licenses/>.
+################################################################################
+
+import copy
+import io
+import os
+import re
+import subprocess
+import sys
+
+# This state is used by the preprocessor external functions. The preprocessor
+# uses its own local state for the parsing, but the preprocessed code needs
+# access (through this module) to this state.
+pre_state = None
+
+# Mode enum
+NORMAL, PRE, DEF, QUOTE_H, QUOTE = range(5)
+
+################################################################################
+## Preprocessor functions ######################################################
+################################################################################
+
+# Emit function. This is what preprocessor code uses to emit real code.
+def emit(s):
+    global pre_state
+    pre_state.out.write(str(s))
+
+# Include: Recursively call the preprocessor
+def include(path, var_dict=None, mode=NORMAL):
+    global pre_state, depend_files
+    depend_files += [path]
+    if var_dict:
+        vd = pre_state.variables.copy()
+        for key, value in var_dict.items():
+            vd[key] = value
+        pre_state.variables = vd
+    pre(pre_state.out, pre_state.pre_globals, path, mode=mode)
+
+def include_py(path, var_dict=None):
+    include(path, var_dict, mode=PRE)
+
+################################################################################
+## Parser functions ############################################################
+################################################################################
+
+PRE_START = '<@'
+PRE_END = '@>'
+DEF_START = '<$'
+DEF_END = '$>'
+QUOTE_H_START = '<#'
+QUOTE_H_END = ':'
+QUOTE_CONT = '##'
+QUOTE_END = '#>'
+
+DELIMS = [PRE_START, PRE_END, DEF_START, DEF_END, QUOTE_H_START, QUOTE_H_END,
+    QUOTE_CONT, QUOTE_END]
+
+# Make the reentrant
+class ParserState:
+    def __init__(self, mode):
+        self.cur_block = []
+        self.quote_blocks = []
+        self.indent = 0
+        self.mode = []
+        self.last_mode = -1
+        self.last_len = -1
+        self.push(mode)
+
+    def push(self, mode):
+        # Flush anything from the last mode
+        if len(self.mode) >= 1:
+          self.flush(self.mode[-1])
+
+        self.mode.append(mode)
+
+        self.cur_block.append([])
+        if mode == QUOTE_H:
+            self.quote_blocks.append([])
+
+    def pop(self):
+        mode = self.mode.pop()
+        if mode == QUOTE:
+            s = self.quote_fn(self.quote_blocks.pop())
+            self.run(s)
+        else:
+            self.flush(mode)
+
+        self.cur_block.pop()
+
+    def flush(self, mode):
+        block = ''.join(self.cur_block.pop())
+        self.cur_block.append([])
+        s = ''
+        if block:
+            if mode == NORMAL:
+                s = 'emit(%s)\n' % repr(block)
+                s = self.fix_ws(s)
+            elif mode == PRE:
+                s = block
+                s = self.fix_ws(s)
+            elif mode == DEF:
+                s = 'emit(%s)\n' % block
+                s = self.fix_ws(s)
+            elif mode == QUOTE_H:
+                s = block
+                self.quote_blocks[-1].append(s)
+                s = ''
+            else:
+                s = ''
+
+        self.run(s)
+
+    def run(self, s):
+        # Execute the python code
+        if QUOTE in self.mode:
+            self.quote_blocks[-1].append(s)
+        elif s is not '':
+            try:
+                exec(s, self.pre_globals)
+            except:
+                print('Exception in code:\n%s' % s)
+                raise
+
+    def quote_fn(self, blocks):
+        header = blocks[0]
+        body = ''.join(blocks[1:])
+
+        header = '%s:\n' % header
+        header = self.fix_ws(header)
+
+        # Set up body
+        #body = 'emit(%s)\n' % repr(body)
+        self.indent += 4
+        body = self.fix_ws(body)
+        self.indent -= 4
+
+        return '\n'.join([header, body])
+
+    # Fix the indenting of a block to be at the global scope
+    def fix_ws(self, block):
+        lines = block.split('\n')
+
+        pre = None
+        l = 0
+        for line in lines:
+            if not line.strip():
+                continue
+            elif pre is None:
+                pre = re.match('\\s*', line).group(0)
+                l = len(pre)
+            else:
+                for x in range(l):
+                    if x >= len(line) or line[x] != pre[x]:
+                        l = x
+                        break
+
+        # Re-indent the lines to match the indent level
+        lines = [line[l:] if line.strip() else line for line in lines]
+        lines = [' '*self.indent + line for line in lines]
+
+        return '%s\n' % '\n'.join(lines)
+
+
+# Just add a character to a buffer
+def _emit(state, s):
+    state.cur_block[-1] += [s]
+    if QUOTE == state.mode[-1] and s:
+        s = 'emit(%s)\n' % repr(s)
+        state.quote_blocks[-1].append(s)
+
+def tokenize(s, delims):
+    tokens = []
+    while s:
+        idx = None
+        t = None
+        for d in delims:
+            i = s.find(d)
+            if i != -1 and (idx is None or i < idx):
+                idx = i
+                t = d
+
+        if t:
+            tokens.append(s[:idx])
+            tokens.append(t)
+            s = s[idx + len(t):]
+        else:
+            tokens.append(s)
+            s = ''
+
+    return tokens
+
+def pre(out, pre_globals, file, mode=NORMAL):
+    global pre_state
+
+    # Set up the state of the parser
+    state = ParserState(mode)
+    state.path = file
+    state.quote = False
+    state.last_quote = False
+    state.out = out
+    state.emit = [True]
+
+    # Set up globals for the pre-space
+    state.pre_globals = pre_globals
+
+    # Set the global state so functions in this module can use it while being
+    # called from the preprocessed code. We back up the old state since we can
+    # preprocess recursively (through includes)
+    old_state = pre_state
+    pre_state = state
+
+    # Open the file for reading
+    with open(file, 'rt') as f:
+        for c in f:
+            #tokens = re.findall(pattern, c, re.DOTALL) # DOTALL means keep newlines
+            tokens = tokenize(c, DELIMS)
+
+            for tok in tokens:
+                # Regular preprocessed sections
+                if tok == PRE_START:
+                    state.push(PRE)
+                elif tok == PRE_END:
+                    state.pop()
+                # Def
+                elif tok == DEF_START:
+                    state.push(DEF)
+                elif tok == DEF_END:
+                    state.pop()
+                # Quote
+                elif tok == QUOTE_H_START:
+                    state.push(QUOTE_H)
+                elif tok == QUOTE_H_END and state.mode[-1] == QUOTE_H:
+                    state.pop()
+                    state.push(QUOTE)
+                elif tok == QUOTE_CONT and state.mode[-1] == QUOTE:
+                    state.pop()
+                    state.push(QUOTE_H)
+                elif tok == QUOTE_END:
+                    state.pop()
+                else:
+                    _emit(state, tok)
+
+    # Finish up: flush the last block of characters
+    state.pop()
+
+    # Restore the old parser state
+    pre_state = old_state
+
+# Set up options
+if len(sys.argv) < 3:
+    print('Usage: %s [options] <input> <output> [var=value...]' % sys.argv[0])
+    sys.exit(1)
+
+depend = None
+depend_files = []
+
+while True:
+    if sys.argv[1] == '-d':
+        depend = sys.argv[2]
+        sys.argv[1:] = sys.argv[3:]
+    else:
+        break
+
+# Set up input/output files
+i = sys.argv[1]
+o = sys.argv[2]
+
+# Wrapper class for passing stuff to the program
+class PreData: pass
+
+# Loop over all key=value pairs and set these variables.
+variables = {}
+for opt in sys.argv[3:]:
+    key, _, value = opt.partition('=')
+    variables[key] = value
+
+p = PreData()
+p.variables = variables
+
+# Preprocessor globals. This keeps the state of the preprocessed blocks
+pre_globals = {
+        'emit' : emit,
+        'include' : include,
+        'include_py' : include_py,
+        'pre' : p
+        }
+
+# Run the preprocessor
+with open(o, 'wt') as out:
+    pre(out, pre_globals, i)
+
+if depend:
+    if os.path.isfile(depend):
+        with open(depend, 'rt') as d_file:
+            lines = d_file.readlines()
+        lines = [l for l in lines if l.strip() and l[:l.find(':')] != o]
+    else:
+        lines = []
+
+    line = '%s: %s' % (o, ' '.join(depend_files))
+    lines += [line]
+
+    with open(depend, 'wt') as d_file:
+        d_file.write('\n'.join(lines))
-- 
2.11.4.GIT