prethon.py

   1 ################################################################################
   2 ##
   3 ## Prethon-Python-based preprocessor.
   4 ##
   5 ## Copyright 2011 Zach Wegner
   6 ##
   7 ## This file is part of Prethon.
   8 ##
   9 ## Prethon is free software: you can redistribute it and/or modify
  10 ## it under the terms of the GNU General Public License as published by
  11 ## the Free Software Foundation, either version 3 of the License, or
  12 ## (at your option) any later version.
  13 ##
  14 ## Prethon is distributed in the hope that it will be useful,
  15 ## but WITHOUT ANY WARRANTY; without even the implied warranty of
  16 ## MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
  17 ## GNU General Public License for more details.
  18 ##
  19 ## You should have received a copy of the GNU General Public License
  20 ## along with Prethon.  If not, see <http://www.gnu.org/licenses/>.
  21 ##
  22 ################################################################################
  23
  24 import copy
  25 import io
  26 import os
  27 import re
  28 import subprocess
  29 import sys
  30
  31 # This state is used by the preprocessor external functions. The preprocessor
  32 # uses its own local state for the parsing, but the preprocessed code needs
  33 # access (through this module) to this state.
  34 pre_state = None
  35
  36 # Mode enum
  37 NORMAL, PRE, DEF, QUOTE_H, QUOTE = range(5)
  38
  39 ################################################################################
  40 ## Preprocessor functions ######################################################
  41 ################################################################################
  42
  43 # Emit function. This is what preprocessor code uses to emit real code.
  44 def emit(s):
  45     global pre_state
  46     pre_state.out.write(str(s))
  47
  48 # Include: Recursively call the preprocessor
  49 def include(path, var_dict=None, mode=NORMAL):
  50     global pre_state, depend_files
  51     depend_files += [path]
  52     if var_dict:
  53         vd = pre_state.variables.copy()
  54         for key, value in var_dict.items():
  55             vd[key] = value
  56         pre_state.variables = vd
  57     pre(pre_state.out, pre_state.pre_globals, path, mode=mode)
  58
  59 def include_py(path, var_dict=None):
  60     include(path, var_dict, mode=PRE)
  61
  62 ################################################################################
  63 ## Parser functions ############################################################
  64 ################################################################################
  65
  66 PRE_START = '<@'
  67 PRE_END = '@>'
  68 DEF_START = '<$'
  69 DEF_END = '$>'
  70 QUOTE_H_START = '<#'
  71 QUOTE_H_END = ':'
  72 QUOTE_CONT = '##'
  73 QUOTE_END = '#>'
  74
  75 DELIMS = [PRE_START, PRE_END, DEF_START, DEF_END, QUOTE_H_START, QUOTE_H_END,
  76     QUOTE_CONT, QUOTE_END]
  77
  78 # Make the reentrant
  79 class ParserState:
  80     def __init__(self, mode):
  81         self.cur_block = []
  82         self.quote_blocks = []
  83         self.indent = 0
  84         self.mode = []
  85         self.last_mode = -1
  86         self.last_len = -1
  87         self.push(mode)
  88
  89     def push(self, mode):
  90         # Flush anything from the last mode
  91         if len(self.mode) >= 1:
  92           self.flush(self.mode[-1])
  93
  94         self.mode.append(mode)
  95
  96         self.cur_block.append([])
  97         if mode == QUOTE_H:
  98             self.quote_blocks.append([])
  99
 100     def pop(self):
 101         mode = self.mode.pop()
 102         if mode == QUOTE:
 103             s = self.quote_fn(self.quote_blocks.pop())
 104             self.run(s)
 105         else:
 106             self.flush(mode)
 107
 108         self.cur_block.pop()
 109
 110     def flush(self, mode):
 111         block = ''.join(self.cur_block.pop())
 112         self.cur_block.append([])
 113         s = ''
 114         if block:
 115             if mode == NORMAL:
 116                 s = 'emit(%s)\n' % repr(block)
 117                 s = self.fix_ws(s)
 118             elif mode == PRE:
 119                 s = block
 120                 s = self.fix_ws(s)
 121             elif mode == DEF:
 122                 s = 'emit(%s)\n' % block
 123                 s = self.fix_ws(s)
 124             elif mode == QUOTE_H:
 125                 s = block
 126                 self.quote_blocks[-1].append(s)
 127                 s = ''
 128             else:
 129                 s = ''
 130
 131         self.run(s)
 132
 133     def run(self, s):
 134         # Execute the python code
 135         if QUOTE in self.mode:
 136             self.quote_blocks[-1].append(s)
 137         elif s is not '':
 138             try:
 139                 exec(s, self.pre_globals)
 140             except:
 141                 print('Exception in code:\n%s' % s)
 142                 raise
 143
 144     def quote_fn(self, blocks):
 145         header = blocks[0]
 146         body = ''.join(blocks[1:])
 147
 148         header = '%s:\n' % header
 149         header = self.fix_ws(header)
 150
 151         # Set up body
 152         #body = 'emit(%s)\n' % repr(body)
 153         self.indent += 4
 154         body = self.fix_ws(body)
 155         self.indent -= 4
 156
 157         return '\n'.join([header, body])
 158
 159     # Fix the indenting of a block to be at the global scope
 160     def fix_ws(self, block):
 161         lines = block.split('\n')
 162
 163         pre = None
 164         l = 0
 165         for line in lines:
 166             if not line.strip():
 167                 continue
 168             elif pre is None:
 169                 pre = re.match('\\s*', line).group(0)
 170                 l = len(pre)
 171             else:
 172                 for x in range(l):
 173                     if x >= len(line) or line[x] != pre[x]:
 174                         l = x
 175                         break
 176
 177         # Re-indent the lines to match the indent level
 178         lines = [line[l:] if line.strip() else line for line in lines]
 179         lines = [' '*self.indent + line for line in lines]
 180
 181         return '%s\n' % '\n'.join(lines)
 182
 183
 184 # Just add a character to a buffer
 185 def _emit(state, s):
 186     state.cur_block[-1] += [s]
 187     if QUOTE == state.mode[-1] and s:
 188         s = 'emit(%s)\n' % repr(s)
 189         state.quote_blocks[-1].append(s)
 190
 191 def tokenize(s, delims):
 192     tokens = []
 193     while s:
 194         idx = None
 195         t = None
 196         for d in delims:
 197             i = s.find(d)
 198             if i != -1 and (idx is None or i < idx):
 199                 idx = i
 200                 t = d
 201
 202         if t:
 203             tokens.append(s[:idx])
 204             tokens.append(t)
 205             s = s[idx + len(t):]
 206         else:
 207             tokens.append(s)
 208             s = ''
 209
 210     return tokens
 211
 212 def pre(out, pre_globals, file, mode=NORMAL):
 213     global pre_state
 214
 215     # Set up the state of the parser
 216     state = ParserState(mode)
 217     state.path = file
 218     state.quote = False
 219     state.last_quote = False
 220     state.out = out
 221     state.emit = [True]
 222
 223     # Set up globals for the pre-space
 224     state.pre_globals = pre_globals
 225
 226     # Set the global state so functions in this module can use it while being
 227     # called from the preprocessed code. We back up the old state since we can
 228     # preprocess recursively (through includes)
 229     old_state = pre_state
 230     pre_state = state
 231
 232     # Open the file for reading
 233     with open(file, 'rt') as f:
 234         for c in f:
 235             #tokens = re.findall(pattern, c, re.DOTALL) # DOTALL means keep newlines
 236             tokens = tokenize(c, DELIMS)
 237
 238             for tok in tokens:
 239                 # Regular preprocessed sections
 240                 if tok == PRE_START:
 241                     state.push(PRE)
 242                 elif tok == PRE_END:
 243                     state.pop()
 244                 # Def
 245                 elif tok == DEF_START:
 246                     state.push(DEF)
 247                 elif tok == DEF_END:
 248                     state.pop()
 249                 # Quote
 250                 elif tok == QUOTE_H_START:
 251                     state.push(QUOTE_H)
 252                 elif tok == QUOTE_H_END and state.mode[-1] == QUOTE_H:
 253                     state.pop()
 254                     state.push(QUOTE)
 255                 elif tok == QUOTE_CONT and state.mode[-1] == QUOTE:
 256                     state.pop()
 257                     state.push(QUOTE_H)
 258                 elif tok == QUOTE_END:
 259                     state.pop()
 260                 else:
 261                     _emit(state, tok)
 262
 263     # Finish up: flush the last block of characters
 264     state.pop()
 265
 266     # Restore the old parser state
 267     pre_state = old_state
 268
 269 # Set up options
 270 if len(sys.argv) < 3:
 271     print('Usage: %s [options] <input> <output> [var=value...]' % sys.argv[0])
 272     sys.exit(1)
 273
 274 depend = None
 275 depend_files = []
 276
 277 while True:
 278     if sys.argv[1] == '-d':
 279         depend = sys.argv[2]
 280         sys.argv[1:] = sys.argv[3:]
 281     else:
 282         break
 283
 284 # Set up input/output files
 285 i = sys.argv[1]
 286 o = sys.argv[2]
 287
 288 # Wrapper class for passing stuff to the program
 289 class PreData: pass
 290
 291 # Loop over all key=value pairs and set these variables.
 292 variables = {}
 293 for opt in sys.argv[3:]:
 294     key, _, value = opt.partition('=')
 295     variables[key] = value
 296
 297 p = PreData()
 298 p.variables = variables
 299
 300 # Preprocessor globals. This keeps the state of the preprocessed blocks
 301 pre_globals = {
 302         'emit' : emit,
 303         'include' : include,
 304         'include_py' : include_py,
 305         'pre' : p
 306         }
 307
 308 # Run the preprocessor
 309 with open(o, 'wt') as out:
 310     pre(out, pre_globals, i)
 311
 312 if depend:
 313     if os.path.isfile(depend):
 314         with open(depend, 'rt') as d_file:
 315             lines = d_file.readlines()
 316         lines = [l for l in lines if l.strip() and l[:l.find(':')] != o]
 317     else:
 318         lines = []
 319
 320     line = '%s: %s' % (o, ' '.join(depend_files))
 321     lines += [line]
 322
 323     with open(depend, 'wt') as d_file:
 324         d_file.write('\n'.join(lines))