Mac/Tools/IDE/PyFontify.py

   1 """Module to analyze Python source code; for syntax coloring tools.
   2
   3 Interface:
   4         tags = fontify(pytext, searchfrom, searchto)
   5
   6 The 'pytext' argument is a string containing Python source code.
   7 The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext.
   8 The returned value is a list of tuples, formatted like this:
   9         [('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
  10 The tuple contents are always like this:
  11         (tag, startindex, endindex, sublist)
  12 tag is one of 'keyword', 'string', 'comment' or 'identifier'
  13 sublist is not used, hence always None.
  14 """
  15
  16 # Based on FontText.py by Mitchell S. Chapman,
  17 # which was modified by Zachary Roadhouse,
  18 # then un-Tk'd by Just van Rossum.
  19 # Many thanks for regular expression debugging & authoring are due to:
  20 #       Tim (the-incredib-ly y'rs) Peters and Cristian Tismer
  21 # So, who owns the copyright? ;-) How about this:
  22 # Copyright 1996-2001:
  23 #       Mitchell S. Chapman,
  24 #       Zachary Roadhouse,
  25 #       Tim Peters,
  26 #       Just van Rossum
  27
  28 __version__ = "0.4"
  29
  30 import string
  31 import re
  32
  33 # First a little helper, since I don't like to repeat things. (Tismer speaking)
  34 import string
  35 def replace(where, what, with):
  36         return string.join(string.split(where, what), with)
  37
  38 # This list of keywords is taken from ref/node13.html of the
  39 # Python 1.3 HTML documentation. ("access" is intentionally omitted.)
  40 keywordsList = [
  41         "assert", "exec",
  42         "del", "from", "lambda", "return",
  43         "and", "elif", "global", "not", "try",
  44         "break", "else", "if", "or", "while",
  45         "class", "except", "import", "pass",
  46         "continue", "finally", "in", "print",
  47         "def", "for", "is", "raise", "yield"]
  48
  49 # Build up a regular expression which will match anything
  50 # interesting, including multi-line triple-quoted strings.
  51 commentPat = r"#[^\n]*"
  52
  53 pat = r"q[^\\q\n]*(\\[\000-\377][^\\q\n]*)*q"
  54 quotePat = replace(pat, "q", "'") + "|" + replace(pat, 'q', '"')
  55
  56 # Way to go, Tim!
  57 pat = r"""
  58         qqq
  59         [^\\q]*
  60         (
  61                 (       \\[\000-\377]
  62                 |       q
  63                         (       \\[\000-\377]
  64                         |       [^\q]
  65                         |       q
  66                                 (       \\[\000-\377]
  67                                 |       [^\\q]
  68                                 )
  69                         )
  70                 )
  71                 [^\\q]*
  72         )*
  73         qqq
  74 """
  75 pat = string.join(string.split(pat), '')        # get rid of whitespace
  76 tripleQuotePat = replace(pat, "q", "'") + "|" + replace(pat, 'q', '"')
  77
  78 # Build up a regular expression which matches all and only
  79 # Python keywords. This will let us skip the uninteresting
  80 # identifier references.
  81 # nonKeyPat identifies characters which may legally precede
  82 # a keyword pattern.
  83 nonKeyPat = r"(^|[^a-zA-Z0-9_.\"'])"
  84
  85 keyPat = nonKeyPat + "(" + "|".join(keywordsList) + ")" + nonKeyPat
  86
  87 matchPat = commentPat + "|" + keyPat + "|" + tripleQuotePat + "|" + quotePat
  88 matchRE = re.compile(matchPat)
  89
  90 idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*"      # Ident w. leading whitespace.
  91 idRE = re.compile(idKeyPat)
  92
  93
  94 def fontify(pytext, searchfrom = 0, searchto = None):
  95         if searchto is None:
  96                 searchto = len(pytext)
  97         # Cache a few attributes for quicker reference.
  98         search = matchRE.search
  99         idSearch = idRE.search
 100
 101         tags = []
 102         tags_append = tags.append
 103         commentTag = 'comment'
 104         stringTag = 'string'
 105         keywordTag = 'keyword'
 106         identifierTag = 'identifier'
 107
 108         start = 0
 109         end = searchfrom
 110         while 1:
 111                 m = search(pytext, end)
 112                 if m is None:
 113                         break   # EXIT LOOP
 114                 start = m.start()
 115                 if start >= searchto:
 116                         break   # EXIT LOOP
 117                 match = m.group(0)
 118                 end = start + len(match)
 119                 c = match[0]
 120                 if c not in "#'\"":
 121                         # Must have matched a keyword.
 122                         if start <> searchfrom:
 123                                 # there's still a redundant char before and after it, strip!
 124                                 match = match[1:-1]
 125                                 start = start + 1
 126                         else:
 127                                 # this is the first keyword in the text.
 128                                 # Only a space at the end.
 129                                 match = match[:-1]
 130                         end = end - 1
 131                         tags_append((keywordTag, start, end, None))
 132                         # If this was a defining keyword, look ahead to the
 133                         # following identifier.
 134                         if match in ["def", "class"]:
 135                                 m = idSearch(pytext, end)
 136                                 if m is not None:
 137                                         start = m.start()
 138                                         if start == end:
 139                                                 match = m.group(0)
 140                                                 end = start + len(match)
 141                                                 tags_append((identifierTag, start, end, None))
 142                 elif c == "#":
 143                         tags_append((commentTag, start, end, None))
 144                 else:
 145                         tags_append((stringTag, start, end, None))
 146         return tags
 147
 148
 149 def test(path):
 150         f = open(path)
 151         text = f.read()
 152         f.close()
 153         tags = fontify(text)
 154         for tag, start, end, sublist in tags:
 155                 print tag, `text[start:end]`