Mac/Tools/IDE/PyFontify.py

   1 """Module to analyze Python source code; for syntax coloring tools.
   2
   3 Interface:
   4         tags = fontify(pytext, searchfrom, searchto)
   5
   6 The 'pytext' argument is a string containing Python source code.
   7 The (optional) arguments 'searchfrom' and 'searchto' may contain a slice in pytext.
   8 The returned value is a list of tuples, formatted like this:
   9         [('keyword', 0, 6, None), ('keyword', 11, 17, None), ('comment', 23, 53, None), etc. ]
  10 The tuple contents are always like this:
  11         (tag, startindex, endindex, sublist)
  12 tag is one of 'keyword', 'string', 'comment' or 'identifier'
  13 sublist is not used, hence always None.
  14 """
  15
  16 # Based on FontText.py by Mitchell S. Chapman,
  17 # which was modified by Zachary Roadhouse,
  18 # then un-Tk'd by Just van Rossum.
  19 # Many thanks for regular expression debugging & authoring are due to:
  20 #       Tim (the-incredib-ly y'rs) Peters and Cristian Tismer
  21 # So, who owns the copyright? ;-) How about this:
  22 # Copyright 1996-1997:
  23 #       Mitchell S. Chapman,
  24 #       Zachary Roadhouse,
  25 #       Tim Peters,
  26 #       Just van Rossum
  27
  28 __version__ = "0.3.1"
  29
  30 import string, regex
  31
  32 # First a little helper, since I don't like to repeat things. (Tismer speaking)
  33 import string
  34 def replace(where, what, with):
  35         return string.join(string.split(where, what), with)
  36
  37 # This list of keywords is taken from ref/node13.html of the
  38 # Python 1.3 HTML documentation. ("access" is intentionally omitted.)
  39 keywordsList = [
  40         "assert",
  41         "del", "from", "lambda", "return",
  42         "and", "elif", "global", "not", "try",
  43         "break", "else", "if", "or", "while",
  44         "class", "except", "import", "pass",
  45         "continue", "finally", "in", "print",
  46         "def", "for", "is", "raise"]
  47
  48 # Build up a regular expression which will match anything
  49 # interesting, including multi-line triple-quoted strings.
  50 commentPat = "#.*"
  51
  52 pat = "q[^\q\n]*\(\\\\[\000-\377][^\q\n]*\)*q"
  53 quotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')
  54
  55 # Way to go, Tim!
  56 pat = """
  57         qqq
  58         [^\\q]*
  59         \(
  60                 \(      \\\\[\000-\377]
  61                 \|      q
  62                         \(      \\\\[\000-\377]
  63                         \|      [^\\q]
  64                         \|      q
  65                                 \(      \\\\[\000-\377]
  66                                 \|      [^\\q]
  67                                 \)
  68                         \)
  69                 \)
  70                 [^\\q]*
  71         \)*
  72         qqq
  73 """
  74 pat = string.join(string.split(pat), '')        # get rid of whitespace
  75 tripleQuotePat = replace(pat, "q", "'") + "\|" + replace(pat, 'q', '"')
  76
  77 # Build up a regular expression which matches all and only
  78 # Python keywords. This will let us skip the uninteresting
  79 # identifier references.
  80 # nonKeyPat identifies characters which may legally precede
  81 # a keyword pattern.
  82 nonKeyPat = "\(^\|[^a-zA-Z0-9_.\"']\)"
  83
  84 keyPat = nonKeyPat + "\("
  85 for keyword in keywordsList:
  86         keyPat = keyPat + keyword + "\|"
  87 keyPat = keyPat[:-2] + "\)" + nonKeyPat
  88
  89 matchPat = keyPat + "\|" + commentPat + "\|" + tripleQuotePat + "\|" + quotePat
  90 matchRE = regex.compile(matchPat)
  91
  92 idKeyPat = "[ \t]*[A-Za-z_][A-Za-z_0-9.]*"      # Ident w. leading whitespace.
  93 idRE = regex.compile(idKeyPat)
  94
  95
  96 def fontify(pytext, searchfrom = 0, searchto = None):
  97         if searchto is None:
  98                 searchto = len(pytext)
  99         # Cache a few attributes for quicker reference.
 100         search = matchRE.search
 101         group = matchRE.group
 102         idSearch = idRE.search
 103         idGroup = idRE.group
 104
 105         tags = []
 106         tags_append = tags.append
 107         commentTag = 'comment'
 108         stringTag = 'string'
 109         keywordTag = 'keyword'
 110         identifierTag = 'identifier'
 111
 112         start = 0
 113         end = searchfrom
 114         while 1:
 115                 start = search(pytext, end)
 116                 if start < 0 or start >= searchto:
 117                         break   # EXIT LOOP
 118                 match = group(0)
 119                 end = start + len(match)
 120                 c = match[0]
 121                 if c not in "#'\"":
 122                         # Must have matched a keyword.
 123                         if start <> searchfrom:
 124                                 # there's still a redundant char before and after it, strip!
 125                                 match = match[1:-1]
 126                                 start = start + 1
 127                         else:
 128                                 # this is the first keyword in the text.
 129                                 # Only a space at the end.
 130                                 match = match[:-1]
 131                         end = end - 1
 132                         tags_append((keywordTag, start, end, None))
 133                         # If this was a defining keyword, look ahead to the
 134                         # following identifier.
 135                         if match in ["def", "class"]:
 136                                 start = idSearch(pytext, end)
 137                                 if start == end:
 138                                         match = idGroup(0)
 139                                         end = start + len(match)
 140                                         tags_append((identifierTag, start, end, None))
 141                 elif c == "#":
 142                         tags_append((commentTag, start, end, None))
 143                 else:
 144                         tags_append((stringTag, start, end, None))
 145         return tags
 146
 147
 148 def test(path):
 149         f = open(path)
 150         text = f.read()
 151         f.close()
 152         tags = fontify(text)
 153         for tag, start, end, sublist in tags:
 154                 print tag, `text[start:end]`