tools/json_comment_eater/json_comment_eater.py

   1 #!/usr/bin/env python
   2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
   3 # Use of this source code is governed by a BSD-style license that can be
   4 # found in the LICENSE file.
   5
   6 '''Utility to remove comments from JSON files so that they can be parsed by
   7 json.loads.
   8 '''
   9
  10 import sys
  11
  12
  13 def _Rcount(string, chars):
  14   '''Returns the number of consecutive characters from |chars| that occur at the
  15   end of |string|.
  16   '''
  17   return len(string) - len(string.rstrip(chars))
  18
  19
  20 def _FindNextToken(string, tokens, start):
  21   '''Finds the next token in |tokens| that occurs in |string| from |start|.
  22   Returns a tuple (index, token key).
  23   '''
  24   min_index, min_key = (-1, None)
  25   for k in tokens:
  26     index = string.find(k, start)
  27     if index != -1 and (min_index == -1 or index < min_index):
  28       min_index, min_key = (index, k)
  29   return (min_index, min_key)
  30
  31
  32 def _ReadString(input, start, output):
  33   output.append('"')
  34   start_range, end_range = (start, input.find('"', start))
  35   # \" escapes the ", \\" doesn't, \\\" does, etc.
  36   while (end_range != -1 and
  37          _Rcount(input[start_range:end_range], '\\') % 2 == 1):
  38     start_range, end_range = (end_range, input.find('"', end_range + 1))
  39   if end_range == -1:
  40     return start_range + 1
  41   output.append(input[start:end_range + 1])
  42   return end_range + 1
  43
  44
  45 def _ReadComment(input, start, output):
  46   eol_tokens = ('\n', '\r')
  47   eol_token_index, eol_token = _FindNextToken(input, eol_tokens, start)
  48   if eol_token is None:
  49     return len(input)
  50   output.append(eol_token)
  51   return eol_token_index + len(eol_token)
  52
  53 def _ReadMultilineComment(input, start, output):
  54   end_tokens = ('*/',)
  55   end_token_index, end_token = _FindNextToken(input, end_tokens, start)
  56   if end_token is None:
  57     raise Exception("Multiline comment end token (*/) not found")
  58   return end_token_index + len(end_token)
  59
  60 def Nom(input):
  61   token_actions = {
  62     '"': _ReadString,
  63     '//': _ReadComment,
  64     '/*': _ReadMultilineComment,
  65   }
  66   output = []
  67   pos = 0
  68   while pos < len(input):
  69     token_index, token = _FindNextToken(input, token_actions.keys(), pos)
  70     if token is None:
  71       output.append(input[pos:])
  72       break
  73     output.append(input[pos:token_index])
  74     pos = token_actions[token](input, token_index + len(token), output)
  75   return ''.join(output)
  76
  77
  78 if __name__ == '__main__':
  79     sys.stdout.write(Nom(sys.stdin.read()))