tools/json_comment_eater/json_comment_eater.py

   1 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
   2 # Use of this source code is governed by a BSD-style license that can be
   3 # found in the LICENSE file.
   4
   5 '''Utility to remove comments from JSON files so that they can be parsed by
   6 json.loads.
   7 '''
   8
   9 def _Rcount(string, chars):
  10   '''Returns the number of consecutive characters from |chars| that occur at the
  11   end of |string|.
  12   '''
  13   return len(string) - len(string.rstrip(chars))
  14
  15 def _FindNextToken(string, tokens, start):
  16   '''Finds the next token in |tokens| that occurs in |string| from |start|.
  17   Returns a tuple (index, token key).
  18   '''
  19   min_index, min_key = (-1, None)
  20   for k in tokens:
  21     index = string.find(k, start)
  22     if index != -1 and (min_index == -1 or index < min_index):
  23       min_index, min_key = (index, k)
  24   return (min_index, min_key)
  25
  26 def _ReadString(input, start, output):
  27   output.append('"')
  28   start_range, end_range = (start, input.find('"', start))
  29   # \" escapes the ", \\" doesn't, \\\" does, etc.
  30   while (end_range != -1 and
  31          _Rcount(input[start_range:end_range], '\\') % 2 == 1):
  32     start_range, end_range = (end_range, input.find('"', end_range + 1))
  33   if end_range == -1:
  34     return start_range + 1
  35   output.append(input[start:end_range + 1])
  36   return end_range + 1
  37
  38 def _ReadComment(input, start, output):
  39   eol_tokens = ('\n', '\r')
  40   eol_token_index, eol_token = _FindNextToken(input, eol_tokens, start)
  41   if eol_token is None:
  42     return len(input)
  43   output.append(eol_token)
  44   return eol_token_index + len(eol_token)
  45
  46 def Nom(input):
  47   token_actions = {
  48     '"': _ReadString,
  49     '//': _ReadComment,
  50   }
  51   output = []
  52   pos = 0
  53   while pos < len(input):
  54     token_index, token = _FindNextToken(input, token_actions.keys(), pos)
  55     if token is None:
  56       output.append(input[pos:])
  57       break
  58     output.append(input[pos:token_index])
  59     pos = token_actions[token](input, token_index + len(token), output)
  60   return ''.join(output)