Include all dupe types (event when value is zero) in scan stats.
[chromium-blink-merge.git] / tools / json_comment_eater / json_comment_eater.py
blobd61ece20e26ba8c8516bc223908a74e7b498f0c6
1 #!/usr/bin/env python
2 # Copyright (c) 2012 The Chromium Authors. All rights reserved.
3 # Use of this source code is governed by a BSD-style license that can be
4 # found in the LICENSE file.
6 '''Utility to remove comments from JSON files so that they can be parsed by
7 json.loads.
8 '''
10 import sys
13 def _Rcount(string, chars):
14 '''Returns the number of consecutive characters from |chars| that occur at the
15 end of |string|.
16 '''
17 return len(string) - len(string.rstrip(chars))
20 def _FindNextToken(string, tokens, start):
21 '''Finds the next token in |tokens| that occurs in |string| from |start|.
22 Returns a tuple (index, token key).
23 '''
24 min_index, min_key = (-1, None)
25 for k in tokens:
26 index = string.find(k, start)
27 if index != -1 and (min_index == -1 or index < min_index):
28 min_index, min_key = (index, k)
29 return (min_index, min_key)
32 def _ReadString(input, start, output):
33 output.append('"')
34 start_range, end_range = (start, input.find('"', start))
35 # \" escapes the ", \\" doesn't, \\\" does, etc.
36 while (end_range != -1 and
37 _Rcount(input[start_range:end_range], '\\') % 2 == 1):
38 start_range, end_range = (end_range, input.find('"', end_range + 1))
39 if end_range == -1:
40 return start_range + 1
41 output.append(input[start:end_range + 1])
42 return end_range + 1
45 def _ReadComment(input, start, output):
46 eol_tokens = ('\n', '\r')
47 eol_token_index, eol_token = _FindNextToken(input, eol_tokens, start)
48 if eol_token is None:
49 return len(input)
50 output.append(eol_token)
51 return eol_token_index + len(eol_token)
53 def _ReadMultilineComment(input, start, output):
54 end_tokens = ('*/',)
55 end_token_index, end_token = _FindNextToken(input, end_tokens, start)
56 if end_token is None:
57 raise Exception("Multiline comment end token (*/) not found")
58 return end_token_index + len(end_token)
60 def Nom(input):
61 token_actions = {
62 '"': _ReadString,
63 '//': _ReadComment,
64 '/*': _ReadMultilineComment,
66 output = []
67 pos = 0
68 while pos < len(input):
69 token_index, token = _FindNextToken(input, token_actions.keys(), pos)
70 if token is None:
71 output.append(input[pos:])
72 break
73 output.append(input[pos:token_index])
74 pos = token_actions[token](input, token_index + len(token), output)
75 return ''.join(output)
78 if __name__ == '__main__':
79 sys.stdout.write(Nom(sys.stdin.read()))