1 // Copyright 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "chrome/tools/profile_reset/jtl_parser.h"
9 #include "base/logging.h"
10 #include "third_party/re2/re2/re2.h"
14 // RegEx that matches the first line of a text. Will throw away any potential
15 // double-slash-introduced comments and the potential trailing EOL character.
16 // Note: will fail in case the first line contains an unmatched double-quote
17 // outside of comments.
18 const char kSingleLineWithMaybeCommentsRE
[] =
19 // Non-greedily match and capture sequences of 1.) string literals inside
20 // correctly matched double-quotes, or 2.) any other character.
21 "^((?:\"[^\"\\n]*\"|[^\"\\n])*?)"
22 // Greedily match and throw away the potential comment.
24 // Match and throw away EOL, or match end-of-string.
27 // RegEx to match either a double-quote-enclosed string literal or a whitespace.
28 // Applied repeatedly and without overlapping, can be used to remove whitespace
29 // outside of string literals.
30 const char kRemoveWhitespaceRE
[] = "(\"[^\"]*\")|\\s";
32 // The substitution pattern to use together with the above when replacing. As
33 // the whitespace is not back-referenced here, it will get removed.
34 const char kRemoveWhitespaceRewrite
[] = "\\1";
36 // Separator to terminate a sentence.
37 const char kEndOfSentenceSeparator
[] = ";";
39 // The 'true' Boolean keyword.
40 const char kTrueKeyword
[] = "true";
42 // RegEx that matches and captures one argument, which is either a double-quote
43 // enclosed string, or a Boolean value. Will throw away a trailing comma.
44 const char kSingleArgumentRE
[] = "(?:(?:\"([^\"]*)\"|(true|false))(?:,|$))";
46 // RegEx-es that, when concatenated, will match a single operation, and capture
47 // the: operation name, the optional arguments, and the separator that follows.
48 const char kOperationNameRE
[] = "([[:word:]]+)";
49 const char kMaybeArgumentListRE
[] =
50 "(?:\\(" // Opening parenthesis.
51 "((?:\"[^\"]*\"|[^\")])*)" // Capture: anything inside, quote-aware.
52 "\\))?"; // Closing parenthesis + everything optional.
53 const char kOperationSeparatorRE
[] = "(;|\\.)";
57 struct JtlParser::ParsingState
{
58 explicit ParsingState(const re2::StringPiece
& compacted_source
)
59 : single_operation_regex(std::string(kOperationNameRE
) +
60 kMaybeArgumentListRE
+
61 kOperationSeparatorRE
),
62 single_argument_regex(kSingleArgumentRE
),
63 remaining_compacted_source(compacted_source
),
64 last_line_number(0) {}
66 RE2 single_operation_regex
;
67 RE2 single_argument_regex
;
68 re2::StringPiece remaining_compacted_source
;
69 re2::StringPiece last_context
;
70 size_t last_line_number
;
73 JtlParser::JtlParser(const std::string
& compacted_source_code
,
74 const std::vector
<size_t>& newline_indices
)
75 : compacted_source_(compacted_source_code
),
76 newline_indices_(newline_indices
) {
77 state_
.reset(new ParsingState(compacted_source_
));
80 JtlParser::~JtlParser() {}
83 bool JtlParser::RemoveCommentsAndAllWhitespace(
84 const std::string
& verbose_text
,
85 std::string
* compacted_text
,
86 std::vector
<size_t>* newline_indices
,
87 size_t* error_line_number
) {
88 DCHECK(compacted_text
);
89 DCHECK(newline_indices
);
91 RE2
single_line_regex(kSingleLineWithMaybeCommentsRE
);
92 RE2
remove_whitespace_regex(kRemoveWhitespaceRE
);
93 re2::StringPiece
verbose_text_piece(verbose_text
);
94 compacted_text
->clear();
95 newline_indices
->clear();
96 while (!verbose_text_piece
.empty()) {
97 if (!RE2::Consume(&verbose_text_piece
, single_line_regex
, &line
)) {
98 if (error_line_number
)
99 *error_line_number
= newline_indices
->size();
103 &line
, remove_whitespace_regex
, kRemoveWhitespaceRewrite
);
104 *compacted_text
+= line
;
105 newline_indices
->push_back(compacted_text
->size());
110 bool JtlParser::HasFinished() {
111 return state_
->remaining_compacted_source
.empty();
114 bool JtlParser::ParseNextOperation(std::string
* name
,
115 base::ListValue
* argument_list
,
116 bool* ends_sentence
) {
118 DCHECK(argument_list
);
119 DCHECK(ends_sentence
);
121 state_
->last_context
= state_
->remaining_compacted_source
;
122 state_
->last_line_number
= GetOriginalLineNumber(
123 compacted_source_
.size() - state_
->remaining_compacted_source
.length());
125 std::string arguments
, separator
;
126 if (!RE2::Consume(&state_
->remaining_compacted_source
,
127 state_
->single_operation_regex
,
133 *ends_sentence
= (separator
== kEndOfSentenceSeparator
);
134 state_
->last_context
.remove_suffix(state_
->remaining_compacted_source
.size());
136 re2::StringPiece
arguments_piece(arguments
);
137 std::string string_value
, boolean_value
;
138 while (!arguments_piece
.empty()) {
139 if (!RE2::Consume(&arguments_piece
,
140 state_
->single_argument_regex
,
145 if (!boolean_value
.empty()) {
146 argument_list
->Append(
147 new base::FundamentalValue(boolean_value
== kTrueKeyword
));
149 // |string_value| might be empty for an empty string
150 argument_list
->Append(new base::StringValue(string_value
));
156 size_t JtlParser::GetOriginalLineNumber(size_t compacted_index
) const {
157 return static_cast<size_t>(std::upper_bound(newline_indices_
.begin(),
158 newline_indices_
.end(),
160 newline_indices_
.begin());
163 size_t JtlParser::GetLastLineNumber() const { return state_
->last_line_number
; }
165 std::string
JtlParser::GetLastContext() const {
166 return state_
->last_context
.ToString();