1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #include "tools/gn/string_utils.h"
7 #include "tools/gn/err.h"
8 #include "tools/gn/input_file.h"
9 #include "tools/gn/parser.h"
10 #include "tools/gn/scope.h"
11 #include "tools/gn/token.h"
12 #include "tools/gn/tokenizer.h"
13 #include "tools/gn/value.h"
17 // Constructs an Err indicating a range inside a string. We assume that the
18 // token has quotes around it that are not counted by the offset.
19 Err
ErrInsideStringToken(const Token
& token
, size_t offset
, size_t size
,
20 const std::string
& msg
,
21 const std::string
& help
= std::string()) {
22 // The "+1" is skipping over the " at the beginning of the token.
23 int int_offset
= static_cast<int>(offset
);
24 Location
begin_loc(token
.location().file(),
25 token
.location().line_number(),
26 token
.location().char_offset() + int_offset
+ 1,
27 token
.location().byte() + int_offset
+ 1);
29 token
.location().file(),
30 token
.location().line_number(),
31 token
.location().char_offset() + int_offset
+ 1 + static_cast<int>(size
),
32 token
.location().byte() + int_offset
+ 1 + static_cast<int>(size
));
33 return Err(LocationRange(begin_loc
, end_loc
), msg
, help
);
36 // Notes about expression interpolation. This is based loosly on Dart but is
37 // slightly less flexible. In Dart, seeing the ${ in a string is something
38 // the toplevel parser knows about, and it will recurse into the block
39 // treating it as a first-class {...} block. So even things like this work:
40 // "hello ${"foo}"*2+"bar"}" => "hello foo}foo}bar"
41 // (you can see it did not get confused by the nested strings or the nested "}"
44 // This is cool but complicates the parser for almost no benefit for this
45 // non-general-purpose programming language. The main reason expressions are
46 // supported here at all are to support "${scope.variable}" and "${list[0]}",
47 // neither of which have any of these edge-cases.
49 // In this simplified approach, we search for the terminating '}' and execute
50 // the result. This means we can't support any expressions with embedded '}'
51 // or '"'. To keep people from getting confusing about what's supported and
52 // what's not, only identifier and accessor expressions are allowed (neither
53 // of these run into any of these edge-cases).
54 bool AppendInterpolatedExpression(Scope
* scope
,
61 SourceFile empty_source_file
; // Prevent most vexing parse.
62 InputFile
input_file(empty_source_file
);
63 input_file
.SetContents(
64 std::string(&input
[begin_offset
], end_offset
- begin_offset
));
67 std::vector
<Token
> tokens
= Tokenizer::Tokenize(&input_file
, err
);
68 if (err
->has_error()) {
69 // The error will point into our temporary buffer, rewrite it to refer
70 // to the original token. This will make the location information less
71 // precise, but generally there won't be complicated things in string
73 *err
= ErrInsideStringToken(token
, begin_offset
, end_offset
- begin_offset
,
74 err
->message(), err
->help_text());
79 scoped_ptr
<ParseNode
> node
= Parser::ParseExpression(tokens
, err
);
80 if (err
->has_error()) {
81 // Rewrite error as above.
82 *err
= ErrInsideStringToken(token
, begin_offset
, end_offset
- begin_offset
,
83 err
->message(), err
->help_text());
86 if (!(node
->AsIdentifier() || node
->AsAccessor())) {
87 *err
= ErrInsideStringToken(token
, begin_offset
, end_offset
- begin_offset
,
88 "Invalid string interpolation.",
89 "The thing inside the ${} must be an identifier ${foo},\n"
90 "a scope access ${foo.bar}, or a list access ${foo[0]}.");
95 Value result
= node
->Execute(scope
, err
);
96 if (err
->has_error()) {
97 // Rewrite error as above.
98 *err
= ErrInsideStringToken(token
, begin_offset
, end_offset
- begin_offset
,
99 err
->message(), err
->help_text());
103 output
->append(result
.ToString(false));
107 bool AppendInterpolatedIdentifier(Scope
* scope
,
114 base::StringPiece
identifier(&input
[begin_offset
],
115 end_offset
- begin_offset
);
116 const Value
* value
= scope
->GetValue(identifier
, true);
118 // We assume the input points inside the token.
119 *err
= ErrInsideStringToken(
120 token
, identifier
.data() - token
.value().data() - 1, identifier
.size(),
121 "Undefined identifier in string expansion.",
122 std::string("\"") + identifier
+ "\" is not currently in scope.");
126 output
->append(value
->ToString(false));
130 // Handles string interpolations: $identifier and ${expression}
132 // |*i| is the index into |input| of the $. This will be updated to point to
133 // the last character consumed on success. The token is the original string
134 // to blame on failure.
136 // On failure, returns false and sets the error. On success, appends the
137 // result of the interpolation to |*output|.
138 bool AppendStringInterpolation(Scope
* scope
,
140 const char* input
, size_t size
,
144 size_t dollars_index
= *i
;
147 *err
= ErrInsideStringToken(token
, dollars_index
, 1, "$ at end of string.",
148 "I was expecting an identifier or {...} after the $.");
152 if (input
[*i
] == '{') {
153 // Bracketed expression.
155 size_t begin_offset
= *i
;
157 // Find the closing } and check for non-identifier chars. Don't need to
158 // bother checking for the more-restricted first character of an identifier
159 // since the {} unambiguously denotes the range, and identifiers with
160 // invalid names just won't be found later.
161 bool has_non_ident_chars
= false;
162 while (*i
< size
&& input
[*i
] != '}') {
163 has_non_ident_chars
|= Tokenizer::IsIdentifierContinuingChar(input
[*i
]);
167 *err
= ErrInsideStringToken(token
, dollars_index
, *i
- dollars_index
,
168 "Unterminated ${...");
172 // In the common case, the thing inside the {} will actually be a
173 // simple identifier. Avoid all the complicated parsing of accessors
175 if (!has_non_ident_chars
) {
176 return AppendInterpolatedIdentifier(scope
, token
, input
, begin_offset
,
179 return AppendInterpolatedExpression(scope
, token
, input
, begin_offset
, *i
,
183 // Simple identifier.
184 // The first char of an identifier is more restricted.
185 if (!Tokenizer::IsIdentifierFirstChar(input
[*i
])) {
186 *err
= ErrInsideStringToken(
187 token
, dollars_index
, *i
- dollars_index
+ 1,
188 "$ not followed by an identifier char.",
189 "It you want a literal $ use \"\\$\".");
192 size_t begin_offset
= *i
;
195 // Find the first non-identifier char following the string.
196 while (*i
< size
&& Tokenizer::IsIdentifierContinuingChar(input
[*i
]))
198 size_t end_offset
= *i
;
199 (*i
)--; // Back up to mark the last character consumed.
200 return AppendInterpolatedIdentifier(scope
, token
, input
, begin_offset
,
201 end_offset
, output
, err
);
206 bool ExpandStringLiteral(Scope
* scope
,
207 const Token
& literal
,
210 DCHECK(literal
.type() == Token::STRING
);
211 DCHECK(literal
.value().size() > 1); // Should include quotes.
212 DCHECK(result
->type() == Value::STRING
); // Should be already set.
214 // The token includes the surrounding quotes, so strip those off.
215 const char* input
= &literal
.value().data()[1];
216 size_t size
= literal
.value().size() - 2;
218 std::string
& output
= result
->string_value();
219 output
.reserve(size
);
220 for (size_t i
= 0; i
< size
; i
++) {
221 if (input
[i
] == '\\') {
223 switch (input
[i
+ 1]) {
227 output
.push_back(input
[i
+ 1]);
230 default: // Everything else has no meaning: pass the literal.
234 output
.push_back(input
[i
]);
235 } else if (input
[i
] == '$') {
236 if (!AppendStringInterpolation(scope
, literal
, input
, size
, &i
,
240 output
.push_back(input
[i
]);
246 std::string
RemovePrefix(const std::string
& str
, const std::string
& prefix
) {
247 CHECK(str
.size() >= prefix
.size() &&
248 str
.compare(0, prefix
.size(), prefix
) == 0);
249 return str
.substr(prefix
.size());
252 void TrimTrailingSlash(std::string
* str
) {
254 DCHECK((*str
)[str
->size() - 1] == '/');
255 str
->resize(str
->size() - 1);