tools/gn/string_utils.cc

   1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #include "tools/gn/string_utils.h"
   6
   7 #include "tools/gn/err.h"
   8 #include "tools/gn/input_file.h"
   9 #include "tools/gn/parser.h"
  10 #include "tools/gn/scope.h"
  11 #include "tools/gn/token.h"
  12 #include "tools/gn/tokenizer.h"
  13 #include "tools/gn/value.h"
  14
  15 namespace {
  16
  17 // Constructs an Err indicating a range inside a string. We assume that the
  18 // token has quotes around it that are not counted by the offset.
  19 Err ErrInsideStringToken(const Token& token, size_t offset, size_t size,
  20                          const std::string& msg,
  21                          const std::string& help = std::string()) {
  22   // The "+1" is skipping over the " at the beginning of the token.
  23   int int_offset = static_cast<int>(offset);
  24   Location begin_loc(token.location().file(),
  25                      token.location().line_number(),
  26                      token.location().char_offset() + int_offset + 1,
  27                      token.location().byte() + int_offset + 1);
  28   Location end_loc(
  29       token.location().file(),
  30       token.location().line_number(),
  31       token.location().char_offset() + int_offset + 1 + static_cast<int>(size),
  32       token.location().byte() + int_offset + 1 + static_cast<int>(size));
  33   return Err(LocationRange(begin_loc, end_loc), msg, help);
  34 }
  35
  36 // Notes about expression interpolation. This is based loosly on Dart but is
  37 // slightly less flexible. In Dart, seeing the ${ in a string is something
  38 // the toplevel parser knows about, and it will recurse into the block
  39 // treating it as a first-class {...} block. So even things like this work:
  40 //   "hello ${"foo}"*2+"bar"}"  =>  "hello foo}foo}bar"
  41 // (you can see it did not get confused by the nested strings or the nested "}"
  42 // inside the block).
  43 //
  44 // This is cool but complicates the parser for almost no benefit for this
  45 // non-general-purpose programming language. The main reason expressions are
  46 // supported here at all are to support "${scope.variable}" and "${list[0]}",
  47 // neither of which have any of these edge-cases.
  48 //
  49 // In this simplified approach, we search for the terminating '}' and execute
  50 // the result. This means we can't support any expressions with embedded '}'
  51 // or '"'. To keep people from getting confusing about what's supported and
  52 // what's not, only identifier and accessor expressions are allowed (neither
  53 // of these run into any of these edge-cases).
  54 bool AppendInterpolatedExpression(Scope* scope,
  55                                   const Token& token,
  56                                   const char* input,
  57                                   size_t begin_offset,
  58                                   size_t end_offset,
  59                                   std::string* output,
  60                                   Err* err) {
  61   SourceFile empty_source_file;  // Prevent most vexing parse.
  62   InputFile input_file(empty_source_file);
  63   input_file.SetContents(
  64       std::string(&input[begin_offset], end_offset - begin_offset));
  65
  66   // Tokenize.
  67   std::vector<Token> tokens = Tokenizer::Tokenize(&input_file, err);
  68   if (err->has_error()) {
  69     // The error will point into our temporary buffer, rewrite it to refer
  70     // to the original token. This will make the location information less
  71     // precise, but generally there won't be complicated things in string
  72     // interpolations.
  73     *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,
  74                                 err->message(), err->help_text());
  75     return false;
  76   }
  77
  78   // Parse.
  79   scoped_ptr<ParseNode> node = Parser::ParseExpression(tokens, err);
  80   if (err->has_error()) {
  81     // Rewrite error as above.
  82     *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,
  83                                 err->message(), err->help_text());
  84     return false;
  85   }
  86   if (!(node->AsIdentifier() || node->AsAccessor())) {
  87     *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,
  88         "Invalid string interpolation.",
  89         "The thing inside the ${} must be an identifier ${foo},\n"
  90         "a scope access ${foo.bar}, or a list access ${foo[0]}.");
  91     return false;
  92   }
  93
  94   // Evaluate.
  95   Value result = node->Execute(scope, err);
  96   if (err->has_error()) {
  97     // Rewrite error as above.
  98     *err = ErrInsideStringToken(token, begin_offset, end_offset - begin_offset,
  99                                 err->message(), err->help_text());
 100     return false;
 101   }
 102
 103   output->append(result.ToString(false));
 104   return true;
 105 }
 106
 107 bool AppendInterpolatedIdentifier(Scope* scope,
 108                                   const Token& token,
 109                                   const char* input,
 110                                   size_t begin_offset,
 111                                   size_t end_offset,
 112                                   std::string* output,
 113                                   Err* err) {
 114   base::StringPiece identifier(&input[begin_offset],
 115                                end_offset - begin_offset);
 116   const Value* value = scope->GetValue(identifier, true);
 117   if (!value) {
 118     // We assume the input points inside the token.
 119     *err = ErrInsideStringToken(
 120         token, identifier.data() - token.value().data() - 1, identifier.size(),
 121         "Undefined identifier in string expansion.",
 122         std::string("\"") + identifier + "\" is not currently in scope.");
 123     return false;
 124   }
 125
 126   output->append(value->ToString(false));
 127   return true;
 128 }
 129
 130 // Handles string interpolations: $identifier and ${expression}
 131 //
 132 // |*i| is the index into |input| of the $. This will be updated to point to
 133 // the last character consumed on success. The token is the original string
 134 // to blame on failure.
 135 //
 136 // On failure, returns false and sets the error. On success, appends the
 137 // result of the interpolation to |*output|.
 138 bool AppendStringInterpolation(Scope* scope,
 139                                const Token& token,
 140                                const char* input, size_t size,
 141                                size_t* i,
 142                                std::string* output,
 143                                Err* err) {
 144   size_t dollars_index = *i;
 145   (*i)++;
 146   if (*i == size) {
 147     *err = ErrInsideStringToken(token, dollars_index, 1, "$ at end of string.",
 148         "I was expecting an identifier or {...} after the $.");
 149     return false;
 150   }
 151
 152   if (input[*i] == '{') {
 153     // Bracketed expression.
 154     (*i)++;
 155     size_t begin_offset = *i;
 156
 157     // Find the closing } and check for non-identifier chars. Don't need to
 158     // bother checking for the more-restricted first character of an identifier
 159     // since the {} unambiguously denotes the range, and identifiers with
 160     // invalid names just won't be found later.
 161     bool has_non_ident_chars = false;
 162     while (*i < size && input[*i] != '}') {
 163       has_non_ident_chars |= Tokenizer::IsIdentifierContinuingChar(input[*i]);
 164       (*i)++;
 165     }
 166     if (*i == size) {
 167       *err = ErrInsideStringToken(token, dollars_index, *i - dollars_index,
 168                                   "Unterminated ${...");
 169       return false;
 170     }
 171
 172     // In the common case, the thing inside the {} will actually be a
 173     // simple identifier. Avoid all the complicated parsing of accessors
 174     // in this case.
 175     if (!has_non_ident_chars) {
 176       return AppendInterpolatedIdentifier(scope, token, input, begin_offset,
 177                                           *i, output, err);
 178     }
 179     return AppendInterpolatedExpression(scope, token, input, begin_offset, *i,
 180                                         output, err);
 181   }
 182
 183   // Simple identifier.
 184   // The first char of an identifier is more restricted.
 185   if (!Tokenizer::IsIdentifierFirstChar(input[*i])) {
 186     *err = ErrInsideStringToken(
 187         token, dollars_index, *i - dollars_index + 1,
 188         "$ not followed by an identifier char.",
 189         "It you want a literal $ use \"\\$\".");
 190     return false;
 191   }
 192   size_t begin_offset = *i;
 193   (*i)++;
 194
 195   // Find the first non-identifier char following the string.
 196   while (*i < size && Tokenizer::IsIdentifierContinuingChar(input[*i]))
 197     (*i)++;
 198   size_t end_offset = *i;
 199   (*i)--;  // Back up to mark the last character consumed.
 200   return AppendInterpolatedIdentifier(scope, token, input, begin_offset,
 201                                       end_offset, output, err);
 202 }
 203
 204 }  // namespace
 205
 206 bool ExpandStringLiteral(Scope* scope,
 207                          const Token& literal,
 208                          Value* result,
 209                          Err* err) {
 210   DCHECK(literal.type() == Token::STRING);
 211   DCHECK(literal.value().size() > 1);  // Should include quotes.
 212   DCHECK(result->type() == Value::STRING);  // Should be already set.
 213
 214   // The token includes the surrounding quotes, so strip those off.
 215   const char* input = &literal.value().data()[1];
 216   size_t size = literal.value().size() - 2;
 217
 218   std::string& output = result->string_value();
 219   output.reserve(size);
 220   for (size_t i = 0; i < size; i++) {
 221     if (input[i] == '\\') {
 222       if (i < size - 1) {
 223         switch (input[i + 1]) {
 224           case '\\':
 225           case '"':
 226           case '$':
 227             output.push_back(input[i + 1]);
 228             i++;
 229             continue;
 230           default:  // Everything else has no meaning: pass the literal.
 231             break;
 232         }
 233       }
 234       output.push_back(input[i]);
 235     } else if (input[i] == '$') {
 236       if (!AppendStringInterpolation(scope, literal, input, size, &i,
 237                                      &output, err))
 238         return false;
 239     } else {
 240       output.push_back(input[i]);
 241     }
 242   }
 243   return true;
 244 }
 245
 246 std::string RemovePrefix(const std::string& str, const std::string& prefix) {
 247   CHECK(str.size() >= prefix.size() &&
 248         str.compare(0, prefix.size(), prefix) == 0);
 249   return str.substr(prefix.size());
 250 }
 251
 252 void TrimTrailingSlash(std::string* str) {
 253   if (!str->empty()) {
 254     DCHECK((*str)[str->size() - 1] == '/');
 255     str->resize(str->size() - 1);
 256   }
 257 }