tools/gn/tokenizer.h

   1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
   2 // Use of this source code is governed by a BSD-style license that can be
   3 // found in the LICENSE file.
   4
   5 #ifndef TOOLS_GN_TOKENIZER_H_
   6 #define TOOLS_GN_TOKENIZER_H_
   7
   8 #include <vector>
   9
  10 #include "base/basictypes.h"
  11 #include "base/strings/string_piece.h"
  12 #include "base/strings/string_util.h"
  13 #include "tools/gn/err.h"
  14 #include "tools/gn/token.h"
  15
  16 class InputFile;
  17
  18 class Tokenizer {
  19  public:
  20   static std::vector<Token> Tokenize(const InputFile* input_file, Err* err);
  21
  22   // Counts lines in the given buffer (the first line is "1") and returns
  23   // the byte offset of the beginning of that line, or (size_t)-1 if there
  24   // aren't that many lines in the file. Note that this will return the byte
  25   // one past the end of the input if the last character is a newline.
  26   //
  27   // This is a helper function for error output so that the tokenizer's
  28   // notion of lines can be used elsewhere.
  29   static size_t ByteOffsetOfNthLine(const base::StringPiece& buf, int n);
  30
  31   // Returns true if the given offset of the string piece counts as a newline.
  32   // The offset must be in the buffer.
  33   static bool IsNewline(const base::StringPiece& buffer, size_t offset);
  34
  35   static bool IsIdentifierFirstChar(char c) {
  36     return base::IsAsciiAlpha(c) || c == '_';
  37   }
  38
  39   static bool IsIdentifierContinuingChar(char c) {
  40     // Also allow digits after the first char.
  41     return IsIdentifierFirstChar(c) || base::IsAsciiDigit(c);
  42   }
  43
  44  private:
  45   // InputFile must outlive the tokenizer and all generated tokens.
  46   Tokenizer(const InputFile* input_file, Err* err);
  47   ~Tokenizer();
  48
  49   std::vector<Token> Run();
  50
  51   void AdvanceToNextToken();
  52   Token::Type ClassifyCurrent() const;
  53   void AdvanceToEndOfToken(const Location& location, Token::Type type);
  54
  55   // Whether from this location back to the beginning of the line is only
  56   // whitespace. |location| should be the first character of the token to be
  57   // checked.
  58   bool AtStartOfLine(size_t location) const;
  59
  60   bool IsCurrentWhitespace() const;
  61   bool IsCurrentNewline() const;
  62   bool IsCurrentStringTerminator(char quote_char) const;
  63
  64   bool CanIncrement() const { return cur_ < input_.size(); }
  65
  66   // Increments the current location by one.
  67   void Advance();
  68
  69   // Returns the current character in the file as a location.
  70   Location GetCurrentLocation() const;
  71
  72   Err GetErrorForInvalidToken(const Location& location) const;
  73
  74   bool done() const { return at_end() || has_error(); }
  75
  76   bool at_end() const { return cur_ == input_.size(); }
  77   char cur_char() const { return input_[cur_]; }
  78
  79   bool has_error() const { return err_->has_error(); }
  80
  81   std::vector<Token> tokens_;
  82
  83   const InputFile* input_file_;
  84   const base::StringPiece input_;
  85   Err* err_;
  86   size_t cur_;  // Byte offset into input buffer.
  87
  88   int line_number_;
  89   int char_in_line_;
  90
  91   DISALLOW_COPY_AND_ASSIGN(Tokenizer);
  92 };
  93
  94 #endif  // TOOLS_GN_TOKENIZER_H_