Add ICU message format support
[chromium-blink-merge.git] / tools / gn / tokenizer.h
blob29d107aca9cb69488a6730d94f92ebf8b1a72e82
1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef TOOLS_GN_TOKENIZER_H_
6 #define TOOLS_GN_TOKENIZER_H_
8 #include <vector>
10 #include "base/basictypes.h"
11 #include "base/strings/string_piece.h"
12 #include "base/strings/string_util.h"
13 #include "tools/gn/err.h"
14 #include "tools/gn/token.h"
16 class InputFile;
18 class Tokenizer {
19 public:
20 static std::vector<Token> Tokenize(const InputFile* input_file, Err* err);
22 // Counts lines in the given buffer (the first line is "1") and returns
23 // the byte offset of the beginning of that line, or (size_t)-1 if there
24 // aren't that many lines in the file. Note that this will return the byte
25 // one past the end of the input if the last character is a newline.
27 // This is a helper function for error output so that the tokenizer's
28 // notion of lines can be used elsewhere.
29 static size_t ByteOffsetOfNthLine(const base::StringPiece& buf, int n);
31 // Returns true if the given offset of the string piece counts as a newline.
32 // The offset must be in the buffer.
33 static bool IsNewline(const base::StringPiece& buffer, size_t offset);
35 static bool IsIdentifierFirstChar(char c) {
36 return base::IsAsciiAlpha(c) || c == '_';
39 static bool IsIdentifierContinuingChar(char c) {
40 // Also allow digits after the first char.
41 return IsIdentifierFirstChar(c) || base::IsAsciiDigit(c);
44 private:
45 // InputFile must outlive the tokenizer and all generated tokens.
46 Tokenizer(const InputFile* input_file, Err* err);
47 ~Tokenizer();
49 std::vector<Token> Run();
51 void AdvanceToNextToken();
52 Token::Type ClassifyCurrent() const;
53 void AdvanceToEndOfToken(const Location& location, Token::Type type);
55 // Whether from this location back to the beginning of the line is only
56 // whitespace. |location| should be the first character of the token to be
57 // checked.
58 bool AtStartOfLine(size_t location) const;
60 bool IsCurrentWhitespace() const;
61 bool IsCurrentNewline() const;
62 bool IsCurrentStringTerminator(char quote_char) const;
64 bool CanIncrement() const { return cur_ < input_.size(); }
66 // Increments the current location by one.
67 void Advance();
69 // Returns the current character in the file as a location.
70 Location GetCurrentLocation() const;
72 Err GetErrorForInvalidToken(const Location& location) const;
74 bool done() const { return at_end() || has_error(); }
76 bool at_end() const { return cur_ == input_.size(); }
77 char cur_char() const { return input_[cur_]; }
79 bool has_error() const { return err_->has_error(); }
81 std::vector<Token> tokens_;
83 const InputFile* input_file_;
84 const base::StringPiece input_;
85 Err* err_;
86 size_t cur_; // Byte offset into input buffer.
88 int line_number_;
89 int char_in_line_;
91 DISALLOW_COPY_AND_ASSIGN(Tokenizer);
94 #endif // TOOLS_GN_TOKENIZER_H_