1 // Copyright (c) 2013 The Chromium Authors. All rights reserved.
2 // Use of this source code is governed by a BSD-style license that can be
3 // found in the LICENSE file.
5 #ifndef TOOLS_GN_TOKENIZER_H_
6 #define TOOLS_GN_TOKENIZER_H_
10 #include "base/basictypes.h"
11 #include "base/strings/string_piece.h"
12 #include "base/strings/string_util.h"
13 #include "tools/gn/err.h"
14 #include "tools/gn/token.h"
20 static std::vector
<Token
> Tokenize(const InputFile
* input_file
, Err
* err
);
22 // Counts lines in the given buffer (the first line is "1") and returns
23 // the byte offset of the beginning of that line, or (size_t)-1 if there
24 // aren't that many lines in the file. Note that this will return the byte
25 // one past the end of the input if the last character is a newline.
27 // This is a helper function for error output so that the tokenizer's
28 // notion of lines can be used elsewhere.
29 static size_t ByteOffsetOfNthLine(const base::StringPiece
& buf
, int n
);
31 // Returns true if the given offset of the string piece counts as a newline.
32 // The offset must be in the buffer.
33 static bool IsNewline(const base::StringPiece
& buffer
, size_t offset
);
35 static bool IsIdentifierFirstChar(char c
) {
36 return IsAsciiAlpha(c
) || c
== '_';
39 static bool IsIdentifierContinuingChar(char c
) {
40 // Also allow digits after the first char.
41 return IsIdentifierFirstChar(c
) || IsAsciiDigit(c
);
45 // InputFile must outlive the tokenizer and all generated tokens.
46 explicit Tokenizer(const InputFile
* input_file
, Err
* err
);
49 std::vector
<Token
> Run();
51 void AdvanceToNextToken();
52 Token::Type
ClassifyCurrent() const;
53 void AdvanceToEndOfToken(const Location
& location
, Token::Type type
);
55 // Whether from this location back to the beginning of the line is only
56 // whitespace. |location| should be the first character of the token to be
58 bool AtStartOfLine(size_t location
) const;
60 bool IsCurrentWhitespace() const;
61 bool IsCurrentNewline() const;
62 bool IsCurrentStringTerminator(char quote_char
) const;
64 bool CanIncrement() const { return cur_
< input_
.size(); }
66 // Increments the current location by one.
69 // Returns the current character in the file as a location.
70 Location
GetCurrentLocation() const;
72 Err
GetErrorForInvalidToken(const Location
& location
) const;
74 bool done() const { return at_end() || has_error(); }
76 bool at_end() const { return cur_
== input_
.size(); }
77 char cur_char() const { return input_
[cur_
]; }
79 bool has_error() const { return err_
->has_error(); }
81 std::vector
<Token
> tokens_
;
83 const InputFile
* input_file_
;
84 const base::StringPiece input_
;
86 size_t cur_
; // Byte offset into input buffer.
91 DISALLOW_COPY_AND_ASSIGN(Tokenizer
);
94 #endif // TOOLS_GN_TOKENIZER_H_