GIR writer: Generate annotations on all elements
[vala-lang.git] / vapigen / valamarkupreader.vala
blob4db901397a9a2bab66dcff13a731e911dd29c1d2
1 /* valamarkupreader.vala
3 * Copyright (C) 2008-2009 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 * Author:
20 * Jürg Billeter <j@bitron.ch>
23 using GLib;
24 using Gee;
26 /**
27 * Simple reader for a subset of XML.
29 public class Vala.MarkupReader : Object {
30 public string filename { get; construct; }
32 public string name { get; private set; }
34 MappedFile mapped_file;
36 char* begin;
37 char* current;
38 char* end;
40 int line;
41 int column;
43 Map<string,string> attributes = new HashMap<string,string> (str_hash, str_equal);
44 bool empty_element;
46 public MarkupReader (string filename) {
47 this.filename = filename;
50 construct {
51 try {
52 mapped_file = new MappedFile (filename, false);
53 begin = mapped_file.get_contents ();
54 end = begin + mapped_file.get_length ();
56 current = begin;
58 line = 1;
59 column = 1;
60 } catch (FileError e) {
61 Report.error (null, "Unable to map file `%s': %s".printf (filename, e.message));
65 public string? get_attribute (string attr) {
66 return attributes[attr];
69 string read_name () {
70 char* begin = current;
71 while (current < end) {
72 if (current[0] == ' ' || current[0] == '>'
73 || current[0] == '/' || current[0] == '=') {
74 break;
76 unichar u = ((string) current).get_char_validated ((long) (end - current));
77 if (u != (unichar) (-1)) {
78 current += u.to_utf8 (null);
79 } else {
80 Report.error (null, "invalid UTF-8 character");
83 if (current == begin) {
84 // syntax error: invalid name
86 return ((string) begin).ndup (current - begin);
89 public MarkupTokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
90 attributes.clear ();
92 if (empty_element) {
93 empty_element = false;
94 return MarkupTokenType.END_ELEMENT;
97 space ();
99 MarkupTokenType type = MarkupTokenType.NONE;
100 char* begin = current;
101 token_begin.pos = begin;
102 token_begin.line = line;
103 token_begin.column = column;
105 if (current >= end) {
106 type = MarkupTokenType.EOF;
107 } else if (current[0] == '<') {
108 current++;
109 if (current >= end) {
110 // error
111 } else if (current[0] == '?') {
112 // processing instruction
113 } else if (current[0] == '!') {
114 // comment or doctype
115 current++;
116 if (current < end - 1 && current[0] == '-' && current[1] == '-') {
117 // comment
118 current += 2;
119 while (current < end - 2) {
120 if (current[0] == '-' && current[1] == '-' && current[2] == '>') {
121 // end of comment
122 current += 3;
123 break;
125 current++;
128 // ignore comment, read next token
129 return read_token (out token_begin, out token_end);
131 } else if (current[0] == '/') {
132 type = MarkupTokenType.END_ELEMENT;
133 current++;
134 name = read_name ();
135 if (current >= end || current[0] != '>') {
136 // error
138 current++;
139 } else {
140 type = MarkupTokenType.START_ELEMENT;
141 name = read_name ();
142 space ();
143 while (current < end && current[0] != '>' && current[0] != '/') {
144 string attr_name = read_name ();
145 if (current >= end || current[0] != '=') {
146 // error
148 current++;
149 // FIXME allow single quotes
150 if (current >= end || current[0] != '"') {
151 // error
153 current++;
154 char* attr_begin = current;
155 while (current < end && current[0] != '"') {
156 unichar u = ((string) current).get_char_validated ((long) (end - current));
157 if (u != (unichar) (-1)) {
158 current += u.to_utf8 (null);
159 } else {
160 Report.error (null, "invalid UTF-8 character");
163 // TODO process &amp; &gt; &lt; &quot; &apos;
164 string attr_value = ((string) attr_begin).ndup (current - attr_begin);
165 if (current >= end || current[0] != '"') {
166 // error
168 current++;
169 attributes.set (attr_name, attr_value);
170 space ();
172 if (current[0] == '/') {
173 empty_element = true;
174 current++;
175 space ();
176 } else {
177 empty_element = false;
179 if (current >= end || current[0] != '>') {
180 // error
182 current++;
184 } else {
185 space ();
186 char* text_begin = current;
187 while (current < end && current[0] != '<') {
188 unichar u = ((string) current).get_char_validated ((long) (end - current));
189 if (u != (unichar) (-1)) {
190 current += u.to_utf8 (null);
191 } else {
192 Report.error (null, "invalid UTF-8 character");
195 if (text_begin == current) {
196 // no text
197 // read next token
198 return read_token (out token_begin, out token_end);
200 type = MarkupTokenType.TEXT;
201 // TODO process &amp; &gt; &lt; &quot; &apos;
202 // string text = ((string) text_begin).ndup (current - text_begin);
205 column += (int) (current - begin);
207 token_end.pos = current;
208 token_end.line = line;
209 token_end.column = column - 1;
211 return type;
214 void space () {
215 while (current < end && current[0].isspace ()) {
216 if (current[0] == '\n') {
217 line++;
218 column = 0;
220 current++;
221 column++;
226 public enum Vala.MarkupTokenType {
227 NONE,
228 START_ELEMENT,
229 END_ELEMENT,
230 TEXT,
231 EOF;
233 public weak string to_string () {
234 switch (this) {
235 case START_ELEMENT: return "start element";
236 case END_ELEMENT: return "end element";
237 case TEXT: return "text";
238 case EOF: return "end of file";
239 default: return "unknown token type";