girparser: Provide default constructor for classes.
[vala-lang.git] / vala / valamarkupreader.vala
blob9546faac1d0cf4be998d898f606fc3b0f29b10e9
1 /* valamarkupreader.vala
3 * Copyright (C) 2008-2009 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 * Author:
20 * Jürg Billeter <j@bitron.ch>
23 using GLib;
25 /**
26 * Simple reader for a subset of XML.
28 public class Vala.MarkupReader : Object {
29 public string filename { get; private set; }
31 public string name { get; private set; }
33 public string content { get; private set; }
35 MappedFile mapped_file;
37 char* begin;
38 char* current;
39 char* end;
41 int line;
42 int column;
44 Map<string,string> attributes = new HashMap<string,string> (str_hash, str_equal);
45 bool empty_element;
47 public MarkupReader (string filename) {
48 this.filename = filename;
50 try {
51 mapped_file = new MappedFile (filename, false);
52 begin = mapped_file.get_contents ();
53 end = begin + mapped_file.get_length ();
55 current = begin;
57 line = 1;
58 column = 1;
59 } catch (FileError e) {
60 Report.error (null, "Unable to map file `%s': %s".printf (filename, e.message));
64 public string? get_attribute (string attr) {
65 return attributes[attr];
68 string read_name () {
69 char* begin = current;
70 while (current < end) {
71 if (current[0] == ' ' || current[0] == '\t' || current[0] == '>'
72 || current[0] == '/' || current[0] == '=' || current[0] == '\n') {
73 break;
75 unichar u = ((string) current).get_char_validated ((long) (end - current));
76 if (u != (unichar) (-1)) {
77 current += u.to_utf8 (null);
78 } else {
79 Report.error (null, "invalid UTF-8 character");
82 if (current == begin) {
83 // syntax error: invalid name
85 return ((string) begin).substring (0, (int) (current - begin));
88 public MarkupTokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
89 attributes.clear ();
91 if (empty_element) {
92 empty_element = false;
93 return MarkupTokenType.END_ELEMENT;
96 space ();
98 MarkupTokenType type = MarkupTokenType.NONE;
99 char* begin = current;
100 token_begin.pos = begin;
101 token_begin.line = line;
102 token_begin.column = column;
104 if (current >= end) {
105 type = MarkupTokenType.EOF;
106 } else if (current[0] == '<') {
107 current++;
108 if (current >= end) {
109 // error
110 } else if (current[0] == '?') {
111 // processing instruction
112 } else if (current[0] == '!') {
113 // comment or doctype
114 current++;
115 if (current < end - 1 && current[0] == '-' && current[1] == '-') {
116 // comment
117 current += 2;
118 while (current < end - 2) {
119 if (current[0] == '-' && current[1] == '-' && current[2] == '>') {
120 // end of comment
121 current += 3;
122 break;
123 } else if (current[0] == '\n') {
124 line++;
125 column = 0;
127 current++;
130 // ignore comment, read next token
131 return read_token (out token_begin, out token_end);
133 } else if (current[0] == '/') {
134 type = MarkupTokenType.END_ELEMENT;
135 current++;
136 name = read_name ();
137 if (current >= end || current[0] != '>') {
138 // error
140 current++;
141 } else {
142 type = MarkupTokenType.START_ELEMENT;
143 name = read_name ();
144 space ();
145 while (current < end && current[0] != '>' && current[0] != '/') {
146 string attr_name = read_name ();
147 if (current >= end || current[0] != '=') {
148 // error
150 current++;
151 // FIXME allow single quotes
152 if (current >= end || current[0] != '"') {
153 // error
155 current++;
157 string attr_value = text ('"', false);
159 if (current >= end || current[0] != '"') {
160 // error
162 current++;
163 attributes.set (attr_name, attr_value);
164 space ();
166 if (current[0] == '/') {
167 empty_element = true;
168 current++;
169 space ();
170 } else {
171 empty_element = false;
173 if (current >= end || current[0] != '>') {
174 // error
176 current++;
178 } else {
179 space ();
181 if (current[0] != '<') {
182 content = text ('<', true);
183 } else {
184 // no text
185 // read next token
186 return read_token (out token_begin, out token_end);
189 type = MarkupTokenType.TEXT;
192 token_end.pos = current;
193 token_end.line = line;
194 token_end.column = column - 1;
196 return type;
199 string text (char end_char, bool rm_trailing_whitespace) {
200 StringBuilder content = new StringBuilder ();
201 char* text_begin = current;
202 char* last_linebreak = current;
204 while (current < end && current[0] != end_char) {
205 unichar u = ((string) current).get_char_validated ((long) (end - current));
206 if (u == (unichar) (-1)) {
207 Report.error (null, "invalid UTF-8 character");
208 } else if (u == '&') {
209 char* next_pos = current + u.to_utf8 (null);
210 if (((string) next_pos).has_prefix ("amp;")) {
211 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
212 content.append_c ('&');
213 current += 5;
214 text_begin = current;
215 } else if (((string) next_pos).has_prefix ("quot;")) {
216 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
217 content.append_c ('"');
218 current += 6;
219 text_begin = current;
220 } else if (((string) next_pos).has_prefix ("apos;")) {
221 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
222 content.append_c ('\'');
223 current += 6;
224 text_begin = current;
225 } else if (((string) next_pos).has_prefix ("lt;")) {
226 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
227 content.append_c ('<');
228 current += 4;
229 text_begin = current;
230 } else if (((string) next_pos).has_prefix ("gt;")) {
231 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
232 content.append_c ('>');
233 current += 4;
234 text_begin = current;
235 } else {
236 current += u.to_utf8 (null);
238 } else {
239 if (u == '\n') {
240 line++;
241 column = 0;
242 last_linebreak = current;
245 current += u.to_utf8 (null);
246 column++;
250 if (text_begin != current) {
251 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
254 column += (int) (current - last_linebreak);
256 // Removes trailing whitespace
257 if (rm_trailing_whitespace) {
258 char* str_pos = ((char*)content.str) + content.len;
259 for (str_pos--; str_pos > ((char*)content.str) && str_pos[0].isspace(); str_pos--);
260 content.erase ((ssize_t) (str_pos-((char*) content.str) + 1), -1);
263 return content.str;
266 void space () {
267 while (current < end && current[0].isspace ()) {
268 if (current[0] == '\n') {
269 line++;
270 column = 0;
272 current++;
273 column++;
278 public enum Vala.MarkupTokenType {
279 NONE,
280 START_ELEMENT,
281 END_ELEMENT,
282 TEXT,
283 EOF;
285 public unowned string to_string () {
286 switch (this) {
287 case START_ELEMENT: return "start element";
288 case END_ELEMENT: return "end element";
289 case TEXT: return "text";
290 case EOF: return "end of file";
291 default: return "unknown token type";