Release 0.41.92
[vala-gnome.git] / vala / valamarkupreader.vala
blob0197320dcafab676cd0ff6e8b4efe023f4d63c22
1 /* valamarkupreader.vala
3 * Copyright (C) 2008-2009 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 * Author:
20 * Jürg Billeter <j@bitron.ch>
23 using GLib;
25 /**
26 * Simple reader for a subset of XML.
28 public class Vala.MarkupReader {
29 public string filename { get; private set; }
31 public string name { get; private set; }
33 public string content { get; private set; }
35 MappedFile mapped_file;
37 char* begin;
38 char* current;
39 char* end;
41 int line;
42 int column;
44 Map<string,string> attributes = new HashMap<string,string> (str_hash, str_equal);
45 bool empty_element;
47 public MarkupReader (string filename) {
48 this.filename = filename;
50 try {
51 mapped_file = new MappedFile (filename, false);
52 begin = mapped_file.get_contents ();
53 end = begin + mapped_file.get_length ();
55 current = begin;
57 line = 1;
58 column = 1;
59 } catch (FileError e) {
60 Report.error (null, "Unable to map file `%s': %s".printf (filename, e.message));
64 public MarkupReader.from_string (string filename, string content) {
65 this.filename = filename;
67 begin = content;
68 end = begin + content.length;
70 current = begin;
72 line = 1;
73 column = 1;
76 public string? get_attribute (string attr) {
77 return attributes[attr];
81 * Returns a copy of the current attributes.
83 * @return map of current attributes
85 public Map<string,string> get_attributes () {
86 var result = new HashMap<string,string> (str_hash, str_equal);
87 foreach (var key in attributes.get_keys ()) {
88 result.set (key, attributes.get (key));
90 return result;
93 string read_name () {
94 char* begin = current;
95 while (current < end) {
96 if (current[0] == ' ' || current[0] == '\t' || current[0] == '>'
97 || current[0] == '/' || current[0] == '=' || current[0] == '\n') {
98 break;
100 unichar u = ((string) current).get_char_validated ((long) (end - current));
101 if (u != (unichar) (-1)) {
102 current += u.to_utf8 (null);
103 } else {
104 Report.error (null, "invalid UTF-8 character");
107 if (current == begin) {
108 // syntax error: invalid name
110 return ((string) begin).substring (0, (int) (current - begin));
113 public MarkupTokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
114 attributes.clear ();
116 if (empty_element) {
117 empty_element = false;
118 token_begin = SourceLocation (begin, line, column);
119 token_end = SourceLocation (begin, line, column);
120 return MarkupTokenType.END_ELEMENT;
123 content = null;
124 name = null;
126 space ();
128 MarkupTokenType type = MarkupTokenType.NONE;
129 char* begin = current;
130 token_begin = SourceLocation (begin, line, column);
132 if (current >= end) {
133 type = MarkupTokenType.EOF;
134 } else if (current[0] == '<') {
135 current++;
136 if (current >= end) {
137 // error
138 } else if (current[0] == '?') {
139 // processing instruction
140 } else if (current[0] == '!') {
141 // comment or doctype
142 current++;
143 if (current < end - 1 && current[0] == '-' && current[1] == '-') {
144 // comment
145 current += 2;
146 while (current < end - 2) {
147 if (current[0] == '-' && current[1] == '-' && current[2] == '>') {
148 // end of comment
149 current += 3;
150 break;
151 } else if (current[0] == '\n') {
152 line++;
153 column = 0;
155 current++;
158 // ignore comment, read next token
159 return read_token (out token_begin, out token_end);
161 } else if (current[0] == '/') {
162 type = MarkupTokenType.END_ELEMENT;
163 current++;
164 name = read_name ();
165 if (current >= end || current[0] != '>') {
166 // error
168 current++;
169 } else {
170 type = MarkupTokenType.START_ELEMENT;
171 name = read_name ();
172 space ();
173 while (current < end && current[0] != '>' && current[0] != '/') {
174 string attr_name = read_name ();
175 if (current >= end || current[0] != '=') {
176 // error
178 current++;
179 if (current >= end || current[0] != '"' || current[0] != '\'') {
180 // error
182 char quote = current[0];
183 current++;
185 string attr_value = text (quote, false);
187 if (current >= end || current[0] != quote) {
188 // error
190 current++;
191 attributes.set (attr_name, attr_value);
192 space ();
194 if (current[0] == '/') {
195 empty_element = true;
196 current++;
197 space ();
198 } else {
199 empty_element = false;
201 if (current >= end || current[0] != '>') {
202 // error
204 current++;
206 } else {
207 space ();
209 if (current[0] != '<') {
210 content = text ('<', true);
211 } else {
212 // no text
213 // read next token
214 return read_token (out token_begin, out token_end);
217 type = MarkupTokenType.TEXT;
220 token_end = SourceLocation (current, line, column - 1);
222 return type;
225 string text (char end_char, bool rm_trailing_whitespace) {
226 StringBuilder content = new StringBuilder ();
227 char* text_begin = current;
228 char* last_linebreak = current;
230 while (current < end && current[0] != end_char) {
231 unichar u = ((string) current).get_char_validated ((long) (end - current));
232 if (u == (unichar) (-1)) {
233 Report.error (null, "invalid UTF-8 character");
234 } else if (u == '&') {
235 char* next_pos = current + u.to_utf8 (null);
236 if (((string) next_pos).has_prefix ("amp;")) {
237 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
238 content.append_c ('&');
239 current += 5;
240 text_begin = current;
241 } else if (((string) next_pos).has_prefix ("quot;")) {
242 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
243 content.append_c ('"');
244 current += 6;
245 text_begin = current;
246 } else if (((string) next_pos).has_prefix ("apos;")) {
247 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
248 content.append_c ('\'');
249 current += 6;
250 text_begin = current;
251 } else if (((string) next_pos).has_prefix ("lt;")) {
252 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
253 content.append_c ('<');
254 current += 4;
255 text_begin = current;
256 } else if (((string) next_pos).has_prefix ("gt;")) {
257 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
258 content.append_c ('>');
259 current += 4;
260 text_begin = current;
261 } else if (((string) next_pos).has_prefix ("percnt;")) {
262 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
263 content.append_c ('%');
264 current += 8;
265 text_begin = current;
266 } else {
267 current += u.to_utf8 (null);
269 } else {
270 if (u == '\n') {
271 line++;
272 column = 0;
273 last_linebreak = current;
276 current += u.to_utf8 (null);
277 column++;
281 if (text_begin != current) {
282 content.append (((string) text_begin).substring (0, (int) (current - text_begin)));
285 column += (int) (current - last_linebreak);
287 // Removes trailing whitespace
288 if (rm_trailing_whitespace) {
289 char* str_pos = ((char*)content.str) + content.len;
290 for (str_pos--; str_pos > ((char*)content.str) && str_pos[0].isspace(); str_pos--);
291 content.erase ((ssize_t) (str_pos-((char*) content.str) + 1), -1);
294 return content.str;
297 void space () {
298 while (current < end && current[0].isspace ()) {
299 if (current[0] == '\n') {
300 line++;
301 column = 0;
303 current++;
304 column++;
309 public enum Vala.MarkupTokenType {
310 NONE,
311 START_ELEMENT,
312 END_ELEMENT,
313 TEXT,
314 EOF;
316 public unowned string to_string () {
317 switch (this) {
318 case START_ELEMENT: return "start element";
319 case END_ELEMENT: return "end element";
320 case TEXT: return "text";
321 case EOF: return "end of file";
322 default: return "unknown token type";