Release 0.7.8
[vala-lang.git] / vala / valamarkupreader.vala
blob41ec731ffd7a9eaf9499ac6e2486379edfeb96fc
1 /* valamarkupreader.vala
3 * Copyright (C) 2008-2009 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
19 * Author:
20 * Jürg Billeter <j@bitron.ch>
23 using GLib;
25 /**
26 * Simple reader for a subset of XML.
28 public class Vala.MarkupReader : Object {
29 public string filename { get; private set; }
31 public string name { get; private set; }
33 MappedFile mapped_file;
35 char* begin;
36 char* current;
37 char* end;
39 int line;
40 int column;
42 Map<string,string> attributes = new HashMap<string,string> (str_hash, str_equal);
43 bool empty_element;
45 public MarkupReader (string filename) {
46 this.filename = filename;
49 construct {
50 try {
51 mapped_file = new MappedFile (filename, false);
52 begin = mapped_file.get_contents ();
53 end = begin + mapped_file.get_length ();
55 current = begin;
57 line = 1;
58 column = 1;
59 } catch (FileError e) {
60 Report.error (null, "Unable to map file `%s': %s".printf (filename, e.message));
64 public string? get_attribute (string attr) {
65 return attributes[attr];
68 string read_name () {
69 char* begin = current;
70 while (current < end) {
71 if (current[0] == ' ' || current[0] == '>'
72 || current[0] == '/' || current[0] == '=') {
73 break;
75 unichar u = ((string) current).get_char_validated ((long) (end - current));
76 if (u != (unichar) (-1)) {
77 current += u.to_utf8 (null);
78 } else {
79 Report.error (null, "invalid UTF-8 character");
82 if (current == begin) {
83 // syntax error: invalid name
85 return ((string) begin).ndup (current - begin);
88 public MarkupTokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
89 attributes.clear ();
91 if (empty_element) {
92 empty_element = false;
93 return MarkupTokenType.END_ELEMENT;
96 space ();
98 MarkupTokenType type = MarkupTokenType.NONE;
99 char* begin = current;
100 token_begin.pos = begin;
101 token_begin.line = line;
102 token_begin.column = column;
104 if (current >= end) {
105 type = MarkupTokenType.EOF;
106 } else if (current[0] == '<') {
107 current++;
108 if (current >= end) {
109 // error
110 } else if (current[0] == '?') {
111 // processing instruction
112 } else if (current[0] == '!') {
113 // comment or doctype
114 current++;
115 if (current < end - 1 && current[0] == '-' && current[1] == '-') {
116 // comment
117 current += 2;
118 while (current < end - 2) {
119 if (current[0] == '-' && current[1] == '-' && current[2] == '>') {
120 // end of comment
121 current += 3;
122 break;
124 current++;
127 // ignore comment, read next token
128 return read_token (out token_begin, out token_end);
130 } else if (current[0] == '/') {
131 type = MarkupTokenType.END_ELEMENT;
132 current++;
133 name = read_name ();
134 if (current >= end || current[0] != '>') {
135 // error
137 current++;
138 } else {
139 type = MarkupTokenType.START_ELEMENT;
140 name = read_name ();
141 space ();
142 while (current < end && current[0] != '>' && current[0] != '/') {
143 string attr_name = read_name ();
144 if (current >= end || current[0] != '=') {
145 // error
147 current++;
148 // FIXME allow single quotes
149 if (current >= end || current[0] != '"') {
150 // error
152 current++;
153 char* attr_begin = current;
154 while (current < end && current[0] != '"') {
155 unichar u = ((string) current).get_char_validated ((long) (end - current));
156 if (u != (unichar) (-1)) {
157 current += u.to_utf8 (null);
158 } else {
159 Report.error (null, "invalid UTF-8 character");
162 // TODO process &amp; &gt; &lt; &quot; &apos;
163 string attr_value = ((string) attr_begin).ndup (current - attr_begin);
164 if (current >= end || current[0] != '"') {
165 // error
167 current++;
168 attributes.set (attr_name, attr_value);
169 space ();
171 if (current[0] == '/') {
172 empty_element = true;
173 current++;
174 space ();
175 } else {
176 empty_element = false;
178 if (current >= end || current[0] != '>') {
179 // error
181 current++;
183 } else {
184 space ();
185 char* text_begin = current;
186 while (current < end && current[0] != '<') {
187 unichar u = ((string) current).get_char_validated ((long) (end - current));
188 if (u != (unichar) (-1)) {
189 current += u.to_utf8 (null);
190 } else {
191 Report.error (null, "invalid UTF-8 character");
194 if (text_begin == current) {
195 // no text
196 // read next token
197 return read_token (out token_begin, out token_end);
199 type = MarkupTokenType.TEXT;
200 // TODO process &amp; &gt; &lt; &quot; &apos;
201 // string text = ((string) text_begin).ndup (current - text_begin);
204 column += (int) (current - begin);
206 token_end.pos = current;
207 token_end.line = line;
208 token_end.column = column - 1;
210 return type;
213 void space () {
214 while (current < end && current[0].isspace ()) {
215 if (current[0] == '\n') {
216 line++;
217 column = 0;
219 current++;
220 column++;
225 public enum Vala.MarkupTokenType {
226 NONE,
227 START_ELEMENT,
228 END_ELEMENT,
229 TEXT,
230 EOF;
232 public weak string to_string () {
233 switch (this) {
234 case START_ELEMENT: return "start element";
235 case END_ELEMENT: return "end element";
236 case TEXT: return "text";
237 case EOF: return "end of file";
238 default: return "unknown token type";