1 /* valamarkupreader.vala
3 * Copyright (C) 2008-2009 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * Jürg Billeter <j@bitron.ch>
26 * Simple reader for a subset of XML.
28 public class Vala
.MarkupReader
{
29 public string filename
{ get; private set; }
31 public string name
{ get; private set; }
33 public string content
{ get; private set; }
35 MappedFile mapped_file
;
44 Map
<string,string> attributes
= new HashMap
<string,string> (str_hash
, str_equal
);
47 public MarkupReader (string filename
) {
48 this
.filename
= filename
;
51 mapped_file
= new
MappedFile (filename
, false);
52 begin
= mapped_file
.get_contents ();
53 end
= begin
+ mapped_file
.get_length ();
59 } catch (FileError e
) {
60 Report
.error (null, "Unable to map file `%s': %s".printf (filename
, e
.message
));
64 public MarkupReader
.from_string (string filename
, string content
) {
65 this
.filename
= filename
;
68 end
= begin
+ content
.length
;
76 public string?
get_attribute (string attr
) {
77 return attributes
[attr
];
81 * Returns a copy of the current attributes.
83 * @return map of current attributes
85 public Map
<string,string> get_attributes () {
86 var result
= new HashMap
<string,string> (str_hash
, str_equal
);
87 foreach (var key
in attributes
.get_keys ()) {
88 result
.set (key
, attributes
.get (key
));
94 char* begin
= current
;
95 while (current
< end
) {
96 if (current
[0] == ' ' || current
[0] == '\t' || current
[0] == '>'
97 || current
[0] == '/' || current
[0] == '=' || current
[0] == '\n') {
100 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
101 if (u
!= (unichar
) (-1)) {
102 current
+= u
.to_utf8 (null);
104 Report
.error (null, "invalid UTF-8 character");
107 if (current
== begin
) {
108 // syntax error: invalid name
110 return ((string) begin
).substring (0, (int) (current
- begin
));
113 public MarkupTokenType
read_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
117 empty_element
= false;
118 token_begin
= SourceLocation (begin
, line
, column
);
119 token_end
= SourceLocation (begin
, line
, column
);
120 return MarkupTokenType
.END_ELEMENT
;
128 MarkupTokenType type
= MarkupTokenType
.NONE
;
129 char* begin
= current
;
130 token_begin
= SourceLocation (begin
, line
, column
);
132 if (current
>= end
) {
133 type
= MarkupTokenType
.EOF
;
134 } else if (current
[0] == '<') {
136 if (current
>= end
) {
138 } else if (current
[0] == '?') {
139 // processing instruction
140 } else if (current
[0] == '!') {
141 // comment or doctype
143 if (current
< end
- 1 && current
[0] == '-' && current
[1] == '-') {
146 while (current
< end
- 2) {
147 if (current
[0] == '-' && current
[1] == '-' && current
[2] == '>') {
151 } else if (current
[0] == '\n') {
158 // ignore comment, read next token
159 return read_token (out token_begin
, out token_end
);
161 } else if (current
[0] == '/') {
162 type
= MarkupTokenType
.END_ELEMENT
;
165 if (current
>= end
|| current
[0] != '>') {
170 type
= MarkupTokenType
.START_ELEMENT
;
173 while (current
< end
&& current
[0] != '>' && current
[0] != '/') {
174 string attr_name
= read_name ();
175 if (current
>= end
|| current
[0] != '=') {
179 if (current
>= end
|| current
[0] != '"' || current
[0] != '\'') {
182 char quote
= current
[0];
185 string attr_value
= text (quote
, false);
187 if (current
>= end
|| current
[0] != quote
) {
191 attributes
.set (attr_name
, attr_value
);
194 if (current
[0] == '/') {
195 empty_element
= true;
199 empty_element
= false;
201 if (current
>= end
|| current
[0] != '>') {
209 if (current
[0] != '<') {
210 content
= text ('<', true);
214 return read_token (out token_begin
, out token_end
);
217 type
= MarkupTokenType
.TEXT
;
220 token_end
= SourceLocation (current
, line
, column
- 1);
225 string text (char end_char
, bool rm_trailing_whitespace
) {
226 StringBuilder content
= new
StringBuilder ();
227 char* text_begin
= current
;
228 char* last_linebreak
= current
;
230 while (current
< end
&& current
[0] != end_char
) {
231 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
232 if (u
== (unichar
) (-1)) {
233 Report
.error (null, "invalid UTF-8 character");
234 } else if (u
== '&') {
235 char* next_pos
= current
+ u
.to_utf8 (null);
236 if (((string) next_pos
).has_prefix ("amp;")) {
237 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
238 content
.append_c ('&');
240 text_begin
= current
;
241 } else if (((string) next_pos
).has_prefix ("quot;")) {
242 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
243 content
.append_c ('"');
245 text_begin
= current
;
246 } else if (((string) next_pos
).has_prefix ("apos;")) {
247 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
248 content
.append_c ('\'');
250 text_begin
= current
;
251 } else if (((string) next_pos
).has_prefix ("lt;")) {
252 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
253 content
.append_c ('<');
255 text_begin
= current
;
256 } else if (((string) next_pos
).has_prefix ("gt;")) {
257 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
258 content
.append_c ('>');
260 text_begin
= current
;
261 } else if (((string) next_pos
).has_prefix ("percnt;")) {
262 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
263 content
.append_c ('%');
265 text_begin
= current
;
267 current
+= u
.to_utf8 (null);
273 last_linebreak
= current
;
276 current
+= u
.to_utf8 (null);
281 if (text_begin
!= current
) {
282 content
.append (((string) text_begin
).substring (0, (int) (current
- text_begin
)));
285 column
+= (int) (current
- last_linebreak
);
287 // Removes trailing whitespace
288 if (rm_trailing_whitespace
) {
289 char* str_pos
= ((char*)content
.str
) + content
.len
;
290 for (str_pos
--; str_pos
> ((char*)content
.str
) && str_pos
[0].isspace(); str_pos
--);
291 content
.erase ((ssize_t
) (str_pos
-((char*) content
.str
) + 1), -1);
298 while (current
< end
&& current
[0].isspace ()) {
299 if (current
[0] == '\n') {
309 public enum Vala
.MarkupTokenType
{
316 public unowned
string to_string () {
318 case START_ELEMENT
: return "start element";
319 case END_ELEMENT
: return "end element";
320 case TEXT
: return "text";
321 case EOF
: return "end of file";
322 default: return "unknown token type";