1 /* valamarkupreader.vala
3 * Copyright (C) 2008-2009 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * Jürg Billeter <j@bitron.ch>
27 * Simple reader for a subset of XML.
29 public class Vala
.MarkupReader
: Object
{
30 public string filename
{ get; construct; }
32 public string name
{ get; private set; }
34 MappedFile mapped_file
;
43 Map
<string,string> attributes
= new HashMap
<string,string> (str_hash
, str_equal
);
46 public MarkupReader (string filename
) {
47 this
.filename
= filename
;
52 mapped_file
= new
MappedFile (filename
, false);
53 begin
= mapped_file
.get_contents ();
54 end
= begin
+ mapped_file
.get_length ();
60 } catch (FileError e
) {
61 Report
.error (null, "Unable to map file `%s': %s".printf (filename
, e
.message
));
65 public string?
get_attribute (string attr
) {
66 return attributes
[attr
];
70 char* begin
= current
;
71 while (current
< end
) {
72 if (current
[0] == ' ' || current
[0] == '>'
73 || current
[0] == '/' || current
[0] == '=') {
76 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
77 if (u
!= (unichar
) (-1)) {
78 current
+= u
.to_utf8 (null);
80 Report
.error (null, "invalid UTF-8 character");
83 if (current
== begin
) {
84 // syntax error: invalid name
86 return ((string) begin
).ndup (current
- begin
);
89 public MarkupTokenType
read_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
93 empty_element
= false;
94 return MarkupTokenType
.END_ELEMENT
;
99 MarkupTokenType type
= MarkupTokenType
.NONE
;
100 char* begin
= current
;
101 token_begin
.pos
= begin
;
102 token_begin
.line
= line
;
103 token_begin
.column
= column
;
105 if (current
>= end
) {
106 type
= MarkupTokenType
.EOF
;
107 } else if (current
[0] == '<') {
109 if (current
>= end
) {
111 } else if (current
[0] == '?') {
112 // processing instruction
113 } else if (current
[0] == '!') {
114 // comment or doctype
116 if (current
< end
- 1 && current
[0] == '-' && current
[1] == '-') {
119 while (current
< end
- 2) {
120 if (current
[0] == '-' && current
[1] == '-' && current
[2] == '>') {
128 // ignore comment, read next token
129 return read_token (out token_begin
, out token_end
);
131 } else if (current
[0] == '/') {
132 type
= MarkupTokenType
.END_ELEMENT
;
135 if (current
>= end
|| current
[0] != '>') {
140 type
= MarkupTokenType
.START_ELEMENT
;
143 while (current
< end
&& current
[0] != '>' && current
[0] != '/') {
144 string attr_name
= read_name ();
145 if (current
>= end
|| current
[0] != '=') {
149 // FIXME allow single quotes
150 if (current
>= end
|| current
[0] != '"') {
154 char* attr_begin
= current
;
155 while (current
< end
&& current
[0] != '"') {
156 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
157 if (u
!= (unichar
) (-1)) {
158 current
+= u
.to_utf8 (null);
160 Report
.error (null, "invalid UTF-8 character");
163 // TODO process & > < " '
164 string attr_value
= ((string) attr_begin
).ndup (current
- attr_begin
);
165 if (current
>= end
|| current
[0] != '"') {
169 attributes
.set (attr_name
, attr_value
);
172 if (current
[0] == '/') {
173 empty_element
= true;
177 empty_element
= false;
179 if (current
>= end
|| current
[0] != '>') {
186 char* text_begin
= current
;
187 while (current
< end
&& current
[0] != '<') {
188 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
189 if (u
!= (unichar
) (-1)) {
190 current
+= u
.to_utf8 (null);
192 Report
.error (null, "invalid UTF-8 character");
195 if (text_begin
== current
) {
198 return read_token (out token_begin
, out token_end
);
200 type
= MarkupTokenType
.TEXT
;
201 // TODO process & > < " '
202 // string text = ((string) text_begin).ndup (current - text_begin);
205 column
+= (int) (current
- begin
);
207 token_end
.pos
= current
;
208 token_end
.line
= line
;
209 token_end
.column
= column
- 1;
215 while (current
< end
&& current
[0].isspace ()) {
216 if (current
[0] == '\n') {
226 public enum Vala
.MarkupTokenType
{
233 public weak string to_string () {
235 case START_ELEMENT
: return "start element";
236 case END_ELEMENT
: return "end element";
237 case TEXT
: return "text";
238 case EOF
: return "end of file";
239 default: return "unknown token type";