1 /* valamarkupreader.vala
3 * Copyright (C) 2008-2009 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * Jürg Billeter <j@bitron.ch>
26 * Simple reader for a subset of XML.
28 public class Vala
.MarkupReader
: Object
{
29 public string filename
{ get; private set; }
31 public string name
{ get; private set; }
33 MappedFile mapped_file
;
42 Map
<string,string> attributes
= new HashMap
<string,string> (str_hash
, str_equal
);
45 public MarkupReader (string filename
) {
46 this
.filename
= filename
;
51 mapped_file
= new
MappedFile (filename
, false);
52 begin
= mapped_file
.get_contents ();
53 end
= begin
+ mapped_file
.get_length ();
59 } catch (FileError e
) {
60 Report
.error (null, "Unable to map file `%s': %s".printf (filename
, e
.message
));
64 public string?
get_attribute (string attr
) {
65 return attributes
[attr
];
69 char* begin
= current
;
70 while (current
< end
) {
71 if (current
[0] == ' ' || current
[0] == '>'
72 || current
[0] == '/' || current
[0] == '=') {
75 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
76 if (u
!= (unichar
) (-1)) {
77 current
+= u
.to_utf8 (null);
79 Report
.error (null, "invalid UTF-8 character");
82 if (current
== begin
) {
83 // syntax error: invalid name
85 return ((string) begin
).ndup (current
- begin
);
88 public MarkupTokenType
read_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
92 empty_element
= false;
93 return MarkupTokenType
.END_ELEMENT
;
98 MarkupTokenType type
= MarkupTokenType
.NONE
;
99 char* begin
= current
;
100 token_begin
.pos
= begin
;
101 token_begin
.line
= line
;
102 token_begin
.column
= column
;
104 if (current
>= end
) {
105 type
= MarkupTokenType
.EOF
;
106 } else if (current
[0] == '<') {
108 if (current
>= end
) {
110 } else if (current
[0] == '?') {
111 // processing instruction
112 } else if (current
[0] == '!') {
113 // comment or doctype
115 if (current
< end
- 1 && current
[0] == '-' && current
[1] == '-') {
118 while (current
< end
- 2) {
119 if (current
[0] == '-' && current
[1] == '-' && current
[2] == '>') {
127 // ignore comment, read next token
128 return read_token (out token_begin
, out token_end
);
130 } else if (current
[0] == '/') {
131 type
= MarkupTokenType
.END_ELEMENT
;
134 if (current
>= end
|| current
[0] != '>') {
139 type
= MarkupTokenType
.START_ELEMENT
;
142 while (current
< end
&& current
[0] != '>' && current
[0] != '/') {
143 string attr_name
= read_name ();
144 if (current
>= end
|| current
[0] != '=') {
148 // FIXME allow single quotes
149 if (current
>= end
|| current
[0] != '"') {
153 char* attr_begin
= current
;
154 while (current
< end
&& current
[0] != '"') {
155 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
156 if (u
!= (unichar
) (-1)) {
157 current
+= u
.to_utf8 (null);
159 Report
.error (null, "invalid UTF-8 character");
162 // TODO process & > < " '
163 string attr_value
= ((string) attr_begin
).ndup (current
- attr_begin
);
164 if (current
>= end
|| current
[0] != '"') {
168 attributes
.set (attr_name
, attr_value
);
171 if (current
[0] == '/') {
172 empty_element
= true;
176 empty_element
= false;
178 if (current
>= end
|| current
[0] != '>') {
185 char* text_begin
= current
;
186 while (current
< end
&& current
[0] != '<') {
187 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
188 if (u
!= (unichar
) (-1)) {
189 current
+= u
.to_utf8 (null);
191 Report
.error (null, "invalid UTF-8 character");
194 if (text_begin
== current
) {
197 return read_token (out token_begin
, out token_end
);
199 type
= MarkupTokenType
.TEXT
;
200 // TODO process & > < " '
201 // string text = ((string) text_begin).ndup (current - text_begin);
204 column
+= (int) (current
- begin
);
206 token_end
.pos
= current
;
207 token_end
.line
= line
;
208 token_end
.column
= column
- 1;
214 while (current
< end
&& current
[0].isspace ()) {
215 if (current
[0] == '\n') {
225 public enum Vala
.MarkupTokenType
{
232 public weak string to_string () {
234 case START_ELEMENT
: return "start element";
235 case END_ELEMENT
: return "end element";
236 case TEXT
: return "text";
237 case EOF
: return "end of file";
238 default: return "unknown token type";