1 /* valamarkupreader.vala
3 * Copyright (C) 2008 Jürg Billeter
5 * This library is free software; you can redistribute it and/or
6 * modify it under the terms of the GNU Lesser General Public
7 * License as published by the Free Software Foundation; either
8 * version 2.1 of the License, or (at your option) any later version.
10 * This library is distributed in the hope that it will be useful,
11 * but WITHOUT ANY WARRANTY; without even the implied warranty of
12 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
13 * Lesser General Public License for more details.
15 * You should have received a copy of the GNU Lesser General Public
16 * License along with this library; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
20 * Jürg Billeter <j@bitron.ch>
27 * Simple reader for a subset of XML.
29 public class Vala
.MarkupReader
: Object
{
30 public string filename
{ get; construct; }
32 public string name
{ get; private set; }
34 MappedFile mapped_file
;
43 Map
<string,string> attributes
= new HashMap
<string,string> (str_hash
, str_equal
);
46 public MarkupReader (string filename
) {
47 this
.filename
= filename
;
52 mapped_file
= new
MappedFile (filename
, false);
53 begin
= mapped_file
.get_contents ();
54 end
= begin
+ mapped_file
.get_length ();
60 } catch (FileError e
) {
61 Report
.error (null, "Unable to map file `%s': %s".printf (filename
, e
.message
));
65 public string?
get_attribute (string attr
) {
66 return attributes
[attr
];
70 char* begin
= current
;
71 while (current
< end
) {
72 if (current
[0] == ' ' || current
[0] == '>'
73 || current
[0] == '/' || current
[0] == '=') {
76 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
77 if (u
!= (unichar
) (-1)) {
78 current
+= u
.to_utf8 (null);
80 Report
.error (null, "invalid UTF-8 character");
83 if (current
== begin
) {
84 // syntax error: invalid name
86 return ((string) begin
).ndup (current
- begin
);
89 public MarkupTokenType
read_token (out SourceLocation token_begin
, out SourceLocation token_end
) {
93 empty_element
= false;
94 return MarkupTokenType
.END_ELEMENT
;
99 MarkupTokenType type
= MarkupTokenType
.NONE
;
100 char* begin
= current
;
101 token_begin
.pos
= begin
;
102 token_begin
.line
= line
;
103 token_begin
.column
= column
;
105 if (current
>= end
) {
106 type
= MarkupTokenType
.EOF
;
107 } else if (current
[0] == '<') {
109 if (current
>= end
) {
111 } else if (current
[0] == '?') {
112 // processing instruction
113 } else if (current
[0] == '!') {
114 // comment or doctype
115 } else if (current
[0] == '/') {
116 type
= MarkupTokenType
.END_ELEMENT
;
119 if (current
>= end
|| current
[0] != '>') {
124 type
= MarkupTokenType
.START_ELEMENT
;
127 while (current
< end
&& current
[0] != '>' && current
[0] != '/') {
128 string attr_name
= read_name ();
129 if (current
>= end
|| current
[0] != '=') {
133 // FIXME allow single quotes
134 if (current
>= end
|| current
[0] != '"') {
138 char* attr_begin
= current
;
139 while (current
< end
&& current
[0] != '"') {
140 if (current
[0] == '&') {
141 // process & > < " '
143 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
144 if (u
!= (unichar
) (-1)) {
145 current
+= u
.to_utf8 (null);
147 Report
.error (null, "invalid UTF-8 character");
151 string attr_value
= ((string) attr_begin
).ndup (current
- attr_begin
);
152 if (current
>= end
|| current
[0] != '"') {
156 attributes
.set (attr_name
, attr_value
);
159 if (current
[0] == '/') {
160 empty_element
= true;
164 empty_element
= false;
166 if (current
>= end
|| current
[0] != '>') {
173 char* text_begin
= current
;
174 while (current
< end
&& current
[0] != '<') {
175 if (current
[0] == '&') {
176 // process & > < " '
178 unichar u
= ((string) current
).get_char_validated ((long) (end
- current
));
179 if (u
!= (unichar
) (-1)) {
180 current
+= u
.to_utf8 (null);
182 Report
.error (null, "invalid UTF-8 character");
186 if (text_begin
== current
) {
189 return read_token (out token_begin
, out token_end
);
191 type
= MarkupTokenType
.TEXT
;
192 // string text = ((string) text_begin).ndup (current - text_begin);
195 column
+= (int) (current
- begin
);
197 token_end
.pos
= current
;
198 token_end
.line
= line
;
199 token_end
.column
= column
- 1;
205 while (current
< end
&& current
[0].isspace ()) {
206 if (current
[0] == '\n') {
216 public enum Vala
.MarkupTokenType
{
223 public weak string to_string () {
225 case START_ELEMENT
: return "start element";
226 case END_ELEMENT
: return "end element";
227 case TEXT
: return "text";
228 case EOF
: return "end of file";
229 default: return "unknown token type";