vapigen/valamarkupreader.vala

   1 /* valamarkupreader.vala
   2  *
   3  * Copyright (C) 2008-2009  Jürg Billeter
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2.1 of the License, or (at your option) any later version.
   9
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
  18  *
  19  * Author:
  20  *      Jürg Billeter <j@bitron.ch>
  21  */
  22
  23 using GLib;
  24 using Gee;
  25
  26 /**
  27  * Simple reader for a subset of XML.
  28  */
  29 public class Vala.MarkupReader : Object {
  30         public string filename { get; construct; }
  31
  32         public string name { get; private set; }
  33
  34         MappedFile mapped_file;
  35
  36         char* begin;
  37         char* current;
  38         char* end;
  39
  40         int line;
  41         int column;
  42
  43         Map<string,string> attributes = new HashMap<string,string> (str_hash, str_equal);
  44         bool empty_element;
  45
  46         public MarkupReader (string filename) {
  47                 this.filename = filename;
  48         }
  49
  50         construct {
  51                 try {
  52                         mapped_file = new MappedFile (filename, false);
  53                         begin = mapped_file.get_contents ();
  54                         end = begin + mapped_file.get_length ();
  55
  56                         current = begin;
  57
  58                         line = 1;
  59                         column = 1;
  60                 } catch (FileError e) {
  61                         Report.error (null, "Unable to map file `%s': %s".printf (filename, e.message));
  62                 }
  63         }
  64
  65         public string? get_attribute (string attr) {
  66                 return attributes[attr];
  67         }
  68
  69         string read_name () {
  70                 char* begin = current;
  71                 while (current < end) {
  72                         if (current[0] == ' ' || current[0] == '>'
  73                             || current[0] == '/' || current[0] == '=') {
  74                                 break;
  75                         }
  76                         unichar u = ((string) current).get_char_validated ((long) (end - current));
  77                         if (u != (unichar) (-1)) {
  78                                 current += u.to_utf8 (null);
  79                         } else {
  80                                 Report.error (null, "invalid UTF-8 character");
  81                         }
  82                 }
  83                 if (current == begin) {
  84                         // syntax error: invalid name
  85                 }
  86                 return ((string) begin).ndup (current - begin);
  87         }
  88
  89         public MarkupTokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
  90                 attributes.clear ();
  91
  92                 if (empty_element) {
  93                         empty_element = false;
  94                         return MarkupTokenType.END_ELEMENT;
  95                 }
  96
  97                 space ();
  98
  99                 MarkupTokenType type = MarkupTokenType.NONE;
 100                 char* begin = current;
 101                 token_begin.pos = begin;
 102                 token_begin.line = line;
 103                 token_begin.column = column;
 104
 105                 if (current >= end) {
 106                         type = MarkupTokenType.EOF;
 107                 } else if (current[0] == '<') {
 108                         current++;
 109                         if (current >= end) {
 110                                 // error
 111                         } else if (current[0] == '?') {
 112                                 // processing instruction
 113                         } else if (current[0] == '!') {
 114                                 // comment or doctype
 115                                 current++;
 116                                 if (current < end - 1 && current[0] == '-' && current[1] == '-') {
 117                                         // comment
 118                                         current += 2;
 119                                         while (current < end - 2) {
 120                                                 if (current[0] == '-' && current[1] == '-' && current[2] == '>') {
 121                                                         // end of comment
 122                                                         current += 3;
 123                                                         break;
 124                                                 }
 125                                                 current++;
 126                                         }
 127
 128                                         // ignore comment, read next token
 129                                         return read_token (out token_begin, out token_end);
 130                                 }
 131                         } else if (current[0] == '/') {
 132                                 type = MarkupTokenType.END_ELEMENT;
 133                                 current++;
 134                                 name = read_name ();
 135                                 if (current >= end || current[0] != '>') {
 136                                         // error
 137                                 }
 138                                 current++;
 139                         } else {
 140                                 type = MarkupTokenType.START_ELEMENT;
 141                                 name = read_name ();
 142                                 space ();
 143                                 while (current < end && current[0] != '>' && current[0] != '/') {
 144                                         string attr_name = read_name ();
 145                                         if (current >= end || current[0] != '=') {
 146                                                 // error
 147                                         }
 148                                         current++;
 149                                         // FIXME allow single quotes
 150                                         if (current >= end || current[0] != '"') {
 151                                                 // error
 152                                         }
 153                                         current++;
 154                                         char* attr_begin = current;
 155                                         while (current < end && current[0] != '"') {
 156                                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
 157                                                 if (u != (unichar) (-1)) {
 158                                                         current += u.to_utf8 (null);
 159                                                 } else {
 160                                                         Report.error (null, "invalid UTF-8 character");
 161                                                 }
 162                                         }
 163                                         // TODO process &amp; &gt; &lt; &quot; &apos;
 164                                         string attr_value = ((string) attr_begin).ndup (current - attr_begin);
 165                                         if (current >= end || current[0] != '"') {
 166                                                 // error
 167                                         }
 168                                         current++;
 169                                         attributes.set (attr_name, attr_value);
 170                                         space ();
 171                                 }
 172                                 if (current[0] == '/') {
 173                                         empty_element = true;
 174                                         current++;
 175                                         space ();
 176                                 } else {
 177                                         empty_element = false;
 178                                 }
 179                                 if (current >= end || current[0] != '>') {
 180                                         // error
 181                                 }
 182                                 current++;
 183                         }
 184                 } else {
 185                         space ();
 186                         char* text_begin = current;
 187                         while (current < end && current[0] != '<') {
 188                                 unichar u = ((string) current).get_char_validated ((long) (end - current));
 189                                 if (u != (unichar) (-1)) {
 190                                         current += u.to_utf8 (null);
 191                                 } else {
 192                                         Report.error (null, "invalid UTF-8 character");
 193                                 }
 194                         }
 195                         if (text_begin == current) {
 196                                 // no text
 197                                 // read next token
 198                                 return read_token (out token_begin, out token_end);
 199                         }
 200                         type = MarkupTokenType.TEXT;
 201                         // TODO process &amp; &gt; &lt; &quot; &apos;
 202                         // string text = ((string) text_begin).ndup (current - text_begin);
 203                 }
 204
 205                 column += (int) (current - begin);
 206
 207                 token_end.pos = current;
 208                 token_end.line = line;
 209                 token_end.column = column - 1;
 210
 211                 return type;
 212         }
 213
 214         void space () {
 215                 while (current < end && current[0].isspace ()) {
 216                         if (current[0] == '\n') {
 217                                 line++;
 218                                 column = 0;
 219                         }
 220                         current++;
 221                         column++;
 222                 }
 223         }
 224 }
 225
 226 public enum Vala.MarkupTokenType {
 227         NONE,
 228         START_ELEMENT,
 229         END_ELEMENT,
 230         TEXT,
 231         EOF;
 232
 233         public weak string to_string () {
 234                 switch (this) {
 235                 case START_ELEMENT: return "start element";
 236                 case END_ELEMENT: return "end element";
 237                 case TEXT: return "text";
 238                 case EOF: return "end of file";
 239                 default: return "unknown token type";
 240                 }
 241         }
 242 }
 243