vapigen/valamarkupreader.vala

   1 /* valamarkupreader.vala
   2  *
   3  * Copyright (C) 2008  Jürg Billeter
   4  *
   5  * This library is free software; you can redistribute it and/or
   6  * modify it under the terms of the GNU Lesser General Public
   7  * License as published by the Free Software Foundation; either
   8  * version 2.1 of the License, or (at your option) any later version.
   9
  10  * This library is distributed in the hope that it will be useful,
  11  * but WITHOUT ANY WARRANTY; without even the implied warranty of
  12  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
  13  * Lesser General Public License for more details.
  14
  15  * You should have received a copy of the GNU Lesser General Public
  16  * License along with this library; if not, write to the Free Software
  17  * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301  USA
  18  *
  19  * Author:
  20  *      Jürg Billeter <j@bitron.ch>
  21  */
  22
  23 using GLib;
  24 using Gee;
  25
  26 /**
  27  * Simple reader for a subset of XML.
  28  */
  29 public class Vala.MarkupReader : Object {
  30         public string filename { get; construct; }
  31
  32         public string name { get; private set; }
  33
  34         MappedFile mapped_file;
  35
  36         char* begin;
  37         char* current;
  38         char* end;
  39
  40         int line;
  41         int column;
  42
  43         Map<string,string> attributes = new HashMap<string,string> (str_hash, str_equal);
  44         bool empty_element;
  45
  46         public MarkupReader (string filename) {
  47                 this.filename = filename;
  48         }
  49
  50         construct {
  51                 try {
  52                         mapped_file = new MappedFile (filename, false);
  53                         begin = mapped_file.get_contents ();
  54                         end = begin + mapped_file.get_length ();
  55
  56                         current = begin;
  57
  58                         line = 1;
  59                         column = 1;
  60                 } catch (FileError e) {
  61                         Report.error (null, "Unable to map file `%s': %s".printf (filename, e.message));
  62                 }
  63         }
  64
  65         public string? get_attribute (string attr) {
  66                 return attributes[attr];
  67         }
  68
  69         string read_name () {
  70                 char* begin = current;
  71                 while (current < end) {
  72                         if (current[0] == ' ' || current[0] == '>'
  73                             || current[0] == '/' || current[0] == '=') {
  74                                 break;
  75                         }
  76                         unichar u = ((string) current).get_char_validated ((long) (end - current));
  77                         if (u != (unichar) (-1)) {
  78                                 current += u.to_utf8 (null);
  79                         } else {
  80                                 Report.error (null, "invalid UTF-8 character");
  81                         }
  82                 }
  83                 if (current == begin) {
  84                         // syntax error: invalid name
  85                 }
  86                 return ((string) begin).ndup (current - begin);
  87         }
  88
  89         public MarkupTokenType read_token (out SourceLocation token_begin, out SourceLocation token_end) {
  90                 attributes.clear ();
  91
  92                 if (empty_element) {
  93                         empty_element = false;
  94                         return MarkupTokenType.END_ELEMENT;
  95                 }
  96
  97                 space ();
  98
  99                 MarkupTokenType type = MarkupTokenType.NONE;
 100                 char* begin = current;
 101                 token_begin.pos = begin;
 102                 token_begin.line = line;
 103                 token_begin.column = column;
 104
 105                 if (current >= end) {
 106                         type = MarkupTokenType.EOF;
 107                 } else if (current[0] == '<') {
 108                         current++;
 109                         if (current >= end) {
 110                                 // error
 111                         } else if (current[0] == '?') {
 112                                 // processing instruction
 113                         } else if (current[0] == '!') {
 114                                 // comment or doctype
 115                         } else if (current[0] == '/') {
 116                                 type = MarkupTokenType.END_ELEMENT;
 117                                 current++;
 118                                 name = read_name ();
 119                                 if (current >= end || current[0] != '>') {
 120                                         // error
 121                                 }
 122                                 current++;
 123                         } else {
 124                                 type = MarkupTokenType.START_ELEMENT;
 125                                 name = read_name ();
 126                                 space ();
 127                                 while (current < end && current[0] != '>' && current[0] != '/') {
 128                                         string attr_name = read_name ();
 129                                         if (current >= end || current[0] != '=') {
 130                                                 // error
 131                                         }
 132                                         current++;
 133                                         // FIXME allow single quotes
 134                                         if (current >= end || current[0] != '"') {
 135                                                 // error
 136                                         }
 137                                         current++;
 138                                         char* attr_begin = current;
 139                                         while (current < end && current[0] != '"') {
 140                                                 if (current[0] == '&') {
 141                                                         // process &amp; &gt; &lt; &quot; &apos;
 142                                                 } else {
 143                                                         unichar u = ((string) current).get_char_validated ((long) (end - current));
 144                                                         if (u != (unichar) (-1)) {
 145                                                                 current += u.to_utf8 (null);
 146                                                         } else {
 147                                                                 Report.error (null, "invalid UTF-8 character");
 148                                                         }
 149                                                 }
 150                                         }
 151                                         string attr_value = ((string) attr_begin).ndup (current - attr_begin);
 152                                         if (current >= end || current[0] != '"') {
 153                                                 // error
 154                                         }
 155                                         current++;
 156                                         attributes.set (attr_name, attr_value);
 157                                         space ();
 158                                 }
 159                                 if (current[0] == '/') {
 160                                         empty_element = true;
 161                                         current++;
 162                                         space ();
 163                                 } else {
 164                                         empty_element = false;
 165                                 }
 166                                 if (current >= end || current[0] != '>') {
 167                                         // error
 168                                 }
 169                                 current++;
 170                         }
 171                 } else {
 172                         space ();
 173                         char* text_begin = current;
 174                         while (current < end && current[0] != '<') {
 175                                 if (current[0] == '&') {
 176                                         // process &amp; &gt; &lt; &quot; &apos;
 177                                 } else {
 178                                         unichar u = ((string) current).get_char_validated ((long) (end - current));
 179                                         if (u != (unichar) (-1)) {
 180                                                 current += u.to_utf8 (null);
 181                                         } else {
 182                                                 Report.error (null, "invalid UTF-8 character");
 183                                         }
 184                                 }
 185                         }
 186                         if (text_begin == current) {
 187                                 // no text
 188                                 // read next token
 189                                 return read_token (out token_begin, out token_end);
 190                         }
 191                         type = MarkupTokenType.TEXT;
 192                         // string text = ((string) text_begin).ndup (current - text_begin);
 193                 }
 194
 195                 column += (int) (current - begin);
 196
 197                 token_end.pos = current;
 198                 token_end.line = line;
 199                 token_end.column = column - 1;
 200
 201                 return type;
 202         }
 203
 204         void space () {
 205                 while (current < end && current[0].isspace ()) {
 206                         if (current[0] == '\n') {
 207                                 line++;
 208                                 column = 0;
 209                         }
 210                         current++;
 211                         column++;
 212                 }
 213         }
 214 }
 215
 216 public enum Vala.MarkupTokenType {
 217         NONE,
 218         START_ELEMENT,
 219         END_ELEMENT,
 220         TEXT,
 221         EOF;
 222
 223         public weak string to_string () {
 224                 switch (this) {
 225                 case START_ELEMENT: return "start element";
 226                 case END_ELEMENT: return "end element";
 227                 case TEXT: return "text";
 228                 case EOF: return "end of file";
 229                 default: return "unknown token type";
 230                 }
 231         }
 232 }
 233