Merge remote-tracking branch 'redux/master' into sh4-pool
[tamarin-stm.git] / eval / eval-lex-xml.cpp
blob6a1e5636da736770b3eb9e379e3fb2d7a2abaea3
1 /* -*- Mode: C++; c-basic-offset: 4; indent-tabs-mode: nil; tab-width: 4 -*- */
2 /* vi: set ts=4 sw=4 expandtab: (add to ~/.vimrc: set modeline modelines=5) */
3 /* ***** BEGIN LICENSE BLOCK *****
4 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
6 * The contents of this file are subject to the Mozilla Public License Version
7 * 1.1 (the "License"); you may not use this file except in compliance with
8 * the License. You may obtain a copy of the License at
9 * http://www.mozilla.org/MPL/
11 * Software distributed under the License is distributed on an "AS IS" basis,
12 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
13 * for the specific language governing rights and limitations under the
14 * License.
16 * The Original Code is [Open Source Virtual Machine.].
18 * The Initial Developer of the Original Code is
19 * Adobe System Incorporated.
20 * Portions created by the Initial Developer are Copyright (C) 2008
21 * the Initial Developer. All Rights Reserved.
23 * Contributor(s):
24 * Adobe AS3 Team
26 * Alternatively, the contents of this file may be used under the terms of
27 * either the GNU General Public License Version 2 or later (the "GPL"), or
28 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
29 * in which case the provisions of the GPL or the LGPL are applicable instead
30 * of those above. If you wish to allow use of your version of this file only
31 * under the terms of either the GPL or the LGPL, and not to allow others to
32 * use your version of this file under the terms of the MPL, indicate your
33 * decision by deleting the provisions above and replace them with the notice
34 * and other provisions required by the GPL or the LGPL. If you do not delete
35 * the provisions above, a recipient may use your version of this file under
36 * the terms of any one of the MPL, the GPL or the LGPL.
38 * ***** END LICENSE BLOCK ***** */
40 #include "avmplus.h"
42 #ifdef VMCFG_EVAL
44 #include "eval.h"
46 namespace avmplus
48 namespace RTC
50 Token Lexer::xmlAtomImpl()
52 mark=idx;
53 switch (idx[0]) {
54 case 0:
55 compiler->syntaxError(lineno, SYNTAXERR_XML_EOI_IN_MARKUP);
56 case '<':
57 switch (idx[1]) {
58 case '!':
59 if (idx[2] == '[' &&
60 idx[3] == 'C' &&
61 idx[4] == 'D' &&
62 idx[5] == 'A' &&
63 idx[6] == 'T' &&
64 idx[7] == 'A' &&
65 idx[8] == '[') {
66 idx += 9;
67 return xmlMarkup(T_XmlCDATA);
69 if (idx[2] == '-' && idx[3] == '-') {
70 idx += 4;
71 return xmlMarkup(T_XmlComment);
73 compiler->syntaxError(lineno, SYNTAXERR_XML_INVALID_LEFTBANG);
75 case '?':
76 idx += 2;
77 return xmlMarkup(T_XmlProcessingInstruction);
79 case '/':
80 idx += 2;
81 return T_XmlLeftAngleSlash;
83 default:
84 idx += 1;
85 return T_XmlLeftAngle;
88 case '/':
89 if (idx[1] == '>') {
90 idx += 2;
91 return T_XmlSlashRightAngle;
93 compiler->syntaxError(lineno, SYNTAXERR_XML_INVALID_SLASH);
95 case '>':
96 idx += 1;
97 return T_XmlRightAngle;
99 case '{':
100 idx += 1;
101 return T_XmlLeftBrace;
103 case '}':
104 idx += 1;
105 return T_XmlRightBrace;
107 case '=':
108 idx += 1;
109 return T_XmlEquals;
111 case ' ':
112 case '\t':
113 case '\r':
114 case '\n':
115 return xmlWhitespace();
117 case '"':
118 case '\'':
119 return xmlString();
121 default:
122 if (isXmlNameStart(idx[0]))
123 return xmlName();
124 else
125 return xmlText();
129 // Capture everything from the starting through the ending punctuation.
131 Token Lexer::xmlMarkup(Token token)
133 uint32_t l = lineno;
134 switch (token) {
135 case T_XmlComment:
136 mark = idx-4; // "<!--"
137 break;
138 case T_XmlCDATA:
139 mark = idx-9; // "<![CDATA["
140 break;
141 case T_XmlProcessingInstruction:
142 mark = idx-2; // "<?"
143 break;
144 default:
145 AvmAssert(!"Inconsistent internal state");
148 while (idx < limit) {
149 if (idx[0] == '-' || idx[0] == '?' || idx[0] == ']') {
150 switch (token) {
151 case T_XmlComment:
152 if (idx[0] == '-' && idx[1] == '-') {
153 // Done; we require > to follow but it's not part of the stop condition.
154 if (idx[2] != '>')
155 compiler->syntaxError(lineno, SYNTAXERR_XML_ILLEGAL_CHARS);
156 idx += 3;
157 goto endloop;
159 break;
160 case T_XmlCDATA:
161 if (idx[0] == ']' && idx[1] == ']' && idx[2] == '>') {
162 // Done.
163 idx += 3;
164 goto endloop;
166 break;
167 case T_XmlProcessingInstruction:
168 if (idx[0] == '?' && idx[1] == '>') {
169 // Done.
170 idx += 2;
171 goto endloop;
173 break;
177 switch (idx[0]) {
178 case '\n':
179 lineno++;
180 break;
181 case '\r':
182 lineno++;
183 if (idx[1] == '\n')
184 idx++;
185 break;
186 default:
187 idx++;
188 break;
192 endloop:
193 if (idx == limit)
194 compiler->syntaxError(l, SYNTAXERR_XML_UNTERMINATED);
195 val.s = compiler->intern(mark, uint32_t(idx-mark));
196 return token;
199 Token Lexer::xmlWhitespace()
201 mark = idx;
202 while (idx < limit) {
203 switch (*idx) {
204 case ' ':
205 case '\t':
206 break;
207 case '\r':
208 lineno++;
209 if (idx[1] == '\n')
210 idx++;
211 break;
212 case '\n':
213 lineno++;
214 break;
215 default:
216 goto end_loop;
218 idx++;
220 end_loop:
221 val.s = compiler->intern(mark, uint32_t(idx-mark));
222 return T_XmlWhitespace;
225 Token Lexer::xmlName()
227 AvmAssert( isXmlNameStart(*idx) );
228 mark = idx;
229 while (isXmlNameSubsequent(*idx))
230 idx++;
231 val.s = compiler->intern(mark, uint32_t(idx-mark));
232 return T_XmlName;
235 // mark has been set at the beginning of the starting punctuation,
236 // we wish to capture the ending punctuation as well.
238 Token Lexer::xmlString()
240 wchar terminator = *idx;
241 uint32_t l = lineno;
243 idx++;
244 while (idx < limit && *idx != terminator) {
245 if (*idx == '\r') {
246 idx++;
247 if (*idx == '\n')
248 idx++;
249 lineno++;
251 else if (*idx == '\n') {
252 idx++;
253 lineno++;
255 else
256 idx++;
259 if (idx == limit)
260 compiler->syntaxError(l, SYNTAXERR_XML_UNTERMINATED);
262 idx++;
263 val.s = compiler->intern(mark, uint32_t(idx-mark));
264 return T_XmlString;
267 // FIXME: E4X says to stop only at "{" and "<".
269 Token Lexer::xmlText()
271 mark = idx;
272 while (idx < limit) {
273 switch (*idx) {
274 case ' ':
275 case '\t':
276 case '\r':
277 case '\n':
278 case '{':
279 case '}':
280 case '<':
281 case '>':
282 case '/':
283 case '=':
284 goto end_loop;
285 default:
286 if (isXmlNameStart(*idx))
287 goto end_loop;
289 idx++;
291 end_loop:
292 val.s = compiler->intern(mark, uint32_t(idx-mark));
293 return T_XmlText;
296 bool Lexer::isXmlNameStart(wchar c)
298 return isUnicodeLetter(c) || c == ':' || c == '_';
301 bool Lexer::isXmlNameSubsequent(wchar c)
303 return isUnicodeLetter(c) || isUnicodeDigit(c) || c == '_' || c == ':' || c == '.' || c == '-';
309 #endif // VMCFG_EVAL