2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
31 #include "xmllite_private.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43 XmlReadInState_Initial
,
44 XmlReadInState_XmlDecl
,
45 XmlReadInState_Misc_DTD
,
47 XmlReadInState_DTD_Misc
,
48 XmlReadInState_Element
,
49 XmlReadInState_Content
,
50 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
52 } XmlReaderInternalState
;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
58 XmlReadResumeState_Initial
,
59 XmlReadResumeState_PITarget
,
60 XmlReadResumeState_PIBody
,
61 XmlReadResumeState_CDATA
,
62 XmlReadResumeState_Comment
,
63 XmlReadResumeState_STag
,
64 XmlReadResumeState_CharData
,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState
;
68 /* saved pointer index to resume from particular input position */
71 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local
, /* local for QName */
73 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
79 StringValue_LocalName
,
81 StringValue_QualifiedName
,
84 } XmlReaderStringValue
;
86 static const WCHAR usasciiW
[] = {'U','S','-','A','S','C','I','I',0};
87 static const WCHAR utf16W
[] = {'U','T','F','-','1','6',0};
88 static const WCHAR utf8W
[] = {'U','T','F','-','8',0};
90 static const WCHAR dblquoteW
[] = {'\"',0};
91 static const WCHAR quoteW
[] = {'\'',0};
92 static const WCHAR ltW
[] = {'<',0};
93 static const WCHAR gtW
[] = {'>',0};
94 static const WCHAR commentW
[] = {'<','!','-','-',0};
95 static const WCHAR piW
[] = {'<','?',0};
97 BOOL
is_namestartchar(WCHAR ch
);
99 static const char *debugstr_nodetype(XmlNodeType nodetype
)
101 static const char * const type_names
[] =
110 "ProcessingInstruction",
123 if (nodetype
> _XmlNodeType_Last
)
124 return wine_dbg_sprintf("unknown type=%d", nodetype
);
126 return type_names
[nodetype
];
129 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
131 static const char * const prop_names
[] =
143 if (prop
> _XmlReaderProperty_Last
)
144 return wine_dbg_sprintf("unknown property=%d", prop
);
146 return prop_names
[prop
];
149 struct xml_encoding_data
156 static const struct xml_encoding_data xml_encoding_map
[] = {
157 { usasciiW
, XmlEncoding_USASCII
, 20127 },
158 { utf16W
, XmlEncoding_UTF16
, 1200 },
159 { utf8W
, XmlEncoding_UTF8
, CP_UTF8
},
162 const WCHAR
*get_encoding_name(xml_encoding encoding
)
164 return xml_encoding_map
[encoding
].name
;
167 xml_encoding
get_encoding_from_codepage(UINT codepage
)
170 for (i
= 0; i
< ARRAY_SIZE(xml_encoding_map
); i
++)
172 if (xml_encoding_map
[i
].cp
== codepage
) return xml_encoding_map
[i
].enc
;
174 return XmlEncoding_Unknown
;
181 unsigned int allocated
;
182 unsigned int written
;
186 typedef struct input_buffer input_buffer
;
190 IXmlReaderInput IXmlReaderInput_iface
;
192 /* reference passed on IXmlReaderInput creation, is kept when input is created */
195 xml_encoding encoding
;
198 /* stream reference set after SetInput() call from reader,
199 stored as sequential stream, cause currently
200 optimizations possible with IStream aren't implemented */
201 ISequentialStream
*stream
;
202 input_buffer
*buffer
;
203 unsigned int pending
: 1;
206 static const struct IUnknownVtbl xmlreaderinputvtbl
;
208 /* Structure to hold parsed string of specific length.
210 Reader stores node value as 'start' pointer, on request
211 a null-terminated version of it is allocated.
213 To init a strval variable use reader_init_strval(),
214 to set strval as a reader value use reader_set_strval().
218 WCHAR
*str
; /* allocated null-terminated string */
219 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
220 UINT start
; /* input position where value starts */
223 static WCHAR emptyW
[] = {0};
224 static WCHAR xmlW
[] = {'x','m','l',0};
225 static WCHAR xmlnsW
[] = {'x','m','l','n','s',0};
226 static const strval strval_empty
= { emptyW
};
227 static const strval strval_xml
= { xmlW
, 3 };
228 static const strval strval_xmlns
= { xmlnsW
, 5 };
230 struct reader_position
238 ATTRIBUTE_NS_DEFINITION
= 0x1,
239 ATTRIBUTE_DEFAULT_NS_DEFINITION
= 0x2,
249 struct reader_position position
;
259 struct reader_position position
;
267 struct element
*element
;
272 IXmlReader IXmlReader_iface
;
274 xmlreaderinput
*input
;
277 HRESULT error
; /* error set on XmlReadState_Error */
278 XmlReaderInternalState instate
;
279 XmlReaderResumeState resumestate
;
280 XmlNodeType nodetype
;
281 DtdProcessing dtdmode
;
282 IXmlResolver
*resolver
;
284 struct reader_position position
;
285 struct list attrs
; /* attributes list for current node */
286 struct attribute
*attr
; /* current attribute */
290 struct list elements
;
292 strval strvalues
[StringValue_Last
];
295 BOOL is_empty_element
;
296 struct element empty_element
; /* used for empty elements without end tag <a />,
297 and to keep <?xml reader position */
298 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
303 encoded_buffer utf16
;
304 encoded_buffer encoded
;
306 xmlreaderinput
*input
;
309 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
311 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
314 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
316 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
319 /* reader memory allocation functions */
320 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
322 return m_alloc(reader
->imalloc
, len
);
325 static inline void *reader_alloc_zero(xmlreader
*reader
, size_t len
)
327 void *ret
= reader_alloc(reader
, len
);
333 static inline void reader_free(xmlreader
*reader
, void *mem
)
335 m_free(reader
->imalloc
, mem
);
338 /* Just return pointer from offset, no attempt to read more. */
339 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
341 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
342 return (WCHAR
*)buffer
->data
+ offset
;
345 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
347 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
350 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
354 if (src
->str
!= strval_empty
.str
)
356 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
357 if (!dest
->str
) return E_OUTOFMEMORY
;
358 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
359 dest
->str
[dest
->len
] = 0;
366 /* reader input memory allocation functions */
367 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
369 return m_alloc(input
->imalloc
, len
);
372 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
374 return m_realloc(input
->imalloc
, mem
, len
);
377 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
379 m_free(input
->imalloc
, mem
);
382 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
389 size
= (lstrlenW(str
)+1)*sizeof(WCHAR
);
390 ret
= readerinput_alloc(input
, size
);
391 if (ret
) memcpy(ret
, str
, size
);
397 /* This one frees stored string value if needed */
398 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
400 if (v
->str
!= strval_empty
.str
)
402 reader_free(reader
, v
->str
);
407 static void reader_clear_attrs(xmlreader
*reader
)
409 struct attribute
*attr
, *attr2
;
410 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
412 reader_free_strvalued(reader
, &attr
->localname
);
413 reader_free_strvalued(reader
, &attr
->value
);
414 reader_free(reader
, attr
);
416 list_init(&reader
->attrs
);
417 reader
->attr_count
= 0;
421 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
422 while we are on a node with attributes */
423 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*prefix
, strval
*localname
, strval
*qname
,
424 strval
*value
, const struct reader_position
*position
, unsigned int flags
)
426 struct attribute
*attr
;
429 attr
= reader_alloc(reader
, sizeof(*attr
));
430 if (!attr
) return E_OUTOFMEMORY
;
432 hr
= reader_strvaldup(reader
, localname
, &attr
->localname
);
435 hr
= reader_strvaldup(reader
, value
, &attr
->value
);
437 reader_free_strvalued(reader
, &attr
->value
);
441 reader_free(reader
, attr
);
446 attr
->prefix
= *prefix
;
448 memset(&attr
->prefix
, 0, sizeof(attr
->prefix
));
449 attr
->qname
= qname
? *qname
: *localname
;
450 attr
->position
= *position
;
452 list_add_tail(&reader
->attrs
, &attr
->entry
);
453 reader
->attr_count
++;
458 /* Returns current element, doesn't check if reader is actually positioned on it. */
459 static struct element
*reader_get_element(xmlreader
*reader
)
461 if (reader
->is_empty_element
)
462 return &reader
->empty_element
;
464 return LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
467 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
474 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
476 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
479 /* used to initialize from constant string */
480 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
487 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
489 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
492 static void reader_free_strvalues(xmlreader
*reader
)
495 for (type
= 0; type
< StringValue_Last
; type
++)
496 reader_free_strvalue(reader
, type
);
499 /* This helper should only be used to test if strings are the same,
500 it doesn't try to sort. */
501 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
503 if (str1
->len
!= str2
->len
) return 0;
504 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
507 static void reader_clear_elements(xmlreader
*reader
)
509 struct element
*elem
, *elem2
;
510 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
512 reader_free_strvalued(reader
, &elem
->prefix
);
513 reader_free_strvalued(reader
, &elem
->localname
);
514 reader_free_strvalued(reader
, &elem
->qname
);
515 reader_free(reader
, elem
);
517 list_init(&reader
->elements
);
518 reader_free_strvalued(reader
, &reader
->empty_element
.localname
);
519 reader_free_strvalued(reader
, &reader
->empty_element
.qname
);
520 reader
->is_empty_element
= FALSE
;
523 static struct ns
*reader_lookup_ns(xmlreader
*reader
, const strval
*prefix
)
525 struct list
*nslist
= prefix
? &reader
->ns
: &reader
->nsdef
;
528 LIST_FOR_EACH_ENTRY_REV(ns
, nslist
, struct ns
, entry
) {
529 if (strval_eq(reader
, prefix
, &ns
->prefix
))
536 static HRESULT
reader_inc_depth(xmlreader
*reader
)
538 return (++reader
->depth
>= reader
->max_depth
&& reader
->max_depth
) ? SC_E_MAXELEMENTDEPTH
: S_OK
;
541 static void reader_dec_depth(xmlreader
*reader
)
547 static HRESULT
reader_push_ns(xmlreader
*reader
, const strval
*prefix
, const strval
*uri
, BOOL def
)
552 ns
= reader_alloc(reader
, sizeof(*ns
));
553 if (!ns
) return E_OUTOFMEMORY
;
556 memset(&ns
->prefix
, 0, sizeof(ns
->prefix
));
558 hr
= reader_strvaldup(reader
, prefix
, &ns
->prefix
);
560 reader_free(reader
, ns
);
565 hr
= reader_strvaldup(reader
, uri
, &ns
->uri
);
567 reader_free_strvalued(reader
, &ns
->prefix
);
568 reader_free(reader
, ns
);
573 list_add_head(def
? &reader
->nsdef
: &reader
->ns
, &ns
->entry
);
577 static void reader_free_element(xmlreader
*reader
, struct element
*element
)
579 reader_free_strvalued(reader
, &element
->prefix
);
580 reader_free_strvalued(reader
, &element
->localname
);
581 reader_free_strvalued(reader
, &element
->qname
);
582 reader_free(reader
, element
);
585 static void reader_mark_ns_nodes(xmlreader
*reader
, struct element
*element
)
589 LIST_FOR_EACH_ENTRY(ns
, &reader
->ns
, struct ns
, entry
) {
592 ns
->element
= element
;
595 LIST_FOR_EACH_ENTRY(ns
, &reader
->nsdef
, struct ns
, entry
) {
598 ns
->element
= element
;
602 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*prefix
, strval
*localname
,
603 strval
*qname
, const struct reader_position
*position
)
605 struct element
*element
;
608 element
= reader_alloc_zero(reader
, sizeof(*element
));
610 return E_OUTOFMEMORY
;
612 if ((hr
= reader_strvaldup(reader
, prefix
, &element
->prefix
)) == S_OK
&&
613 (hr
= reader_strvaldup(reader
, localname
, &element
->localname
)) == S_OK
&&
614 (hr
= reader_strvaldup(reader
, qname
, &element
->qname
)) == S_OK
)
616 list_add_head(&reader
->elements
, &element
->entry
);
617 reader_mark_ns_nodes(reader
, element
);
618 reader
->is_empty_element
= FALSE
;
619 element
->position
= *position
;
622 reader_free_element(reader
, element
);
627 static void reader_pop_ns_nodes(xmlreader
*reader
, struct element
*element
)
631 LIST_FOR_EACH_ENTRY_SAFE_REV(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
632 if (ns
->element
!= element
)
635 list_remove(&ns
->entry
);
636 reader_free_strvalued(reader
, &ns
->prefix
);
637 reader_free_strvalued(reader
, &ns
->uri
);
638 reader_free(reader
, ns
);
641 if (!list_empty(&reader
->nsdef
)) {
642 ns
= LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
643 if (ns
->element
== element
) {
644 list_remove(&ns
->entry
);
645 reader_free_strvalued(reader
, &ns
->prefix
);
646 reader_free_strvalued(reader
, &ns
->uri
);
647 reader_free(reader
, ns
);
652 static void reader_pop_element(xmlreader
*reader
)
654 struct element
*element
;
656 if (list_empty(&reader
->elements
))
659 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
660 list_remove(&element
->entry
);
662 reader_pop_ns_nodes(reader
, element
);
663 reader_free_element(reader
, element
);
665 /* It was a root element, the rest is expected as Misc */
666 if (list_empty(&reader
->elements
))
667 reader
->instate
= XmlReadInState_MiscEnd
;
670 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
671 means node value is to be determined. */
672 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
674 strval
*v
= &reader
->strvalues
[type
];
676 reader_free_strvalue(reader
, type
);
685 if (value
->str
== strval_empty
.str
)
689 if (type
== StringValue_Value
)
691 /* defer allocation for value string */
693 v
->start
= value
->start
;
698 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
699 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
700 v
->str
[value
->len
] = 0;
706 static inline int is_reader_pending(xmlreader
*reader
)
708 return reader
->input
->pending
;
711 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
713 const int initial_len
= 0x2000;
714 buffer
->data
= readerinput_alloc(input
, initial_len
);
715 if (!buffer
->data
) return E_OUTOFMEMORY
;
717 memset(buffer
->data
, 0, 4);
719 buffer
->allocated
= initial_len
;
721 buffer
->prev_cr
= FALSE
;
726 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
728 readerinput_free(input
, buffer
->data
);
731 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
733 if (encoding
== XmlEncoding_Unknown
)
735 FIXME("unsupported encoding %d\n", encoding
);
739 *cp
= xml_encoding_map
[encoding
].cp
;
744 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
748 if (!name
) return XmlEncoding_Unknown
;
751 max
= ARRAY_SIZE(xml_encoding_map
) - 1;
758 c
= wcsnicmp(xml_encoding_map
[n
].name
, name
, len
);
760 c
= wcsicmp(xml_encoding_map
[n
].name
, name
);
762 return xml_encoding_map
[n
].enc
;
770 return XmlEncoding_Unknown
;
773 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
775 input_buffer
*buffer
;
778 input
->buffer
= NULL
;
780 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
781 if (!buffer
) return E_OUTOFMEMORY
;
783 buffer
->input
= input
;
784 buffer
->code_page
= ~0; /* code page is unknown at this point */
785 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
787 readerinput_free(input
, buffer
);
791 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
793 free_encoded_buffer(input
, &buffer
->utf16
);
794 readerinput_free(input
, buffer
);
798 input
->buffer
= buffer
;
802 static void free_input_buffer(input_buffer
*buffer
)
804 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
805 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
806 readerinput_free(buffer
->input
, buffer
);
809 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
811 if (readerinput
->stream
) {
812 ISequentialStream_Release(readerinput
->stream
);
813 readerinput
->stream
= NULL
;
817 /* Queries already stored interface for IStream/ISequentialStream.
818 Interface supplied on creation will be overwritten */
819 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
823 readerinput_release_stream(readerinput
);
824 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
826 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
831 /* reads a chunk to raw buffer */
832 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
834 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
835 /* to make sure aligned length won't exceed allocated length */
836 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
840 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
841 variable width encodings like UTF-8 */
842 len
= (len
+ 3) & ~3;
843 /* try to use allocated space or grow */
844 if (buffer
->allocated
- buffer
->written
< len
)
846 buffer
->allocated
*= 2;
847 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
848 len
= buffer
->allocated
- buffer
->written
;
852 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
853 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
854 readerinput
->pending
= hr
== E_PENDING
;
855 if (FAILED(hr
)) return hr
;
856 buffer
->written
+= read
;
861 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
862 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
864 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
866 length
*= sizeof(WCHAR
);
867 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
868 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
870 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
871 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
872 buffer
->allocated
= grown_size
;
876 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
878 static const char startA
[] = {'<','?'};
879 static const char commentA
[] = {'<','!'};
880 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
881 unsigned char *ptr
= (unsigned char*)buffer
->data
;
883 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
884 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
885 /* test start byte */
888 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
889 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
890 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
891 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
895 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
897 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
898 static const char utf8bom
[] = {0xef,0xbb,0xbf};
899 static const char utf16lebom
[] = {0xff,0xfe};
902 *enc
= XmlEncoding_Unknown
;
904 if (buffer
->written
<= 3)
906 HRESULT hr
= readerinput_growraw(readerinput
);
907 if (FAILED(hr
)) return hr
;
908 if (buffer
->written
< 3) return MX_E_INPUTEND
;
911 ptrW
= (WCHAR
*)buffer
->data
;
912 /* try start symbols if we have enough data to do that, input buffer should contain
913 first chunk already */
914 if (readerinput_is_utf8(readerinput
))
915 *enc
= XmlEncoding_UTF8
;
916 else if (*ptrW
== '<')
919 if (*ptrW
== '?' || *ptrW
== '!' || is_namestartchar(*ptrW
))
920 *enc
= XmlEncoding_UTF16
;
922 /* try with BOM now */
923 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
925 buffer
->cur
+= sizeof(utf8bom
);
926 *enc
= XmlEncoding_UTF8
;
928 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
930 buffer
->cur
+= sizeof(utf16lebom
);
931 *enc
= XmlEncoding_UTF16
;
937 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
939 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
940 int len
= buffer
->written
;
942 /* complete single byte char */
943 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
945 /* find start byte of multibyte char */
946 while (--len
&& !(buffer
->data
[len
] & 0xc0))
952 /* Returns byte length of complete char sequence for buffer code page,
953 it's relative to current buffer position which is currently used for BOM handling
955 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
957 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
960 if (readerinput
->buffer
->code_page
== CP_UTF8
)
961 len
= readerinput_get_utf8_convlen(readerinput
);
963 len
= buffer
->written
;
965 TRACE("%d\n", len
- buffer
->cur
);
966 return len
- buffer
->cur
;
969 /* It's possible that raw buffer has some leftovers from last conversion - some char
970 sequence that doesn't represent a full code point. Length argument should be calculated with
971 readerinput_get_convlen(), if it's -1 it will be calculated here. */
972 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
974 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
977 len
= readerinput_get_convlen(readerinput
);
979 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
980 /* everything below cur is lost too */
981 buffer
->written
-= len
+ buffer
->cur
;
982 /* after this point we don't need cur offset really,
983 it's used only to mark where actual data begins when first chunk is read */
987 static void fixup_buffer_cr(encoded_buffer
*buffer
, int off
)
989 BOOL prev_cr
= buffer
->prev_cr
;
993 src
= dest
= (WCHAR
*)buffer
->data
+ off
;
994 while ((const char*)src
< buffer
->data
+ buffer
->written
)
1003 if(prev_cr
&& *src
== '\n')
1010 buffer
->written
= (char*)dest
- buffer
->data
;
1011 buffer
->prev_cr
= prev_cr
;
1015 /* note that raw buffer content is kept */
1016 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
1018 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1019 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1025 hr
= get_code_page(enc
, &cp
);
1026 if (FAILED(hr
)) return;
1028 readerinput
->buffer
->code_page
= cp
;
1029 len
= readerinput_get_convlen(readerinput
);
1031 TRACE("switching to cp %d\n", cp
);
1033 /* just copy in this case */
1034 if (enc
== XmlEncoding_UTF16
)
1036 readerinput_grow(readerinput
, len
);
1037 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
1038 dest
->written
+= len
*sizeof(WCHAR
);
1042 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1043 readerinput_grow(readerinput
, dest_len
);
1044 ptr
= (WCHAR
*)dest
->data
;
1045 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1047 dest
->written
+= dest_len
*sizeof(WCHAR
);
1050 fixup_buffer_cr(dest
, 0);
1053 /* shrinks parsed data a buffer begins with */
1054 static void reader_shrink(xmlreader
*reader
)
1056 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1058 /* avoid to move too often using threshold shrink length */
1059 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
1061 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
1062 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
1064 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
1068 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1069 It won't attempt to shrink but will grow destination buffer if needed */
1070 static HRESULT
reader_more(xmlreader
*reader
)
1072 xmlreaderinput
*readerinput
= reader
->input
;
1073 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1074 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1075 UINT cp
= readerinput
->buffer
->code_page
;
1076 int len
, dest_len
, prev_len
;
1080 /* get some raw data from stream first */
1081 hr
= readerinput_growraw(readerinput
);
1082 len
= readerinput_get_convlen(readerinput
);
1083 prev_len
= dest
->written
/ sizeof(WCHAR
);
1085 /* just copy for UTF-16 case */
1088 readerinput_grow(readerinput
, len
);
1089 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
1090 dest
->written
+= len
*sizeof(WCHAR
);
1094 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1095 readerinput_grow(readerinput
, dest_len
);
1096 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
1097 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1099 dest
->written
+= dest_len
*sizeof(WCHAR
);
1100 /* get rid of processed data */
1101 readerinput_shrinkraw(readerinput
, len
);
1104 fixup_buffer_cr(dest
, prev_len
);
1108 static inline UINT
reader_get_cur(xmlreader
*reader
)
1110 return reader
->input
->buffer
->utf16
.cur
;
1113 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
1115 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1116 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
1117 if (!*ptr
) reader_more(reader
);
1118 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
1121 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
1124 const WCHAR
*ptr
= reader_get_ptr(reader
);
1129 reader_more(reader
);
1130 ptr
= reader_get_ptr(reader
);
1132 if (str
[i
] != ptr
[i
])
1133 return ptr
[i
] - str
[i
];
1139 static void reader_update_position(xmlreader
*reader
, WCHAR ch
)
1142 reader
->position
.line_position
= 1;
1143 else if (ch
== '\n')
1145 reader
->position
.line_number
++;
1146 reader
->position
.line_position
= 1;
1149 reader
->position
.line_position
++;
1152 /* moves cursor n WCHARs forward */
1153 static void reader_skipn(xmlreader
*reader
, int n
)
1155 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1158 while (*(ptr
= reader_get_ptr(reader
)) && n
--)
1160 reader_update_position(reader
, *ptr
);
1165 static inline BOOL
is_wchar_space(WCHAR ch
)
1167 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
1170 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1171 static int reader_skipspaces(xmlreader
*reader
)
1173 const WCHAR
*ptr
= reader_get_ptr(reader
);
1174 UINT start
= reader_get_cur(reader
);
1176 while (is_wchar_space(*ptr
))
1178 reader_skipn(reader
, 1);
1179 ptr
= reader_get_ptr(reader
);
1182 return reader_get_cur(reader
) - start
;
1185 /* [26] VersionNum ::= '1.' [0-9]+ */
1186 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
1188 static const WCHAR onedotW
[] = {'1','.',0};
1192 if (reader_cmp(reader
, onedotW
)) return WC_E_XMLDECL
;
1194 start
= reader_get_cur(reader
);
1196 reader_skipn(reader
, 2);
1198 ptr2
= ptr
= reader_get_ptr(reader
);
1199 while (*ptr
>= '0' && *ptr
<= '9')
1201 reader_skipn(reader
, 1);
1202 ptr
= reader_get_ptr(reader
);
1205 if (ptr2
== ptr
) return WC_E_DIGIT
;
1206 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
1207 TRACE("version=%s\n", debug_strval(reader
, val
));
1211 /* [25] Eq ::= S? '=' S? */
1212 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1214 static const WCHAR eqW
[] = {'=',0};
1215 reader_skipspaces(reader
);
1216 if (reader_cmp(reader
, eqW
)) return WC_E_EQUAL
;
1218 reader_skipn(reader
, 1);
1219 reader_skipspaces(reader
);
1223 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1224 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1226 static const WCHAR versionW
[] = {'v','e','r','s','i','o','n',0};
1227 struct reader_position position
;
1231 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1233 position
= reader
->position
;
1234 if (reader_cmp(reader
, versionW
)) return WC_E_XMLDECL
;
1235 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1236 /* skip 'version' */
1237 reader_skipn(reader
, 7);
1239 hr
= reader_parse_eq(reader
);
1240 if (FAILED(hr
)) return hr
;
1242 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1245 reader_skipn(reader
, 1);
1247 hr
= reader_parse_versionnum(reader
, &val
);
1248 if (FAILED(hr
)) return hr
;
1250 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1254 reader_skipn(reader
, 1);
1256 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1259 /* ([A-Za-z0-9._] | '-') */
1260 static inline BOOL
is_wchar_encname(WCHAR ch
)
1262 return ((ch
>= 'A' && ch
<= 'Z') ||
1263 (ch
>= 'a' && ch
<= 'z') ||
1264 (ch
>= '0' && ch
<= '9') ||
1265 (ch
== '.') || (ch
== '_') ||
1269 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1270 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1272 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1276 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1277 return WC_E_ENCNAME
;
1279 val
->start
= reader_get_cur(reader
);
1282 while (is_wchar_encname(*++ptr
))
1286 enc
= parse_encoding_name(start
, len
);
1287 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1291 if (enc
== XmlEncoding_Unknown
)
1292 return WC_E_ENCNAME
;
1294 /* skip encoding name */
1295 reader_skipn(reader
, len
);
1299 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1300 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1302 static const WCHAR encodingW
[] = {'e','n','c','o','d','i','n','g',0};
1303 struct reader_position position
;
1307 if (!reader_skipspaces(reader
)) return S_FALSE
;
1309 position
= reader
->position
;
1310 if (reader_cmp(reader
, encodingW
)) return S_FALSE
;
1311 name
.str
= reader_get_ptr(reader
);
1312 name
.start
= reader_get_cur(reader
);
1314 /* skip 'encoding' */
1315 reader_skipn(reader
, 8);
1317 hr
= reader_parse_eq(reader
);
1318 if (FAILED(hr
)) return hr
;
1320 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1323 reader_skipn(reader
, 1);
1325 hr
= reader_parse_encname(reader
, &val
);
1326 if (FAILED(hr
)) return hr
;
1328 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1332 reader_skipn(reader
, 1);
1334 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1337 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1338 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1340 static const WCHAR standaloneW
[] = {'s','t','a','n','d','a','l','o','n','e',0};
1341 static const WCHAR yesW
[] = {'y','e','s',0};
1342 static const WCHAR noW
[] = {'n','o',0};
1343 struct reader_position position
;
1348 if (!reader_skipspaces(reader
)) return S_FALSE
;
1350 position
= reader
->position
;
1351 if (reader_cmp(reader
, standaloneW
)) return S_FALSE
;
1352 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1353 /* skip 'standalone' */
1354 reader_skipn(reader
, 10);
1356 hr
= reader_parse_eq(reader
);
1357 if (FAILED(hr
)) return hr
;
1359 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1362 reader_skipn(reader
, 1);
1364 if (reader_cmp(reader
, yesW
) && reader_cmp(reader
, noW
))
1365 return WC_E_XMLDECL
;
1367 start
= reader_get_cur(reader
);
1368 /* skip 'yes'|'no' */
1369 reader_skipn(reader
, reader_cmp(reader
, yesW
) ? 2 : 3);
1370 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1371 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1373 if (reader_cmp(reader
, quoteW
) && reader_cmp(reader
, dblquoteW
))
1376 reader_skipn(reader
, 1);
1378 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1381 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1382 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1384 static const WCHAR xmldeclW
[] = {'<','?','x','m','l',' ',0};
1385 static const WCHAR declcloseW
[] = {'?','>',0};
1386 struct reader_position position
;
1389 /* check if we have "<?xml " */
1390 if (reader_cmp(reader
, xmldeclW
))
1393 reader_skipn(reader
, 2);
1394 position
= reader
->position
;
1395 reader_skipn(reader
, 3);
1396 hr
= reader_parse_versioninfo(reader
);
1400 hr
= reader_parse_encdecl(reader
);
1404 hr
= reader_parse_sddecl(reader
);
1408 reader_skipspaces(reader
);
1409 if (reader_cmp(reader
, declcloseW
))
1410 return WC_E_XMLDECL
;
1413 reader_skipn(reader
, 2);
1415 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1416 reader
->empty_element
.position
= position
;
1417 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_xml
);
1418 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_xml
);
1423 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1424 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1429 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1431 start
= reader
->resume
[XmlReadResume_Body
];
1432 ptr
= reader_get_ptr(reader
);
1437 reader_skipn(reader
, 4);
1438 reader_shrink(reader
);
1439 ptr
= reader_get_ptr(reader
);
1440 start
= reader_get_cur(reader
);
1441 reader
->nodetype
= XmlNodeType_Comment
;
1442 reader
->resume
[XmlReadResume_Body
] = start
;
1443 reader
->resumestate
= XmlReadResumeState_Comment
;
1444 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1447 /* will exit when there's no more data, it won't attempt to
1448 read more from stream */
1459 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1460 TRACE("%s\n", debug_strval(reader
, &value
));
1462 /* skip rest of markup '->' */
1463 reader_skipn(reader
, 3);
1465 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1466 reader
->resume
[XmlReadResume_Body
] = 0;
1467 reader
->resumestate
= XmlReadResumeState_Initial
;
1471 return WC_E_COMMENT
;
1475 reader_skipn(reader
, 1);
1482 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1483 static inline BOOL
is_char(WCHAR ch
)
1485 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1486 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1487 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1488 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1489 (ch
>= 0xe000 && ch
<= 0xfffd);
1492 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1493 BOOL
is_pubchar(WCHAR ch
)
1495 return (ch
== ' ') ||
1496 (ch
>= 'a' && ch
<= 'z') ||
1497 (ch
>= 'A' && ch
<= 'Z') ||
1498 (ch
>= '0' && ch
<= '9') ||
1499 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1500 (ch
== '=') || (ch
== '?') ||
1501 (ch
== '@') || (ch
== '!') ||
1502 (ch
>= '#' && ch
<= '%') || /* #$% */
1503 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1506 BOOL
is_namestartchar(WCHAR ch
)
1508 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1509 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1510 (ch
>= 0xc0 && ch
<= 0xd6) ||
1511 (ch
>= 0xd8 && ch
<= 0xf6) ||
1512 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1513 (ch
>= 0x370 && ch
<= 0x37d) ||
1514 (ch
>= 0x37f && ch
<= 0x1fff) ||
1515 (ch
>= 0x200c && ch
<= 0x200d) ||
1516 (ch
>= 0x2070 && ch
<= 0x218f) ||
1517 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1518 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1519 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1520 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1521 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1522 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1525 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1526 BOOL
is_ncnamechar(WCHAR ch
)
1528 return (ch
>= 'A' && ch
<= 'Z') ||
1529 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1530 (ch
== '-') || (ch
== '.') ||
1531 (ch
>= '0' && ch
<= '9') ||
1533 (ch
>= 0xc0 && ch
<= 0xd6) ||
1534 (ch
>= 0xd8 && ch
<= 0xf6) ||
1535 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1536 (ch
>= 0x300 && ch
<= 0x36f) ||
1537 (ch
>= 0x370 && ch
<= 0x37d) ||
1538 (ch
>= 0x37f && ch
<= 0x1fff) ||
1539 (ch
>= 0x200c && ch
<= 0x200d) ||
1540 (ch
>= 0x203f && ch
<= 0x2040) ||
1541 (ch
>= 0x2070 && ch
<= 0x218f) ||
1542 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1543 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1544 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1545 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1546 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1547 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1550 BOOL
is_namechar(WCHAR ch
)
1552 return (ch
== ':') || is_ncnamechar(ch
);
1555 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1557 /* When we're on attribute always return attribute type, container node type is kept.
1558 Note that container is not necessarily an element, and attribute doesn't mean it's
1559 an attribute in XML spec terms. */
1560 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1563 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1564 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1565 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1566 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1567 [5] Name ::= NameStartChar (NameChar)* */
1568 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1573 if (reader
->resume
[XmlReadResume_Name
])
1575 start
= reader
->resume
[XmlReadResume_Name
];
1576 ptr
= reader_get_ptr(reader
);
1580 ptr
= reader_get_ptr(reader
);
1581 start
= reader_get_cur(reader
);
1582 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1585 while (is_namechar(*ptr
))
1587 reader_skipn(reader
, 1);
1588 ptr
= reader_get_ptr(reader
);
1591 if (is_reader_pending(reader
))
1593 reader
->resume
[XmlReadResume_Name
] = start
;
1597 reader
->resume
[XmlReadResume_Name
] = 0;
1599 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1600 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1605 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1606 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1608 static const WCHAR xmlW
[] = {'x','m','l'};
1609 static const strval xmlval
= { (WCHAR
*)xmlW
, 3 };
1615 hr
= reader_parse_name(reader
, &name
);
1616 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1618 /* now that we got name check for illegal content */
1619 if (strval_eq(reader
, &name
, &xmlval
))
1620 return WC_E_LEADINGXML
;
1622 /* PITarget can't be a qualified name */
1623 ptr
= reader_get_strptr(reader
, &name
);
1624 for (i
= 0; i
< name
.len
; i
++)
1626 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1628 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1633 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1634 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1641 switch (reader
->resumestate
)
1643 case XmlReadResumeState_Initial
:
1645 reader_skipn(reader
, 2);
1646 reader_shrink(reader
);
1647 reader
->resumestate
= XmlReadResumeState_PITarget
;
1648 case XmlReadResumeState_PITarget
:
1649 hr
= reader_parse_pitarget(reader
, &target
);
1650 if (FAILED(hr
)) return hr
;
1651 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1652 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1653 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1654 reader
->resumestate
= XmlReadResumeState_PIBody
;
1655 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1660 start
= reader
->resume
[XmlReadResume_Body
];
1661 ptr
= reader_get_ptr(reader
);
1668 UINT cur
= reader_get_cur(reader
);
1671 /* strip all leading whitespace chars */
1674 ptr
= reader_get_ptr2(reader
, start
);
1675 if (!is_wchar_space(*ptr
)) break;
1679 reader_init_strvalue(start
, cur
-start
, &value
);
1682 reader_skipn(reader
, 2);
1683 TRACE("%s\n", debug_strval(reader
, &value
));
1684 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1685 reader
->resumestate
= XmlReadResumeState_Initial
;
1686 reader
->resume
[XmlReadResume_Body
] = 0;
1687 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1692 reader_skipn(reader
, 1);
1693 ptr
= reader_get_ptr(reader
);
1699 /* This one is used to parse significant whitespace nodes, like in Misc production */
1700 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1702 switch (reader
->resumestate
)
1704 case XmlReadResumeState_Initial
:
1705 reader_shrink(reader
);
1706 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1707 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1708 reader
->nodetype
= XmlNodeType_Whitespace
;
1709 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1710 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1711 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1713 case XmlReadResumeState_Whitespace
:
1718 reader_skipspaces(reader
);
1719 if (is_reader_pending(reader
)) return S_OK
;
1721 start
= reader
->resume
[XmlReadResume_Body
];
1722 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1723 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1724 TRACE("%s\n", debug_strval(reader
, &value
));
1725 reader
->resumestate
= XmlReadResumeState_Initial
;
1734 /* [27] Misc ::= Comment | PI | S */
1735 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1737 HRESULT hr
= S_FALSE
;
1739 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1741 hr
= reader_more(reader
);
1742 if (FAILED(hr
)) return hr
;
1744 /* finish current node */
1745 switch (reader
->resumestate
)
1747 case XmlReadResumeState_PITarget
:
1748 case XmlReadResumeState_PIBody
:
1749 return reader_parse_pi(reader
);
1750 case XmlReadResumeState_Comment
:
1751 return reader_parse_comment(reader
);
1752 case XmlReadResumeState_Whitespace
:
1753 return reader_parse_whitespace(reader
);
1755 ERR("unknown resume state %d\n", reader
->resumestate
);
1761 const WCHAR
*cur
= reader_get_ptr(reader
);
1763 if (is_wchar_space(*cur
))
1764 hr
= reader_parse_whitespace(reader
);
1765 else if (!reader_cmp(reader
, commentW
))
1766 hr
= reader_parse_comment(reader
);
1767 else if (!reader_cmp(reader
, piW
))
1768 hr
= reader_parse_pi(reader
);
1772 if (hr
!= S_FALSE
) return hr
;
1778 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1779 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1781 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1784 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1787 reader_skipn(reader
, 1);
1789 cur
= reader_get_ptr(reader
);
1790 start
= reader_get_cur(reader
);
1791 while (is_char(*cur
) && *cur
!= quote
)
1793 reader_skipn(reader
, 1);
1794 cur
= reader_get_ptr(reader
);
1796 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1797 if (*cur
== quote
) reader_skipn(reader
, 1);
1799 TRACE("%s\n", debug_strval(reader
, literal
));
1803 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1804 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1805 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1807 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1810 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1813 reader_skipn(reader
, 1);
1815 start
= reader_get_cur(reader
);
1816 cur
= reader_get_ptr(reader
);
1817 while (is_pubchar(*cur
) && *cur
!= quote
)
1819 reader_skipn(reader
, 1);
1820 cur
= reader_get_ptr(reader
);
1822 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1823 if (*cur
== quote
) reader_skipn(reader
, 1);
1825 TRACE("%s\n", debug_strval(reader
, literal
));
1829 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1830 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1832 static WCHAR systemW
[] = {'S','Y','S','T','E','M',0};
1833 static WCHAR publicW
[] = {'P','U','B','L','I','C',0};
1834 struct reader_position position
= reader
->position
;
1839 if (!reader_cmp(reader
, publicW
)) {
1843 reader_skipn(reader
, 6);
1844 cnt
= reader_skipspaces(reader
);
1845 if (!cnt
) return WC_E_WHITESPACE
;
1847 hr
= reader_parse_pub_literal(reader
, &pub
);
1848 if (FAILED(hr
)) return hr
;
1850 reader_init_cstrvalue(publicW
, lstrlenW(publicW
), &name
);
1851 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &pub
, &position
, 0);
1852 if (FAILED(hr
)) return hr
;
1854 cnt
= reader_skipspaces(reader
);
1855 if (!cnt
) return S_OK
;
1857 /* optional system id */
1858 hr
= reader_parse_sys_literal(reader
, &sys
);
1859 if (FAILED(hr
)) return S_OK
;
1861 reader_init_cstrvalue(systemW
, lstrlenW(systemW
), &name
);
1862 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1863 if (FAILED(hr
)) return hr
;
1866 } else if (!reader_cmp(reader
, systemW
)) {
1868 reader_skipn(reader
, 6);
1869 cnt
= reader_skipspaces(reader
);
1870 if (!cnt
) return WC_E_WHITESPACE
;
1872 hr
= reader_parse_sys_literal(reader
, &sys
);
1873 if (FAILED(hr
)) return hr
;
1875 reader_init_cstrvalue(systemW
, lstrlenW(systemW
), &name
);
1876 return reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1882 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1883 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1885 static const WCHAR doctypeW
[] = {'<','!','D','O','C','T','Y','P','E',0};
1890 /* check if we have "<!DOCTYPE" */
1891 if (reader_cmp(reader
, doctypeW
)) return S_FALSE
;
1892 reader_shrink(reader
);
1894 /* DTD processing is not allowed by default */
1895 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1897 reader_skipn(reader
, 9);
1898 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1901 hr
= reader_parse_name(reader
, &name
);
1902 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1904 reader_skipspaces(reader
);
1906 hr
= reader_parse_externalid(reader
);
1907 if (FAILED(hr
)) return hr
;
1909 reader_skipspaces(reader
);
1911 cur
= reader_get_ptr(reader
);
1914 FIXME("internal subset parsing not implemented\n");
1919 reader_skipn(reader
, 1);
1921 reader
->nodetype
= XmlNodeType_DocumentType
;
1922 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1923 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1928 /* [11 NS] LocalPart ::= NCName */
1929 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
, BOOL check_for_separator
)
1934 if (reader
->resume
[XmlReadResume_Local
])
1936 start
= reader
->resume
[XmlReadResume_Local
];
1937 ptr
= reader_get_ptr(reader
);
1941 ptr
= reader_get_ptr(reader
);
1942 start
= reader_get_cur(reader
);
1945 while (is_ncnamechar(*ptr
))
1947 reader_skipn(reader
, 1);
1948 ptr
= reader_get_ptr(reader
);
1951 if (check_for_separator
&& *ptr
== ':')
1952 return NC_E_QNAMECOLON
;
1954 if (is_reader_pending(reader
))
1956 reader
->resume
[XmlReadResume_Local
] = start
;
1960 reader
->resume
[XmlReadResume_Local
] = 0;
1962 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1967 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1968 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1969 [9 NS] UnprefixedName ::= LocalPart
1970 [10 NS] Prefix ::= NCName */
1971 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1977 if (reader
->resume
[XmlReadResume_Name
])
1979 start
= reader
->resume
[XmlReadResume_Name
];
1980 ptr
= reader_get_ptr(reader
);
1984 ptr
= reader_get_ptr(reader
);
1985 start
= reader_get_cur(reader
);
1986 reader
->resume
[XmlReadResume_Name
] = start
;
1987 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1990 if (reader
->resume
[XmlReadResume_Local
])
1992 hr
= reader_parse_local(reader
, local
, FALSE
);
1993 if (FAILED(hr
)) return hr
;
1995 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1996 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
2001 /* skip prefix part */
2002 while (is_ncnamechar(*ptr
))
2004 reader_skipn(reader
, 1);
2005 ptr
= reader_get_ptr(reader
);
2008 if (is_reader_pending(reader
)) return E_PENDING
;
2010 /* got a qualified name */
2013 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
2016 reader_skipn(reader
, 1);
2017 hr
= reader_parse_local(reader
, local
, TRUE
);
2018 if (FAILED(hr
)) return hr
;
2022 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
2023 reader_init_strvalue(0, 0, prefix
);
2028 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
2030 TRACE("ncname %s\n", debug_strval(reader
, local
));
2032 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
2034 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
2037 reader
->resume
[XmlReadResume_Name
] = 0;
2038 reader
->resume
[XmlReadResume_Local
] = 0;
2043 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
2045 static const WCHAR entltW
[] = {'l','t'};
2046 static const WCHAR entgtW
[] = {'g','t'};
2047 static const WCHAR entampW
[] = {'a','m','p'};
2048 static const WCHAR entaposW
[] = {'a','p','o','s'};
2049 static const WCHAR entquotW
[] = {'q','u','o','t'};
2050 static const strval lt
= { (WCHAR
*)entltW
, 2 };
2051 static const strval gt
= { (WCHAR
*)entgtW
, 2 };
2052 static const strval amp
= { (WCHAR
*)entampW
, 3 };
2053 static const strval apos
= { (WCHAR
*)entaposW
, 4 };
2054 static const strval quot
= { (WCHAR
*)entquotW
, 4 };
2055 WCHAR
*str
= reader_get_strptr(reader
, name
);
2060 if (strval_eq(reader
, name
, <
)) return '<';
2063 if (strval_eq(reader
, name
, >
)) return '>';
2066 if (strval_eq(reader
, name
, &
))
2068 else if (strval_eq(reader
, name
, &apos
))
2072 if (strval_eq(reader
, name
, "
)) return '\"';
2081 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2082 [67] Reference ::= EntityRef | CharRef
2083 [68] EntityRef ::= '&' Name ';' */
2084 static HRESULT
reader_parse_reference(xmlreader
*reader
)
2086 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
2087 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
2088 UINT cur
= reader_get_cur(reader
);
2093 reader_skipn(reader
, 1);
2094 ptr
= reader_get_ptr(reader
);
2098 reader_skipn(reader
, 1);
2099 ptr
= reader_get_ptr(reader
);
2101 /* hex char or decimal */
2104 reader_skipn(reader
, 1);
2105 ptr
= reader_get_ptr(reader
);
2109 if ((*ptr
>= '0' && *ptr
<= '9'))
2110 ch
= ch
*16 + *ptr
- '0';
2111 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
2112 ch
= ch
*16 + *ptr
- 'a' + 10;
2113 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
2114 ch
= ch
*16 + *ptr
- 'A' + 10;
2116 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
2117 reader_skipn(reader
, 1);
2118 ptr
= reader_get_ptr(reader
);
2125 if ((*ptr
>= '0' && *ptr
<= '9'))
2127 ch
= ch
*10 + *ptr
- '0';
2128 reader_skipn(reader
, 1);
2129 ptr
= reader_get_ptr(reader
);
2132 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
2136 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
2139 if (is_wchar_space(ch
)) ch
= ' ';
2141 ptr
= reader_get_ptr(reader
);
2142 start
= reader_get_ptr2(reader
, cur
);
2143 len
= buffer
->written
- ((char *)ptr
- buffer
->data
);
2144 memmove(start
+ 1, ptr
+ 1, len
);
2146 buffer
->written
-= (reader_get_cur(reader
) - cur
) * sizeof(WCHAR
);
2147 buffer
->cur
= cur
+ 1;
2156 hr
= reader_parse_name(reader
, &name
);
2157 if (FAILED(hr
)) return hr
;
2159 ptr
= reader_get_ptr(reader
);
2160 if (*ptr
!= ';') return WC_E_SEMICOLON
;
2162 /* predefined entities resolve to a single character */
2163 ch
= get_predefined_entity(reader
, &name
);
2166 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
2167 memmove(start
+1, ptr
+1, len
);
2168 buffer
->cur
= cur
+ 1;
2169 buffer
->written
-= (ptr
- start
) * sizeof(WCHAR
);
2175 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
2176 return WC_E_UNDECLAREDENTITY
;
2184 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2185 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
2190 ptr
= reader_get_ptr(reader
);
2192 /* skip opening quote */
2194 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
2195 reader_skipn(reader
, 1);
2197 ptr
= reader_get_ptr(reader
);
2198 start
= reader_get_cur(reader
);
2201 if (*ptr
== '<') return WC_E_LESSTHAN
;
2205 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
2206 /* skip closing quote */
2207 reader_skipn(reader
, 1);
2213 HRESULT hr
= reader_parse_reference(reader
);
2214 if (FAILED(hr
)) return hr
;
2218 /* replace all whitespace chars with ' ' */
2219 if (is_wchar_space(*ptr
)) *ptr
= ' ';
2220 reader_skipn(reader
, 1);
2222 ptr
= reader_get_ptr(reader
);
2228 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2229 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2230 [3 NS] DefaultAttName ::= 'xmlns'
2231 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2232 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2234 struct reader_position position
= reader
->position
;
2235 strval prefix
, local
, qname
, value
;
2236 enum attribute_flags flags
= 0;
2239 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2240 if (FAILED(hr
)) return hr
;
2242 if (strval_eq(reader
, &prefix
, &strval_xmlns
))
2243 flags
|= ATTRIBUTE_NS_DEFINITION
;
2245 if (strval_eq(reader
, &qname
, &strval_xmlns
))
2246 flags
|= ATTRIBUTE_DEFAULT_NS_DEFINITION
;
2248 hr
= reader_parse_eq(reader
);
2249 if (FAILED(hr
)) return hr
;
2251 hr
= reader_parse_attvalue(reader
, &value
);
2252 if (FAILED(hr
)) return hr
;
2254 if (flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
2255 reader_push_ns(reader
, &local
, &value
, !!(flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
));
2257 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2258 return reader_add_attr(reader
, &prefix
, &local
, &qname
, &value
, &position
, flags
);
2261 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2262 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2263 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
2265 struct reader_position position
= reader
->position
;
2268 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2269 if (FAILED(hr
)) return hr
;
2273 static const WCHAR endW
[] = {'/','>',0};
2275 reader_skipspaces(reader
);
2278 if ((reader
->is_empty_element
= !reader_cmp(reader
, endW
)))
2280 struct element
*element
= &reader
->empty_element
;
2283 reader_skipn(reader
, 2);
2285 reader_free_strvalued(reader
, &element
->qname
);
2286 reader_free_strvalued(reader
, &element
->localname
);
2288 element
->prefix
= *prefix
;
2289 reader_strvaldup(reader
, qname
, &element
->qname
);
2290 reader_strvaldup(reader
, local
, &element
->localname
);
2291 element
->position
= position
;
2292 reader_mark_ns_nodes(reader
, element
);
2296 /* got a start tag */
2297 if (!reader_cmp(reader
, gtW
))
2300 reader_skipn(reader
, 1);
2301 return reader_push_element(reader
, prefix
, local
, qname
, &position
);
2304 hr
= reader_parse_attribute(reader
);
2305 if (FAILED(hr
)) return hr
;
2311 /* [39] element ::= EmptyElemTag | STag content ETag */
2312 static HRESULT
reader_parse_element(xmlreader
*reader
)
2316 switch (reader
->resumestate
)
2318 case XmlReadResumeState_Initial
:
2319 /* check if we are really on element */
2320 if (reader_cmp(reader
, ltW
)) return S_FALSE
;
2323 reader_skipn(reader
, 1);
2325 reader_shrink(reader
);
2326 reader
->resumestate
= XmlReadResumeState_STag
;
2327 case XmlReadResumeState_STag
:
2329 strval qname
, prefix
, local
;
2331 /* this handles empty elements too */
2332 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
);
2333 if (FAILED(hr
)) return hr
;
2335 /* FIXME: need to check for defined namespace to reject invalid prefix */
2337 /* if we got empty element and stack is empty go straight to Misc */
2338 if (reader
->is_empty_element
&& list_empty(&reader
->elements
))
2339 reader
->instate
= XmlReadInState_MiscEnd
;
2341 reader
->instate
= XmlReadInState_Content
;
2343 reader
->nodetype
= XmlNodeType_Element
;
2344 reader
->resumestate
= XmlReadResumeState_Initial
;
2345 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2346 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2347 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
2357 /* [13 NS] ETag ::= '</' QName S? '>' */
2358 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2360 struct reader_position position
;
2361 strval prefix
, local
, qname
;
2362 struct element
*element
;
2366 reader_skipn(reader
, 2);
2368 position
= reader
->position
;
2369 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2370 if (FAILED(hr
)) return hr
;
2372 reader_skipspaces(reader
);
2374 if (reader_cmp(reader
, gtW
)) return WC_E_GREATERTHAN
;
2377 reader_skipn(reader
, 1);
2379 /* Element stack should never be empty at this point, cause we shouldn't get to
2380 content parsing if it's empty. */
2381 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2382 if (!strval_eq(reader
, &element
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2384 /* update position stored for start tag, we won't be using it */
2385 element
->position
= position
;
2387 reader
->nodetype
= XmlNodeType_EndElement
;
2388 reader
->is_empty_element
= FALSE
;
2389 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2394 /* [18] CDSect ::= CDStart CData CDEnd
2395 [19] CDStart ::= '<![CDATA['
2396 [20] CData ::= (Char* - (Char* ']]>' Char*))
2397 [21] CDEnd ::= ']]>' */
2398 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2403 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2405 start
= reader
->resume
[XmlReadResume_Body
];
2406 ptr
= reader_get_ptr(reader
);
2410 /* skip markup '<![CDATA[' */
2411 reader_skipn(reader
, 9);
2412 reader_shrink(reader
);
2413 ptr
= reader_get_ptr(reader
);
2414 start
= reader_get_cur(reader
);
2415 reader
->nodetype
= XmlNodeType_CDATA
;
2416 reader
->resume
[XmlReadResume_Body
] = start
;
2417 reader
->resumestate
= XmlReadResumeState_CDATA
;
2418 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2423 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2427 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2430 reader_skipn(reader
, 3);
2431 TRACE("%s\n", debug_strval(reader
, &value
));
2433 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2434 reader
->resume
[XmlReadResume_Body
] = 0;
2435 reader
->resumestate
= XmlReadResumeState_Initial
;
2440 reader_skipn(reader
, 1);
2441 ptr
= reader_get_ptr(reader
);
2448 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2449 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2451 struct reader_position position
;
2455 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2457 start
= reader
->resume
[XmlReadResume_Body
];
2458 ptr
= reader_get_ptr(reader
);
2462 reader_shrink(reader
);
2463 ptr
= reader_get_ptr(reader
);
2464 start
= reader_get_cur(reader
);
2465 /* There's no text */
2466 if (!*ptr
|| *ptr
== '<') return S_OK
;
2467 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2468 reader
->resume
[XmlReadResume_Body
] = start
;
2469 reader
->resumestate
= XmlReadResumeState_CharData
;
2470 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2473 position
= reader
->position
;
2476 static const WCHAR ampW
[] = {'&',0};
2478 /* CDATA closing sequence ']]>' is not allowed */
2479 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2480 return WC_E_CDSECTEND
;
2482 /* Found next markup part */
2487 reader
->empty_element
.position
= position
;
2488 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2489 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2490 reader
->resume
[XmlReadResume_Body
] = 0;
2491 reader
->resumestate
= XmlReadResumeState_Initial
;
2495 /* this covers a case when text has leading whitespace chars */
2496 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2498 if (!reader_cmp(reader
, ampW
))
2499 reader_parse_reference(reader
);
2501 reader_skipn(reader
, 1);
2503 ptr
= reader_get_ptr(reader
);
2509 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2510 static HRESULT
reader_parse_content(xmlreader
*reader
)
2512 static const WCHAR cdstartW
[] = {'<','!','[','C','D','A','T','A','[',0};
2513 static const WCHAR etagW
[] = {'<','/',0};
2515 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2517 switch (reader
->resumestate
)
2519 case XmlReadResumeState_CDATA
:
2520 return reader_parse_cdata(reader
);
2521 case XmlReadResumeState_Comment
:
2522 return reader_parse_comment(reader
);
2523 case XmlReadResumeState_PIBody
:
2524 case XmlReadResumeState_PITarget
:
2525 return reader_parse_pi(reader
);
2526 case XmlReadResumeState_CharData
:
2527 return reader_parse_chardata(reader
);
2529 ERR("unknown resume state %d\n", reader
->resumestate
);
2533 reader_shrink(reader
);
2535 /* handle end tag here, it indicates end of content as well */
2536 if (!reader_cmp(reader
, etagW
))
2537 return reader_parse_endtag(reader
);
2539 if (!reader_cmp(reader
, commentW
))
2540 return reader_parse_comment(reader
);
2542 if (!reader_cmp(reader
, piW
))
2543 return reader_parse_pi(reader
);
2545 if (!reader_cmp(reader
, cdstartW
))
2546 return reader_parse_cdata(reader
);
2548 if (!reader_cmp(reader
, ltW
))
2549 return reader_parse_element(reader
);
2551 /* what's left must be CharData */
2552 return reader_parse_chardata(reader
);
2555 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2557 XmlNodeType nodetype
= reader_get_nodetype(reader
);
2560 if (!is_reader_pending(reader
))
2562 reader
->chunk_read_off
= 0;
2563 reader_clear_attrs(reader
);
2566 /* When moving from EndElement or empty element, pop its own namespace definitions */
2569 case XmlNodeType_Attribute
:
2570 reader_dec_depth(reader
);
2572 case XmlNodeType_Element
:
2573 if (reader
->is_empty_element
)
2574 reader_pop_ns_nodes(reader
, &reader
->empty_element
);
2575 else if (FAILED(hr
= reader_inc_depth(reader
)))
2578 case XmlNodeType_EndElement
:
2579 reader_pop_element(reader
);
2580 reader_dec_depth(reader
);
2588 switch (reader
->instate
)
2590 /* if it's a first call for a new input we need to detect stream encoding */
2591 case XmlReadInState_Initial
:
2595 hr
= readerinput_growraw(reader
->input
);
2596 if (FAILED(hr
)) return hr
;
2598 reader
->position
.line_number
= 1;
2599 reader
->position
.line_position
= 1;
2601 /* try to detect encoding by BOM or data and set input code page */
2602 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2603 TRACE("detected encoding %s, 0x%08x\n", enc
== XmlEncoding_Unknown
? "(unknown)" :
2604 debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2605 if (FAILED(hr
)) return hr
;
2607 /* always switch first time cause we have to put something in */
2608 readerinput_switchencoding(reader
->input
, enc
);
2610 /* parse xml declaration */
2611 hr
= reader_parse_xmldecl(reader
);
2612 if (FAILED(hr
)) return hr
;
2614 readerinput_shrinkraw(reader
->input
, -1);
2615 reader
->instate
= XmlReadInState_Misc_DTD
;
2616 if (hr
== S_OK
) return hr
;
2619 case XmlReadInState_Misc_DTD
:
2620 hr
= reader_parse_misc(reader
);
2621 if (FAILED(hr
)) return hr
;
2624 reader
->instate
= XmlReadInState_DTD
;
2628 case XmlReadInState_DTD
:
2629 hr
= reader_parse_dtd(reader
);
2630 if (FAILED(hr
)) return hr
;
2634 reader
->instate
= XmlReadInState_DTD_Misc
;
2638 reader
->instate
= XmlReadInState_Element
;
2640 case XmlReadInState_DTD_Misc
:
2641 hr
= reader_parse_misc(reader
);
2642 if (FAILED(hr
)) return hr
;
2645 reader
->instate
= XmlReadInState_Element
;
2649 case XmlReadInState_Element
:
2650 return reader_parse_element(reader
);
2651 case XmlReadInState_Content
:
2652 return reader_parse_content(reader
);
2653 case XmlReadInState_MiscEnd
:
2654 hr
= reader_parse_misc(reader
);
2655 if (hr
!= S_FALSE
) return hr
;
2657 if (*reader_get_ptr(reader
))
2659 WARN("found garbage in the end of XML\n");
2663 reader
->instate
= XmlReadInState_Eof
;
2664 reader
->state
= XmlReadState_EndOfFile
;
2665 reader
->nodetype
= XmlNodeType_None
;
2667 case XmlReadInState_Eof
:
2670 FIXME("internal state %d not handled\n", reader
->instate
);
2678 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2680 xmlreader
*This
= impl_from_IXmlReader(iface
);
2682 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2684 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2685 IsEqualGUID(riid
, &IID_IXmlReader
))
2691 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2693 return E_NOINTERFACE
;
2696 IXmlReader_AddRef(iface
);
2701 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2703 xmlreader
*This
= impl_from_IXmlReader(iface
);
2704 ULONG ref
= InterlockedIncrement(&This
->ref
);
2705 TRACE("(%p)->(%d)\n", This
, ref
);
2709 static void reader_clear_ns(xmlreader
*reader
)
2711 struct ns
*ns
, *ns2
;
2713 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
2714 list_remove(&ns
->entry
);
2715 reader_free_strvalued(reader
, &ns
->prefix
);
2716 reader_free_strvalued(reader
, &ns
->uri
);
2717 reader_free(reader
, ns
);
2720 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->nsdef
, struct ns
, entry
) {
2721 list_remove(&ns
->entry
);
2722 reader_free_strvalued(reader
, &ns
->uri
);
2723 reader_free(reader
, ns
);
2727 static void reader_reset_parser(xmlreader
*reader
)
2729 reader
->position
.line_number
= 0;
2730 reader
->position
.line_position
= 0;
2732 reader_clear_elements(reader
);
2733 reader_clear_attrs(reader
);
2734 reader_clear_ns(reader
);
2735 reader_free_strvalues(reader
);
2738 reader
->nodetype
= XmlNodeType_None
;
2739 reader
->resumestate
= XmlReadResumeState_Initial
;
2740 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2741 reader
->is_empty_element
= FALSE
;
2744 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2746 xmlreader
*This
= impl_from_IXmlReader(iface
);
2747 LONG ref
= InterlockedDecrement(&This
->ref
);
2749 TRACE("(%p)->(%d)\n", This
, ref
);
2753 IMalloc
*imalloc
= This
->imalloc
;
2754 reader_reset_parser(This
);
2755 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2756 if (This
->resolver
) IXmlResolver_Release(This
->resolver
);
2757 if (This
->mlang
) IUnknown_Release(This
->mlang
);
2758 reader_free(This
, This
);
2759 if (imalloc
) IMalloc_Release(imalloc
);
2765 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2767 xmlreader
*This
= impl_from_IXmlReader(iface
);
2768 IXmlReaderInput
*readerinput
;
2771 TRACE("(%p)->(%p)\n", This
, input
);
2775 readerinput_release_stream(This
->input
);
2776 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2780 reader_reset_parser(This
);
2782 /* just reset current input */
2785 This
->state
= XmlReadState_Initial
;
2789 /* now try IXmlReaderInput, ISequentialStream, IStream */
2790 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2793 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2794 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2797 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2798 readerinput
, readerinput
->lpVtbl
);
2799 IUnknown_Release(readerinput
);
2805 if (hr
!= S_OK
|| !readerinput
)
2807 /* create IXmlReaderInput basing on supplied interface */
2808 hr
= CreateXmlReaderInputWithEncodingName(input
,
2809 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2810 if (hr
!= S_OK
) return hr
;
2811 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2814 /* set stream for supplied IXmlReaderInput */
2815 hr
= readerinput_query_for_stream(This
->input
);
2818 This
->state
= XmlReadState_Initial
;
2819 This
->instate
= XmlReadInState_Initial
;
2824 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2826 xmlreader
*This
= impl_from_IXmlReader(iface
);
2828 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2830 if (!value
) return E_INVALIDARG
;
2834 case XmlReaderProperty_MultiLanguage
:
2835 *value
= (LONG_PTR
)This
->mlang
;
2837 IUnknown_AddRef(This
->mlang
);
2839 case XmlReaderProperty_XmlResolver
:
2840 *value
= (LONG_PTR
)This
->resolver
;
2842 IXmlResolver_AddRef(This
->resolver
);
2844 case XmlReaderProperty_DtdProcessing
:
2845 *value
= This
->dtdmode
;
2847 case XmlReaderProperty_ReadState
:
2848 *value
= This
->state
;
2850 case XmlReaderProperty_MaxElementDepth
:
2851 *value
= This
->max_depth
;
2854 FIXME("Unimplemented property (%u)\n", property
);
2861 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2863 xmlreader
*This
= impl_from_IXmlReader(iface
);
2865 TRACE("(%p)->(%s 0x%lx)\n", This
, debugstr_reader_prop(property
), value
);
2869 case XmlReaderProperty_MultiLanguage
:
2871 IUnknown_Release(This
->mlang
);
2872 This
->mlang
= (IUnknown
*)value
;
2874 IUnknown_AddRef(This
->mlang
);
2876 FIXME("Ignoring MultiLanguage %p\n", This
->mlang
);
2878 case XmlReaderProperty_XmlResolver
:
2880 IXmlResolver_Release(This
->resolver
);
2881 This
->resolver
= (IXmlResolver
*)value
;
2883 IXmlResolver_AddRef(This
->resolver
);
2885 case XmlReaderProperty_DtdProcessing
:
2886 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2887 This
->dtdmode
= value
;
2889 case XmlReaderProperty_MaxElementDepth
:
2890 This
->max_depth
= value
;
2893 FIXME("Unimplemented property (%u)\n", property
);
2900 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2902 xmlreader
*This
= impl_from_IXmlReader(iface
);
2903 XmlNodeType oldtype
= This
->nodetype
;
2907 TRACE("(%p)->(%p)\n", This
, nodetype
);
2912 switch (This
->state
)
2914 case XmlReadState_Closed
:
2917 case XmlReadState_Error
:
2921 hr
= reader_parse_nextnode(This
);
2922 if (SUCCEEDED(hr
) && oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2923 This
->state
= XmlReadState_Interactive
;
2927 This
->state
= XmlReadState_Error
;
2928 This
->nodetype
= XmlNodeType_None
;
2934 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2935 *nodetype
= This
->nodetype
;
2940 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2942 xmlreader
*This
= impl_from_IXmlReader(iface
);
2944 TRACE("(%p)->(%p)\n", This
, node_type
);
2947 return E_INVALIDARG
;
2949 *node_type
= reader_get_nodetype(This
);
2950 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2953 static void reader_set_current_attribute(xmlreader
*reader
, struct attribute
*attr
)
2955 reader
->attr
= attr
;
2956 reader
->chunk_read_off
= 0;
2957 reader_set_strvalue(reader
, StringValue_Prefix
, &attr
->prefix
);
2958 reader_set_strvalue(reader
, StringValue_QualifiedName
, &attr
->qname
);
2959 reader_set_strvalue(reader
, StringValue_Value
, &attr
->value
);
2962 static HRESULT
reader_move_to_first_attribute(xmlreader
*reader
)
2964 if (!reader
->attr_count
)
2968 reader_inc_depth(reader
);
2970 reader_set_current_attribute(reader
, LIST_ENTRY(list_head(&reader
->attrs
), struct attribute
, entry
));
2975 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2977 xmlreader
*This
= impl_from_IXmlReader(iface
);
2979 TRACE("(%p)\n", This
);
2981 return reader_move_to_first_attribute(This
);
2984 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2986 xmlreader
*This
= impl_from_IXmlReader(iface
);
2987 const struct list
*next
;
2989 TRACE("(%p)\n", This
);
2991 if (!This
->attr_count
) return S_FALSE
;
2994 return reader_move_to_first_attribute(This
);
2996 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2998 reader_set_current_attribute(This
, LIST_ENTRY(next
, struct attribute
, entry
));
3000 return next
? S_OK
: S_FALSE
;
3003 static void reader_get_attribute_ns_uri(xmlreader
*reader
, struct attribute
*attr
, const WCHAR
**uri
, UINT
*len
)
3005 static const WCHAR xmlns_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3006 '2','0','0','0','/','x','m','l','n','s','/',0};
3007 static const WCHAR xml_uriW
[] = {'h','t','t','p',':','/','/','w','w','w','.','w','3','.','o','r','g','/',
3008 'X','M','L','/','1','9','9','8','/','n','a','m','e','s','p','a','c','e',0};
3010 /* Check for reserved prefixes first */
3011 if ((strval_eq(reader
, &attr
->prefix
, &strval_empty
) && strval_eq(reader
, &attr
->localname
, &strval_xmlns
)) ||
3012 strval_eq(reader
, &attr
->prefix
, &strval_xmlns
))
3015 *len
= ARRAY_SIZE(xmlns_uriW
) - 1;
3017 else if (strval_eq(reader
, &attr
->prefix
, &strval_xml
))
3020 *len
= ARRAY_SIZE(xml_uriW
) - 1;
3032 if ((ns
= reader_lookup_ns(reader
, &attr
->prefix
)))
3045 static void reader_get_attribute_local_name(xmlreader
*reader
, struct attribute
*attr
, const WCHAR
**name
, UINT
*len
)
3047 if (attr
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3052 else if (attr
->flags
& ATTRIBUTE_NS_DEFINITION
)
3054 const struct ns
*ns
= reader_lookup_ns(reader
, &attr
->localname
);
3055 *name
= ns
->prefix
.str
;
3056 *len
= ns
->prefix
.len
;
3060 *name
= attr
->localname
.str
;
3061 *len
= attr
->localname
.len
;
3065 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
3066 const WCHAR
*local_name
, const WCHAR
*namespace_uri
)
3068 xmlreader
*This
= impl_from_IXmlReader(iface
);
3069 UINT target_name_len
, target_uri_len
;
3070 struct attribute
*attr
;
3072 TRACE("(%p)->(%s %s)\n", This
, debugstr_w(local_name
), debugstr_w(namespace_uri
));
3075 return E_INVALIDARG
;
3077 if (!This
->attr_count
)
3081 namespace_uri
= emptyW
;
3083 target_name_len
= lstrlenW(local_name
);
3084 target_uri_len
= lstrlenW(namespace_uri
);
3086 LIST_FOR_EACH_ENTRY(attr
, &This
->attrs
, struct attribute
, entry
)
3088 UINT name_len
, uri_len
;
3089 const WCHAR
*name
, *uri
;
3091 reader_get_attribute_local_name(This
, attr
, &name
, &name_len
);
3092 reader_get_attribute_ns_uri(This
, attr
, &uri
, &uri_len
);
3094 if (name_len
== target_name_len
&& uri_len
== target_uri_len
&&
3095 !wcscmp(name
, local_name
) && !wcscmp(uri
, namespace_uri
))
3097 reader_set_current_attribute(This
, attr
);
3105 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
3107 xmlreader
*This
= impl_from_IXmlReader(iface
);
3109 TRACE("(%p)\n", This
);
3111 if (!This
->attr_count
) return S_FALSE
;
3114 reader_dec_depth(This
);
3118 /* FIXME: support other node types with 'attributes' like DTD */
3119 if (This
->is_empty_element
) {
3120 reader_set_strvalue(This
, StringValue_Prefix
, &This
->empty_element
.prefix
);
3121 reader_set_strvalue(This
, StringValue_QualifiedName
, &This
->empty_element
.qname
);
3124 struct element
*element
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
3126 reader_set_strvalue(This
, StringValue_Prefix
, &element
->prefix
);
3127 reader_set_strvalue(This
, StringValue_QualifiedName
, &element
->qname
);
3130 This
->chunk_read_off
= 0;
3131 reader_set_strvalue(This
, StringValue_Value
, &strval_empty
);
3136 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3138 xmlreader
*This
= impl_from_IXmlReader(iface
);
3139 struct attribute
*attribute
= This
->attr
;
3140 struct element
*element
;
3143 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3148 switch (reader_get_nodetype(This
))
3150 case XmlNodeType_Text
:
3151 case XmlNodeType_CDATA
:
3152 case XmlNodeType_Comment
:
3153 case XmlNodeType_Whitespace
:
3157 case XmlNodeType_Element
:
3158 case XmlNodeType_EndElement
:
3159 element
= reader_get_element(This
);
3160 if (element
->prefix
.len
)
3162 *name
= element
->qname
.str
;
3163 *len
= element
->qname
.len
;
3167 *name
= element
->localname
.str
;
3168 *len
= element
->localname
.len
;
3171 case XmlNodeType_Attribute
:
3172 if (attribute
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3176 } else if (attribute
->prefix
.len
)
3178 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3179 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3183 *name
= attribute
->localname
.str
;
3184 *len
= attribute
->localname
.len
;
3188 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3189 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3196 static struct ns
*reader_lookup_nsdef(xmlreader
*reader
)
3198 if (list_empty(&reader
->nsdef
))
3201 return LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
3204 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
, const WCHAR
**uri
, UINT
*len
)
3206 xmlreader
*This
= impl_from_IXmlReader(iface
);
3207 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3208 XmlNodeType nodetype
;
3212 TRACE("(%p %p %p)\n", iface
, uri
, len
);
3217 switch ((nodetype
= reader_get_nodetype(This
)))
3219 case XmlNodeType_Attribute
:
3220 reader_get_attribute_ns_uri(This
, This
->attr
, uri
, len
);
3222 case XmlNodeType_Element
:
3223 case XmlNodeType_EndElement
:
3225 ns
= reader_lookup_ns(This
, prefix
);
3227 /* pick top default ns if any */
3229 ns
= reader_lookup_nsdef(This
);
3241 case XmlNodeType_Text
:
3242 case XmlNodeType_CDATA
:
3243 case XmlNodeType_ProcessingInstruction
:
3244 case XmlNodeType_Comment
:
3245 case XmlNodeType_Whitespace
:
3246 case XmlNodeType_XmlDeclaration
:
3251 FIXME("Unhandled node type %d\n", nodetype
);
3260 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3262 xmlreader
*This
= impl_from_IXmlReader(iface
);
3263 struct element
*element
;
3266 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3271 switch (reader_get_nodetype(This
))
3273 case XmlNodeType_Text
:
3274 case XmlNodeType_CDATA
:
3275 case XmlNodeType_Comment
:
3276 case XmlNodeType_Whitespace
:
3280 case XmlNodeType_Element
:
3281 case XmlNodeType_EndElement
:
3282 element
= reader_get_element(This
);
3283 *name
= element
->localname
.str
;
3284 *len
= element
->localname
.len
;
3286 case XmlNodeType_Attribute
:
3287 reader_get_attribute_local_name(This
, This
->attr
, name
, len
);
3290 *name
= This
->strvalues
[StringValue_LocalName
].str
;
3291 *len
= This
->strvalues
[StringValue_LocalName
].len
;
3298 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, const WCHAR
**ret
, UINT
*len
)
3300 xmlreader
*This
= impl_from_IXmlReader(iface
);
3301 XmlNodeType nodetype
;
3304 TRACE("(%p)->(%p %p)\n", This
, ret
, len
);
3312 switch ((nodetype
= reader_get_nodetype(This
)))
3314 case XmlNodeType_Element
:
3315 case XmlNodeType_EndElement
:
3316 case XmlNodeType_Attribute
:
3318 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3321 if (strval_eq(This
, prefix
, &strval_xml
))
3326 else if (strval_eq(This
, prefix
, &strval_xmlns
))
3331 else if ((ns
= reader_lookup_ns(This
, prefix
)))
3333 *ret
= ns
->prefix
.str
;
3334 *len
= ns
->prefix
.len
;
3346 static const strval
*reader_get_value(xmlreader
*reader
, BOOL ensure_allocated
)
3350 switch (reader_get_nodetype(reader
))
3352 case XmlNodeType_XmlDeclaration
:
3353 case XmlNodeType_EndElement
:
3354 case XmlNodeType_None
:
3355 return &strval_empty
;
3356 case XmlNodeType_Attribute
:
3357 /* For namespace definition attributes return values from namespace list */
3358 if (reader
->attr
->flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
3362 if (!(ns
= reader_lookup_ns(reader
, &reader
->attr
->localname
)))
3363 ns
= reader_lookup_nsdef(reader
);
3367 return &reader
->attr
->value
;
3372 val
= &reader
->strvalues
[StringValue_Value
];
3373 if (!val
->str
&& ensure_allocated
)
3375 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
3376 if (!ptr
) return NULL
;
3377 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
3385 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
3387 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3388 const strval
*val
= &reader
->strvalues
[StringValue_Value
];
3391 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
3395 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
&& !val
->len
) || is_reader_pending(reader
))
3400 hr
= IXmlReader_Read(iface
, &type
);
3401 if (FAILED(hr
)) return hr
;
3403 /* return if still pending, partially read values are not reported */
3404 if (is_reader_pending(reader
)) return E_PENDING
;
3407 val
= reader_get_value(reader
, TRUE
);
3409 return E_OUTOFMEMORY
;
3411 off
= abs(reader
->chunk_read_off
);
3412 assert(off
<= val
->len
);
3413 *value
= val
->str
+ off
;
3414 if (len
) *len
= val
->len
- off
;
3415 reader
->chunk_read_off
= -off
;
3419 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
3421 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3425 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
3427 val
= reader_get_value(reader
, FALSE
);
3429 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3430 if (reader
->chunk_read_off
>= 0)
3432 assert(reader
->chunk_read_off
<= val
->len
);
3433 len
= min(val
->len
- reader
->chunk_read_off
, chunk_size
);
3435 if (read
) *read
= len
;
3439 memcpy(buffer
, reader_get_strptr(reader
, val
) + reader
->chunk_read_off
, len
*sizeof(WCHAR
));
3440 reader
->chunk_read_off
+= len
;
3443 return len
|| !chunk_size
? S_OK
: S_FALSE
;
3446 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
3448 UINT
*baseUri_length
)
3450 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
3454 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
3456 FIXME("(%p): stub\n", iface
);
3460 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
3462 xmlreader
*This
= impl_from_IXmlReader(iface
);
3463 TRACE("(%p)\n", This
);
3464 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3465 when current node is start tag of an element */
3466 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->is_empty_element
: FALSE
;
3469 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*line_number
)
3471 xmlreader
*This
= impl_from_IXmlReader(iface
);
3472 const struct element
*element
;
3474 TRACE("(%p %p)\n", This
, line_number
);
3477 return E_INVALIDARG
;
3479 switch (reader_get_nodetype(This
))
3481 case XmlNodeType_Element
:
3482 case XmlNodeType_EndElement
:
3483 element
= reader_get_element(This
);
3484 *line_number
= element
->position
.line_number
;
3486 case XmlNodeType_Attribute
:
3487 *line_number
= This
->attr
->position
.line_number
;
3489 case XmlNodeType_Whitespace
:
3490 case XmlNodeType_XmlDeclaration
:
3491 *line_number
= This
->empty_element
.position
.line_number
;
3494 *line_number
= This
->position
.line_number
;
3498 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3501 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*line_position
)
3503 xmlreader
*This
= impl_from_IXmlReader(iface
);
3504 const struct element
*element
;
3506 TRACE("(%p %p)\n", This
, line_position
);
3509 return E_INVALIDARG
;
3511 switch (reader_get_nodetype(This
))
3513 case XmlNodeType_Element
:
3514 case XmlNodeType_EndElement
:
3515 element
= reader_get_element(This
);
3516 *line_position
= element
->position
.line_position
;
3518 case XmlNodeType_Attribute
:
3519 *line_position
= This
->attr
->position
.line_position
;
3521 case XmlNodeType_Whitespace
:
3522 case XmlNodeType_XmlDeclaration
:
3523 *line_position
= This
->empty_element
.position
.line_position
;
3526 *line_position
= This
->position
.line_position
;
3530 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3533 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
3535 xmlreader
*This
= impl_from_IXmlReader(iface
);
3537 TRACE("(%p)->(%p)\n", This
, count
);
3539 if (!count
) return E_INVALIDARG
;
3541 *count
= This
->attr_count
;
3545 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
3547 xmlreader
*This
= impl_from_IXmlReader(iface
);
3548 TRACE("(%p)->(%p)\n", This
, depth
);
3549 *depth
= This
->depth
;
3553 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
3555 xmlreader
*This
= impl_from_IXmlReader(iface
);
3556 TRACE("(%p)\n", iface
);
3557 return This
->state
== XmlReadState_EndOfFile
;
3560 static const struct IXmlReaderVtbl xmlreader_vtbl
=
3562 xmlreader_QueryInterface
,
3566 xmlreader_GetProperty
,
3567 xmlreader_SetProperty
,
3569 xmlreader_GetNodeType
,
3570 xmlreader_MoveToFirstAttribute
,
3571 xmlreader_MoveToNextAttribute
,
3572 xmlreader_MoveToAttributeByName
,
3573 xmlreader_MoveToElement
,
3574 xmlreader_GetQualifiedName
,
3575 xmlreader_GetNamespaceUri
,
3576 xmlreader_GetLocalName
,
3577 xmlreader_GetPrefix
,
3579 xmlreader_ReadValueChunk
,
3580 xmlreader_GetBaseUri
,
3581 xmlreader_IsDefault
,
3582 xmlreader_IsEmptyElement
,
3583 xmlreader_GetLineNumber
,
3584 xmlreader_GetLinePosition
,
3585 xmlreader_GetAttributeCount
,
3590 /** IXmlReaderInput **/
3591 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
3593 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3595 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
3597 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
3598 IsEqualGUID(riid
, &IID_IUnknown
))
3604 WARN("interface %s not implemented\n", debugstr_guid(riid
));
3606 return E_NOINTERFACE
;
3609 IUnknown_AddRef(iface
);
3614 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
3616 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3617 ULONG ref
= InterlockedIncrement(&This
->ref
);
3618 TRACE("(%p)->(%d)\n", This
, ref
);
3622 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
3624 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3625 LONG ref
= InterlockedDecrement(&This
->ref
);
3627 TRACE("(%p)->(%d)\n", This
, ref
);
3631 IMalloc
*imalloc
= This
->imalloc
;
3632 if (This
->input
) IUnknown_Release(This
->input
);
3633 if (This
->stream
) ISequentialStream_Release(This
->stream
);
3634 if (This
->buffer
) free_input_buffer(This
->buffer
);
3635 readerinput_free(This
, This
->baseuri
);
3636 readerinput_free(This
, This
);
3637 if (imalloc
) IMalloc_Release(imalloc
);
3643 static const struct IUnknownVtbl xmlreaderinputvtbl
=
3645 xmlreaderinput_QueryInterface
,
3646 xmlreaderinput_AddRef
,
3647 xmlreaderinput_Release
3650 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
3656 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
3659 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
3661 reader
= heap_alloc(sizeof(*reader
));
3663 return E_OUTOFMEMORY
;
3665 memset(reader
, 0, sizeof(*reader
));
3666 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
3668 reader
->state
= XmlReadState_Closed
;
3669 reader
->instate
= XmlReadInState_Initial
;
3670 reader
->resumestate
= XmlReadResumeState_Initial
;
3671 reader
->dtdmode
= DtdProcessing_Prohibit
;
3672 reader
->imalloc
= imalloc
;
3673 if (imalloc
) IMalloc_AddRef(imalloc
);
3674 reader
->nodetype
= XmlNodeType_None
;
3675 list_init(&reader
->attrs
);
3676 list_init(&reader
->nsdef
);
3677 list_init(&reader
->ns
);
3678 list_init(&reader
->elements
);
3679 reader
->max_depth
= 256;
3681 reader
->chunk_read_off
= 0;
3682 for (i
= 0; i
< StringValue_Last
; i
++)
3683 reader
->strvalues
[i
] = strval_empty
;
3685 hr
= IXmlReader_QueryInterface(&reader
->IXmlReader_iface
, riid
, obj
);
3686 IXmlReader_Release(&reader
->IXmlReader_iface
);
3688 TRACE("returning iface %p, hr %#x\n", *obj
, hr
);
3693 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
3698 IXmlReaderInput
**ppInput
)
3700 xmlreaderinput
*readerinput
;
3703 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3704 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3706 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3709 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3711 readerinput
= heap_alloc(sizeof(*readerinput
));
3712 if(!readerinput
) return E_OUTOFMEMORY
;
3714 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3715 readerinput
->ref
= 1;
3716 readerinput
->imalloc
= imalloc
;
3717 readerinput
->stream
= NULL
;
3718 if (imalloc
) IMalloc_AddRef(imalloc
);
3719 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3720 readerinput
->hint
= hint
;
3721 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3722 readerinput
->pending
= 0;
3724 hr
= alloc_input_buffer(readerinput
);
3727 readerinput_free(readerinput
, readerinput
->baseuri
);
3728 readerinput_free(readerinput
, readerinput
);
3729 if (imalloc
) IMalloc_Release(imalloc
);
3732 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3734 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3736 TRACE("returning iface %p\n", *ppInput
);