2 * IXmlReader implementation
4 * Copyright 2010, 2012-2013, 2016-2017 Nikolay Sivov
6 * This library is free software; you can redistribute it and/or
7 * modify it under the terms of the GNU Lesser General Public
8 * License as published by the Free Software Foundation; either
9 * version 2.1 of the License, or (at your option) any later version.
11 * This library is distributed in the hope that it will be useful,
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
14 * Lesser General Public License for more details.
16 * You should have received a copy of the GNU Lesser General Public
17 * License along with this library; if not, write to the Free Software
18 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301, USA
31 #include "xmllite_private.h"
33 #include "wine/debug.h"
34 #include "wine/list.h"
36 WINE_DEFAULT_DEBUG_CHANNEL(xmllite
);
38 /* not defined in public headers */
39 DEFINE_GUID(IID_IXmlReaderInput
, 0x0b3ccc9b, 0x9214, 0x428b, 0xa2, 0xae, 0xef, 0x3a, 0xa8, 0x71, 0xaf, 0xda);
43 XmlReadInState_Initial
,
44 XmlReadInState_XmlDecl
,
45 XmlReadInState_Misc_DTD
,
47 XmlReadInState_DTD_Misc
,
48 XmlReadInState_Element
,
49 XmlReadInState_Content
,
50 XmlReadInState_MiscEnd
, /* optional Misc at the end of a document */
52 } XmlReaderInternalState
;
54 /* This state denotes where parsing was interrupted by input problem.
55 Reader resumes parsing using this information. */
58 XmlReadResumeState_Initial
,
59 XmlReadResumeState_PITarget
,
60 XmlReadResumeState_PIBody
,
61 XmlReadResumeState_CDATA
,
62 XmlReadResumeState_Comment
,
63 XmlReadResumeState_STag
,
64 XmlReadResumeState_CharData
,
65 XmlReadResumeState_Whitespace
66 } XmlReaderResumeState
;
68 /* saved pointer index to resume from particular input position */
71 XmlReadResume_Name
, /* PITarget, name for NCName, prefix for QName */
72 XmlReadResume_Local
, /* local for QName */
73 XmlReadResume_Body
, /* PI body, comment text, CDATA text, CharData text */
79 StringValue_LocalName
,
81 StringValue_QualifiedName
,
84 } XmlReaderStringValue
;
86 BOOL
is_namestartchar(WCHAR ch
);
88 static const char *debugstr_nodetype(XmlNodeType nodetype
)
90 static const char * const type_names
[] =
99 "ProcessingInstruction",
112 if (nodetype
> _XmlNodeType_Last
)
113 return wine_dbg_sprintf("unknown type=%d", nodetype
);
115 return type_names
[nodetype
];
118 static const char *debugstr_reader_prop(XmlReaderProperty prop
)
120 static const char * const prop_names
[] =
132 if (prop
> _XmlReaderProperty_Last
)
133 return wine_dbg_sprintf("unknown property=%d", prop
);
135 return prop_names
[prop
];
138 struct xml_encoding_data
145 static const struct xml_encoding_data xml_encoding_map
[] =
147 { L
"US-ASCII", XmlEncoding_USASCII
, 20127 },
148 { L
"UTF-16", XmlEncoding_UTF16
, 1200 },
149 { L
"UTF-8", XmlEncoding_UTF8
, CP_UTF8
},
152 const WCHAR
*get_encoding_name(xml_encoding encoding
)
154 return xml_encoding_map
[encoding
].name
;
157 xml_encoding
get_encoding_from_codepage(UINT codepage
)
160 for (i
= 0; i
< ARRAY_SIZE(xml_encoding_map
); i
++)
162 if (xml_encoding_map
[i
].cp
== codepage
) return xml_encoding_map
[i
].enc
;
164 return XmlEncoding_Unknown
;
171 unsigned int allocated
;
172 unsigned int written
;
176 typedef struct input_buffer input_buffer
;
180 IXmlReaderInput IXmlReaderInput_iface
;
182 /* reference passed on IXmlReaderInput creation, is kept when input is created */
185 xml_encoding encoding
;
188 /* stream reference set after SetInput() call from reader,
189 stored as sequential stream, cause currently
190 optimizations possible with IStream aren't implemented */
191 ISequentialStream
*stream
;
192 input_buffer
*buffer
;
193 unsigned int pending
: 1;
196 static const struct IUnknownVtbl xmlreaderinputvtbl
;
198 /* Structure to hold parsed string of specific length.
200 Reader stores node value as 'start' pointer, on request
201 a null-terminated version of it is allocated.
203 To init a strval variable use reader_init_strval(),
204 to set strval as a reader value use reader_set_strval().
208 WCHAR
*str
; /* allocated null-terminated string */
209 UINT len
; /* length in WCHARs, altered after ReadValueChunk */
210 UINT start
; /* input position where value starts */
213 static WCHAR emptyW
[] = L
"";
214 static WCHAR xmlW
[] = L
"xml";
215 static WCHAR xmlnsW
[] = L
"xmlns";
216 static const strval strval_empty
= { emptyW
, 0 };
217 static const strval strval_xml
= { xmlW
, 3 };
218 static const strval strval_xmlns
= { xmlnsW
, 5 };
220 struct reader_position
228 ATTRIBUTE_NS_DEFINITION
= 0x1,
229 ATTRIBUTE_DEFAULT_NS_DEFINITION
= 0x2,
239 struct reader_position position
;
249 struct reader_position position
;
257 struct element
*element
;
262 IXmlReader IXmlReader_iface
;
264 xmlreaderinput
*input
;
267 HRESULT error
; /* error set on XmlReadState_Error */
268 XmlReaderInternalState instate
;
269 XmlReaderResumeState resumestate
;
270 XmlNodeType nodetype
;
271 DtdProcessing dtdmode
;
272 IXmlResolver
*resolver
;
274 struct reader_position position
;
275 struct list attrs
; /* attributes list for current node */
276 struct attribute
*attr
; /* current attribute */
280 struct list elements
;
282 strval strvalues
[StringValue_Last
];
285 BOOL is_empty_element
;
286 struct element empty_element
; /* used for empty elements without end tag <a />,
287 and to keep <?xml reader position */
288 UINT resume
[XmlReadResume_Last
]; /* offsets used to resume reader */
293 encoded_buffer utf16
;
294 encoded_buffer encoded
;
296 xmlreaderinput
*input
;
299 static inline xmlreader
*impl_from_IXmlReader(IXmlReader
*iface
)
301 return CONTAINING_RECORD(iface
, xmlreader
, IXmlReader_iface
);
304 static inline xmlreaderinput
*impl_from_IXmlReaderInput(IXmlReaderInput
*iface
)
306 return CONTAINING_RECORD(iface
, xmlreaderinput
, IXmlReaderInput_iface
);
309 /* reader memory allocation functions */
310 static inline void *reader_alloc(xmlreader
*reader
, size_t len
)
312 return m_alloc(reader
->imalloc
, len
);
315 static inline void *reader_alloc_zero(xmlreader
*reader
, size_t len
)
317 void *ret
= reader_alloc(reader
, len
);
323 static inline void reader_free(xmlreader
*reader
, void *mem
)
325 m_free(reader
->imalloc
, mem
);
328 /* Just return pointer from offset, no attempt to read more. */
329 static inline WCHAR
*reader_get_ptr2(const xmlreader
*reader
, UINT offset
)
331 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
332 return (WCHAR
*)buffer
->data
+ offset
;
335 static inline WCHAR
*reader_get_strptr(const xmlreader
*reader
, const strval
*v
)
337 return v
->str
? v
->str
: reader_get_ptr2(reader
, v
->start
);
340 static HRESULT
reader_strvaldup(xmlreader
*reader
, const strval
*src
, strval
*dest
)
344 if (src
->str
!= strval_empty
.str
)
346 dest
->str
= reader_alloc(reader
, (dest
->len
+1)*sizeof(WCHAR
));
347 if (!dest
->str
) return E_OUTOFMEMORY
;
348 memcpy(dest
->str
, reader_get_strptr(reader
, src
), dest
->len
*sizeof(WCHAR
));
349 dest
->str
[dest
->len
] = 0;
356 /* reader input memory allocation functions */
357 static inline void *readerinput_alloc(xmlreaderinput
*input
, size_t len
)
359 return m_alloc(input
->imalloc
, len
);
362 static inline void *readerinput_realloc(xmlreaderinput
*input
, void *mem
, size_t len
)
364 return m_realloc(input
->imalloc
, mem
, len
);
367 static inline void readerinput_free(xmlreaderinput
*input
, void *mem
)
369 m_free(input
->imalloc
, mem
);
372 static inline WCHAR
*readerinput_strdupW(xmlreaderinput
*input
, const WCHAR
*str
)
379 size
= (lstrlenW(str
)+1)*sizeof(WCHAR
);
380 ret
= readerinput_alloc(input
, size
);
381 if (ret
) memcpy(ret
, str
, size
);
387 /* This one frees stored string value if needed */
388 static void reader_free_strvalued(xmlreader
*reader
, strval
*v
)
390 if (v
->str
!= strval_empty
.str
)
392 reader_free(reader
, v
->str
);
397 static void reader_clear_attrs(xmlreader
*reader
)
399 struct attribute
*attr
, *attr2
;
400 LIST_FOR_EACH_ENTRY_SAFE(attr
, attr2
, &reader
->attrs
, struct attribute
, entry
)
402 reader_free_strvalued(reader
, &attr
->localname
);
403 reader_free_strvalued(reader
, &attr
->value
);
404 reader_free(reader
, attr
);
406 list_init(&reader
->attrs
);
407 reader
->attr_count
= 0;
411 /* attribute data holds pointers to buffer data, so buffer shrink is not possible
412 while we are on a node with attributes */
413 static HRESULT
reader_add_attr(xmlreader
*reader
, strval
*prefix
, strval
*localname
, strval
*qname
,
414 strval
*value
, const struct reader_position
*position
, unsigned int flags
)
416 struct attribute
*attr
;
419 attr
= reader_alloc(reader
, sizeof(*attr
));
420 if (!attr
) return E_OUTOFMEMORY
;
422 hr
= reader_strvaldup(reader
, localname
, &attr
->localname
);
425 hr
= reader_strvaldup(reader
, value
, &attr
->value
);
427 reader_free_strvalued(reader
, &attr
->value
);
431 reader_free(reader
, attr
);
436 attr
->prefix
= *prefix
;
438 memset(&attr
->prefix
, 0, sizeof(attr
->prefix
));
439 attr
->qname
= qname
? *qname
: *localname
;
440 attr
->position
= *position
;
442 list_add_tail(&reader
->attrs
, &attr
->entry
);
443 reader
->attr_count
++;
448 /* Returns current element, doesn't check if reader is actually positioned on it. */
449 static struct element
*reader_get_element(xmlreader
*reader
)
451 if (reader
->is_empty_element
)
452 return &reader
->empty_element
;
454 return LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
457 static inline void reader_init_strvalue(UINT start
, UINT len
, strval
*v
)
464 static inline const char* debug_strval(const xmlreader
*reader
, const strval
*v
)
466 return debugstr_wn(reader_get_strptr(reader
, v
), v
->len
);
469 /* used to initialize from constant string */
470 static inline void reader_init_cstrvalue(WCHAR
*str
, UINT len
, strval
*v
)
477 static void reader_free_strvalue(xmlreader
*reader
, XmlReaderStringValue type
)
479 reader_free_strvalued(reader
, &reader
->strvalues
[type
]);
482 static void reader_free_strvalues(xmlreader
*reader
)
485 for (type
= 0; type
< StringValue_Last
; type
++)
486 reader_free_strvalue(reader
, type
);
489 /* This helper should only be used to test if strings are the same,
490 it doesn't try to sort. */
491 static inline int strval_eq(const xmlreader
*reader
, const strval
*str1
, const strval
*str2
)
493 if (str1
->len
!= str2
->len
) return 0;
494 return !memcmp(reader_get_strptr(reader
, str1
), reader_get_strptr(reader
, str2
), str1
->len
*sizeof(WCHAR
));
497 static void reader_clear_elements(xmlreader
*reader
)
499 struct element
*elem
, *elem2
;
500 LIST_FOR_EACH_ENTRY_SAFE(elem
, elem2
, &reader
->elements
, struct element
, entry
)
502 reader_free_strvalued(reader
, &elem
->prefix
);
503 reader_free_strvalued(reader
, &elem
->localname
);
504 reader_free_strvalued(reader
, &elem
->qname
);
505 reader_free(reader
, elem
);
507 list_init(&reader
->elements
);
508 reader_free_strvalued(reader
, &reader
->empty_element
.localname
);
509 reader_free_strvalued(reader
, &reader
->empty_element
.qname
);
510 reader
->is_empty_element
= FALSE
;
513 static struct ns
*reader_lookup_ns(xmlreader
*reader
, const strval
*prefix
)
515 struct list
*nslist
= prefix
? &reader
->ns
: &reader
->nsdef
;
518 LIST_FOR_EACH_ENTRY_REV(ns
, nslist
, struct ns
, entry
) {
519 if (strval_eq(reader
, prefix
, &ns
->prefix
))
526 static HRESULT
reader_inc_depth(xmlreader
*reader
)
528 return (++reader
->depth
>= reader
->max_depth
&& reader
->max_depth
) ? SC_E_MAXELEMENTDEPTH
: S_OK
;
531 static void reader_dec_depth(xmlreader
*reader
)
537 static HRESULT
reader_push_ns(xmlreader
*reader
, const strval
*prefix
, const strval
*uri
, BOOL def
)
542 ns
= reader_alloc(reader
, sizeof(*ns
));
543 if (!ns
) return E_OUTOFMEMORY
;
546 memset(&ns
->prefix
, 0, sizeof(ns
->prefix
));
548 hr
= reader_strvaldup(reader
, prefix
, &ns
->prefix
);
550 reader_free(reader
, ns
);
555 hr
= reader_strvaldup(reader
, uri
, &ns
->uri
);
557 reader_free_strvalued(reader
, &ns
->prefix
);
558 reader_free(reader
, ns
);
563 list_add_head(def
? &reader
->nsdef
: &reader
->ns
, &ns
->entry
);
567 static void reader_free_element(xmlreader
*reader
, struct element
*element
)
569 reader_free_strvalued(reader
, &element
->prefix
);
570 reader_free_strvalued(reader
, &element
->localname
);
571 reader_free_strvalued(reader
, &element
->qname
);
572 reader_free(reader
, element
);
575 static void reader_mark_ns_nodes(xmlreader
*reader
, struct element
*element
)
579 LIST_FOR_EACH_ENTRY(ns
, &reader
->ns
, struct ns
, entry
) {
582 ns
->element
= element
;
585 LIST_FOR_EACH_ENTRY(ns
, &reader
->nsdef
, struct ns
, entry
) {
588 ns
->element
= element
;
592 static HRESULT
reader_push_element(xmlreader
*reader
, strval
*prefix
, strval
*localname
,
593 strval
*qname
, const struct reader_position
*position
)
595 struct element
*element
;
598 element
= reader_alloc_zero(reader
, sizeof(*element
));
600 return E_OUTOFMEMORY
;
602 if ((hr
= reader_strvaldup(reader
, prefix
, &element
->prefix
)) == S_OK
&&
603 (hr
= reader_strvaldup(reader
, localname
, &element
->localname
)) == S_OK
&&
604 (hr
= reader_strvaldup(reader
, qname
, &element
->qname
)) == S_OK
)
606 list_add_head(&reader
->elements
, &element
->entry
);
607 reader_mark_ns_nodes(reader
, element
);
608 reader
->is_empty_element
= FALSE
;
609 element
->position
= *position
;
612 reader_free_element(reader
, element
);
617 static void reader_pop_ns_nodes(xmlreader
*reader
, struct element
*element
)
621 LIST_FOR_EACH_ENTRY_SAFE_REV(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
622 if (ns
->element
!= element
)
625 list_remove(&ns
->entry
);
626 reader_free_strvalued(reader
, &ns
->prefix
);
627 reader_free_strvalued(reader
, &ns
->uri
);
628 reader_free(reader
, ns
);
631 if (!list_empty(&reader
->nsdef
)) {
632 ns
= LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
633 if (ns
->element
== element
) {
634 list_remove(&ns
->entry
);
635 reader_free_strvalued(reader
, &ns
->prefix
);
636 reader_free_strvalued(reader
, &ns
->uri
);
637 reader_free(reader
, ns
);
642 static void reader_pop_element(xmlreader
*reader
)
644 struct element
*element
;
646 if (list_empty(&reader
->elements
))
649 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
650 list_remove(&element
->entry
);
652 reader_pop_ns_nodes(reader
, element
);
653 reader_free_element(reader
, element
);
655 /* It was a root element, the rest is expected as Misc */
656 if (list_empty(&reader
->elements
))
657 reader
->instate
= XmlReadInState_MiscEnd
;
660 /* Always make a copy, cause strings are supposed to be null terminated. Null pointer for 'value'
661 means node value is to be determined. */
662 static void reader_set_strvalue(xmlreader
*reader
, XmlReaderStringValue type
, const strval
*value
)
664 strval
*v
= &reader
->strvalues
[type
];
666 reader_free_strvalue(reader
, type
);
675 if (value
->str
== strval_empty
.str
)
679 if (type
== StringValue_Value
)
681 /* defer allocation for value string */
683 v
->start
= value
->start
;
688 v
->str
= reader_alloc(reader
, (value
->len
+ 1)*sizeof(WCHAR
));
689 memcpy(v
->str
, reader_get_strptr(reader
, value
), value
->len
*sizeof(WCHAR
));
690 v
->str
[value
->len
] = 0;
696 static inline int is_reader_pending(xmlreader
*reader
)
698 return reader
->input
->pending
;
701 static HRESULT
init_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
703 const int initial_len
= 0x2000;
704 buffer
->data
= readerinput_alloc(input
, initial_len
);
705 if (!buffer
->data
) return E_OUTOFMEMORY
;
707 memset(buffer
->data
, 0, 4);
709 buffer
->allocated
= initial_len
;
711 buffer
->prev_cr
= FALSE
;
716 static void free_encoded_buffer(xmlreaderinput
*input
, encoded_buffer
*buffer
)
718 readerinput_free(input
, buffer
->data
);
721 HRESULT
get_code_page(xml_encoding encoding
, UINT
*cp
)
723 if (encoding
== XmlEncoding_Unknown
)
725 FIXME("unsupported encoding %d\n", encoding
);
729 *cp
= xml_encoding_map
[encoding
].cp
;
734 xml_encoding
parse_encoding_name(const WCHAR
*name
, int len
)
738 if (!name
) return XmlEncoding_Unknown
;
741 max
= ARRAY_SIZE(xml_encoding_map
) - 1;
748 c
= wcsnicmp(xml_encoding_map
[n
].name
, name
, len
);
750 c
= wcsicmp(xml_encoding_map
[n
].name
, name
);
752 return xml_encoding_map
[n
].enc
;
760 return XmlEncoding_Unknown
;
763 static HRESULT
alloc_input_buffer(xmlreaderinput
*input
)
765 input_buffer
*buffer
;
768 input
->buffer
= NULL
;
770 buffer
= readerinput_alloc(input
, sizeof(*buffer
));
771 if (!buffer
) return E_OUTOFMEMORY
;
773 buffer
->input
= input
;
774 buffer
->code_page
= ~0; /* code page is unknown at this point */
775 hr
= init_encoded_buffer(input
, &buffer
->utf16
);
777 readerinput_free(input
, buffer
);
781 hr
= init_encoded_buffer(input
, &buffer
->encoded
);
783 free_encoded_buffer(input
, &buffer
->utf16
);
784 readerinput_free(input
, buffer
);
788 input
->buffer
= buffer
;
792 static void free_input_buffer(input_buffer
*buffer
)
794 free_encoded_buffer(buffer
->input
, &buffer
->encoded
);
795 free_encoded_buffer(buffer
->input
, &buffer
->utf16
);
796 readerinput_free(buffer
->input
, buffer
);
799 static void readerinput_release_stream(xmlreaderinput
*readerinput
)
801 if (readerinput
->stream
) {
802 ISequentialStream_Release(readerinput
->stream
);
803 readerinput
->stream
= NULL
;
807 /* Queries already stored interface for IStream/ISequentialStream.
808 Interface supplied on creation will be overwritten */
809 static inline HRESULT
readerinput_query_for_stream(xmlreaderinput
*readerinput
)
813 readerinput_release_stream(readerinput
);
814 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_IStream
, (void**)&readerinput
->stream
);
816 hr
= IUnknown_QueryInterface(readerinput
->input
, &IID_ISequentialStream
, (void**)&readerinput
->stream
);
821 /* reads a chunk to raw buffer */
822 static HRESULT
readerinput_growraw(xmlreaderinput
*readerinput
)
824 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
825 /* to make sure aligned length won't exceed allocated length */
826 ULONG len
= buffer
->allocated
- buffer
->written
- 4;
830 /* always try to get aligned to 4 bytes, so the only case we can get partially read characters is
831 variable width encodings like UTF-8 */
832 len
= (len
+ 3) & ~3;
833 /* try to use allocated space or grow */
834 if (buffer
->allocated
- buffer
->written
< len
)
836 buffer
->allocated
*= 2;
837 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, buffer
->allocated
);
838 len
= buffer
->allocated
- buffer
->written
;
842 hr
= ISequentialStream_Read(readerinput
->stream
, buffer
->data
+ buffer
->written
, len
, &read
);
843 TRACE("written=%d, alloc=%d, requested=%d, read=%d, ret=0x%08x\n", buffer
->written
, buffer
->allocated
, len
, read
, hr
);
844 readerinput
->pending
= hr
== E_PENDING
;
845 if (FAILED(hr
)) return hr
;
846 buffer
->written
+= read
;
851 /* grows UTF-16 buffer so it has at least 'length' WCHAR chars free on return */
852 static void readerinput_grow(xmlreaderinput
*readerinput
, int length
)
854 encoded_buffer
*buffer
= &readerinput
->buffer
->utf16
;
856 length
*= sizeof(WCHAR
);
857 /* grow if needed, plus 4 bytes to be sure null terminator will fit in */
858 if (buffer
->allocated
< buffer
->written
+ length
+ 4)
860 int grown_size
= max(2*buffer
->allocated
, buffer
->allocated
+ length
);
861 buffer
->data
= readerinput_realloc(readerinput
, buffer
->data
, grown_size
);
862 buffer
->allocated
= grown_size
;
866 static inline BOOL
readerinput_is_utf8(xmlreaderinput
*readerinput
)
868 static const char startA
[] = {'<','?'};
869 static const char commentA
[] = {'<','!'};
870 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
871 unsigned char *ptr
= (unsigned char*)buffer
->data
;
873 return !memcmp(buffer
->data
, startA
, sizeof(startA
)) ||
874 !memcmp(buffer
->data
, commentA
, sizeof(commentA
)) ||
875 /* test start byte */
878 (ptr
[1] && (ptr
[1] <= 0x7f)) ||
879 (buffer
->data
[1] >> 5) == 0x6 || /* 2 bytes */
880 (buffer
->data
[1] >> 4) == 0xe || /* 3 bytes */
881 (buffer
->data
[1] >> 3) == 0x1e) /* 4 bytes */
885 static HRESULT
readerinput_detectencoding(xmlreaderinput
*readerinput
, xml_encoding
*enc
)
887 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
888 static const char utf8bom
[] = {0xef,0xbb,0xbf};
889 static const char utf16lebom
[] = {0xff,0xfe};
892 *enc
= XmlEncoding_Unknown
;
894 if (buffer
->written
<= 3)
896 HRESULT hr
= readerinput_growraw(readerinput
);
897 if (FAILED(hr
)) return hr
;
898 if (buffer
->written
< 3) return MX_E_INPUTEND
;
901 ptrW
= (WCHAR
*)buffer
->data
;
902 /* try start symbols if we have enough data to do that, input buffer should contain
903 first chunk already */
904 if (readerinput_is_utf8(readerinput
))
905 *enc
= XmlEncoding_UTF8
;
906 else if (*ptrW
== '<')
909 if (*ptrW
== '?' || *ptrW
== '!' || is_namestartchar(*ptrW
))
910 *enc
= XmlEncoding_UTF16
;
912 /* try with BOM now */
913 else if (!memcmp(buffer
->data
, utf8bom
, sizeof(utf8bom
)))
915 buffer
->cur
+= sizeof(utf8bom
);
916 *enc
= XmlEncoding_UTF8
;
918 else if (!memcmp(buffer
->data
, utf16lebom
, sizeof(utf16lebom
)))
920 buffer
->cur
+= sizeof(utf16lebom
);
921 *enc
= XmlEncoding_UTF16
;
927 static int readerinput_get_utf8_convlen(xmlreaderinput
*readerinput
)
929 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
930 int len
= buffer
->written
;
932 /* complete single byte char */
933 if (!(buffer
->data
[len
-1] & 0x80)) return len
;
935 /* find start byte of multibyte char */
936 while (--len
&& !(buffer
->data
[len
] & 0xc0))
942 /* Returns byte length of complete char sequence for buffer code page,
943 it's relative to current buffer position which is currently used for BOM handling
945 static int readerinput_get_convlen(xmlreaderinput
*readerinput
)
947 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
950 if (readerinput
->buffer
->code_page
== CP_UTF8
)
951 len
= readerinput_get_utf8_convlen(readerinput
);
953 len
= buffer
->written
;
955 TRACE("%d\n", len
- buffer
->cur
);
956 return len
- buffer
->cur
;
959 /* It's possible that raw buffer has some leftovers from last conversion - some char
960 sequence that doesn't represent a full code point. Length argument should be calculated with
961 readerinput_get_convlen(), if it's -1 it will be calculated here. */
962 static void readerinput_shrinkraw(xmlreaderinput
*readerinput
, int len
)
964 encoded_buffer
*buffer
= &readerinput
->buffer
->encoded
;
967 len
= readerinput_get_convlen(readerinput
);
969 memmove(buffer
->data
, buffer
->data
+ buffer
->cur
+ (buffer
->written
- len
), len
);
970 /* everything below cur is lost too */
971 buffer
->written
-= len
+ buffer
->cur
;
972 /* after this point we don't need cur offset really,
973 it's used only to mark where actual data begins when first chunk is read */
977 static void fixup_buffer_cr(encoded_buffer
*buffer
, int off
)
979 BOOL prev_cr
= buffer
->prev_cr
;
983 src
= dest
= (WCHAR
*)buffer
->data
+ off
;
984 while ((const char*)src
< buffer
->data
+ buffer
->written
)
993 if(prev_cr
&& *src
== '\n')
1000 buffer
->written
= (char*)dest
- buffer
->data
;
1001 buffer
->prev_cr
= prev_cr
;
1005 /* note that raw buffer content is kept */
1006 static void readerinput_switchencoding(xmlreaderinput
*readerinput
, xml_encoding enc
)
1008 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1009 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1015 hr
= get_code_page(enc
, &cp
);
1016 if (FAILED(hr
)) return;
1018 readerinput
->buffer
->code_page
= cp
;
1019 len
= readerinput_get_convlen(readerinput
);
1021 TRACE("switching to cp %d\n", cp
);
1023 /* just copy in this case */
1024 if (enc
== XmlEncoding_UTF16
)
1026 readerinput_grow(readerinput
, len
);
1027 memcpy(dest
->data
, src
->data
+ src
->cur
, len
);
1028 dest
->written
+= len
*sizeof(WCHAR
);
1032 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1033 readerinput_grow(readerinput
, dest_len
);
1034 ptr
= (WCHAR
*)dest
->data
;
1035 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1037 dest
->written
+= dest_len
*sizeof(WCHAR
);
1040 fixup_buffer_cr(dest
, 0);
1043 /* shrinks parsed data a buffer begins with */
1044 static void reader_shrink(xmlreader
*reader
)
1046 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1048 /* avoid to move too often using threshold shrink length */
1049 if (buffer
->cur
*sizeof(WCHAR
) > buffer
->written
/ 2)
1051 buffer
->written
-= buffer
->cur
*sizeof(WCHAR
);
1052 memmove(buffer
->data
, (WCHAR
*)buffer
->data
+ buffer
->cur
, buffer
->written
);
1054 *(WCHAR
*)&buffer
->data
[buffer
->written
] = 0;
1058 /* This is a normal way for reader to get new data converted from raw buffer to utf16 buffer.
1059 It won't attempt to shrink but will grow destination buffer if needed */
1060 static HRESULT
reader_more(xmlreader
*reader
)
1062 xmlreaderinput
*readerinput
= reader
->input
;
1063 encoded_buffer
*src
= &readerinput
->buffer
->encoded
;
1064 encoded_buffer
*dest
= &readerinput
->buffer
->utf16
;
1065 UINT cp
= readerinput
->buffer
->code_page
;
1066 int len
, dest_len
, prev_len
;
1070 /* get some raw data from stream first */
1071 hr
= readerinput_growraw(readerinput
);
1072 len
= readerinput_get_convlen(readerinput
);
1073 prev_len
= dest
->written
/ sizeof(WCHAR
);
1075 /* just copy for UTF-16 case */
1078 readerinput_grow(readerinput
, len
);
1079 memcpy(dest
->data
+ dest
->written
, src
->data
+ src
->cur
, len
);
1080 dest
->written
+= len
*sizeof(WCHAR
);
1084 dest_len
= MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, NULL
, 0);
1085 readerinput_grow(readerinput
, dest_len
);
1086 ptr
= (WCHAR
*)(dest
->data
+ dest
->written
);
1087 MultiByteToWideChar(cp
, 0, src
->data
+ src
->cur
, len
, ptr
, dest_len
);
1089 dest
->written
+= dest_len
*sizeof(WCHAR
);
1090 /* get rid of processed data */
1091 readerinput_shrinkraw(readerinput
, len
);
1094 fixup_buffer_cr(dest
, prev_len
);
1098 static inline UINT
reader_get_cur(xmlreader
*reader
)
1100 return reader
->input
->buffer
->utf16
.cur
;
1103 static inline WCHAR
*reader_get_ptr(xmlreader
*reader
)
1105 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1106 WCHAR
*ptr
= (WCHAR
*)buffer
->data
+ buffer
->cur
;
1107 if (!*ptr
) reader_more(reader
);
1108 return (WCHAR
*)buffer
->data
+ buffer
->cur
;
1111 static int reader_cmp(xmlreader
*reader
, const WCHAR
*str
)
1114 const WCHAR
*ptr
= reader_get_ptr(reader
);
1119 reader_more(reader
);
1120 ptr
= reader_get_ptr(reader
);
1122 if (str
[i
] != ptr
[i
])
1123 return ptr
[i
] - str
[i
];
1129 static void reader_update_position(xmlreader
*reader
, WCHAR ch
)
1132 reader
->position
.line_position
= 1;
1133 else if (ch
== '\n')
1135 reader
->position
.line_number
++;
1136 reader
->position
.line_position
= 1;
1139 reader
->position
.line_position
++;
1142 /* moves cursor n WCHARs forward */
1143 static void reader_skipn(xmlreader
*reader
, int n
)
1145 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
1148 while (*(ptr
= reader_get_ptr(reader
)) && n
--)
1150 reader_update_position(reader
, *ptr
);
1155 static inline BOOL
is_wchar_space(WCHAR ch
)
1157 return ch
== ' ' || ch
== '\t' || ch
== '\r' || ch
== '\n';
1160 /* [3] S ::= (#x20 | #x9 | #xD | #xA)+ */
1161 static int reader_skipspaces(xmlreader
*reader
)
1163 const WCHAR
*ptr
= reader_get_ptr(reader
);
1164 UINT start
= reader_get_cur(reader
);
1166 while (is_wchar_space(*ptr
))
1168 reader_skipn(reader
, 1);
1169 ptr
= reader_get_ptr(reader
);
1172 return reader_get_cur(reader
) - start
;
1175 /* [26] VersionNum ::= '1.' [0-9]+ */
1176 static HRESULT
reader_parse_versionnum(xmlreader
*reader
, strval
*val
)
1181 if (reader_cmp(reader
, L
"1.")) return WC_E_XMLDECL
;
1183 start
= reader_get_cur(reader
);
1185 reader_skipn(reader
, 2);
1187 ptr2
= ptr
= reader_get_ptr(reader
);
1188 while (*ptr
>= '0' && *ptr
<= '9')
1190 reader_skipn(reader
, 1);
1191 ptr
= reader_get_ptr(reader
);
1194 if (ptr2
== ptr
) return WC_E_DIGIT
;
1195 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, val
);
1196 TRACE("version=%s\n", debug_strval(reader
, val
));
1200 /* [25] Eq ::= S? '=' S? */
1201 static HRESULT
reader_parse_eq(xmlreader
*reader
)
1203 reader_skipspaces(reader
);
1204 if (reader_cmp(reader
, L
"=")) return WC_E_EQUAL
;
1206 reader_skipn(reader
, 1);
1207 reader_skipspaces(reader
);
1211 static BOOL
reader_is_quote(xmlreader
*reader
)
1213 return !reader_cmp(reader
, L
"\'") || !reader_cmp(reader
, L
"\"");
1216 /* [24] VersionInfo ::= S 'version' Eq ("'" VersionNum "'" | '"' VersionNum '"') */
1217 static HRESULT
reader_parse_versioninfo(xmlreader
*reader
)
1219 struct reader_position position
;
1223 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1225 position
= reader
->position
;
1226 if (reader_cmp(reader
, L
"version")) return WC_E_XMLDECL
;
1227 reader_init_strvalue(reader_get_cur(reader
), 7, &name
);
1228 /* skip 'version' */
1229 reader_skipn(reader
, 7);
1231 hr
= reader_parse_eq(reader
);
1232 if (FAILED(hr
)) return hr
;
1234 if (!reader_is_quote(reader
))
1237 reader_skipn(reader
, 1);
1239 hr
= reader_parse_versionnum(reader
, &val
);
1240 if (FAILED(hr
)) return hr
;
1242 if (!reader_is_quote(reader
))
1246 reader_skipn(reader
, 1);
1248 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1251 /* ([A-Za-z0-9._] | '-') */
1252 static inline BOOL
is_wchar_encname(WCHAR ch
)
1254 return ((ch
>= 'A' && ch
<= 'Z') ||
1255 (ch
>= 'a' && ch
<= 'z') ||
1256 (ch
>= '0' && ch
<= '9') ||
1257 (ch
== '.') || (ch
== '_') ||
1261 /* [81] EncName ::= [A-Za-z] ([A-Za-z0-9._] | '-')* */
1262 static HRESULT
reader_parse_encname(xmlreader
*reader
, strval
*val
)
1264 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
1268 if ((*start
< 'A' || *start
> 'Z') && (*start
< 'a' || *start
> 'z'))
1269 return WC_E_ENCNAME
;
1271 val
->start
= reader_get_cur(reader
);
1274 while (is_wchar_encname(*++ptr
))
1278 enc
= parse_encoding_name(start
, len
);
1279 TRACE("encoding name %s\n", debugstr_wn(start
, len
));
1283 if (enc
== XmlEncoding_Unknown
)
1284 return WC_E_ENCNAME
;
1286 /* skip encoding name */
1287 reader_skipn(reader
, len
);
1291 /* [80] EncodingDecl ::= S 'encoding' Eq ('"' EncName '"' | "'" EncName "'" ) */
1292 static HRESULT
reader_parse_encdecl(xmlreader
*reader
)
1294 struct reader_position position
;
1298 if (!reader_skipspaces(reader
)) return S_FALSE
;
1300 position
= reader
->position
;
1301 if (reader_cmp(reader
, L
"encoding")) return S_FALSE
;
1302 name
.str
= reader_get_ptr(reader
);
1303 name
.start
= reader_get_cur(reader
);
1305 /* skip 'encoding' */
1306 reader_skipn(reader
, 8);
1308 hr
= reader_parse_eq(reader
);
1309 if (FAILED(hr
)) return hr
;
1311 if (!reader_is_quote(reader
))
1314 reader_skipn(reader
, 1);
1316 hr
= reader_parse_encname(reader
, &val
);
1317 if (FAILED(hr
)) return hr
;
1319 if (!reader_is_quote(reader
))
1323 reader_skipn(reader
, 1);
1325 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1328 /* [32] SDDecl ::= S 'standalone' Eq (("'" ('yes' | 'no') "'") | ('"' ('yes' | 'no') '"')) */
1329 static HRESULT
reader_parse_sddecl(xmlreader
*reader
)
1331 struct reader_position position
;
1336 if (!reader_skipspaces(reader
)) return S_FALSE
;
1338 position
= reader
->position
;
1339 if (reader_cmp(reader
, L
"standalone")) return S_FALSE
;
1340 reader_init_strvalue(reader_get_cur(reader
), 10, &name
);
1341 /* skip 'standalone' */
1342 reader_skipn(reader
, 10);
1344 hr
= reader_parse_eq(reader
);
1345 if (FAILED(hr
)) return hr
;
1347 if (!reader_is_quote(reader
))
1350 reader_skipn(reader
, 1);
1352 if (reader_cmp(reader
, L
"yes") && reader_cmp(reader
, L
"no"))
1353 return WC_E_XMLDECL
;
1355 start
= reader_get_cur(reader
);
1356 /* skip 'yes'|'no' */
1357 reader_skipn(reader
, reader_cmp(reader
, L
"yes") ? 2 : 3);
1358 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &val
);
1359 TRACE("standalone=%s\n", debug_strval(reader
, &val
));
1361 if (!reader_is_quote(reader
))
1364 reader_skipn(reader
, 1);
1366 return reader_add_attr(reader
, NULL
, &name
, NULL
, &val
, &position
, 0);
1369 /* [23] XMLDecl ::= '<?xml' VersionInfo EncodingDecl? SDDecl? S? '?>' */
1370 static HRESULT
reader_parse_xmldecl(xmlreader
*reader
)
1372 struct reader_position position
;
1375 if (reader_cmp(reader
, L
"<?xml "))
1378 reader_skipn(reader
, 2);
1379 position
= reader
->position
;
1380 reader_skipn(reader
, 3);
1381 hr
= reader_parse_versioninfo(reader
);
1385 hr
= reader_parse_encdecl(reader
);
1389 hr
= reader_parse_sddecl(reader
);
1393 reader_skipspaces(reader
);
1394 if (reader_cmp(reader
, L
"?>"))
1395 return WC_E_XMLDECL
;
1398 reader_skipn(reader
, 2);
1400 reader
->nodetype
= XmlNodeType_XmlDeclaration
;
1401 reader
->empty_element
.position
= position
;
1402 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_xml
);
1403 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_xml
);
1408 /* [15] Comment ::= '<!--' ((Char - '-') | ('-' (Char - '-')))* '-->' */
1409 static HRESULT
reader_parse_comment(xmlreader
*reader
)
1414 if (reader
->resumestate
== XmlReadResumeState_Comment
)
1416 start
= reader
->resume
[XmlReadResume_Body
];
1417 ptr
= reader_get_ptr(reader
);
1422 reader_skipn(reader
, 4);
1423 reader_shrink(reader
);
1424 ptr
= reader_get_ptr(reader
);
1425 start
= reader_get_cur(reader
);
1426 reader
->nodetype
= XmlNodeType_Comment
;
1427 reader
->resume
[XmlReadResume_Body
] = start
;
1428 reader
->resumestate
= XmlReadResumeState_Comment
;
1429 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
1432 /* will exit when there's no more data, it won't attempt to
1433 read more from stream */
1444 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1445 TRACE("%s\n", debug_strval(reader
, &value
));
1447 /* skip rest of markup '->' */
1448 reader_skipn(reader
, 3);
1450 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1451 reader
->resume
[XmlReadResume_Body
] = 0;
1452 reader
->resumestate
= XmlReadResumeState_Initial
;
1456 return WC_E_COMMENT
;
1460 reader_skipn(reader
, 1);
1467 /* [2] Char ::= #x9 | #xA | #xD | [#x20-#xD7FF] | [#xE000-#xFFFD] | [#x10000-#x10FFFF] */
1468 static inline BOOL
is_char(WCHAR ch
)
1470 return (ch
== '\t') || (ch
== '\r') || (ch
== '\n') ||
1471 (ch
>= 0x20 && ch
<= 0xd7ff) ||
1472 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1473 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1474 (ch
>= 0xe000 && ch
<= 0xfffd);
1477 /* [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1478 BOOL
is_pubchar(WCHAR ch
)
1480 return (ch
== ' ') ||
1481 (ch
>= 'a' && ch
<= 'z') ||
1482 (ch
>= 'A' && ch
<= 'Z') ||
1483 (ch
>= '0' && ch
<= '9') ||
1484 (ch
>= '-' && ch
<= ';') || /* '()*+,-./:; */
1485 (ch
== '=') || (ch
== '?') ||
1486 (ch
== '@') || (ch
== '!') ||
1487 (ch
>= '#' && ch
<= '%') || /* #$% */
1488 (ch
== '_') || (ch
== '\r') || (ch
== '\n');
1491 BOOL
is_namestartchar(WCHAR ch
)
1493 return (ch
== ':') || (ch
>= 'A' && ch
<= 'Z') ||
1494 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1495 (ch
>= 0xc0 && ch
<= 0xd6) ||
1496 (ch
>= 0xd8 && ch
<= 0xf6) ||
1497 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1498 (ch
>= 0x370 && ch
<= 0x37d) ||
1499 (ch
>= 0x37f && ch
<= 0x1fff) ||
1500 (ch
>= 0x200c && ch
<= 0x200d) ||
1501 (ch
>= 0x2070 && ch
<= 0x218f) ||
1502 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1503 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1504 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1505 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1506 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1507 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1510 /* [4 NS] NCName ::= Name - (Char* ':' Char*) */
1511 BOOL
is_ncnamechar(WCHAR ch
)
1513 return (ch
>= 'A' && ch
<= 'Z') ||
1514 (ch
== '_') || (ch
>= 'a' && ch
<= 'z') ||
1515 (ch
== '-') || (ch
== '.') ||
1516 (ch
>= '0' && ch
<= '9') ||
1518 (ch
>= 0xc0 && ch
<= 0xd6) ||
1519 (ch
>= 0xd8 && ch
<= 0xf6) ||
1520 (ch
>= 0xf8 && ch
<= 0x2ff) ||
1521 (ch
>= 0x300 && ch
<= 0x36f) ||
1522 (ch
>= 0x370 && ch
<= 0x37d) ||
1523 (ch
>= 0x37f && ch
<= 0x1fff) ||
1524 (ch
>= 0x200c && ch
<= 0x200d) ||
1525 (ch
>= 0x203f && ch
<= 0x2040) ||
1526 (ch
>= 0x2070 && ch
<= 0x218f) ||
1527 (ch
>= 0x2c00 && ch
<= 0x2fef) ||
1528 (ch
>= 0x3001 && ch
<= 0xd7ff) ||
1529 (ch
>= 0xd800 && ch
<= 0xdbff) || /* high surrogate */
1530 (ch
>= 0xdc00 && ch
<= 0xdfff) || /* low surrogate */
1531 (ch
>= 0xf900 && ch
<= 0xfdcf) ||
1532 (ch
>= 0xfdf0 && ch
<= 0xfffd);
1535 BOOL
is_namechar(WCHAR ch
)
1537 return (ch
== ':') || is_ncnamechar(ch
);
1540 static XmlNodeType
reader_get_nodetype(const xmlreader
*reader
)
1542 /* When we're on attribute always return attribute type, container node type is kept.
1543 Note that container is not necessarily an element, and attribute doesn't mean it's
1544 an attribute in XML spec terms. */
1545 return reader
->attr
? XmlNodeType_Attribute
: reader
->nodetype
;
1548 /* [4] NameStartChar ::= ":" | [A-Z] | "_" | [a-z] | [#xC0-#xD6] | [#xD8-#xF6] | [#xF8-#x2FF] | [#x370-#x37D] |
1549 [#x37F-#x1FFF] | [#x200C-#x200D] | [#x2070-#x218F] | [#x2C00-#x2FEF] | [#x3001-#xD7FF] |
1550 [#xF900-#xFDCF] | [#xFDF0-#xFFFD] | [#x10000-#xEFFFF]
1551 [4a] NameChar ::= NameStartChar | "-" | "." | [0-9] | #xB7 | [#x0300-#x036F] | [#x203F-#x2040]
1552 [5] Name ::= NameStartChar (NameChar)* */
1553 static HRESULT
reader_parse_name(xmlreader
*reader
, strval
*name
)
1558 if (reader
->resume
[XmlReadResume_Name
])
1560 start
= reader
->resume
[XmlReadResume_Name
];
1561 ptr
= reader_get_ptr(reader
);
1565 ptr
= reader_get_ptr(reader
);
1566 start
= reader_get_cur(reader
);
1567 if (!is_namestartchar(*ptr
)) return WC_E_NAMECHARACTER
;
1570 while (is_namechar(*ptr
))
1572 reader_skipn(reader
, 1);
1573 ptr
= reader_get_ptr(reader
);
1576 if (is_reader_pending(reader
))
1578 reader
->resume
[XmlReadResume_Name
] = start
;
1582 reader
->resume
[XmlReadResume_Name
] = 0;
1584 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, name
);
1585 TRACE("name %s:%d\n", debug_strval(reader
, name
), name
->len
);
1590 /* [17] PITarget ::= Name - (('X' | 'x') ('M' | 'm') ('L' | 'l')) */
1591 static HRESULT
reader_parse_pitarget(xmlreader
*reader
, strval
*target
)
1593 static const strval xmlval
= { (WCHAR
*)L
"xml", 3 };
1599 hr
= reader_parse_name(reader
, &name
);
1600 if (FAILED(hr
)) return is_reader_pending(reader
) ? E_PENDING
: WC_E_PI
;
1602 /* now that we got name check for illegal content */
1603 if (strval_eq(reader
, &name
, &xmlval
))
1604 return WC_E_LEADINGXML
;
1606 /* PITarget can't be a qualified name */
1607 ptr
= reader_get_strptr(reader
, &name
);
1608 for (i
= 0; i
< name
.len
; i
++)
1610 return i
? NC_E_NAMECOLON
: WC_E_PI
;
1612 TRACE("pitarget %s:%d\n", debug_strval(reader
, &name
), name
.len
);
1617 /* [16] PI ::= '<?' PITarget (S (Char* - (Char* '?>' Char*)))? '?>' */
1618 static HRESULT
reader_parse_pi(xmlreader
*reader
)
1625 switch (reader
->resumestate
)
1627 case XmlReadResumeState_Initial
:
1629 reader_skipn(reader
, 2);
1630 reader_shrink(reader
);
1631 reader
->resumestate
= XmlReadResumeState_PITarget
;
1632 case XmlReadResumeState_PITarget
:
1633 hr
= reader_parse_pitarget(reader
, &target
);
1634 if (FAILED(hr
)) return hr
;
1635 reader_set_strvalue(reader
, StringValue_LocalName
, &target
);
1636 reader_set_strvalue(reader
, StringValue_QualifiedName
, &target
);
1637 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1638 reader
->resumestate
= XmlReadResumeState_PIBody
;
1639 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1644 start
= reader
->resume
[XmlReadResume_Body
];
1645 ptr
= reader_get_ptr(reader
);
1652 UINT cur
= reader_get_cur(reader
);
1655 /* strip all leading whitespace chars */
1658 ptr
= reader_get_ptr2(reader
, start
);
1659 if (!is_wchar_space(*ptr
)) break;
1663 reader_init_strvalue(start
, cur
-start
, &value
);
1666 reader_skipn(reader
, 2);
1667 TRACE("%s\n", debug_strval(reader
, &value
));
1668 reader
->nodetype
= XmlNodeType_ProcessingInstruction
;
1669 reader
->resumestate
= XmlReadResumeState_Initial
;
1670 reader
->resume
[XmlReadResume_Body
] = 0;
1671 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1676 reader_skipn(reader
, 1);
1677 ptr
= reader_get_ptr(reader
);
1683 /* This one is used to parse significant whitespace nodes, like in Misc production */
1684 static HRESULT
reader_parse_whitespace(xmlreader
*reader
)
1686 switch (reader
->resumestate
)
1688 case XmlReadResumeState_Initial
:
1689 reader_shrink(reader
);
1690 reader
->resumestate
= XmlReadResumeState_Whitespace
;
1691 reader
->resume
[XmlReadResume_Body
] = reader_get_cur(reader
);
1692 reader
->nodetype
= XmlNodeType_Whitespace
;
1693 reader_set_strvalue(reader
, StringValue_LocalName
, &strval_empty
);
1694 reader_set_strvalue(reader
, StringValue_QualifiedName
, &strval_empty
);
1695 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
1697 case XmlReadResumeState_Whitespace
:
1702 reader_skipspaces(reader
);
1703 if (is_reader_pending(reader
)) return S_OK
;
1705 start
= reader
->resume
[XmlReadResume_Body
];
1706 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
1707 reader_set_strvalue(reader
, StringValue_Value
, &value
);
1708 TRACE("%s\n", debug_strval(reader
, &value
));
1709 reader
->resumestate
= XmlReadResumeState_Initial
;
1718 /* [27] Misc ::= Comment | PI | S */
1719 static HRESULT
reader_parse_misc(xmlreader
*reader
)
1721 HRESULT hr
= S_FALSE
;
1723 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
1725 hr
= reader_more(reader
);
1726 if (FAILED(hr
)) return hr
;
1728 /* finish current node */
1729 switch (reader
->resumestate
)
1731 case XmlReadResumeState_PITarget
:
1732 case XmlReadResumeState_PIBody
:
1733 return reader_parse_pi(reader
);
1734 case XmlReadResumeState_Comment
:
1735 return reader_parse_comment(reader
);
1736 case XmlReadResumeState_Whitespace
:
1737 return reader_parse_whitespace(reader
);
1739 ERR("unknown resume state %d\n", reader
->resumestate
);
1745 const WCHAR
*cur
= reader_get_ptr(reader
);
1747 if (is_wchar_space(*cur
))
1748 hr
= reader_parse_whitespace(reader
);
1749 else if (!reader_cmp(reader
, L
"<!--"))
1750 hr
= reader_parse_comment(reader
);
1751 else if (!reader_cmp(reader
, L
"<?"))
1752 hr
= reader_parse_pi(reader
);
1756 if (hr
!= S_FALSE
) return hr
;
1762 /* [11] SystemLiteral ::= ('"' [^"]* '"') | ("'" [^']* "'") */
1763 static HRESULT
reader_parse_sys_literal(xmlreader
*reader
, strval
*literal
)
1765 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1768 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1771 reader_skipn(reader
, 1);
1773 cur
= reader_get_ptr(reader
);
1774 start
= reader_get_cur(reader
);
1775 while (is_char(*cur
) && *cur
!= quote
)
1777 reader_skipn(reader
, 1);
1778 cur
= reader_get_ptr(reader
);
1780 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1781 if (*cur
== quote
) reader_skipn(reader
, 1);
1783 TRACE("%s\n", debug_strval(reader
, literal
));
1787 /* [12] PubidLiteral ::= '"' PubidChar* '"' | "'" (PubidChar - "'")* "'"
1788 [13] PubidChar ::= #x20 | #xD | #xA | [a-zA-Z0-9] | [-'()+,./:=?;!*#@$_%] */
1789 static HRESULT
reader_parse_pub_literal(xmlreader
*reader
, strval
*literal
)
1791 WCHAR
*cur
= reader_get_ptr(reader
), quote
;
1794 if (*cur
!= '"' && *cur
!= '\'') return WC_E_QUOTE
;
1797 reader_skipn(reader
, 1);
1799 start
= reader_get_cur(reader
);
1800 cur
= reader_get_ptr(reader
);
1801 while (is_pubchar(*cur
) && *cur
!= quote
)
1803 reader_skipn(reader
, 1);
1804 cur
= reader_get_ptr(reader
);
1806 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, literal
);
1807 if (*cur
== quote
) reader_skipn(reader
, 1);
1809 TRACE("%s\n", debug_strval(reader
, literal
));
1813 /* [75] ExternalID ::= 'SYSTEM' S SystemLiteral | 'PUBLIC' S PubidLiteral S SystemLiteral */
1814 static HRESULT
reader_parse_externalid(xmlreader
*reader
)
1816 static WCHAR systemW
[] = L
"SYSTEM";
1817 static WCHAR publicW
[] = L
"PUBLIC";
1818 struct reader_position position
= reader
->position
;
1823 if (!reader_cmp(reader
, publicW
)) {
1827 reader_skipn(reader
, 6);
1828 cnt
= reader_skipspaces(reader
);
1829 if (!cnt
) return WC_E_WHITESPACE
;
1831 hr
= reader_parse_pub_literal(reader
, &pub
);
1832 if (FAILED(hr
)) return hr
;
1834 reader_init_cstrvalue(publicW
, lstrlenW(publicW
), &name
);
1835 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &pub
, &position
, 0);
1836 if (FAILED(hr
)) return hr
;
1838 cnt
= reader_skipspaces(reader
);
1839 if (!cnt
) return S_OK
;
1841 /* optional system id */
1842 hr
= reader_parse_sys_literal(reader
, &sys
);
1843 if (FAILED(hr
)) return S_OK
;
1845 reader_init_cstrvalue(systemW
, lstrlenW(systemW
), &name
);
1846 hr
= reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1847 if (FAILED(hr
)) return hr
;
1850 } else if (!reader_cmp(reader
, systemW
)) {
1852 reader_skipn(reader
, 6);
1853 cnt
= reader_skipspaces(reader
);
1854 if (!cnt
) return WC_E_WHITESPACE
;
1856 hr
= reader_parse_sys_literal(reader
, &sys
);
1857 if (FAILED(hr
)) return hr
;
1859 reader_init_cstrvalue(systemW
, lstrlenW(systemW
), &name
);
1860 return reader_add_attr(reader
, NULL
, &name
, NULL
, &sys
, &position
, 0);
1866 /* [28] doctypedecl ::= '<!DOCTYPE' S Name (S ExternalID)? S? ('[' intSubset ']' S?)? '>' */
1867 static HRESULT
reader_parse_dtd(xmlreader
*reader
)
1873 if (reader_cmp(reader
, L
"<!DOCTYPE")) return S_FALSE
;
1874 reader_shrink(reader
);
1876 /* DTD processing is not allowed by default */
1877 if (reader
->dtdmode
== DtdProcessing_Prohibit
) return WC_E_DTDPROHIBITED
;
1879 reader_skipn(reader
, 9);
1880 if (!reader_skipspaces(reader
)) return WC_E_WHITESPACE
;
1883 hr
= reader_parse_name(reader
, &name
);
1884 if (FAILED(hr
)) return WC_E_DECLDOCTYPE
;
1886 reader_skipspaces(reader
);
1888 hr
= reader_parse_externalid(reader
);
1889 if (FAILED(hr
)) return hr
;
1891 reader_skipspaces(reader
);
1893 cur
= reader_get_ptr(reader
);
1896 FIXME("internal subset parsing not implemented\n");
1901 reader_skipn(reader
, 1);
1903 reader
->nodetype
= XmlNodeType_DocumentType
;
1904 reader_set_strvalue(reader
, StringValue_LocalName
, &name
);
1905 reader_set_strvalue(reader
, StringValue_QualifiedName
, &name
);
1910 /* [11 NS] LocalPart ::= NCName */
1911 static HRESULT
reader_parse_local(xmlreader
*reader
, strval
*local
, BOOL check_for_separator
)
1916 if (reader
->resume
[XmlReadResume_Local
])
1918 start
= reader
->resume
[XmlReadResume_Local
];
1919 ptr
= reader_get_ptr(reader
);
1923 ptr
= reader_get_ptr(reader
);
1924 start
= reader_get_cur(reader
);
1927 while (is_ncnamechar(*ptr
))
1929 reader_skipn(reader
, 1);
1930 ptr
= reader_get_ptr(reader
);
1933 if (check_for_separator
&& *ptr
== ':')
1934 return NC_E_QNAMECOLON
;
1936 if (is_reader_pending(reader
))
1938 reader
->resume
[XmlReadResume_Local
] = start
;
1942 reader
->resume
[XmlReadResume_Local
] = 0;
1944 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, local
);
1949 /* [7 NS] QName ::= PrefixedName | UnprefixedName
1950 [8 NS] PrefixedName ::= Prefix ':' LocalPart
1951 [9 NS] UnprefixedName ::= LocalPart
1952 [10 NS] Prefix ::= NCName */
1953 static HRESULT
reader_parse_qname(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
1959 if (reader
->resume
[XmlReadResume_Name
])
1961 start
= reader
->resume
[XmlReadResume_Name
];
1962 ptr
= reader_get_ptr(reader
);
1966 ptr
= reader_get_ptr(reader
);
1967 start
= reader_get_cur(reader
);
1968 reader
->resume
[XmlReadResume_Name
] = start
;
1969 if (!is_ncnamechar(*ptr
)) return NC_E_QNAMECHARACTER
;
1972 if (reader
->resume
[XmlReadResume_Local
])
1974 hr
= reader_parse_local(reader
, local
, FALSE
);
1975 if (FAILED(hr
)) return hr
;
1977 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
],
1978 local
->start
- reader
->resume
[XmlReadResume_Name
] - 1,
1983 /* skip prefix part */
1984 while (is_ncnamechar(*ptr
))
1986 reader_skipn(reader
, 1);
1987 ptr
= reader_get_ptr(reader
);
1990 if (is_reader_pending(reader
)) return E_PENDING
;
1992 /* got a qualified name */
1995 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, prefix
);
1998 reader_skipn(reader
, 1);
1999 hr
= reader_parse_local(reader
, local
, TRUE
);
2000 if (FAILED(hr
)) return hr
;
2004 reader_init_strvalue(reader
->resume
[XmlReadResume_Name
], reader_get_cur(reader
)-reader
->resume
[XmlReadResume_Name
], local
);
2005 reader_init_strvalue(0, 0, prefix
);
2010 TRACE("qname %s:%s\n", debug_strval(reader
, prefix
), debug_strval(reader
, local
));
2012 TRACE("ncname %s\n", debug_strval(reader
, local
));
2014 reader_init_strvalue(prefix
->len
? prefix
->start
: local
->start
,
2016 (prefix
->len
? prefix
->len
+ 1 : 0) + local
->len
,
2019 reader
->resume
[XmlReadResume_Name
] = 0;
2020 reader
->resume
[XmlReadResume_Local
] = 0;
2025 static WCHAR
get_predefined_entity(const xmlreader
*reader
, const strval
*name
)
2027 static const strval lt
= { (WCHAR
*)L
"lt", 2 };
2028 static const strval gt
= { (WCHAR
*)L
"gt", 2 };
2029 static const strval amp
= { (WCHAR
*)L
"amp", 3 };
2030 static const strval apos
= { (WCHAR
*)L
"apos", 4 };
2031 static const strval quot
= { (WCHAR
*)L
"quot", 4 };
2032 WCHAR
*str
= reader_get_strptr(reader
, name
);
2037 if (strval_eq(reader
, name
, <
)) return '<';
2040 if (strval_eq(reader
, name
, >
)) return '>';
2043 if (strval_eq(reader
, name
, &
))
2045 else if (strval_eq(reader
, name
, &apos
))
2049 if (strval_eq(reader
, name
, "
)) return '\"';
2058 /* [66] CharRef ::= '&#' [0-9]+ ';' | '&#x' [0-9a-fA-F]+ ';'
2059 [67] Reference ::= EntityRef | CharRef
2060 [68] EntityRef ::= '&' Name ';' */
2061 static HRESULT
reader_parse_reference(xmlreader
*reader
)
2063 encoded_buffer
*buffer
= &reader
->input
->buffer
->utf16
;
2064 WCHAR
*start
= reader_get_ptr(reader
), *ptr
;
2065 UINT cur
= reader_get_cur(reader
);
2070 reader_skipn(reader
, 1);
2071 ptr
= reader_get_ptr(reader
);
2075 reader_skipn(reader
, 1);
2076 ptr
= reader_get_ptr(reader
);
2078 /* hex char or decimal */
2081 reader_skipn(reader
, 1);
2082 ptr
= reader_get_ptr(reader
);
2086 if ((*ptr
>= '0' && *ptr
<= '9'))
2087 ch
= ch
*16 + *ptr
- '0';
2088 else if ((*ptr
>= 'a' && *ptr
<= 'f'))
2089 ch
= ch
*16 + *ptr
- 'a' + 10;
2090 else if ((*ptr
>= 'A' && *ptr
<= 'F'))
2091 ch
= ch
*16 + *ptr
- 'A' + 10;
2093 return ch
? WC_E_SEMICOLON
: WC_E_HEXDIGIT
;
2094 reader_skipn(reader
, 1);
2095 ptr
= reader_get_ptr(reader
);
2102 if ((*ptr
>= '0' && *ptr
<= '9'))
2104 ch
= ch
*10 + *ptr
- '0';
2105 reader_skipn(reader
, 1);
2106 ptr
= reader_get_ptr(reader
);
2109 return ch
? WC_E_SEMICOLON
: WC_E_DIGIT
;
2113 if (!is_char(ch
)) return WC_E_XMLCHARACTER
;
2116 if (is_wchar_space(ch
)) ch
= ' ';
2118 ptr
= reader_get_ptr(reader
);
2119 start
= reader_get_ptr2(reader
, cur
);
2120 len
= buffer
->written
- ((char *)ptr
- buffer
->data
);
2121 memmove(start
+ 1, ptr
+ 1, len
);
2123 buffer
->written
-= (reader_get_cur(reader
) - cur
) * sizeof(WCHAR
);
2124 buffer
->cur
= cur
+ 1;
2133 hr
= reader_parse_name(reader
, &name
);
2134 if (FAILED(hr
)) return hr
;
2136 ptr
= reader_get_ptr(reader
);
2137 if (*ptr
!= ';') return WC_E_SEMICOLON
;
2139 /* predefined entities resolve to a single character */
2140 ch
= get_predefined_entity(reader
, &name
);
2143 len
= buffer
->written
- ((char*)ptr
- buffer
->data
) - sizeof(WCHAR
);
2144 memmove(start
+1, ptr
+1, len
);
2145 buffer
->cur
= cur
+ 1;
2146 buffer
->written
-= (ptr
- start
) * sizeof(WCHAR
);
2152 FIXME("undeclared entity %s\n", debug_strval(reader
, &name
));
2153 return WC_E_UNDECLAREDENTITY
;
2161 /* [10 NS] AttValue ::= '"' ([^<&"] | Reference)* '"' | "'" ([^<&'] | Reference)* "'" */
2162 static HRESULT
reader_parse_attvalue(xmlreader
*reader
, strval
*value
)
2167 ptr
= reader_get_ptr(reader
);
2169 /* skip opening quote */
2171 if (quote
!= '\"' && quote
!= '\'') return WC_E_QUOTE
;
2172 reader_skipn(reader
, 1);
2174 ptr
= reader_get_ptr(reader
);
2175 start
= reader_get_cur(reader
);
2178 if (*ptr
== '<') return WC_E_LESSTHAN
;
2182 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, value
);
2183 /* skip closing quote */
2184 reader_skipn(reader
, 1);
2190 HRESULT hr
= reader_parse_reference(reader
);
2191 if (FAILED(hr
)) return hr
;
2195 /* replace all whitespace chars with ' ' */
2196 if (is_wchar_space(*ptr
)) *ptr
= ' ';
2197 reader_skipn(reader
, 1);
2199 ptr
= reader_get_ptr(reader
);
2205 /* [1 NS] NSAttName ::= PrefixedAttName | DefaultAttName
2206 [2 NS] PrefixedAttName ::= 'xmlns:' NCName
2207 [3 NS] DefaultAttName ::= 'xmlns'
2208 [15 NS] Attribute ::= NSAttName Eq AttValue | QName Eq AttValue */
2209 static HRESULT
reader_parse_attribute(xmlreader
*reader
)
2211 struct reader_position position
= reader
->position
;
2212 strval prefix
, local
, qname
, value
;
2213 enum attribute_flags flags
= 0;
2216 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2217 if (FAILED(hr
)) return hr
;
2219 if (strval_eq(reader
, &prefix
, &strval_xmlns
))
2220 flags
|= ATTRIBUTE_NS_DEFINITION
;
2222 if (strval_eq(reader
, &qname
, &strval_xmlns
))
2223 flags
|= ATTRIBUTE_DEFAULT_NS_DEFINITION
;
2225 hr
= reader_parse_eq(reader
);
2226 if (FAILED(hr
)) return hr
;
2228 hr
= reader_parse_attvalue(reader
, &value
);
2229 if (FAILED(hr
)) return hr
;
2231 if (flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
2232 reader_push_ns(reader
, &local
, &value
, !!(flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
));
2234 TRACE("%s=%s\n", debug_strval(reader
, &local
), debug_strval(reader
, &value
));
2235 return reader_add_attr(reader
, &prefix
, &local
, &qname
, &value
, &position
, flags
);
2238 /* [12 NS] STag ::= '<' QName (S Attribute)* S? '>'
2239 [14 NS] EmptyElemTag ::= '<' QName (S Attribute)* S? '/>' */
2240 static HRESULT
reader_parse_stag(xmlreader
*reader
, strval
*prefix
, strval
*local
, strval
*qname
)
2242 struct reader_position position
= reader
->position
;
2245 hr
= reader_parse_qname(reader
, prefix
, local
, qname
);
2246 if (FAILED(hr
)) return hr
;
2250 reader_skipspaces(reader
);
2253 if ((reader
->is_empty_element
= !reader_cmp(reader
, L
"/>")))
2255 struct element
*element
= &reader
->empty_element
;
2258 reader_skipn(reader
, 2);
2260 reader_free_strvalued(reader
, &element
->qname
);
2261 reader_free_strvalued(reader
, &element
->localname
);
2263 element
->prefix
= *prefix
;
2264 reader_strvaldup(reader
, qname
, &element
->qname
);
2265 reader_strvaldup(reader
, local
, &element
->localname
);
2266 element
->position
= position
;
2267 reader_mark_ns_nodes(reader
, element
);
2271 /* got a start tag */
2272 if (!reader_cmp(reader
, L
">"))
2275 reader_skipn(reader
, 1);
2276 return reader_push_element(reader
, prefix
, local
, qname
, &position
);
2279 hr
= reader_parse_attribute(reader
);
2280 if (FAILED(hr
)) return hr
;
2286 /* [39] element ::= EmptyElemTag | STag content ETag */
2287 static HRESULT
reader_parse_element(xmlreader
*reader
)
2291 switch (reader
->resumestate
)
2293 case XmlReadResumeState_Initial
:
2294 /* check if we are really on element */
2295 if (reader_cmp(reader
, L
"<")) return S_FALSE
;
2298 reader_skipn(reader
, 1);
2300 reader_shrink(reader
);
2301 reader
->resumestate
= XmlReadResumeState_STag
;
2302 case XmlReadResumeState_STag
:
2304 strval qname
, prefix
, local
;
2306 /* this handles empty elements too */
2307 hr
= reader_parse_stag(reader
, &prefix
, &local
, &qname
);
2308 if (FAILED(hr
)) return hr
;
2310 /* FIXME: need to check for defined namespace to reject invalid prefix */
2312 /* if we got empty element and stack is empty go straight to Misc */
2313 if (reader
->is_empty_element
&& list_empty(&reader
->elements
))
2314 reader
->instate
= XmlReadInState_MiscEnd
;
2316 reader
->instate
= XmlReadInState_Content
;
2318 reader
->nodetype
= XmlNodeType_Element
;
2319 reader
->resumestate
= XmlReadResumeState_Initial
;
2320 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2321 reader_set_strvalue(reader
, StringValue_QualifiedName
, &qname
);
2322 reader_set_strvalue(reader
, StringValue_Value
, &strval_empty
);
2332 /* [13 NS] ETag ::= '</' QName S? '>' */
2333 static HRESULT
reader_parse_endtag(xmlreader
*reader
)
2335 struct reader_position position
;
2336 strval prefix
, local
, qname
;
2337 struct element
*element
;
2341 reader_skipn(reader
, 2);
2343 position
= reader
->position
;
2344 hr
= reader_parse_qname(reader
, &prefix
, &local
, &qname
);
2345 if (FAILED(hr
)) return hr
;
2347 reader_skipspaces(reader
);
2349 if (reader_cmp(reader
, L
">")) return WC_E_GREATERTHAN
;
2352 reader_skipn(reader
, 1);
2354 /* Element stack should never be empty at this point, cause we shouldn't get to
2355 content parsing if it's empty. */
2356 element
= LIST_ENTRY(list_head(&reader
->elements
), struct element
, entry
);
2357 if (!strval_eq(reader
, &element
->qname
, &qname
)) return WC_E_ELEMENTMATCH
;
2359 /* update position stored for start tag, we won't be using it */
2360 element
->position
= position
;
2362 reader
->nodetype
= XmlNodeType_EndElement
;
2363 reader
->is_empty_element
= FALSE
;
2364 reader_set_strvalue(reader
, StringValue_Prefix
, &prefix
);
2369 /* [18] CDSect ::= CDStart CData CDEnd
2370 [19] CDStart ::= '<![CDATA['
2371 [20] CData ::= (Char* - (Char* ']]>' Char*))
2372 [21] CDEnd ::= ']]>' */
2373 static HRESULT
reader_parse_cdata(xmlreader
*reader
)
2378 if (reader
->resumestate
== XmlReadResumeState_CDATA
)
2380 start
= reader
->resume
[XmlReadResume_Body
];
2381 ptr
= reader_get_ptr(reader
);
2385 /* skip markup '<![CDATA[' */
2386 reader_skipn(reader
, 9);
2387 reader_shrink(reader
);
2388 ptr
= reader_get_ptr(reader
);
2389 start
= reader_get_cur(reader
);
2390 reader
->nodetype
= XmlNodeType_CDATA
;
2391 reader
->resume
[XmlReadResume_Body
] = start
;
2392 reader
->resumestate
= XmlReadResumeState_CDATA
;
2393 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2398 if (*ptr
== ']' && *(ptr
+1) == ']' && *(ptr
+2) == '>')
2402 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2405 reader_skipn(reader
, 3);
2406 TRACE("%s\n", debug_strval(reader
, &value
));
2408 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2409 reader
->resume
[XmlReadResume_Body
] = 0;
2410 reader
->resumestate
= XmlReadResumeState_Initial
;
2415 reader_skipn(reader
, 1);
2416 ptr
= reader_get_ptr(reader
);
2423 /* [14] CharData ::= [^<&]* - ([^<&]* ']]>' [^<&]*) */
2424 static HRESULT
reader_parse_chardata(xmlreader
*reader
)
2426 struct reader_position position
;
2430 if (reader
->resumestate
== XmlReadResumeState_CharData
)
2432 start
= reader
->resume
[XmlReadResume_Body
];
2433 ptr
= reader_get_ptr(reader
);
2437 reader_shrink(reader
);
2438 ptr
= reader_get_ptr(reader
);
2439 start
= reader_get_cur(reader
);
2440 /* There's no text */
2441 if (!*ptr
|| *ptr
== '<') return S_OK
;
2442 reader
->nodetype
= is_wchar_space(*ptr
) ? XmlNodeType_Whitespace
: XmlNodeType_Text
;
2443 reader
->resume
[XmlReadResume_Body
] = start
;
2444 reader
->resumestate
= XmlReadResumeState_CharData
;
2445 reader_set_strvalue(reader
, StringValue_Value
, NULL
);
2448 position
= reader
->position
;
2451 /* CDATA closing sequence ']]>' is not allowed */
2452 if (ptr
[0] == ']' && ptr
[1] == ']' && ptr
[2] == '>')
2453 return WC_E_CDSECTEND
;
2455 /* Found next markup part */
2460 reader
->empty_element
.position
= position
;
2461 reader_init_strvalue(start
, reader_get_cur(reader
)-start
, &value
);
2462 reader_set_strvalue(reader
, StringValue_Value
, &value
);
2463 reader
->resume
[XmlReadResume_Body
] = 0;
2464 reader
->resumestate
= XmlReadResumeState_Initial
;
2468 /* this covers a case when text has leading whitespace chars */
2469 if (!is_wchar_space(*ptr
)) reader
->nodetype
= XmlNodeType_Text
;
2471 if (!reader_cmp(reader
, L
"&"))
2472 reader_parse_reference(reader
);
2474 reader_skipn(reader
, 1);
2476 ptr
= reader_get_ptr(reader
);
2482 /* [43] content ::= CharData? ((element | Reference | CDSect | PI | Comment) CharData?)* */
2483 static HRESULT
reader_parse_content(xmlreader
*reader
)
2485 if (reader
->resumestate
!= XmlReadResumeState_Initial
)
2487 switch (reader
->resumestate
)
2489 case XmlReadResumeState_CDATA
:
2490 return reader_parse_cdata(reader
);
2491 case XmlReadResumeState_Comment
:
2492 return reader_parse_comment(reader
);
2493 case XmlReadResumeState_PIBody
:
2494 case XmlReadResumeState_PITarget
:
2495 return reader_parse_pi(reader
);
2496 case XmlReadResumeState_CharData
:
2497 return reader_parse_chardata(reader
);
2499 ERR("unknown resume state %d\n", reader
->resumestate
);
2503 reader_shrink(reader
);
2505 /* handle end tag here, it indicates end of content as well */
2506 if (!reader_cmp(reader
, L
"</"))
2507 return reader_parse_endtag(reader
);
2509 if (!reader_cmp(reader
, L
"<!--"))
2510 return reader_parse_comment(reader
);
2512 if (!reader_cmp(reader
, L
"<?"))
2513 return reader_parse_pi(reader
);
2515 if (!reader_cmp(reader
, L
"<![CDATA["))
2516 return reader_parse_cdata(reader
);
2518 if (!reader_cmp(reader
, L
"<"))
2519 return reader_parse_element(reader
);
2521 /* what's left must be CharData */
2522 return reader_parse_chardata(reader
);
2525 static HRESULT
reader_parse_nextnode(xmlreader
*reader
)
2527 XmlNodeType nodetype
= reader_get_nodetype(reader
);
2530 if (!is_reader_pending(reader
))
2532 reader
->chunk_read_off
= 0;
2533 reader_clear_attrs(reader
);
2536 /* When moving from EndElement or empty element, pop its own namespace definitions */
2539 case XmlNodeType_Attribute
:
2540 reader_dec_depth(reader
);
2542 case XmlNodeType_Element
:
2543 if (reader
->is_empty_element
)
2544 reader_pop_ns_nodes(reader
, &reader
->empty_element
);
2545 else if (FAILED(hr
= reader_inc_depth(reader
)))
2548 case XmlNodeType_EndElement
:
2549 reader_pop_element(reader
);
2550 reader_dec_depth(reader
);
2558 switch (reader
->instate
)
2560 /* if it's a first call for a new input we need to detect stream encoding */
2561 case XmlReadInState_Initial
:
2565 hr
= readerinput_growraw(reader
->input
);
2566 if (FAILED(hr
)) return hr
;
2568 reader
->position
.line_number
= 1;
2569 reader
->position
.line_position
= 1;
2571 /* try to detect encoding by BOM or data and set input code page */
2572 hr
= readerinput_detectencoding(reader
->input
, &enc
);
2573 TRACE("detected encoding %s, 0x%08x\n", enc
== XmlEncoding_Unknown
? "(unknown)" :
2574 debugstr_w(xml_encoding_map
[enc
].name
), hr
);
2575 if (FAILED(hr
)) return hr
;
2577 /* always switch first time cause we have to put something in */
2578 readerinput_switchencoding(reader
->input
, enc
);
2580 /* parse xml declaration */
2581 hr
= reader_parse_xmldecl(reader
);
2582 if (FAILED(hr
)) return hr
;
2584 readerinput_shrinkraw(reader
->input
, -1);
2585 reader
->instate
= XmlReadInState_Misc_DTD
;
2586 if (hr
== S_OK
) return hr
;
2589 case XmlReadInState_Misc_DTD
:
2590 hr
= reader_parse_misc(reader
);
2591 if (FAILED(hr
)) return hr
;
2594 reader
->instate
= XmlReadInState_DTD
;
2598 case XmlReadInState_DTD
:
2599 hr
= reader_parse_dtd(reader
);
2600 if (FAILED(hr
)) return hr
;
2604 reader
->instate
= XmlReadInState_DTD_Misc
;
2608 reader
->instate
= XmlReadInState_Element
;
2610 case XmlReadInState_DTD_Misc
:
2611 hr
= reader_parse_misc(reader
);
2612 if (FAILED(hr
)) return hr
;
2615 reader
->instate
= XmlReadInState_Element
;
2619 case XmlReadInState_Element
:
2620 return reader_parse_element(reader
);
2621 case XmlReadInState_Content
:
2622 return reader_parse_content(reader
);
2623 case XmlReadInState_MiscEnd
:
2624 hr
= reader_parse_misc(reader
);
2625 if (hr
!= S_FALSE
) return hr
;
2627 if (*reader_get_ptr(reader
))
2629 WARN("found garbage in the end of XML\n");
2633 reader
->instate
= XmlReadInState_Eof
;
2634 reader
->state
= XmlReadState_EndOfFile
;
2635 reader
->nodetype
= XmlNodeType_None
;
2637 case XmlReadInState_Eof
:
2640 FIXME("internal state %d not handled\n", reader
->instate
);
2648 static HRESULT WINAPI
xmlreader_QueryInterface(IXmlReader
*iface
, REFIID riid
, void** ppvObject
)
2650 xmlreader
*This
= impl_from_IXmlReader(iface
);
2652 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
2654 if (IsEqualGUID(riid
, &IID_IUnknown
) ||
2655 IsEqualGUID(riid
, &IID_IXmlReader
))
2661 FIXME("interface %s not implemented\n", debugstr_guid(riid
));
2663 return E_NOINTERFACE
;
2666 IXmlReader_AddRef(iface
);
2671 static ULONG WINAPI
xmlreader_AddRef(IXmlReader
*iface
)
2673 xmlreader
*This
= impl_from_IXmlReader(iface
);
2674 ULONG ref
= InterlockedIncrement(&This
->ref
);
2675 TRACE("(%p)->(%d)\n", This
, ref
);
2679 static void reader_clear_ns(xmlreader
*reader
)
2681 struct ns
*ns
, *ns2
;
2683 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->ns
, struct ns
, entry
) {
2684 list_remove(&ns
->entry
);
2685 reader_free_strvalued(reader
, &ns
->prefix
);
2686 reader_free_strvalued(reader
, &ns
->uri
);
2687 reader_free(reader
, ns
);
2690 LIST_FOR_EACH_ENTRY_SAFE(ns
, ns2
, &reader
->nsdef
, struct ns
, entry
) {
2691 list_remove(&ns
->entry
);
2692 reader_free_strvalued(reader
, &ns
->uri
);
2693 reader_free(reader
, ns
);
2697 static void reader_reset_parser(xmlreader
*reader
)
2699 reader
->position
.line_number
= 0;
2700 reader
->position
.line_position
= 0;
2702 reader_clear_elements(reader
);
2703 reader_clear_attrs(reader
);
2704 reader_clear_ns(reader
);
2705 reader_free_strvalues(reader
);
2708 reader
->nodetype
= XmlNodeType_None
;
2709 reader
->resumestate
= XmlReadResumeState_Initial
;
2710 memset(reader
->resume
, 0, sizeof(reader
->resume
));
2711 reader
->is_empty_element
= FALSE
;
2714 static ULONG WINAPI
xmlreader_Release(IXmlReader
*iface
)
2716 xmlreader
*This
= impl_from_IXmlReader(iface
);
2717 LONG ref
= InterlockedDecrement(&This
->ref
);
2719 TRACE("(%p)->(%d)\n", This
, ref
);
2723 IMalloc
*imalloc
= This
->imalloc
;
2724 reader_reset_parser(This
);
2725 if (This
->input
) IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2726 if (This
->resolver
) IXmlResolver_Release(This
->resolver
);
2727 if (This
->mlang
) IUnknown_Release(This
->mlang
);
2728 reader_free(This
, This
);
2729 if (imalloc
) IMalloc_Release(imalloc
);
2735 static HRESULT WINAPI
xmlreader_SetInput(IXmlReader
* iface
, IUnknown
*input
)
2737 xmlreader
*This
= impl_from_IXmlReader(iface
);
2738 IXmlReaderInput
*readerinput
;
2741 TRACE("(%p)->(%p)\n", This
, input
);
2745 readerinput_release_stream(This
->input
);
2746 IUnknown_Release(&This
->input
->IXmlReaderInput_iface
);
2750 reader_reset_parser(This
);
2752 /* just reset current input */
2755 This
->state
= XmlReadState_Initial
;
2759 /* now try IXmlReaderInput, ISequentialStream, IStream */
2760 hr
= IUnknown_QueryInterface(input
, &IID_IXmlReaderInput
, (void**)&readerinput
);
2763 if (readerinput
->lpVtbl
== &xmlreaderinputvtbl
)
2764 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2767 ERR("got external IXmlReaderInput implementation: %p, vtbl=%p\n",
2768 readerinput
, readerinput
->lpVtbl
);
2769 IUnknown_Release(readerinput
);
2775 if (hr
!= S_OK
|| !readerinput
)
2777 /* create IXmlReaderInput basing on supplied interface */
2778 hr
= CreateXmlReaderInputWithEncodingName(input
,
2779 This
->imalloc
, NULL
, FALSE
, NULL
, &readerinput
);
2780 if (hr
!= S_OK
) return hr
;
2781 This
->input
= impl_from_IXmlReaderInput(readerinput
);
2784 /* set stream for supplied IXmlReaderInput */
2785 hr
= readerinput_query_for_stream(This
->input
);
2788 This
->state
= XmlReadState_Initial
;
2789 This
->instate
= XmlReadInState_Initial
;
2794 static HRESULT WINAPI
xmlreader_GetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR
*value
)
2796 xmlreader
*This
= impl_from_IXmlReader(iface
);
2798 TRACE("(%p)->(%s %p)\n", This
, debugstr_reader_prop(property
), value
);
2800 if (!value
) return E_INVALIDARG
;
2804 case XmlReaderProperty_MultiLanguage
:
2805 *value
= (LONG_PTR
)This
->mlang
;
2807 IUnknown_AddRef(This
->mlang
);
2809 case XmlReaderProperty_XmlResolver
:
2810 *value
= (LONG_PTR
)This
->resolver
;
2812 IXmlResolver_AddRef(This
->resolver
);
2814 case XmlReaderProperty_DtdProcessing
:
2815 *value
= This
->dtdmode
;
2817 case XmlReaderProperty_ReadState
:
2818 *value
= This
->state
;
2820 case XmlReaderProperty_MaxElementDepth
:
2821 *value
= This
->max_depth
;
2824 FIXME("Unimplemented property (%u)\n", property
);
2831 static HRESULT WINAPI
xmlreader_SetProperty(IXmlReader
* iface
, UINT property
, LONG_PTR value
)
2833 xmlreader
*This
= impl_from_IXmlReader(iface
);
2835 TRACE("(%p)->(%s 0x%lx)\n", This
, debugstr_reader_prop(property
), value
);
2839 case XmlReaderProperty_MultiLanguage
:
2841 IUnknown_Release(This
->mlang
);
2842 This
->mlang
= (IUnknown
*)value
;
2844 IUnknown_AddRef(This
->mlang
);
2846 FIXME("Ignoring MultiLanguage %p\n", This
->mlang
);
2848 case XmlReaderProperty_XmlResolver
:
2850 IXmlResolver_Release(This
->resolver
);
2851 This
->resolver
= (IXmlResolver
*)value
;
2853 IXmlResolver_AddRef(This
->resolver
);
2855 case XmlReaderProperty_DtdProcessing
:
2856 if (value
< 0 || value
> _DtdProcessing_Last
) return E_INVALIDARG
;
2857 This
->dtdmode
= value
;
2859 case XmlReaderProperty_MaxElementDepth
:
2860 This
->max_depth
= value
;
2863 FIXME("Unimplemented property (%u)\n", property
);
2870 static HRESULT WINAPI
xmlreader_Read(IXmlReader
* iface
, XmlNodeType
*nodetype
)
2872 xmlreader
*This
= impl_from_IXmlReader(iface
);
2873 XmlNodeType oldtype
= This
->nodetype
;
2877 TRACE("(%p)->(%p)\n", This
, nodetype
);
2882 switch (This
->state
)
2884 case XmlReadState_Closed
:
2887 case XmlReadState_Error
:
2891 hr
= reader_parse_nextnode(This
);
2892 if (SUCCEEDED(hr
) && oldtype
== XmlNodeType_None
&& This
->nodetype
!= oldtype
)
2893 This
->state
= XmlReadState_Interactive
;
2897 This
->state
= XmlReadState_Error
;
2898 This
->nodetype
= XmlNodeType_None
;
2904 TRACE("node type %s\n", debugstr_nodetype(This
->nodetype
));
2905 *nodetype
= This
->nodetype
;
2910 static HRESULT WINAPI
xmlreader_GetNodeType(IXmlReader
* iface
, XmlNodeType
*node_type
)
2912 xmlreader
*This
= impl_from_IXmlReader(iface
);
2914 TRACE("(%p)->(%p)\n", This
, node_type
);
2917 return E_INVALIDARG
;
2919 *node_type
= reader_get_nodetype(This
);
2920 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
2923 static void reader_set_current_attribute(xmlreader
*reader
, struct attribute
*attr
)
2925 reader
->attr
= attr
;
2926 reader
->chunk_read_off
= 0;
2927 reader_set_strvalue(reader
, StringValue_Prefix
, &attr
->prefix
);
2928 reader_set_strvalue(reader
, StringValue_QualifiedName
, &attr
->qname
);
2929 reader_set_strvalue(reader
, StringValue_Value
, &attr
->value
);
2932 static HRESULT
reader_move_to_first_attribute(xmlreader
*reader
)
2934 if (!reader
->attr_count
)
2938 reader_inc_depth(reader
);
2940 reader_set_current_attribute(reader
, LIST_ENTRY(list_head(&reader
->attrs
), struct attribute
, entry
));
2945 static HRESULT WINAPI
xmlreader_MoveToFirstAttribute(IXmlReader
* iface
)
2947 xmlreader
*This
= impl_from_IXmlReader(iface
);
2949 TRACE("(%p)\n", This
);
2951 return reader_move_to_first_attribute(This
);
2954 static HRESULT WINAPI
xmlreader_MoveToNextAttribute(IXmlReader
* iface
)
2956 xmlreader
*This
= impl_from_IXmlReader(iface
);
2957 const struct list
*next
;
2959 TRACE("(%p)\n", This
);
2961 if (!This
->attr_count
) return S_FALSE
;
2964 return reader_move_to_first_attribute(This
);
2966 next
= list_next(&This
->attrs
, &This
->attr
->entry
);
2968 reader_set_current_attribute(This
, LIST_ENTRY(next
, struct attribute
, entry
));
2970 return next
? S_OK
: S_FALSE
;
2973 static void reader_get_attribute_ns_uri(xmlreader
*reader
, struct attribute
*attr
, const WCHAR
**uri
, UINT
*len
)
2975 static const WCHAR xmlns_uriW
[] = L
"http://www.w3.org/2000/xmlns/";
2976 static const WCHAR xml_uriW
[] = L
"http://www.w3.org/XML/1998/namespace";
2978 /* Check for reserved prefixes first */
2979 if ((strval_eq(reader
, &attr
->prefix
, &strval_empty
) && strval_eq(reader
, &attr
->localname
, &strval_xmlns
)) ||
2980 strval_eq(reader
, &attr
->prefix
, &strval_xmlns
))
2983 *len
= ARRAY_SIZE(xmlns_uriW
) - 1;
2985 else if (strval_eq(reader
, &attr
->prefix
, &strval_xml
))
2988 *len
= ARRAY_SIZE(xml_uriW
) - 1;
3000 if ((ns
= reader_lookup_ns(reader
, &attr
->prefix
)))
3013 static void reader_get_attribute_local_name(xmlreader
*reader
, struct attribute
*attr
, const WCHAR
**name
, UINT
*len
)
3015 if (attr
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3020 else if (attr
->flags
& ATTRIBUTE_NS_DEFINITION
)
3022 const struct ns
*ns
= reader_lookup_ns(reader
, &attr
->localname
);
3023 *name
= ns
->prefix
.str
;
3024 *len
= ns
->prefix
.len
;
3028 *name
= attr
->localname
.str
;
3029 *len
= attr
->localname
.len
;
3033 static HRESULT WINAPI
xmlreader_MoveToAttributeByName(IXmlReader
* iface
,
3034 const WCHAR
*local_name
, const WCHAR
*namespace_uri
)
3036 xmlreader
*This
= impl_from_IXmlReader(iface
);
3037 UINT target_name_len
, target_uri_len
;
3038 struct attribute
*attr
;
3040 TRACE("(%p)->(%s %s)\n", This
, debugstr_w(local_name
), debugstr_w(namespace_uri
));
3043 return E_INVALIDARG
;
3045 if (!This
->attr_count
)
3049 namespace_uri
= emptyW
;
3051 target_name_len
= lstrlenW(local_name
);
3052 target_uri_len
= lstrlenW(namespace_uri
);
3054 LIST_FOR_EACH_ENTRY(attr
, &This
->attrs
, struct attribute
, entry
)
3056 UINT name_len
, uri_len
;
3057 const WCHAR
*name
, *uri
;
3059 reader_get_attribute_local_name(This
, attr
, &name
, &name_len
);
3060 reader_get_attribute_ns_uri(This
, attr
, &uri
, &uri_len
);
3062 if (name_len
== target_name_len
&& uri_len
== target_uri_len
&&
3063 !wcscmp(name
, local_name
) && !wcscmp(uri
, namespace_uri
))
3065 reader_set_current_attribute(This
, attr
);
3073 static HRESULT WINAPI
xmlreader_MoveToElement(IXmlReader
* iface
)
3075 xmlreader
*This
= impl_from_IXmlReader(iface
);
3077 TRACE("(%p)\n", This
);
3079 if (!This
->attr_count
) return S_FALSE
;
3082 reader_dec_depth(This
);
3086 /* FIXME: support other node types with 'attributes' like DTD */
3087 if (This
->is_empty_element
) {
3088 reader_set_strvalue(This
, StringValue_Prefix
, &This
->empty_element
.prefix
);
3089 reader_set_strvalue(This
, StringValue_QualifiedName
, &This
->empty_element
.qname
);
3092 struct element
*element
= LIST_ENTRY(list_head(&This
->elements
), struct element
, entry
);
3094 reader_set_strvalue(This
, StringValue_Prefix
, &element
->prefix
);
3095 reader_set_strvalue(This
, StringValue_QualifiedName
, &element
->qname
);
3098 This
->chunk_read_off
= 0;
3099 reader_set_strvalue(This
, StringValue_Value
, &strval_empty
);
3104 static HRESULT WINAPI
xmlreader_GetQualifiedName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3106 xmlreader
*This
= impl_from_IXmlReader(iface
);
3107 struct attribute
*attribute
= This
->attr
;
3108 struct element
*element
;
3111 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3116 switch (reader_get_nodetype(This
))
3118 case XmlNodeType_Text
:
3119 case XmlNodeType_CDATA
:
3120 case XmlNodeType_Comment
:
3121 case XmlNodeType_Whitespace
:
3125 case XmlNodeType_Element
:
3126 case XmlNodeType_EndElement
:
3127 element
= reader_get_element(This
);
3128 if (element
->prefix
.len
)
3130 *name
= element
->qname
.str
;
3131 *len
= element
->qname
.len
;
3135 *name
= element
->localname
.str
;
3136 *len
= element
->localname
.len
;
3139 case XmlNodeType_Attribute
:
3140 if (attribute
->flags
& ATTRIBUTE_DEFAULT_NS_DEFINITION
)
3144 } else if (attribute
->prefix
.len
)
3146 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3147 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3151 *name
= attribute
->localname
.str
;
3152 *len
= attribute
->localname
.len
;
3156 *name
= This
->strvalues
[StringValue_QualifiedName
].str
;
3157 *len
= This
->strvalues
[StringValue_QualifiedName
].len
;
3164 static struct ns
*reader_lookup_nsdef(xmlreader
*reader
)
3166 if (list_empty(&reader
->nsdef
))
3169 return LIST_ENTRY(list_head(&reader
->nsdef
), struct ns
, entry
);
3172 static HRESULT WINAPI
xmlreader_GetNamespaceUri(IXmlReader
* iface
, const WCHAR
**uri
, UINT
*len
)
3174 xmlreader
*This
= impl_from_IXmlReader(iface
);
3175 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3176 XmlNodeType nodetype
;
3180 TRACE("(%p %p %p)\n", iface
, uri
, len
);
3185 switch ((nodetype
= reader_get_nodetype(This
)))
3187 case XmlNodeType_Attribute
:
3188 reader_get_attribute_ns_uri(This
, This
->attr
, uri
, len
);
3190 case XmlNodeType_Element
:
3191 case XmlNodeType_EndElement
:
3193 ns
= reader_lookup_ns(This
, prefix
);
3195 /* pick top default ns if any */
3197 ns
= reader_lookup_nsdef(This
);
3209 case XmlNodeType_Text
:
3210 case XmlNodeType_CDATA
:
3211 case XmlNodeType_ProcessingInstruction
:
3212 case XmlNodeType_Comment
:
3213 case XmlNodeType_Whitespace
:
3214 case XmlNodeType_XmlDeclaration
:
3219 FIXME("Unhandled node type %d\n", nodetype
);
3228 static HRESULT WINAPI
xmlreader_GetLocalName(IXmlReader
* iface
, LPCWSTR
*name
, UINT
*len
)
3230 xmlreader
*This
= impl_from_IXmlReader(iface
);
3231 struct element
*element
;
3234 TRACE("(%p)->(%p %p)\n", This
, name
, len
);
3239 switch (reader_get_nodetype(This
))
3241 case XmlNodeType_Text
:
3242 case XmlNodeType_CDATA
:
3243 case XmlNodeType_Comment
:
3244 case XmlNodeType_Whitespace
:
3248 case XmlNodeType_Element
:
3249 case XmlNodeType_EndElement
:
3250 element
= reader_get_element(This
);
3251 *name
= element
->localname
.str
;
3252 *len
= element
->localname
.len
;
3254 case XmlNodeType_Attribute
:
3255 reader_get_attribute_local_name(This
, This
->attr
, name
, len
);
3258 *name
= This
->strvalues
[StringValue_LocalName
].str
;
3259 *len
= This
->strvalues
[StringValue_LocalName
].len
;
3266 static HRESULT WINAPI
xmlreader_GetPrefix(IXmlReader
* iface
, const WCHAR
**ret
, UINT
*len
)
3268 xmlreader
*This
= impl_from_IXmlReader(iface
);
3269 XmlNodeType nodetype
;
3272 TRACE("(%p)->(%p %p)\n", This
, ret
, len
);
3280 switch ((nodetype
= reader_get_nodetype(This
)))
3282 case XmlNodeType_Element
:
3283 case XmlNodeType_EndElement
:
3284 case XmlNodeType_Attribute
:
3286 const strval
*prefix
= &This
->strvalues
[StringValue_Prefix
];
3289 if (strval_eq(This
, prefix
, &strval_xml
))
3294 else if (strval_eq(This
, prefix
, &strval_xmlns
))
3299 else if ((ns
= reader_lookup_ns(This
, prefix
)))
3301 *ret
= ns
->prefix
.str
;
3302 *len
= ns
->prefix
.len
;
3314 static const strval
*reader_get_value(xmlreader
*reader
, BOOL ensure_allocated
)
3318 switch (reader_get_nodetype(reader
))
3320 case XmlNodeType_XmlDeclaration
:
3321 case XmlNodeType_EndElement
:
3322 case XmlNodeType_None
:
3323 return &strval_empty
;
3324 case XmlNodeType_Attribute
:
3325 /* For namespace definition attributes return values from namespace list */
3326 if (reader
->attr
->flags
& (ATTRIBUTE_NS_DEFINITION
| ATTRIBUTE_DEFAULT_NS_DEFINITION
))
3330 if (!(ns
= reader_lookup_ns(reader
, &reader
->attr
->localname
)))
3331 ns
= reader_lookup_nsdef(reader
);
3335 return &reader
->attr
->value
;
3340 val
= &reader
->strvalues
[StringValue_Value
];
3341 if (!val
->str
&& ensure_allocated
)
3343 WCHAR
*ptr
= reader_alloc(reader
, (val
->len
+1)*sizeof(WCHAR
));
3344 if (!ptr
) return NULL
;
3345 memcpy(ptr
, reader_get_strptr(reader
, val
), val
->len
*sizeof(WCHAR
));
3353 static HRESULT WINAPI
xmlreader_GetValue(IXmlReader
* iface
, const WCHAR
**value
, UINT
*len
)
3355 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3356 const strval
*val
= &reader
->strvalues
[StringValue_Value
];
3359 TRACE("(%p)->(%p %p)\n", reader
, value
, len
);
3363 if ((reader
->nodetype
== XmlNodeType_Comment
&& !val
->str
&& !val
->len
) || is_reader_pending(reader
))
3368 hr
= IXmlReader_Read(iface
, &type
);
3369 if (FAILED(hr
)) return hr
;
3371 /* return if still pending, partially read values are not reported */
3372 if (is_reader_pending(reader
)) return E_PENDING
;
3375 val
= reader_get_value(reader
, TRUE
);
3377 return E_OUTOFMEMORY
;
3379 off
= abs(reader
->chunk_read_off
);
3380 assert(off
<= val
->len
);
3381 *value
= val
->str
+ off
;
3382 if (len
) *len
= val
->len
- off
;
3383 reader
->chunk_read_off
= -off
;
3387 static HRESULT WINAPI
xmlreader_ReadValueChunk(IXmlReader
* iface
, WCHAR
*buffer
, UINT chunk_size
, UINT
*read
)
3389 xmlreader
*reader
= impl_from_IXmlReader(iface
);
3393 TRACE("(%p)->(%p %u %p)\n", reader
, buffer
, chunk_size
, read
);
3395 val
= reader_get_value(reader
, FALSE
);
3397 /* If value is already read by GetValue, chunk_read_off is negative and chunked reads are not possible. */
3398 if (reader
->chunk_read_off
>= 0)
3400 assert(reader
->chunk_read_off
<= val
->len
);
3401 len
= min(val
->len
- reader
->chunk_read_off
, chunk_size
);
3403 if (read
) *read
= len
;
3407 memcpy(buffer
, reader_get_strptr(reader
, val
) + reader
->chunk_read_off
, len
*sizeof(WCHAR
));
3408 reader
->chunk_read_off
+= len
;
3411 return len
|| !chunk_size
? S_OK
: S_FALSE
;
3414 static HRESULT WINAPI
xmlreader_GetBaseUri(IXmlReader
* iface
,
3416 UINT
*baseUri_length
)
3418 FIXME("(%p %p %p): stub\n", iface
, baseUri
, baseUri_length
);
3422 static BOOL WINAPI
xmlreader_IsDefault(IXmlReader
* iface
)
3424 FIXME("(%p): stub\n", iface
);
3428 static BOOL WINAPI
xmlreader_IsEmptyElement(IXmlReader
* iface
)
3430 xmlreader
*This
= impl_from_IXmlReader(iface
);
3431 TRACE("(%p)\n", This
);
3432 /* Empty elements are not placed in stack, it's stored as a global reader flag that makes sense
3433 when current node is start tag of an element */
3434 return (reader_get_nodetype(This
) == XmlNodeType_Element
) ? This
->is_empty_element
: FALSE
;
3437 static HRESULT WINAPI
xmlreader_GetLineNumber(IXmlReader
* iface
, UINT
*line_number
)
3439 xmlreader
*This
= impl_from_IXmlReader(iface
);
3440 const struct element
*element
;
3442 TRACE("(%p %p)\n", This
, line_number
);
3445 return E_INVALIDARG
;
3447 switch (reader_get_nodetype(This
))
3449 case XmlNodeType_Element
:
3450 case XmlNodeType_EndElement
:
3451 element
= reader_get_element(This
);
3452 *line_number
= element
->position
.line_number
;
3454 case XmlNodeType_Attribute
:
3455 *line_number
= This
->attr
->position
.line_number
;
3457 case XmlNodeType_Whitespace
:
3458 case XmlNodeType_XmlDeclaration
:
3459 *line_number
= This
->empty_element
.position
.line_number
;
3462 *line_number
= This
->position
.line_number
;
3466 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3469 static HRESULT WINAPI
xmlreader_GetLinePosition(IXmlReader
* iface
, UINT
*line_position
)
3471 xmlreader
*This
= impl_from_IXmlReader(iface
);
3472 const struct element
*element
;
3474 TRACE("(%p %p)\n", This
, line_position
);
3477 return E_INVALIDARG
;
3479 switch (reader_get_nodetype(This
))
3481 case XmlNodeType_Element
:
3482 case XmlNodeType_EndElement
:
3483 element
= reader_get_element(This
);
3484 *line_position
= element
->position
.line_position
;
3486 case XmlNodeType_Attribute
:
3487 *line_position
= This
->attr
->position
.line_position
;
3489 case XmlNodeType_Whitespace
:
3490 case XmlNodeType_XmlDeclaration
:
3491 *line_position
= This
->empty_element
.position
.line_position
;
3494 *line_position
= This
->position
.line_position
;
3498 return This
->state
== XmlReadState_Closed
? S_FALSE
: S_OK
;
3501 static HRESULT WINAPI
xmlreader_GetAttributeCount(IXmlReader
* iface
, UINT
*count
)
3503 xmlreader
*This
= impl_from_IXmlReader(iface
);
3505 TRACE("(%p)->(%p)\n", This
, count
);
3507 if (!count
) return E_INVALIDARG
;
3509 *count
= This
->attr_count
;
3513 static HRESULT WINAPI
xmlreader_GetDepth(IXmlReader
* iface
, UINT
*depth
)
3515 xmlreader
*This
= impl_from_IXmlReader(iface
);
3516 TRACE("(%p)->(%p)\n", This
, depth
);
3517 *depth
= This
->depth
;
3521 static BOOL WINAPI
xmlreader_IsEOF(IXmlReader
* iface
)
3523 xmlreader
*This
= impl_from_IXmlReader(iface
);
3524 TRACE("(%p)\n", iface
);
3525 return This
->state
== XmlReadState_EndOfFile
;
3528 static const struct IXmlReaderVtbl xmlreader_vtbl
=
3530 xmlreader_QueryInterface
,
3534 xmlreader_GetProperty
,
3535 xmlreader_SetProperty
,
3537 xmlreader_GetNodeType
,
3538 xmlreader_MoveToFirstAttribute
,
3539 xmlreader_MoveToNextAttribute
,
3540 xmlreader_MoveToAttributeByName
,
3541 xmlreader_MoveToElement
,
3542 xmlreader_GetQualifiedName
,
3543 xmlreader_GetNamespaceUri
,
3544 xmlreader_GetLocalName
,
3545 xmlreader_GetPrefix
,
3547 xmlreader_ReadValueChunk
,
3548 xmlreader_GetBaseUri
,
3549 xmlreader_IsDefault
,
3550 xmlreader_IsEmptyElement
,
3551 xmlreader_GetLineNumber
,
3552 xmlreader_GetLinePosition
,
3553 xmlreader_GetAttributeCount
,
3558 /** IXmlReaderInput **/
3559 static HRESULT WINAPI
xmlreaderinput_QueryInterface(IXmlReaderInput
*iface
, REFIID riid
, void** ppvObject
)
3561 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3563 TRACE("(%p)->(%s %p)\n", This
, debugstr_guid(riid
), ppvObject
);
3565 if (IsEqualGUID(riid
, &IID_IXmlReaderInput
) ||
3566 IsEqualGUID(riid
, &IID_IUnknown
))
3572 WARN("interface %s not implemented\n", debugstr_guid(riid
));
3574 return E_NOINTERFACE
;
3577 IUnknown_AddRef(iface
);
3582 static ULONG WINAPI
xmlreaderinput_AddRef(IXmlReaderInput
*iface
)
3584 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3585 ULONG ref
= InterlockedIncrement(&This
->ref
);
3586 TRACE("(%p)->(%d)\n", This
, ref
);
3590 static ULONG WINAPI
xmlreaderinput_Release(IXmlReaderInput
*iface
)
3592 xmlreaderinput
*This
= impl_from_IXmlReaderInput(iface
);
3593 LONG ref
= InterlockedDecrement(&This
->ref
);
3595 TRACE("(%p)->(%d)\n", This
, ref
);
3599 IMalloc
*imalloc
= This
->imalloc
;
3600 if (This
->input
) IUnknown_Release(This
->input
);
3601 if (This
->stream
) ISequentialStream_Release(This
->stream
);
3602 if (This
->buffer
) free_input_buffer(This
->buffer
);
3603 readerinput_free(This
, This
->baseuri
);
3604 readerinput_free(This
, This
);
3605 if (imalloc
) IMalloc_Release(imalloc
);
3611 static const struct IUnknownVtbl xmlreaderinputvtbl
=
3613 xmlreaderinput_QueryInterface
,
3614 xmlreaderinput_AddRef
,
3615 xmlreaderinput_Release
3618 HRESULT WINAPI
CreateXmlReader(REFIID riid
, void **obj
, IMalloc
*imalloc
)
3624 TRACE("(%s, %p, %p)\n", wine_dbgstr_guid(riid
), obj
, imalloc
);
3627 reader
= IMalloc_Alloc(imalloc
, sizeof(*reader
));
3629 reader
= heap_alloc(sizeof(*reader
));
3631 return E_OUTOFMEMORY
;
3633 memset(reader
, 0, sizeof(*reader
));
3634 reader
->IXmlReader_iface
.lpVtbl
= &xmlreader_vtbl
;
3636 reader
->state
= XmlReadState_Closed
;
3637 reader
->instate
= XmlReadInState_Initial
;
3638 reader
->resumestate
= XmlReadResumeState_Initial
;
3639 reader
->dtdmode
= DtdProcessing_Prohibit
;
3640 reader
->imalloc
= imalloc
;
3641 if (imalloc
) IMalloc_AddRef(imalloc
);
3642 reader
->nodetype
= XmlNodeType_None
;
3643 list_init(&reader
->attrs
);
3644 list_init(&reader
->nsdef
);
3645 list_init(&reader
->ns
);
3646 list_init(&reader
->elements
);
3647 reader
->max_depth
= 256;
3649 reader
->chunk_read_off
= 0;
3650 for (i
= 0; i
< StringValue_Last
; i
++)
3651 reader
->strvalues
[i
] = strval_empty
;
3653 hr
= IXmlReader_QueryInterface(&reader
->IXmlReader_iface
, riid
, obj
);
3654 IXmlReader_Release(&reader
->IXmlReader_iface
);
3656 TRACE("returning iface %p, hr %#x\n", *obj
, hr
);
3661 HRESULT WINAPI
CreateXmlReaderInputWithEncodingName(IUnknown
*stream
,
3666 IXmlReaderInput
**ppInput
)
3668 xmlreaderinput
*readerinput
;
3671 TRACE("%p %p %s %d %s %p\n", stream
, imalloc
, wine_dbgstr_w(encoding
),
3672 hint
, wine_dbgstr_w(base_uri
), ppInput
);
3674 if (!stream
|| !ppInput
) return E_INVALIDARG
;
3677 readerinput
= IMalloc_Alloc(imalloc
, sizeof(*readerinput
));
3679 readerinput
= heap_alloc(sizeof(*readerinput
));
3680 if(!readerinput
) return E_OUTOFMEMORY
;
3682 readerinput
->IXmlReaderInput_iface
.lpVtbl
= &xmlreaderinputvtbl
;
3683 readerinput
->ref
= 1;
3684 readerinput
->imalloc
= imalloc
;
3685 readerinput
->stream
= NULL
;
3686 if (imalloc
) IMalloc_AddRef(imalloc
);
3687 readerinput
->encoding
= parse_encoding_name(encoding
, -1);
3688 readerinput
->hint
= hint
;
3689 readerinput
->baseuri
= readerinput_strdupW(readerinput
, base_uri
);
3690 readerinput
->pending
= 0;
3692 hr
= alloc_input_buffer(readerinput
);
3695 readerinput_free(readerinput
, readerinput
->baseuri
);
3696 readerinput_free(readerinput
, readerinput
);
3697 if (imalloc
) IMalloc_Release(imalloc
);
3700 IUnknown_QueryInterface(stream
, &IID_IUnknown
, (void**)&readerinput
->input
);
3702 *ppInput
= &readerinput
->IXmlReaderInput_iface
;
3704 TRACE("returning iface %p\n", *ppInput
);