1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_XMLREADER_XMLREADER_HXX
21 #define INCLUDED_XMLREADER_XMLREADER_HXX
23 #include <sal/config.h>
28 #include <com/sun/star/container/NoSuchElementException.hpp>
29 #include <com/sun/star/uno/RuntimeException.hpp>
31 #include <rtl/ustring.hxx>
32 #include <sal/types.h>
33 #include <xmlreader/detail/xmlreaderdllapi.hxx>
34 #include <xmlreader/pad.hxx>
35 #include <xmlreader/span.hxx>
39 class OOO_DLLPUBLIC_XMLREADER XmlReader
{
41 XmlReader(char const *sStr
, size_t nLength
);
43 explicit XmlReader(OUString
const & fileUrl
);
47 enum { NAMESPACE_NONE
= -2, NAMESPACE_UNKNOWN
= -1, NAMESPACE_XML
= 0 };
49 enum Text
{ TEXT_NONE
, TEXT_RAW
, TEXT_NORMALIZED
};
51 enum Result
{ RESULT_BEGIN
, RESULT_END
, RESULT_TEXT
, RESULT_DONE
};
53 int registerNamespaceIri(Span
const & iri
);
55 // RESULT_BEGIN: data = localName, ns = ns
56 // RESULT_END: data, ns unused
57 // RESULT_TEXT: data = text, ns unused
58 Result
nextItem(Text reportText
, Span
* data
, int * nsId
);
60 bool nextAttribute(int * nsId
, Span
* localName
);
62 // the span returned by getAttributeValue is only valid until the next call
63 // to nextItem or getAttributeValue
64 Span
getAttributeValue(bool fullyNormalize
);
66 int getNamespaceId(Span
const & prefix
) const;
68 const OUString
& getUrl() const { return fileUrl_
;}
71 XmlReader(const XmlReader
&) SAL_DELETED_FUNCTION
;
72 XmlReader
& operator=(const XmlReader
&) SAL_DELETED_FUNCTION
;
74 typedef std::vector
< Span
> NamespaceIris
;
76 // If NamespaceData (and similarly ElementData and AttributeData) is made
77 // SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about
78 // "'xmlreader::XmlReader' declared with greater visibility than the type of
79 // its field 'xmlreader::XmlReader::namespaces_'" (and similarly for
80 // elements_ and attributes_):
82 struct NamespaceData
{
89 NamespaceData(Span
const & thePrefix
, int theNsId
):
90 prefix(thePrefix
), nsId(theNsId
) {}
93 typedef std::vector
< NamespaceData
> NamespaceList
;
97 NamespaceList::size_type inheritedNamespaces
;
98 int defaultNamespaceId
;
101 Span
const & theName
,
102 NamespaceList::size_type theInheritedNamespaces
,
103 int theDefaultNamespaceId
):
104 name(theName
), inheritedNamespaces(theInheritedNamespaces
),
105 defaultNamespaceId(theDefaultNamespaceId
)
109 typedef std::stack
< ElementData
> ElementStack
;
111 struct AttributeData
{
112 char const * nameBegin
;
113 char const * nameEnd
;
114 char const * nameColon
;
115 char const * valueBegin
;
116 char const * valueEnd
;
119 char const * theNameBegin
, char const * theNameEnd
,
120 char const * theNameColon
, char const * theValueBegin
,
121 char const * theValueEnd
):
122 nameBegin(theNameBegin
), nameEnd(theNameEnd
),
123 nameColon(theNameColon
), valueBegin(theValueBegin
),
124 valueEnd(theValueEnd
)
128 typedef std::vector
< AttributeData
> Attributes
;
131 STATE_CONTENT
, STATE_START_TAG
, STATE_END_TAG
, STATE_EMPTY_ELEMENT_TAG
,
134 SAL_DLLPRIVATE
inline char read() { return pos_
== end_
? '\0' : *pos_
++; }
136 SAL_DLLPRIVATE
inline char peek() { return pos_
== end_
? '\0' : *pos_
; }
138 SAL_DLLPRIVATE
void normalizeLineEnds(Span
const & text
);
140 SAL_DLLPRIVATE
void skipSpace();
142 SAL_DLLPRIVATE
bool skipComment();
144 SAL_DLLPRIVATE
void skipProcessingInstruction();
146 SAL_DLLPRIVATE
void skipDocumentTypeDeclaration();
148 SAL_DLLPRIVATE Span
scanCdataSection();
150 SAL_DLLPRIVATE
bool scanName(char const ** nameColon
);
152 SAL_DLLPRIVATE
int scanNamespaceIri(
153 char const * begin
, char const * end
);
155 SAL_DLLPRIVATE
char const * handleReference(
156 char const * position
, char const * end
);
158 SAL_DLLPRIVATE Span
handleAttributeValue(
159 char const * begin
, char const * end
, bool fullyNormalize
);
161 SAL_DLLPRIVATE Result
handleStartTag(int * nsId
, Span
* localName
);
163 SAL_DLLPRIVATE Result
handleEndTag();
165 SAL_DLLPRIVATE
void handleElementEnd();
167 SAL_DLLPRIVATE Result
handleSkippedText(Span
* data
, int * nsId
);
169 SAL_DLLPRIVATE Result
handleRawText(Span
* text
);
171 SAL_DLLPRIVATE Result
handleNormalizedText(Span
* text
);
173 SAL_DLLPRIVATE
int toNamespaceId(NamespaceIris::size_type pos
);
176 oslFileHandle fileHandle_
;
177 sal_uInt64 fileSize_
;
179 NamespaceIris namespaceIris_
;
180 NamespaceList namespaces_
;
181 ElementStack elements_
;
185 Attributes attributes_
;
186 Attributes::iterator currentAttribute_
;
187 bool firstAttribute_
;
195 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */