1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_XMLREADER_XMLREADER_HXX
21 #define INCLUDED_XMLREADER_XMLREADER_HXX
23 #include "sal/config.h"
28 #include "boost/noncopyable.hpp"
29 #include "com/sun/star/container/NoSuchElementException.hpp"
30 #include "com/sun/star/uno/RuntimeException.hpp"
32 #include "rtl/ustring.hxx"
33 #include "sal/types.h"
34 #include "xmlreader/detail/xmlreaderdllapi.hxx"
35 #include "xmlreader/pad.hxx"
36 #include "xmlreader/span.hxx"
40 class OOO_DLLPUBLIC_XMLREADER XmlReader
: private boost::noncopyable
{
42 explicit XmlReader(OUString
const & fileUrl
)
44 com::sun::star::container::NoSuchElementException
,
45 com::sun::star::uno::RuntimeException
));
49 enum { NAMESPACE_NONE
= -2, NAMESPACE_UNKNOWN
= -1, NAMESPACE_XML
= 0 };
51 enum Text
{ TEXT_NONE
, TEXT_RAW
, TEXT_NORMALIZED
};
53 enum Result
{ RESULT_BEGIN
, RESULT_END
, RESULT_TEXT
, RESULT_DONE
};
55 int registerNamespaceIri(Span
const & iri
);
57 // RESULT_BEGIN: data = localName, ns = ns
58 // RESULT_END: data, ns unused
59 // RESULT_TEXT: data = text, ns unused
60 Result
nextItem(Text reportText
, Span
* data
, int * nsId
);
62 bool nextAttribute(int * nsId
, Span
* localName
);
64 // the span returned by getAttributeValue is only valid until the next call
65 // to nextItem or getAttributeValue
66 Span
getAttributeValue(bool fullyNormalize
);
68 int getNamespaceId(Span
const & prefix
) const;
70 OUString
getUrl() const;
73 typedef std::vector
< Span
> NamespaceIris
;
75 // If NamespaceData (and similarly ElementData and AttributeData) is made
76 // SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about
77 // "'xmlreader::XmlReader' declared with greater visibility than the type of
78 // its field 'xmlreader::XmlReader::namespaces_'" (and similarly for
79 // elements_ and attributes_):
81 struct NamespaceData
{
87 NamespaceData(Span
const & thePrefix
, int theNsId
):
88 prefix(thePrefix
), nsId(theNsId
) {}
91 typedef std::vector
< NamespaceData
> NamespaceList
;
95 NamespaceList::size_type inheritedNamespaces
;
96 int defaultNamespaceId
;
100 NamespaceList::size_type theInheritedNamespaces
,
101 int theDefaultNamespaceId
):
102 name(theName
), inheritedNamespaces(theInheritedNamespaces
),
103 defaultNamespaceId(theDefaultNamespaceId
)
107 typedef std::stack
< ElementData
> ElementStack
;
109 struct AttributeData
{
110 char const * nameBegin
;
111 char const * nameEnd
;
112 char const * nameColon
;
113 char const * valueBegin
;
114 char const * valueEnd
;
117 char const * theNameBegin
, char const * theNameEnd
,
118 char const * theNameColon
, char const * theValueBegin
,
119 char const * theValueEnd
):
120 nameBegin(theNameBegin
), nameEnd(theNameEnd
),
121 nameColon(theNameColon
), valueBegin(theValueBegin
),
122 valueEnd(theValueEnd
)
126 typedef std::vector
< AttributeData
> Attributes
;
129 STATE_CONTENT
, STATE_START_TAG
, STATE_END_TAG
, STATE_EMPTY_ELEMENT_TAG
,
132 SAL_DLLPRIVATE
inline char read() { return pos_
== end_
? '\0' : *pos_
++; }
134 SAL_DLLPRIVATE
inline char peek() { return pos_
== end_
? '\0' : *pos_
; }
136 SAL_DLLPRIVATE
void normalizeLineEnds(Span
const & text
);
138 SAL_DLLPRIVATE
void skipSpace();
140 SAL_DLLPRIVATE
bool skipComment();
142 SAL_DLLPRIVATE
void skipProcessingInstruction();
144 SAL_DLLPRIVATE
void skipDocumentTypeDeclaration();
146 SAL_DLLPRIVATE Span
scanCdataSection();
148 SAL_DLLPRIVATE
bool scanName(char const ** nameColon
);
150 SAL_DLLPRIVATE
int scanNamespaceIri(
151 char const * begin
, char const * end
);
153 SAL_DLLPRIVATE
char const * handleReference(
154 char const * position
, char const * end
);
156 SAL_DLLPRIVATE Span
handleAttributeValue(
157 char const * begin
, char const * end
, bool fullyNormalize
);
159 SAL_DLLPRIVATE Result
handleStartTag(int * nsId
, Span
* localName
);
161 SAL_DLLPRIVATE Result
handleEndTag();
163 SAL_DLLPRIVATE
void handleElementEnd();
165 SAL_DLLPRIVATE Result
handleSkippedText(Span
* data
, int * nsId
);
167 SAL_DLLPRIVATE Result
handleRawText(Span
* text
);
169 SAL_DLLPRIVATE Result
handleNormalizedText(Span
* text
);
171 SAL_DLLPRIVATE
int toNamespaceId(NamespaceIris::size_type pos
);
174 oslFileHandle fileHandle_
;
175 sal_uInt64 fileSize_
;
177 NamespaceIris namespaceIris_
;
178 NamespaceList namespaces_
;
179 ElementStack elements_
;
183 Attributes attributes_
;
184 Attributes::iterator currentAttribute_
;
185 bool firstAttribute_
;
193 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */