1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
22 #include <sal/config.h>
28 #include <rtl/ustring.hxx>
29 #include <sal/types.h>
30 #include <xmlreader/detail/xmlreaderdllapi.hxx>
31 #include <xmlreader/pad.hxx>
32 #include <xmlreader/span.hxx>
36 class OOO_DLLPUBLIC_XMLREADER XmlReader
{
38 explicit XmlReader(OUString fileUrl
);
42 enum { NAMESPACE_NONE
= -2, NAMESPACE_UNKNOWN
= -1, NAMESPACE_XML
= 0 };
44 enum class Text
{ NONE
, Raw
, Normalized
};
46 enum class Result
{ Begin
, End
, Text
, Done
};
48 int registerNamespaceIri(Span
const & iri
);
50 // RESULT_BEGIN: data = localName, ns = ns
51 // RESULT_END: data, ns unused
52 // RESULT_TEXT: data = text, ns unused
53 Result
nextItem(Text reportText
, Span
* data
, int * nsId
);
55 bool nextAttribute(int * nsId
, Span
* localName
);
57 // the span returned by getAttributeValue is only valid until the next call
58 // to nextItem or getAttributeValue
59 Span
getAttributeValue(bool fullyNormalize
);
61 int getNamespaceId(Span
const & prefix
) const;
63 const OUString
& getUrl() const { return fileUrl_
;}
66 XmlReader(const XmlReader
&) = delete;
67 XmlReader
& operator=(const XmlReader
&) = delete;
69 typedef std::vector
< Span
> NamespaceIris
;
71 // If NamespaceData (and similarly ElementData and AttributeData) is made
72 // SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about
73 // "'xmlreader::XmlReader' declared with greater visibility than the type of
74 // its field 'xmlreader::XmlReader::namespaces_'" (and similarly for
75 // elements_ and attributes_):
77 struct NamespaceData
{
84 NamespaceData(Span
const & thePrefix
, int theNsId
):
85 prefix(thePrefix
), nsId(theNsId
) {}
88 typedef std::vector
< NamespaceData
> NamespaceList
;
92 NamespaceList::size_type inheritedNamespaces
;
93 int defaultNamespaceId
;
97 NamespaceList::size_type theInheritedNamespaces
,
98 int theDefaultNamespaceId
):
99 name(theName
), inheritedNamespaces(theInheritedNamespaces
),
100 defaultNamespaceId(theDefaultNamespaceId
)
104 typedef std::stack
< ElementData
> ElementStack
;
106 struct AttributeData
{
107 char const * nameBegin
;
108 char const * nameEnd
;
109 char const * nameColon
;
110 char const * valueBegin
;
111 char const * valueEnd
;
114 char const * theNameBegin
, char const * theNameEnd
,
115 char const * theNameColon
, char const * theValueBegin
,
116 char const * theValueEnd
):
117 nameBegin(theNameBegin
), nameEnd(theNameEnd
),
118 nameColon(theNameColon
), valueBegin(theValueBegin
),
119 valueEnd(theValueEnd
)
123 typedef std::vector
< AttributeData
> Attributes
;
125 enum class State
{ Content
, StartTag
, EndTag
, EmptyElementTag
, Done
};
127 SAL_DLLPRIVATE
char read() { return pos_
== end_
? '\0' : *pos_
++; }
129 SAL_DLLPRIVATE
char peek() const { return pos_
== end_
? '\0' : *pos_
; }
131 SAL_DLLPRIVATE
void normalizeLineEnds(Span
const & text
);
133 SAL_DLLPRIVATE
void skipSpace();
135 SAL_DLLPRIVATE
bool skipComment();
137 SAL_DLLPRIVATE
void skipProcessingInstruction();
139 SAL_DLLPRIVATE
void skipDocumentTypeDeclaration();
141 SAL_DLLPRIVATE Span
scanCdataSection();
143 SAL_DLLPRIVATE
bool scanName(char const ** nameColon
);
145 SAL_DLLPRIVATE
int scanNamespaceIri(
146 char const * begin
, char const * end
);
148 SAL_DLLPRIVATE
char const * handleReference(
149 char const * position
, char const * end
);
151 SAL_DLLPRIVATE Span
handleAttributeValue(
152 char const * begin
, char const * end
, bool fullyNormalize
);
154 SAL_DLLPRIVATE Result
handleStartTag(int * nsId
, Span
* localName
);
156 SAL_DLLPRIVATE Result
handleEndTag();
158 SAL_DLLPRIVATE
void handleElementEnd();
160 SAL_DLLPRIVATE Result
handleSkippedText(Span
* data
, int * nsId
);
162 SAL_DLLPRIVATE Result
handleRawText(Span
* text
);
164 SAL_DLLPRIVATE Result
handleNormalizedText(Span
* text
);
166 SAL_DLLPRIVATE
static int toNamespaceId(NamespaceIris::size_type pos
);
168 OUString
const fileUrl_
;
169 oslFileHandle fileHandle_
;
170 sal_uInt64 fileSize_
;
172 NamespaceIris namespaceIris_
;
173 NamespaceList namespaces_
;
174 ElementStack elements_
;
178 Attributes attributes_
;
179 Attributes::iterator currentAttribute_
;
180 bool firstAttribute_
;
186 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */