update emoji autocorrect entries from po-files
[LibreOffice.git] / include / xmlreader / xmlreader.hxx
blob27a4be2aa425e3d49a351199864189266c461d45
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #ifndef INCLUDED_XMLREADER_XMLREADER_HXX
21 #define INCLUDED_XMLREADER_XMLREADER_HXX
23 #include <sal/config.h>
25 #include <stack>
26 #include <vector>
28 #include <com/sun/star/container/NoSuchElementException.hpp>
29 #include <com/sun/star/uno/RuntimeException.hpp>
30 #include <osl/file.h>
31 #include <rtl/ustring.hxx>
32 #include <sal/types.h>
33 #include <xmlreader/detail/xmlreaderdllapi.hxx>
34 #include <xmlreader/pad.hxx>
35 #include <xmlreader/span.hxx>
37 namespace xmlreader {
39 class OOO_DLLPUBLIC_XMLREADER XmlReader {
40 public:
41 XmlReader(char const *sStr, size_t nLength);
43 explicit XmlReader(OUString const & fileUrl);
45 ~XmlReader();
47 enum { NAMESPACE_NONE = -2, NAMESPACE_UNKNOWN = -1, NAMESPACE_XML = 0 };
49 enum Text { TEXT_NONE, TEXT_RAW, TEXT_NORMALIZED };
51 enum Result { RESULT_BEGIN, RESULT_END, RESULT_TEXT, RESULT_DONE };
53 int registerNamespaceIri(Span const & iri);
55 // RESULT_BEGIN: data = localName, ns = ns
56 // RESULT_END: data, ns unused
57 // RESULT_TEXT: data = text, ns unused
58 Result nextItem(Text reportText, Span * data, int * nsId);
60 bool nextAttribute(int * nsId, Span * localName);
62 // the span returned by getAttributeValue is only valid until the next call
63 // to nextItem or getAttributeValue
64 Span getAttributeValue(bool fullyNormalize);
66 int getNamespaceId(Span const & prefix) const;
68 const OUString& getUrl() const { return fileUrl_;}
70 private:
71 XmlReader(const XmlReader&) SAL_DELETED_FUNCTION;
72 XmlReader& operator=(const XmlReader&) SAL_DELETED_FUNCTION;
74 typedef std::vector< Span > NamespaceIris;
76 // If NamespaceData (and similarly ElementData and AttributeData) is made
77 // SAL_DLLPRIVATE, at least gcc 4.2.3 erroneously warns about
78 // "'xmlreader::XmlReader' declared with greater visibility than the type of
79 // its field 'xmlreader::XmlReader::namespaces_'" (and similarly for
80 // elements_ and attributes_):
82 struct NamespaceData {
83 Span prefix;
84 int nsId;
86 NamespaceData():
87 nsId(-1) {}
89 NamespaceData(Span const & thePrefix, int theNsId):
90 prefix(thePrefix), nsId(theNsId) {}
93 typedef std::vector< NamespaceData > NamespaceList;
95 struct ElementData {
96 Span name;
97 NamespaceList::size_type inheritedNamespaces;
98 int defaultNamespaceId;
100 ElementData(
101 Span const & theName,
102 NamespaceList::size_type theInheritedNamespaces,
103 int theDefaultNamespaceId):
104 name(theName), inheritedNamespaces(theInheritedNamespaces),
105 defaultNamespaceId(theDefaultNamespaceId)
109 typedef std::stack< ElementData > ElementStack;
111 struct AttributeData {
112 char const * nameBegin;
113 char const * nameEnd;
114 char const * nameColon;
115 char const * valueBegin;
116 char const * valueEnd;
118 AttributeData(
119 char const * theNameBegin, char const * theNameEnd,
120 char const * theNameColon, char const * theValueBegin,
121 char const * theValueEnd):
122 nameBegin(theNameBegin), nameEnd(theNameEnd),
123 nameColon(theNameColon), valueBegin(theValueBegin),
124 valueEnd(theValueEnd)
128 typedef std::vector< AttributeData > Attributes;
130 enum State {
131 STATE_CONTENT, STATE_START_TAG, STATE_END_TAG, STATE_EMPTY_ELEMENT_TAG,
132 STATE_DONE };
134 SAL_DLLPRIVATE inline char read() { return pos_ == end_ ? '\0' : *pos_++; }
136 SAL_DLLPRIVATE inline char peek() { return pos_ == end_ ? '\0' : *pos_; }
138 SAL_DLLPRIVATE void normalizeLineEnds(Span const & text);
140 SAL_DLLPRIVATE void skipSpace();
142 SAL_DLLPRIVATE bool skipComment();
144 SAL_DLLPRIVATE void skipProcessingInstruction();
146 SAL_DLLPRIVATE void skipDocumentTypeDeclaration();
148 SAL_DLLPRIVATE Span scanCdataSection();
150 SAL_DLLPRIVATE bool scanName(char const ** nameColon);
152 SAL_DLLPRIVATE int scanNamespaceIri(
153 char const * begin, char const * end);
155 SAL_DLLPRIVATE char const * handleReference(
156 char const * position, char const * end);
158 SAL_DLLPRIVATE Span handleAttributeValue(
159 char const * begin, char const * end, bool fullyNormalize);
161 SAL_DLLPRIVATE Result handleStartTag(int * nsId, Span * localName);
163 SAL_DLLPRIVATE Result handleEndTag();
165 SAL_DLLPRIVATE void handleElementEnd();
167 SAL_DLLPRIVATE Result handleSkippedText(Span * data, int * nsId);
169 SAL_DLLPRIVATE Result handleRawText(Span * text);
171 SAL_DLLPRIVATE Result handleNormalizedText(Span * text);
173 SAL_DLLPRIVATE int toNamespaceId(NamespaceIris::size_type pos);
175 OUString fileUrl_;
176 oslFileHandle fileHandle_;
177 sal_uInt64 fileSize_;
178 void * fileAddress_;
179 NamespaceIris namespaceIris_;
180 NamespaceList namespaces_;
181 ElementStack elements_;
182 char const * pos_;
183 char const * end_;
184 State state_;
185 Attributes attributes_;
186 Attributes::iterator currentAttribute_;
187 bool firstAttribute_;
188 Pad pad_;
193 #endif
195 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */