Version 7.6.3.2-android, tag libreoffice-7.6.3.2-android
[LibreOffice.git] / sax / source / tools / fastserializer.cxx
blob9461e784ca4a0c887f6c58f22538fa3a3c9dfdfd
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "fastserializer.hxx"
22 #include <com/sun/star/xml/sax/FastTokenHandler.hpp>
23 #include <rtl/math.h>
24 #include <sal/log.hxx>
25 #include <comphelper/processfactory.hxx>
26 #include <comphelper/sequence.hxx>
28 #include <cassert>
29 #include <optional>
30 #include <string.h>
31 #include <string_view>
32 #include <utility>
34 #if OSL_DEBUG_LEVEL > 0
35 #include <iostream>
36 #include <set>
37 #endif
39 using ::std::vector;
40 using ::com::sun::star::uno::Sequence;
41 using ::com::sun::star::io::XOutputStream;
43 #define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0)
44 #define NAMESPACE(x) (x >> 16)
45 #define TOKEN(x) (x & 0xffff)
46 // number of characters without terminating 0
47 #define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1)
49 const char sClosingBracket[] = ">";
50 const char sSlashAndClosingBracket[] = "/>";
51 constexpr OStringLiteral sColon = ":";
52 const char sOpeningBracket[] = "<";
53 const char sOpeningBracketAndSlash[] = "</";
54 const char sQuote[] = "\"";
55 const char sEqualSignAndQuote[] = "=\"";
56 const char sSpace[] = " ";
57 const char sXmlHeader[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
59 namespace sax_fastparser {
60 FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
61 : mbMarkStackEmpty(true)
62 , mpDoubleStr(nullptr)
63 , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE)
64 , mbXescape(true)
66 rtl_string_new_WithLength(&mpDoubleStr, mnDoubleStrCapacity);
67 mxFastTokenHandler = css::xml::sax::FastTokenHandler::create(
68 ::comphelper::getProcessComponentContext());
69 assert(xOutputStream.is()); // cannot do anything without that
70 maCachedOutputStream.setOutputStream( xOutputStream );
73 FastSaxSerializer::~FastSaxSerializer()
75 rtl_string_release(mpDoubleStr);
78 void FastSaxSerializer::startDocument()
80 writeBytes(sXmlHeader, N_CHARS(sXmlHeader));
83 void FastSaxSerializer::write( double value )
85 rtl_math_doubleToString(
86 &mpDoubleStr, &mnDoubleStrCapacity, 0, value, rtl_math_StringFormat_G,
87 RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', nullptr,
88 0, true);
90 write(mpDoubleStr->buffer, mpDoubleStr->length);
91 // and "clear" the string
92 mpDoubleStr->length = 0;
93 mnDoubleStrCapacity = RTL_STR_MAX_VALUEOFDOUBLE;
96 void FastSaxSerializer::write( std::u16string_view sOutput, bool bEscape )
98 write( OUStringToOString(sOutput, RTL_TEXTENCODING_UTF8), bEscape );
102 void FastSaxSerializer::write( std::string_view sOutput, bool bEscape )
104 write( sOutput.data(), sOutput.length(), bEscape );
107 /** Characters not allowed in XML 1.0
108 XML 1.1 would exclude only U+0000
110 This assumes that `string` is UTF-8, but which appears to generally be the case: The only
111 user of this FastSaxSerializer code is FastSerializerHelper, and when its constructor
112 (sax/source/tools/fshelper.cxx) is called with bWriteHeader being true, it calls
113 FastSaxSerializer::startDocument, which writes sXmlHeader claiming encoding="UTF-8". The
114 only place that appears to construct FastSerializerHelper appears to be
115 XmlFilterBase::openFragmentStreamWithSerializer (oox/source/core/xmlfilterbase.cxx), and it
116 only passes false for bWriteHeader when the given rMediaType contains "vml" but not "+xml"
117 (see <https://git.libreoffice.org/core/+/6a11add2c4ea975356cfb7bab02301788c79c904%5E!/>
118 "XLSX VML Export fixes", stating "Don't write xml headers for vml files"). But lets assume
119 that even such Vector Markup Language files are written as UTF-8.
121 template<typename Int> static std::optional<std::pair<unsigned, Int>> invalidChar(
122 char const * string, Int length, Int index )
124 assert(index < length);
125 auto const c = string[index];
127 if (static_cast<unsigned char>(c) >= 0x20 && c != '\xEF')
128 return {};
130 switch (c)
132 case 0x09:
133 case 0x0a:
134 case 0x0d:
135 return {};
136 case '\xEF': // U+FFFE, U+FFFF:
137 if (length - index >= 3 && string[index + 1] == '\xBF') {
138 switch (string[index + 2]) {
139 case '\xBE':
140 return std::pair(0xFFFE, 3);
141 case '\xBF':
142 return std::pair(0xFFFF, 3);
145 return {};
147 return std::pair(static_cast<unsigned char>(c), 1);
150 static bool isHexDigit( char c )
152 return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f');
155 void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape )
157 if (nLen == -1)
158 nLen = pStr ? strlen(pStr) : 0;
160 if (!bEscape)
162 writeBytes( pStr, nLen );
163 return;
166 bool bGood = true;
167 const sal_Int32 kXescapeLen = 7;
168 char bufXescape[kXescapeLen+1];
169 sal_Int32 nNextXescape = 0;
170 for (sal_Int32 i = 0; i < nLen;)
172 char c = pStr[ i ];
173 switch( c )
175 case '<': writeBytes( "&lt;", 4 ); break;
176 case '>': writeBytes( "&gt;", 4 ); break;
177 case '&': writeBytes( "&amp;", 5 ); break;
178 case '\'': writeBytes( "&apos;", 6 ); break;
179 case '"': writeBytes( "&quot;", 6 ); break;
180 case '\t':
181 #if 0
182 // Seems OOXML prefers the _xHHHH_ escape over the
183 // entity in *some* cases, apparently in attribute
184 // values but not in element data.
185 // Would need to distinguish at a higher level.
186 if (mbXescape)
188 snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
189 static_cast<unsigned int>(static_cast<unsigned char>(c)));
190 writeBytes( bufXescape, kXescapeLen);
192 else
193 #endif
195 writeBytes( "&#9;", 4 );
197 break;
198 case '\n':
199 #if 0
200 if (mbXescape)
202 snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
203 static_cast<unsigned int>(static_cast<unsigned char>(c)));
204 writeBytes( bufXescape, kXescapeLen);
206 else
207 #endif
209 writeBytes( "&#10;", 5 );
211 break;
212 case '\r':
213 #if 0
214 if (mbXescape)
216 snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
217 static_cast<unsigned int>(static_cast<unsigned char>(c)));
218 writeBytes( bufXescape, kXescapeLen);
220 else
221 #endif
223 writeBytes( "&#13;", 5 );
225 break;
226 default:
227 if (mbXescape)
229 char c1, c2, c3, c4;
230 // Escape characters not valid in XML 1.0 as
231 // _xHHHH_. A literal "_xHHHH_" has to be
232 // escaped as _x005F_xHHHH_ (effectively
233 // escaping the leading '_').
234 // See ECMA-376-1:2016 page 3736,
235 // 22.4.2.4 bstr (Basic String)
236 // for reference.
237 if (c == '_' && i >= nNextXescape && i <= nLen - kXescapeLen &&
238 pStr[i+6] == '_' &&
239 ((pStr[i+1] | 0x20) == 'x') &&
240 isHexDigit( c1 = pStr[i+2] ) &&
241 isHexDigit( c2 = pStr[i+3] ) &&
242 isHexDigit( c3 = pStr[i+4] ) &&
243 isHexDigit( c4 = pStr[i+5] ))
245 // OOXML has the odd habit to write some
246 // names using this that when re-saving
247 // should *not* be escaped, specifically
248 // _x0020_ for blanks in w:xpath values.
249 if (!(c1 == '0' && c2 == '0' && c3 == '2' && c4 == '0'))
251 // When encountering "_x005F_xHHHH_"
252 // assume that is an already escaped
253 // sequence that was not unescaped and
254 // shall be written as is, to not end
255 // up with "_x005F_x005F_xHHHH_" and
256 // repeated...
257 if (c1 == '0' && c2 == '0' && c3 == '5' && (c4 | 0x20) == 'f' &&
258 i + kXescapeLen <= nLen - 6 &&
259 pStr[i+kXescapeLen+5] == '_' &&
260 ((pStr[i+kXescapeLen+0] | 0x20) == 'x') &&
261 isHexDigit( pStr[i+kXescapeLen+1] ) &&
262 isHexDigit( pStr[i+kXescapeLen+2] ) &&
263 isHexDigit( pStr[i+kXescapeLen+3] ) &&
264 isHexDigit( pStr[i+kXescapeLen+4] ))
266 writeBytes( &c, 1 );
267 // Remember this fake escapement.
268 nNextXescape = i + kXescapeLen + 6;
270 else
272 writeBytes( "_x005F_", kXescapeLen);
273 // Remember this escapement so in
274 // _xHHHH_xHHHH_ only the first '_'
275 // is escaped.
276 nNextXescape = i + kXescapeLen;
278 break;
281 if (auto const inv = invalidChar(pStr, nLen, i))
283 snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
284 inv->first);
285 writeBytes( bufXescape, kXescapeLen);
286 i += inv->second;
287 continue;
290 #if OSL_DEBUG_LEVEL > 0
291 else
293 if (bGood && invalidChar(pStr, nLen, i))
295 bGood = false;
296 // The SAL_WARN() for the single character is
297 // issued in writeBytes(), just gather for the
298 // SAL_WARN_IF() below.
301 #endif
302 writeBytes( &c, 1 );
303 break;
305 ++i;
307 SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'");
310 void FastSaxSerializer::endDocument()
312 assert(mbMarkStackEmpty && maMarkStack.empty());
313 maCachedOutputStream.flush();
316 void FastSaxSerializer::writeId( ::sal_Int32 nElement )
318 if( HAS_NAMESPACE( nElement ) ) {
319 auto const Namespace(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
320 assert(Namespace.hasElements());
321 writeBytes(Namespace);
322 writeBytes(sColon.getStr(), sColon.getLength());
323 auto const Element(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
324 assert(Element.hasElements());
325 writeBytes(Element);
326 } else {
327 auto const Element(mxFastTokenHandler->getUTF8Identifier(nElement));
328 assert(Element.hasElements());
329 writeBytes(Element);
333 #ifdef DBG_UTIL
334 OString FastSaxSerializer::getId( ::sal_Int32 nElement )
336 if (HAS_NAMESPACE(nElement)) {
337 Sequence<sal_Int8> const ns(
338 mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
339 Sequence<sal_Int8> const name(
340 mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
341 return std::string_view(
342 reinterpret_cast<char const*>(ns.getConstArray()), ns.getLength())
343 + sColon
344 + std::string_view(
345 reinterpret_cast<char const*>(name.getConstArray()), name.getLength());
346 } else {
347 Sequence<sal_Int8> const name(
348 mxFastTokenHandler->getUTF8Identifier(nElement));
349 return OString(reinterpret_cast<char const*>(name.getConstArray()), name.getLength());
352 #endif
354 void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList )
356 if ( !mbMarkStackEmpty )
358 maCachedOutputStream.flush();
359 maMarkStack.top()->setCurrentElement( Element );
362 #ifdef DBG_UTIL
363 if (mbMarkStackEmpty)
364 m_DebugStartedElements.push(Element);
365 else
366 maMarkStack.top()->m_DebugStartedElements.push_back(Element);
367 #endif
369 writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
371 writeId(Element);
372 if (pAttrList)
373 writeFastAttributeList(*pAttrList);
374 else
375 writeTokenValueList();
377 writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
380 void FastSaxSerializer::endFastElement( ::sal_Int32 Element )
382 #ifdef DBG_UTIL
383 // Well-formedness constraint: Element Type Match
384 if (mbMarkStackEmpty)
386 assert(!m_DebugStartedElements.empty());
387 assert(Element == m_DebugStartedElements.top());
388 m_DebugStartedElements.pop();
390 else
392 if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
394 // Sort is always well-formed fragment
395 assert(!maMarkStack.top()->m_DebugStartedElements.empty());
397 if (maMarkStack.top()->m_DebugStartedElements.empty())
399 maMarkStack.top()->m_DebugEndedElements.push_back(Element);
401 else
403 assert(Element == maMarkStack.top()->m_DebugStartedElements.back());
404 maMarkStack.top()->m_DebugStartedElements.pop_back();
407 #endif
409 writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash));
411 writeId(Element);
413 writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
416 void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList )
418 if ( !mbMarkStackEmpty )
420 maCachedOutputStream.flush();
421 maMarkStack.top()->setCurrentElement( Element );
424 writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
426 writeId(Element);
427 if (pAttrList)
428 writeFastAttributeList(*pAttrList);
429 else
430 writeTokenValueList();
432 writeBytes(sSlashAndClosingBracket, N_CHARS(sSlashAndClosingBracket));
435 css::uno::Reference< css::io::XOutputStream > const & FastSaxSerializer::getOutputStream() const
437 return maCachedOutputStream.getOutputStream();
440 void FastSaxSerializer::writeTokenValueList()
442 #ifdef DBG_UTIL
443 ::std::set<OString> DebugAttributes;
444 #endif
445 for (const TokenValue & rTokenValue : maTokenValues)
447 writeBytes(sSpace, N_CHARS(sSpace));
449 sal_Int32 nToken = rTokenValue.nToken;
450 writeId(nToken);
452 #ifdef DBG_UTIL
453 // Well-formedness constraint: Unique Att Spec
454 OString const nameId(getId(nToken));
455 assert(DebugAttributes.find(nameId) == DebugAttributes.end());
456 DebugAttributes.insert(nameId);
457 #endif
459 writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
461 write(rTokenValue.pValue, -1, true);
463 writeBytes(sQuote, N_CHARS(sQuote));
465 maTokenValues.clear();
468 void FastSaxSerializer::writeFastAttributeList(FastAttributeList const & rAttrList)
470 #ifdef DBG_UTIL
471 ::std::set<OString> DebugAttributes;
472 #endif
473 const std::vector< sal_Int32 >& Tokens = rAttrList.getFastAttributeTokens();
474 for (size_t j = 0; j < Tokens.size(); j++)
476 writeBytes(sSpace, N_CHARS(sSpace));
478 sal_Int32 nToken = Tokens[j];
479 writeId(nToken);
481 #ifdef DBG_UTIL
482 // Well-formedness constraint: Unique Att Spec
483 OString const nameId(getId(nToken));
484 SAL_WARN_IF(DebugAttributes.find(nameId) != DebugAttributes.end(), "sax", "Duplicate attribute: " << nameId );
485 assert(DebugAttributes.find(nameId) == DebugAttributes.end());
486 DebugAttributes.insert(nameId);
487 #endif
489 writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
491 const char* pAttributeValue = rAttrList.getFastAttributeValue(j);
493 // tdf#117274 don't escape the special VML shape type id "#_x0000_t202"
494 bool bEscape = !(pAttributeValue
495 && *pAttributeValue != '\0'
496 && (*pAttributeValue == '#'
497 ? strncmp(pAttributeValue, "#_x0000_t", 9) == 0
498 : strncmp(pAttributeValue, "_x0000_t", 8) == 0));
500 write(pAttributeValue, rAttrList.AttributeValueLength(j), bEscape);
502 writeBytes(sQuote, N_CHARS(sQuote));
506 void FastSaxSerializer::mark(sal_Int32 const nTag, const Int32Sequence& rOrder)
508 if (rOrder.hasElements())
510 auto pSort = std::make_shared<ForSort>(nTag, rOrder);
511 maMarkStack.push( pSort );
512 maCachedOutputStream.setOutput( pSort );
514 else
516 auto pMerge = std::make_shared<ForMerge>(nTag);
517 maMarkStack.push( pMerge );
518 maCachedOutputStream.setOutput( pMerge );
520 mbMarkStackEmpty = false;
523 #ifdef DBG_UTIL
524 static void lcl_DebugMergeAppend(
525 std::deque<sal_Int32> & rLeftEndedElements,
526 std::deque<sal_Int32> & rLeftStartedElements,
527 std::deque<sal_Int32> & rRightEndedElements,
528 std::deque<sal_Int32> & rRightStartedElements)
530 while (!rRightEndedElements.empty())
532 if (rLeftStartedElements.empty())
534 rLeftEndedElements.push_back(rRightEndedElements.front());
536 else
538 assert(rLeftStartedElements.back() == rRightEndedElements.front());
539 rLeftStartedElements.pop_back();
541 rRightEndedElements.pop_front();
543 while (!rRightStartedElements.empty())
545 rLeftStartedElements.push_back(rRightStartedElements.front());
546 rRightStartedElements.pop_front();
550 static void lcl_DebugMergePrepend(
551 std::deque<sal_Int32> & rLeftEndedElements,
552 std::deque<sal_Int32> & rLeftStartedElements,
553 std::deque<sal_Int32> & rRightEndedElements,
554 std::deque<sal_Int32> & rRightStartedElements)
556 while (!rLeftStartedElements.empty())
558 if (rRightEndedElements.empty())
560 rRightStartedElements.push_front(rLeftStartedElements.back());
562 else
564 assert(rRightEndedElements.front() == rLeftStartedElements.back());
565 rRightEndedElements.pop_front();
567 rLeftStartedElements.pop_back();
569 while (!rLeftEndedElements.empty())
571 rRightEndedElements.push_front(rLeftEndedElements.back());
572 rLeftEndedElements.pop_back();
575 #endif
577 void FastSaxSerializer::mergeTopMarks(
578 sal_Int32 const nTag, sax_fastparser::MergeMarks const eMergeType)
580 SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
581 assert(!mbMarkStackEmpty); // should never happen
582 if ( mbMarkStackEmpty )
583 return;
585 assert(maMarkStack.top()->m_Tag == nTag && "mark/merge tag mismatch!");
586 (void) nTag;
587 #ifdef DBG_UTIL
588 if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
590 // Sort is always well-formed fragment
591 assert(maMarkStack.top()->m_DebugStartedElements.empty());
592 assert(maMarkStack.top()->m_DebugEndedElements.empty());
594 lcl_DebugMergeAppend(
595 maMarkStack.top()->m_DebugEndedElements,
596 maMarkStack.top()->m_DebugStartedElements,
597 maMarkStack.top()->m_DebugPostponedEndedElements,
598 maMarkStack.top()->m_DebugPostponedStartedElements);
599 #endif
601 // flush, so that we get everything in getData()
602 maCachedOutputStream.flush();
604 if (maMarkStack.size() == 1)
606 #ifdef DBG_UTIL
607 while (!maMarkStack.top()->m_DebugEndedElements.empty())
609 assert(maMarkStack.top()->m_DebugEndedElements.front() == m_DebugStartedElements.top());
610 maMarkStack.top()->m_DebugEndedElements.pop_front();
611 m_DebugStartedElements.pop();
613 while (!maMarkStack.top()->m_DebugStartedElements.empty())
615 m_DebugStartedElements.push(maMarkStack.top()->m_DebugStartedElements.front());
616 maMarkStack.top()->m_DebugStartedElements.pop_front();
618 #endif
619 Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
620 maMarkStack.pop();
621 mbMarkStackEmpty = true;
622 maCachedOutputStream.resetOutputToStream();
623 maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() );
624 return;
627 #ifdef DBG_UTIL
628 ::std::deque<sal_Int32> topDebugStartedElements(maMarkStack.top()->m_DebugStartedElements);
629 ::std::deque<sal_Int32> topDebugEndedElements(maMarkStack.top()->m_DebugEndedElements);
630 #endif
631 const Int8Sequence aMerge( maMarkStack.top()->getData() );
632 maMarkStack.pop();
633 #ifdef DBG_UTIL
634 switch (eMergeType)
636 case MergeMarks::APPEND:
637 lcl_DebugMergeAppend(
638 maMarkStack.top()->m_DebugEndedElements,
639 maMarkStack.top()->m_DebugStartedElements,
640 topDebugEndedElements,
641 topDebugStartedElements);
642 break;
643 case MergeMarks::PREPEND:
644 if (dynamic_cast<ForSort*>(maMarkStack.top().get())) // argh...
646 lcl_DebugMergeAppend(
647 maMarkStack.top()->m_DebugEndedElements,
648 maMarkStack.top()->m_DebugStartedElements,
649 topDebugEndedElements,
650 topDebugStartedElements);
652 else
654 lcl_DebugMergePrepend(
655 topDebugEndedElements,
656 topDebugStartedElements,
657 maMarkStack.top()->m_DebugEndedElements,
658 maMarkStack.top()->m_DebugStartedElements);
660 break;
661 case MergeMarks::POSTPONE:
662 lcl_DebugMergeAppend(
663 maMarkStack.top()->m_DebugPostponedEndedElements,
664 maMarkStack.top()->m_DebugPostponedStartedElements,
665 topDebugEndedElements,
666 topDebugStartedElements);
667 break;
669 #endif
670 if (maMarkStack.empty())
672 mbMarkStackEmpty = true;
673 maCachedOutputStream.resetOutputToStream();
675 else
677 maCachedOutputStream.setOutput( maMarkStack.top() );
680 switch ( eMergeType )
682 case MergeMarks::APPEND: maMarkStack.top()->append( aMerge ); break;
683 case MergeMarks::PREPEND: maMarkStack.top()->prepend( aMerge ); break;
684 case MergeMarks::POSTPONE: maMarkStack.top()->postpone( aMerge ); break;
688 void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData )
690 maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() );
693 void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
695 #if OSL_DEBUG_LEVEL > 0
697 bool bGood = true;
698 for (size_t i=0; i < nLen;)
700 if (auto const inv = invalidChar(pStr, nLen, i))
702 bGood = false;
703 SAL_WARN("sax", "FastSaxSerializer::writeBytes - illegal XML character 0x" <<
704 std::hex << inv->first);
705 i += inv->second;
706 continue;
708 ++i;
710 SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'");
712 #endif
713 maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
716 FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
718 merge( maData, maPostponed, true );
719 maPostponed.realloc( 0 );
721 return maData;
724 #if OSL_DEBUG_LEVEL > 0
725 void FastSaxSerializer::ForMerge::print( )
727 std::cerr << "Data: ";
728 for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ )
730 std::cerr << maData[i];
733 std::cerr << "\nPostponed: ";
734 for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ )
736 std::cerr << maPostponed[i];
739 std::cerr << "\n";
741 #endif
743 void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat )
745 merge( maData, rWhat, false );
748 void FastSaxSerializer::ForMerge::append( const css::uno::Sequence<sal_Int8> &rWhat )
750 merge( maData, rWhat, true );
753 void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat )
755 merge( maPostponed, rWhat, true );
758 void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend )
760 sal_Int32 nMergeLen = rMerge.getLength();
761 if ( nMergeLen <= 0 )
762 return;
764 sal_Int32 nTopLen = rTop.getLength();
766 rTop.realloc( nTopLen + nMergeLen );
767 if ( bAppend )
769 // append the rMerge to the rTop
770 memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen );
772 else
774 // prepend the rMerge to the rTop
775 memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen );
776 memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen );
780 void FastSaxSerializer::ForMerge::resetData( )
782 maData = Int8Sequence();
785 void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement )
787 const auto & rOrder = maOrder;
788 if( std::find( rOrder.begin(), rOrder.end(), nElement ) != rOrder.end() )
790 mnCurrentElement = nElement;
791 if ( maData.find( nElement ) == maData.end() )
792 maData[ nElement ] = Int8Sequence();
796 void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat )
798 append( rWhat );
801 void FastSaxSerializer::ForSort::append( const css::uno::Sequence<sal_Int8> &rWhat )
803 merge( maData[mnCurrentElement], rWhat, true );
806 void FastSaxSerializer::ForSort::sort()
808 // Clear the ForMerge data to avoid duplicate items
809 resetData();
811 // Sort it all
812 std::map< sal_Int32, Int8Sequence >::iterator iter;
813 for ( const auto nIndex : std::as_const(maOrder) )
815 iter = maData.find( nIndex );
816 if ( iter != maData.end() )
817 ForMerge::append( iter->second );
821 FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData()
823 sort( );
824 return ForMerge::getData();
827 #if OSL_DEBUG_LEVEL > 0
828 void FastSaxSerializer::ForSort::print( )
830 for ( const auto& [rElement, rData] : maData )
832 std::cerr << "pair: " << rElement;
833 for ( sal_Int32 i=0, len=rData.getLength(); i < len; ++i )
834 std::cerr << rData[i];
835 std::cerr << "\n";
838 sort( );
839 ForMerge::print();
841 #endif
843 } // namespace sax_fastparser
845 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */