cid#1607171 Data race condition
[LibreOffice.git] / sax / source / tools / fastserializer.cxx
blobed1fbfafb4a085f770379c72588271928e2ef847
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "fastserializer.hxx"
22 #include <com/sun/star/xml/sax/FastTokenHandler.hpp>
23 #include <rtl/math.h>
24 #include <sal/log.hxx>
25 #include <comphelper/processfactory.hxx>
27 #include <cassert>
28 #include <optional>
29 #include <string.h>
30 #include <string_view>
31 #include <utility>
33 #if OSL_DEBUG_LEVEL > 0
34 #include <iostream>
35 #include <set>
36 #endif
38 using ::com::sun::star::uno::Sequence;
40 static constexpr bool HAS_NAMESPACE(sal_Int32 x) { return (x & 0xffff0000) != 0; }
41 static constexpr sal_Int32 NAMESPACE(sal_Int32 x) { return x >> 16; }
42 static constexpr sal_Int32 TOKEN(sal_Int32 x) { return x & 0xffff; }
44 namespace sax_fastparser {
45 FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
46 : mbMarkStackEmpty(true)
47 , mpDoubleStr(nullptr)
48 , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE)
49 , mbXescape(true)
51 rtl_string_new_WithLength(&mpDoubleStr, mnDoubleStrCapacity);
52 mxFastTokenHandler = css::xml::sax::FastTokenHandler::create(
53 ::comphelper::getProcessComponentContext());
54 assert(xOutputStream.is()); // cannot do anything without that
55 maCachedOutputStream.setOutputStream( xOutputStream );
58 FastSaxSerializer::~FastSaxSerializer()
60 rtl_string_release(mpDoubleStr);
63 void FastSaxSerializer::startDocument()
65 write("<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n");
68 void FastSaxSerializer::write( double value )
70 rtl_math_doubleToString(
71 &mpDoubleStr, &mnDoubleStrCapacity, 0, value, rtl_math_StringFormat_G,
72 RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', nullptr,
73 0, true);
75 write(mpDoubleStr->buffer, mpDoubleStr->length);
76 // and "clear" the string
77 mpDoubleStr->length = 0;
78 mnDoubleStrCapacity = RTL_STR_MAX_VALUEOFDOUBLE;
81 void FastSaxSerializer::write( std::u16string_view sOutput, bool bEscape )
83 write( OUStringToOString(sOutput, RTL_TEXTENCODING_UTF8), bEscape );
87 void FastSaxSerializer::write( std::string_view sOutput, bool bEscape )
89 write( sOutput.data(), sOutput.length(), bEscape );
92 /** Characters not allowed in XML 1.0
93 XML 1.1 would exclude only U+0000
95 This assumes that `string` is UTF-8, but which appears to generally be the case: The only
96 user of this FastSaxSerializer code is FastSerializerHelper, and when its constructor
97 (sax/source/tools/fshelper.cxx) is called with bWriteHeader being true, it calls
98 FastSaxSerializer::startDocument, which writes sXmlHeader claiming encoding="UTF-8". The
99 only place that appears to construct FastSerializerHelper appears to be
100 XmlFilterBase::openFragmentStreamWithSerializer (oox/source/core/xmlfilterbase.cxx), and it
101 only passes false for bWriteHeader when the given rMediaType contains "vml" but not "+xml"
102 (see <https://git.libreoffice.org/core/+/6a11add2c4ea975356cfb7bab02301788c79c904%5E!/>
103 "XLSX VML Export fixes", stating "Don't write xml headers for vml files"). But let's assume
104 that even such Vector Markup Language files are written as UTF-8.
106 template<typename Int> static std::optional<std::pair<unsigned, Int>> invalidChar(
107 char const * string, Int length, Int index )
109 assert(index < length);
110 auto const c = string[index];
112 if (static_cast<unsigned char>(c) >= 0x20 && c != '\xEF')
113 return {};
115 switch (c)
117 case 0x09:
118 case 0x0a:
119 case 0x0d:
120 return {};
121 case '\xEF': // U+FFFE, U+FFFF:
122 if (length - index >= 3 && string[index + 1] == '\xBF') {
123 switch (string[index + 2]) {
124 case '\xBE':
125 return std::pair(0xFFFE, 3);
126 case '\xBF':
127 return std::pair(0xFFFF, 3);
130 return {};
132 return std::pair(static_cast<unsigned char>(c), 1);
135 static bool isHexDigit( char c )
137 return ('0' <= c && c <= '9') || ('A' <= c && c <= 'F') || ('a' <= c && c <= 'f');
140 void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape )
142 if (nLen == -1)
143 nLen = pStr ? strlen(pStr) : 0;
145 if (!bEscape)
147 writeBytes( pStr, nLen );
148 return;
151 bool bGood = true;
152 const sal_Int32 kXescapeLen = 7;
153 char bufXescape[kXescapeLen+1];
154 sal_Int32 nNextXescape = 0;
155 for (sal_Int32 i = 0; i < nLen;)
157 char c = pStr[ i ];
158 switch( c )
160 case '<': writeBytes( "&lt;", 4 ); break;
161 case '>': writeBytes( "&gt;", 4 ); break;
162 case '&': writeBytes( "&amp;", 5 ); break;
163 case '\'': writeBytes( "&apos;", 6 ); break;
164 case '"': writeBytes( "&quot;", 6 ); break;
165 case '\t':
166 #if 0
167 // Seems OOXML prefers the _xHHHH_ escape over the
168 // entity in *some* cases, apparently in attribute
169 // values but not in element data.
170 // Would need to distinguish at a higher level.
171 if (mbXescape)
173 snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
174 static_cast<unsigned int>(static_cast<unsigned char>(c)));
175 writeBytes( bufXescape, kXescapeLen);
177 else
178 #endif
180 writeBytes( "&#9;", 4 );
182 break;
183 case '\n':
184 #if 0
185 if (mbXescape)
187 snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
188 static_cast<unsigned int>(static_cast<unsigned char>(c)));
189 writeBytes( bufXescape, kXescapeLen);
191 else
192 #endif
194 writeBytes( "&#10;", 5 );
196 break;
197 case '\r':
198 #if 0
199 if (mbXescape)
201 snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
202 static_cast<unsigned int>(static_cast<unsigned char>(c)));
203 writeBytes( bufXescape, kXescapeLen);
205 else
206 #endif
208 writeBytes( "&#13;", 5 );
210 break;
211 default:
212 if (mbXescape)
214 char c1, c2, c3, c4;
215 // Escape characters not valid in XML 1.0 as
216 // _xHHHH_. A literal "_xHHHH_" has to be
217 // escaped as _x005F_xHHHH_ (effectively
218 // escaping the leading '_').
219 // See ECMA-376-1:2016 page 3736,
220 // 22.4.2.4 bstr (Basic String)
221 // for reference.
222 if (c == '_' && i >= nNextXescape && i <= nLen - kXescapeLen &&
223 pStr[i+6] == '_' &&
224 ((pStr[i+1] | 0x20) == 'x') &&
225 isHexDigit( c1 = pStr[i+2] ) &&
226 isHexDigit( c2 = pStr[i+3] ) &&
227 isHexDigit( c3 = pStr[i+4] ) &&
228 isHexDigit( c4 = pStr[i+5] ))
230 // OOXML has the odd habit to write some
231 // names using this that when re-saving
232 // should *not* be escaped, specifically
233 // _x0020_ for blanks in w:xpath values.
234 if (!(c1 == '0' && c2 == '0' && c3 == '2' && c4 == '0'))
236 // When encountering "_x005F_xHHHH_"
237 // assume that is an already escaped
238 // sequence that was not unescaped and
239 // shall be written as is, to not end
240 // up with "_x005F_x005F_xHHHH_" and
241 // repeated...
242 if (c1 == '0' && c2 == '0' && c3 == '5' && (c4 | 0x20) == 'f' &&
243 i + kXescapeLen <= nLen - 6 &&
244 pStr[i+kXescapeLen+5] == '_' &&
245 ((pStr[i+kXescapeLen+0] | 0x20) == 'x') &&
246 isHexDigit( pStr[i+kXescapeLen+1] ) &&
247 isHexDigit( pStr[i+kXescapeLen+2] ) &&
248 isHexDigit( pStr[i+kXescapeLen+3] ) &&
249 isHexDigit( pStr[i+kXescapeLen+4] ))
251 writeBytes( &c, 1 );
252 // Remember this fake escapement.
253 nNextXescape = i + kXescapeLen + 6;
255 else
257 writeBytes( "_x005F_", kXescapeLen);
258 // Remember this escapement so in
259 // _xHHHH_xHHHH_ only the first '_'
260 // is escaped.
261 nNextXescape = i + kXescapeLen;
263 break;
266 if (auto const inv = invalidChar(pStr, nLen, i))
268 snprintf( bufXescape, kXescapeLen+1, "_x%04x_",
269 inv->first);
270 writeBytes( bufXescape, kXescapeLen);
271 i += inv->second;
272 continue;
275 #if OSL_DEBUG_LEVEL > 0
276 else
278 if (bGood && invalidChar(pStr, nLen, i))
280 bGood = false;
281 // The SAL_WARN() for the single character is
282 // issued in writeBytes(), just gather for the
283 // SAL_WARN_IF() below.
286 #endif
287 writeBytes( &c, 1 );
288 break;
290 ++i;
292 SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'");
295 void FastSaxSerializer::endDocument()
297 assert(mbMarkStackEmpty && maMarkStack.empty());
298 maCachedOutputStream.flush();
301 void FastSaxSerializer::writeId( ::sal_Int32 nElement )
303 if( HAS_NAMESPACE( nElement ) ) {
304 auto const Namespace(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
305 assert(Namespace.hasElements());
306 writeBytes(Namespace);
307 write(":");
308 auto const Element(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
309 assert(Element.hasElements());
310 writeBytes(Element);
311 } else {
312 auto const Element(mxFastTokenHandler->getUTF8Identifier(nElement));
313 assert(Element.hasElements());
314 writeBytes(Element);
318 #ifdef DBG_UTIL
319 OString FastSaxSerializer::getId( ::sal_Int32 nElement )
321 if (HAS_NAMESPACE(nElement)) {
322 Sequence<sal_Int8> const ns(
323 mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
324 Sequence<sal_Int8> const name(
325 mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
326 return std::string_view(
327 reinterpret_cast<char const*>(ns.getConstArray()), ns.getLength())
328 + ":"_ostr
329 + std::string_view(
330 reinterpret_cast<char const*>(name.getConstArray()), name.getLength());
331 } else {
332 Sequence<sal_Int8> const name(
333 mxFastTokenHandler->getUTF8Identifier(nElement));
334 return OString(reinterpret_cast<char const*>(name.getConstArray()), name.getLength());
337 #endif
339 void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList )
341 if ( !mbMarkStackEmpty )
343 maCachedOutputStream.flush();
344 maMarkStack.top()->setCurrentElement( Element );
347 #ifdef DBG_UTIL
348 if (mbMarkStackEmpty)
349 m_DebugStartedElements.push(Element);
350 else
351 maMarkStack.top()->m_DebugStartedElements.push_back(Element);
352 #endif
354 write("<");
356 writeId(Element);
357 if (pAttrList)
358 writeFastAttributeList(*pAttrList);
359 else
360 writeTokenValueList();
362 write(">");
365 void FastSaxSerializer::endFastElement( ::sal_Int32 Element )
367 #ifdef DBG_UTIL
368 // Well-formedness constraint: Element Type Match
369 if (mbMarkStackEmpty)
371 assert(!m_DebugStartedElements.empty());
372 assert(Element == m_DebugStartedElements.top());
373 m_DebugStartedElements.pop();
375 else
377 if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
379 // Sort is always well-formed fragment
380 assert(!maMarkStack.top()->m_DebugStartedElements.empty());
382 if (maMarkStack.top()->m_DebugStartedElements.empty())
384 maMarkStack.top()->m_DebugEndedElements.push_back(Element);
386 else
388 assert(Element == maMarkStack.top()->m_DebugStartedElements.back());
389 maMarkStack.top()->m_DebugStartedElements.pop_back();
392 #endif
394 write("</");
396 writeId(Element);
398 write(">");
401 void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList const * pAttrList )
403 if ( !mbMarkStackEmpty )
405 maCachedOutputStream.flush();
406 maMarkStack.top()->setCurrentElement( Element );
409 write("<");
411 writeId(Element);
412 if (pAttrList)
413 writeFastAttributeList(*pAttrList);
414 else
415 writeTokenValueList();
417 write("/>");
420 css::uno::Reference< css::io::XOutputStream > const & FastSaxSerializer::getOutputStream() const
422 return maCachedOutputStream.getOutputStream();
425 void FastSaxSerializer::writeTokenValueList()
427 #ifdef DBG_UTIL
428 ::std::set<OString> DebugAttributes;
429 #endif
430 for (const TokenValue & rTokenValue : maTokenValues)
432 write(" ");
434 sal_Int32 nToken = rTokenValue.nToken;
435 writeId(nToken);
437 #ifdef DBG_UTIL
438 // Well-formedness constraint: Unique Att Spec
439 OString const nameId(getId(nToken));
440 assert(DebugAttributes.find(nameId) == DebugAttributes.end());
441 DebugAttributes.insert(nameId);
442 #endif
444 write("=\"");
446 write(rTokenValue.pValue, -1, true);
448 write("\"");
450 maTokenValues.clear();
453 void FastSaxSerializer::writeFastAttributeList(FastAttributeList const & rAttrList)
455 #ifdef DBG_UTIL
456 ::std::set<OString> DebugAttributes;
457 #endif
458 const std::vector< sal_Int32 >& Tokens = rAttrList.getFastAttributeTokens();
459 for (size_t j = 0; j < Tokens.size(); j++)
461 write(" ");
463 sal_Int32 nToken = Tokens[j];
464 writeId(nToken);
466 #ifdef DBG_UTIL
467 // Well-formedness constraint: Unique Att Spec
468 OString const nameId(getId(nToken));
469 SAL_WARN_IF(DebugAttributes.find(nameId) != DebugAttributes.end(), "sax", "Duplicate attribute: " << nameId );
470 assert(DebugAttributes.find(nameId) == DebugAttributes.end());
471 DebugAttributes.insert(nameId);
472 #endif
474 write("=\"");
476 const char* pAttributeValue = rAttrList.getFastAttributeValue(j);
478 // tdf#117274 don't escape the special VML shape type id "#_x0000_t202"
479 bool bEscape = !(pAttributeValue
480 && *pAttributeValue != '\0'
481 && (*pAttributeValue == '#'
482 ? strncmp(pAttributeValue, "#_x0000_t", 9) == 0
483 : strncmp(pAttributeValue, "_x0000_t", 8) == 0));
485 write(pAttributeValue, rAttrList.AttributeValueLength(j), bEscape);
487 write("\"");
491 void FastSaxSerializer::mark(sal_Int32 const nTag, const Int32Sequence& rOrder)
493 if (rOrder.hasElements())
495 auto pSort = std::make_shared<ForSort>(nTag, rOrder);
496 maMarkStack.push( pSort );
497 maCachedOutputStream.setOutput( pSort );
499 else
501 auto pMerge = std::make_shared<ForMerge>(nTag);
502 maMarkStack.push( pMerge );
503 maCachedOutputStream.setOutput( pMerge );
505 mbMarkStackEmpty = false;
508 #ifdef DBG_UTIL
509 static void lcl_DebugMergeAppend(
510 std::deque<sal_Int32> & rLeftEndedElements,
511 std::deque<sal_Int32> & rLeftStartedElements,
512 std::deque<sal_Int32> & rRightEndedElements,
513 std::deque<sal_Int32> & rRightStartedElements)
515 while (!rRightEndedElements.empty())
517 if (rLeftStartedElements.empty())
519 rLeftEndedElements.push_back(rRightEndedElements.front());
521 else
523 assert(rLeftStartedElements.back() == rRightEndedElements.front());
524 rLeftStartedElements.pop_back();
526 rRightEndedElements.pop_front();
528 while (!rRightStartedElements.empty())
530 rLeftStartedElements.push_back(rRightStartedElements.front());
531 rRightStartedElements.pop_front();
535 static void lcl_DebugMergePrepend(
536 std::deque<sal_Int32> & rLeftEndedElements,
537 std::deque<sal_Int32> & rLeftStartedElements,
538 std::deque<sal_Int32> & rRightEndedElements,
539 std::deque<sal_Int32> & rRightStartedElements)
541 while (!rLeftStartedElements.empty())
543 if (rRightEndedElements.empty())
545 rRightStartedElements.push_front(rLeftStartedElements.back());
547 else
549 assert(rRightEndedElements.front() == rLeftStartedElements.back());
550 rRightEndedElements.pop_front();
552 rLeftStartedElements.pop_back();
554 while (!rLeftEndedElements.empty())
556 rRightEndedElements.push_front(rLeftEndedElements.back());
557 rLeftEndedElements.pop_back();
560 #endif
562 void FastSaxSerializer::mergeTopMarks(
563 sal_Int32 const nTag, sax_fastparser::MergeMarks const eMergeType)
565 SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
566 assert(!mbMarkStackEmpty); // should never happen
567 if ( mbMarkStackEmpty )
568 return;
570 assert(maMarkStack.top()->m_Tag == nTag && "mark/merge tag mismatch!");
571 (void) nTag;
572 #ifdef DBG_UTIL
573 if (dynamic_cast<ForSort*>(maMarkStack.top().get()))
575 // Sort is always well-formed fragment
576 assert(maMarkStack.top()->m_DebugStartedElements.empty());
577 assert(maMarkStack.top()->m_DebugEndedElements.empty());
579 lcl_DebugMergeAppend(
580 maMarkStack.top()->m_DebugEndedElements,
581 maMarkStack.top()->m_DebugStartedElements,
582 maMarkStack.top()->m_DebugPostponedEndedElements,
583 maMarkStack.top()->m_DebugPostponedStartedElements);
584 #endif
586 // flush, so that we get everything in getData()
587 maCachedOutputStream.flush();
589 if (maMarkStack.size() == 1)
591 #ifdef DBG_UTIL
592 while (!maMarkStack.top()->m_DebugEndedElements.empty())
594 assert(maMarkStack.top()->m_DebugEndedElements.front() == m_DebugStartedElements.top());
595 maMarkStack.top()->m_DebugEndedElements.pop_front();
596 m_DebugStartedElements.pop();
598 while (!maMarkStack.top()->m_DebugStartedElements.empty())
600 m_DebugStartedElements.push(maMarkStack.top()->m_DebugStartedElements.front());
601 maMarkStack.top()->m_DebugStartedElements.pop_front();
603 #endif
604 Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
605 maMarkStack.pop();
606 mbMarkStackEmpty = true;
607 maCachedOutputStream.resetOutputToStream();
608 maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() );
609 return;
612 #ifdef DBG_UTIL
613 ::std::deque<sal_Int32> topDebugStartedElements(maMarkStack.top()->m_DebugStartedElements);
614 ::std::deque<sal_Int32> topDebugEndedElements(maMarkStack.top()->m_DebugEndedElements);
615 #endif
616 const Int8Sequence aMerge( maMarkStack.top()->getData() );
617 maMarkStack.pop();
618 #ifdef DBG_UTIL
619 switch (eMergeType)
621 case MergeMarks::APPEND:
622 lcl_DebugMergeAppend(
623 maMarkStack.top()->m_DebugEndedElements,
624 maMarkStack.top()->m_DebugStartedElements,
625 topDebugEndedElements,
626 topDebugStartedElements);
627 break;
628 case MergeMarks::PREPEND:
629 if (dynamic_cast<ForSort*>(maMarkStack.top().get())) // argh...
631 lcl_DebugMergeAppend(
632 maMarkStack.top()->m_DebugEndedElements,
633 maMarkStack.top()->m_DebugStartedElements,
634 topDebugEndedElements,
635 topDebugStartedElements);
637 else
639 lcl_DebugMergePrepend(
640 topDebugEndedElements,
641 topDebugStartedElements,
642 maMarkStack.top()->m_DebugEndedElements,
643 maMarkStack.top()->m_DebugStartedElements);
645 break;
646 case MergeMarks::POSTPONE:
647 lcl_DebugMergeAppend(
648 maMarkStack.top()->m_DebugPostponedEndedElements,
649 maMarkStack.top()->m_DebugPostponedStartedElements,
650 topDebugEndedElements,
651 topDebugStartedElements);
652 break;
654 #endif
655 if (maMarkStack.empty())
657 mbMarkStackEmpty = true;
658 maCachedOutputStream.resetOutputToStream();
660 else
662 maCachedOutputStream.setOutput( maMarkStack.top() );
665 switch ( eMergeType )
667 case MergeMarks::APPEND: maMarkStack.top()->append( aMerge ); break;
668 case MergeMarks::PREPEND: maMarkStack.top()->prepend( aMerge ); break;
669 case MergeMarks::POSTPONE: maMarkStack.top()->postpone( aMerge ); break;
673 void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData )
675 maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() );
678 void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
680 #if OSL_DEBUG_LEVEL > 0
682 bool bGood = true;
683 for (size_t i=0; i < nLen;)
685 if (auto const inv = invalidChar(pStr, nLen, i))
687 bGood = false;
688 SAL_WARN("sax", "FastSaxSerializer::writeBytes - illegal XML character 0x" <<
689 std::hex << inv->first);
690 i += inv->second;
691 continue;
693 ++i;
695 SAL_WARN_IF( !bGood && nLen > 1, "sax", "in '" << OString(pStr,std::min<sal_Int32>(nLen,42)) << "'");
697 #endif
698 maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
701 FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
703 merge( maData, maPostponed, true );
704 maPostponed.realloc( 0 );
706 return maData;
709 #if OSL_DEBUG_LEVEL > 0
710 void FastSaxSerializer::ForMerge::print( )
712 std::cerr << "Data: ";
713 for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ )
715 std::cerr << maData[i];
718 std::cerr << "\nPostponed: ";
719 for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ )
721 std::cerr << maPostponed[i];
724 std::cerr << "\n";
726 #endif
728 void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat )
730 merge( maData, rWhat, false );
733 void FastSaxSerializer::ForMerge::append( const css::uno::Sequence<sal_Int8> &rWhat )
735 merge( maData, rWhat, true );
738 void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat )
740 merge( maPostponed, rWhat, true );
743 void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend )
745 sal_Int32 nMergeLen = rMerge.getLength();
746 if ( nMergeLen <= 0 )
747 return;
749 sal_Int32 nTopLen = rTop.getLength();
751 rTop.realloc( nTopLen + nMergeLen );
752 if ( bAppend )
754 // append the rMerge to the rTop
755 memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen );
757 else
759 // prepend the rMerge to the rTop
760 memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen );
761 memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen );
765 void FastSaxSerializer::ForMerge::resetData( )
767 maData = Int8Sequence();
770 void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement )
772 const auto & rOrder = maOrder;
773 if( std::find( rOrder.begin(), rOrder.end(), nElement ) != rOrder.end() )
775 mnCurrentElement = nElement;
776 if ( maData.find( nElement ) == maData.end() )
777 maData[ nElement ] = Int8Sequence();
781 void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat )
783 append( rWhat );
786 void FastSaxSerializer::ForSort::append( const css::uno::Sequence<sal_Int8> &rWhat )
788 merge( maData[mnCurrentElement], rWhat, true );
791 void FastSaxSerializer::ForSort::sort()
793 // Clear the ForMerge data to avoid duplicate items
794 resetData();
796 // Sort it all
797 std::map< sal_Int32, Int8Sequence >::iterator iter;
798 for (const auto nIndex : maOrder)
800 iter = maData.find( nIndex );
801 if ( iter != maData.end() )
802 ForMerge::append( iter->second );
806 FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData()
808 sort( );
809 return ForMerge::getData();
812 #if OSL_DEBUG_LEVEL > 0
813 void FastSaxSerializer::ForSort::print( )
815 for ( const auto& [rElement, rData] : maData )
817 std::cerr << "pair: " << rElement;
818 for ( sal_Int32 i=0, len=rData.getLength(); i < len; ++i )
819 std::cerr << rData[i];
820 std::cerr << "\n";
823 sort( );
824 ForMerge::print();
826 #endif
828 } // namespace sax_fastparser
830 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */