Bump version to 5.0-14
[LibreOffice.git] / sax / source / tools / fastserializer.cxx
blob10e7ab39f174fdfdff953c2620b9f689d9c4ec4a
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "fastserializer.hxx"
22 #include <com/sun/star/xml/sax/FastTokenHandler.hpp>
23 #include <rtl/math.hxx>
24 #include <sal/log.hxx>
25 #include <comphelper/processfactory.hxx>
26 #include <comphelper/sequence.hxx>
28 #include <string.h>
30 #if OSL_DEBUG_LEVEL > 0
31 #include <iostream>
32 #include <set>
33 #endif
35 using ::std::vector;
36 using ::com::sun::star::uno::Reference;
37 using ::com::sun::star::uno::Sequence;
38 using ::com::sun::star::xml::Attribute;
39 using ::com::sun::star::io::XOutputStream;
41 #define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0)
42 #define NAMESPACE(x) (x >> 16)
43 #define TOKEN(x) (x & 0xffff)
44 // number of characters without terminating 0
45 #define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1)
47 static const char sClosingBracket[] = ">";
48 static const char sSlashAndClosingBracket[] = "/>";
49 static const char sColon[] = ":";
50 static const char sOpeningBracket[] = "<";
51 static const char sOpeningBracketAndSlash[] = "</";
52 static const char sQuote[] = "\"";
53 static const char sEqualSignAndQuote[] = "=\"";
54 static const char sSpace[] = " ";
55 static const char sXmlHeader[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
57 namespace sax_fastparser {
58 FastSaxSerializer::FastSaxSerializer( const css::uno::Reference< css::io::XOutputStream >& xOutputStream )
59 : maCachedOutputStream()
60 , maMarkStack()
61 , mbMarkStackEmpty(true)
62 , mpDoubleStr(NULL)
63 , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE)
65 rtl_string_new_WithLength(&mpDoubleStr, mnDoubleStrCapacity);
66 mxFastTokenHandler = css::xml::sax::FastTokenHandler::create(
67 ::comphelper::getProcessComponentContext());
68 assert(xOutputStream.is()); // cannot do anything without that
69 maCachedOutputStream.setOutputStream( xOutputStream );
72 FastSaxSerializer::~FastSaxSerializer()
74 rtl_string_release(mpDoubleStr);
77 void FastSaxSerializer::startDocument()
79 writeBytes(sXmlHeader, N_CHARS(sXmlHeader));
82 void FastSaxSerializer::write( double value )
84 rtl_math_doubleToString(
85 &mpDoubleStr, &mnDoubleStrCapacity, 0, value, rtl_math_StringFormat_G,
86 RTL_STR_MAX_VALUEOFDOUBLE - RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
87 0, sal_True);
89 write(mpDoubleStr->buffer, mpDoubleStr->length);
90 // and "clear" the string
91 mpDoubleStr->length = 0;
92 mnDoubleStrCapacity = RTL_STR_MAX_VALUEOFDOUBLE;
95 void FastSaxSerializer::write( const OUString& sOutput, bool bEscape )
97 const sal_Int32 nLength = sOutput.getLength();
98 for (sal_Int32 i = 0; i < nLength; ++i)
100 const sal_Unicode cUnicode = sOutput[ i ];
101 const char cChar = cUnicode;
102 if (cUnicode & 0xff80)
104 write( OString(&cUnicode, 1, RTL_TEXTENCODING_UTF8) );
106 else if(bEscape) switch( cChar )
108 case '<': writeBytes( "&lt;", 4 ); break;
109 case '>': writeBytes( "&gt;", 4 ); break;
110 case '&': writeBytes( "&amp;", 5 ); break;
111 case '\'': writeBytes( "&apos;", 6 ); break;
112 case '"': writeBytes( "&quot;", 6 ); break;
113 case '\n': writeBytes( "&#10;", 5 ); break;
114 case '\r': writeBytes( "&#13;", 5 ); break;
115 default: writeBytes( &cChar, 1 ); break;
117 else
118 writeBytes( &cChar, 1 );
122 void FastSaxSerializer::write( const OString& sOutput, bool bEscape )
124 write( sOutput.getStr(), sOutput.getLength(), bEscape );
127 void FastSaxSerializer::write( const char* pStr, sal_Int32 nLen, bool bEscape )
129 if (nLen == -1)
130 nLen = pStr ? strlen(pStr) : 0;
132 if (!bEscape)
134 writeBytes( pStr, nLen );
135 return;
138 for (sal_Int32 i = 0; i < nLen; ++i)
140 char c = pStr[ i ];
141 switch( c )
143 case '<': writeBytes( "&lt;", 4 ); break;
144 case '>': writeBytes( "&gt;", 4 ); break;
145 case '&': writeBytes( "&amp;", 5 ); break;
146 case '\'': writeBytes( "&apos;", 6 ); break;
147 case '"': writeBytes( "&quot;", 6 ); break;
148 case '\n': writeBytes( "&#10;", 5 ); break;
149 case '\r': writeBytes( "&#13;", 5 ); break;
150 default: writeBytes( &c, 1 ); break;
155 void FastSaxSerializer::endDocument()
157 assert(mbMarkStackEmpty && maMarkStack.empty());
158 maCachedOutputStream.flush();
161 void FastSaxSerializer::writeId( ::sal_Int32 nElement )
163 if( HAS_NAMESPACE( nElement ) ) {
164 writeBytes(mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
165 writeBytes(sColon, N_CHARS(sColon));
166 writeBytes(mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
167 } else
168 writeBytes(mxFastTokenHandler->getUTF8Identifier(nElement));
171 #ifdef DBG_UTIL
172 OString FastSaxSerializer::getId( ::sal_Int32 nElement )
174 if (HAS_NAMESPACE(nElement)) {
175 Sequence<sal_Int8> const ns(
176 mxFastTokenHandler->getUTF8Identifier(NAMESPACE(nElement)));
177 Sequence<sal_Int8> const name(
178 mxFastTokenHandler->getUTF8Identifier(TOKEN(nElement)));
179 return OString(reinterpret_cast<sal_Char const*>(ns.getConstArray()), ns.getLength())
180 + OString(sColon, N_CHARS(sColon))
181 + OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
182 } else {
183 Sequence<sal_Int8> const name(
184 mxFastTokenHandler->getUTF8Identifier(nElement));
185 return OString(reinterpret_cast<sal_Char const*>(name.getConstArray()), name.getLength());
188 #endif
190 void FastSaxSerializer::startFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
192 if ( !mbMarkStackEmpty )
194 maCachedOutputStream.flush();
195 maMarkStack.top()->setCurrentElement( Element );
198 #ifdef DBG_UTIL
199 m_DebugStartedElements.push(Element);
200 #endif
202 writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
204 writeId(Element);
205 if (pAttrList)
206 writeFastAttributeList(*pAttrList);
207 else
208 writeTokenValueList();
210 writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
213 void FastSaxSerializer::endFastElement( ::sal_Int32 Element )
215 #ifdef DBG_UTIL
216 assert(!m_DebugStartedElements.empty());
217 // Well-formedness constraint: Element Type Match
218 assert(Element == m_DebugStartedElements.top());
219 m_DebugStartedElements.pop();
220 #endif
222 writeBytes(sOpeningBracketAndSlash, N_CHARS(sOpeningBracketAndSlash));
224 writeId(Element);
226 writeBytes(sClosingBracket, N_CHARS(sClosingBracket));
229 void FastSaxSerializer::singleFastElement( ::sal_Int32 Element, FastAttributeList* pAttrList )
231 if ( !mbMarkStackEmpty )
233 maCachedOutputStream.flush();
234 maMarkStack.top()->setCurrentElement( Element );
237 writeBytes(sOpeningBracket, N_CHARS(sOpeningBracket));
239 writeId(Element);
240 if (pAttrList)
241 writeFastAttributeList(*pAttrList);
242 else
243 writeTokenValueList();
245 writeBytes(sSlashAndClosingBracket, N_CHARS(sSlashAndClosingBracket));
248 ::com::sun::star::uno::Reference< ::com::sun::star::io::XOutputStream > FastSaxSerializer::getOutputStream()
250 return maCachedOutputStream.getOutputStream();
253 void FastSaxSerializer::writeTokenValueList()
255 #ifdef DBG_UTIL
256 ::std::set<OString> DebugAttributes;
257 #endif
258 for (size_t j = 0; j < maTokenValues.size(); j++)
260 writeBytes(sSpace, N_CHARS(sSpace));
262 sal_Int32 nToken = maTokenValues[j].nToken;
263 writeId(nToken);
265 #ifdef DBG_UTIL
266 // Well-formedness constraint: Unique Att Spec
267 OString const nameId(getId(nToken));
268 assert(DebugAttributes.find(nameId) == DebugAttributes.end());
269 DebugAttributes.insert(nameId);
270 #endif
272 writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
274 write(maTokenValues[j].pValue, -1, true);
276 writeBytes(sQuote, N_CHARS(sQuote));
278 maTokenValues.clear();
281 void FastSaxSerializer::writeFastAttributeList(FastAttributeList& rAttrList)
283 #ifdef DBG_UTIL
284 ::std::set<OString> DebugAttributes;
285 #endif
286 const std::vector< sal_Int32 >& Tokens = rAttrList.getFastAttributeTokens();
287 for (size_t j = 0; j < Tokens.size(); j++)
289 writeBytes(sSpace, N_CHARS(sSpace));
291 sal_Int32 nToken = Tokens[j];
292 writeId(nToken);
294 #ifdef DBG_UTIL
295 // Well-formedness constraint: Unique Att Spec
296 OString const nameId(getId(nToken));
297 SAL_WARN_IF(DebugAttributes.find(nameId) != DebugAttributes.end(), "sax", "Duplicate attribute: " << nameId );
298 assert(DebugAttributes.find(nameId) == DebugAttributes.end());
299 DebugAttributes.insert(nameId);
300 #endif
302 writeBytes(sEqualSignAndQuote, N_CHARS(sEqualSignAndQuote));
304 write(rAttrList.getFastAttributeValue(j), rAttrList.AttributeValueLength(j), true);
306 writeBytes(sQuote, N_CHARS(sQuote));
310 void FastSaxSerializer::mark( const Int32Sequence& aOrder )
312 if ( aOrder.hasElements() )
314 boost::shared_ptr< ForMerge > pSort( new ForSort( aOrder ) );
315 maMarkStack.push( pSort );
316 maCachedOutputStream.setOutput( pSort );
318 else
320 boost::shared_ptr< ForMerge > pMerge( new ForMerge( ) );
321 maMarkStack.push( pMerge );
322 maCachedOutputStream.setOutput( pMerge );
324 mbMarkStackEmpty = false;
327 void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType )
329 SAL_WARN_IF(mbMarkStackEmpty, "sax", "Empty mark stack - nothing to merge");
330 if ( mbMarkStackEmpty )
331 return;
333 // flush, so that we get everything in getData()
334 maCachedOutputStream.flush();
336 if ( maMarkStack.size() == 1 && eMergeType != MERGE_MARKS_IGNORE)
338 Sequence<sal_Int8> aSeq( maMarkStack.top()->getData() );
339 maMarkStack.pop();
340 mbMarkStackEmpty = true;
341 maCachedOutputStream.resetOutputToStream();
342 maCachedOutputStream.writeBytes( aSeq.getConstArray(), aSeq.getLength() );
343 return;
346 const Int8Sequence aMerge( maMarkStack.top()->getData() );
347 maMarkStack.pop();
348 if (maMarkStack.empty())
350 mbMarkStackEmpty = true;
351 maCachedOutputStream.resetOutputToStream();
353 else
355 maCachedOutputStream.setOutput( maMarkStack.top() );
358 switch ( eMergeType )
360 case MERGE_MARKS_APPEND: maMarkStack.top()->append( aMerge ); break;
361 case MERGE_MARKS_PREPEND: maMarkStack.top()->prepend( aMerge ); break;
362 case MERGE_MARKS_POSTPONE: maMarkStack.top()->postpone( aMerge ); break;
363 case MERGE_MARKS_IGNORE : break;
368 void FastSaxSerializer::writeBytes( const Sequence< sal_Int8 >& rData )
370 maCachedOutputStream.writeBytes( rData.getConstArray(), rData.getLength() );
373 void FastSaxSerializer::writeBytes( const char* pStr, size_t nLen )
375 maCachedOutputStream.writeBytes( reinterpret_cast<const sal_Int8*>(pStr), nLen );
378 FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForMerge::getData()
380 merge( maData, maPostponed, true );
381 maPostponed.realloc( 0 );
383 return maData;
386 #if OSL_DEBUG_LEVEL > 0
387 void FastSaxSerializer::ForMerge::print( )
389 std::cerr << "Data: ";
390 for ( sal_Int32 i=0, len=maData.getLength(); i < len; i++ )
392 std::cerr << maData[i];
395 std::cerr << "\nPostponed: ";
396 for ( sal_Int32 i=0, len=maPostponed.getLength(); i < len; i++ )
398 std::cerr << maPostponed[i];
401 std::cerr << "\n";
403 #endif
405 void FastSaxSerializer::ForMerge::prepend( const Int8Sequence &rWhat )
407 merge( maData, rWhat, false );
410 void FastSaxSerializer::ForMerge::append( const Int8Sequence &rWhat )
412 merge( maData, rWhat, true );
415 void FastSaxSerializer::ForMerge::postpone( const Int8Sequence &rWhat )
417 merge( maPostponed, rWhat, true );
420 void FastSaxSerializer::ForMerge::merge( Int8Sequence &rTop, const Int8Sequence &rMerge, bool bAppend )
422 sal_Int32 nMergeLen = rMerge.getLength();
423 if ( nMergeLen > 0 )
425 sal_Int32 nTopLen = rTop.getLength();
427 rTop.realloc( nTopLen + nMergeLen );
428 if ( bAppend )
430 // append the rMerge to the rTop
431 memcpy( rTop.getArray() + nTopLen, rMerge.getConstArray(), nMergeLen );
433 else
435 // prepend the rMerge to the rTop
436 memmove( rTop.getArray() + nMergeLen, rTop.getConstArray(), nTopLen );
437 memcpy( rTop.getArray(), rMerge.getConstArray(), nMergeLen );
442 void FastSaxSerializer::ForMerge::resetData( )
444 maData = Int8Sequence();
447 void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement )
449 vector< sal_Int32 > aOrder( comphelper::sequenceToContainer<vector<sal_Int32> >(maOrder) );
450 if( std::find( aOrder.begin(), aOrder.end(), nElement ) != aOrder.end() )
452 mnCurrentElement = nElement;
453 if ( maData.find( nElement ) == maData.end() )
454 maData[ nElement ] = Int8Sequence();
458 void FastSaxSerializer::ForSort::prepend( const Int8Sequence &rWhat )
460 append( rWhat );
463 void FastSaxSerializer::ForSort::append( const Int8Sequence &rWhat )
465 merge( maData[mnCurrentElement], rWhat, true );
468 void FastSaxSerializer::ForSort::sort()
470 // Clear the ForMerge data to avoid duplicate items
471 resetData();
473 // Sort it all
474 std::map< sal_Int32, Int8Sequence >::iterator iter;
475 for ( sal_Int32 i=0, len=maOrder.getLength(); i < len; i++ )
477 iter = maData.find( maOrder[i] );
478 if ( iter != maData.end() )
479 ForMerge::append( iter->second );
483 FastSaxSerializer::Int8Sequence& FastSaxSerializer::ForSort::getData()
485 sort( );
486 return ForMerge::getData();
489 #if OSL_DEBUG_LEVEL > 0
490 void FastSaxSerializer::ForSort::print( )
492 std::map< sal_Int32, Int8Sequence >::iterator iter = maData.begin();
493 while ( iter != maData.end( ) )
495 std::cerr << "pair: " << iter->first;
496 for ( sal_Int32 i=0, len=iter->second.getLength(); i < len; ++i )
497 std::cerr << iter->second[i];
498 std::cerr << "\n";
499 ++iter;
502 sort( );
503 ForMerge::print();
505 #endif
507 } // namespace sax_fastparser
509 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */