1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "fastserializer.hxx"
22 #include <com/sun/star/xml/sax/FastTokenHandler.hpp>
23 #include <rtl/math.hxx>
24 #include <sal/log.hxx>
25 #include <comphelper/processfactory.hxx>
26 #include <comphelper/sequence.hxx>
30 #if OSL_DEBUG_LEVEL > 0
36 using ::com::sun::star::uno::Reference
;
37 using ::com::sun::star::uno::Sequence
;
38 using ::com::sun::star::xml::Attribute
;
39 using ::com::sun::star::io::XOutputStream
;
41 #define HAS_NAMESPACE(x) ((x & 0xffff0000) != 0)
42 #define NAMESPACE(x) (x >> 16)
43 #define TOKEN(x) (x & 0xffff)
44 // number of characters without terminating 0
45 #define N_CHARS(string) (SAL_N_ELEMENTS(string) - 1)
47 static const char sClosingBracket
[] = ">";
48 static const char sSlashAndClosingBracket
[] = "/>";
49 static const char sColon
[] = ":";
50 static const char sOpeningBracket
[] = "<";
51 static const char sOpeningBracketAndSlash
[] = "</";
52 static const char sQuote
[] = "\"";
53 static const char sEqualSignAndQuote
[] = "=\"";
54 static const char sSpace
[] = " ";
55 static const char sXmlHeader
[] = "<?xml version=\"1.0\" encoding=\"UTF-8\" standalone=\"yes\"?>\n";
57 namespace sax_fastparser
{
58 FastSaxSerializer::FastSaxSerializer( const css::uno::Reference
< css::io::XOutputStream
>& xOutputStream
)
59 : maCachedOutputStream()
61 , mbMarkStackEmpty(true)
63 , mnDoubleStrCapacity(RTL_STR_MAX_VALUEOFDOUBLE
)
65 rtl_string_new_WithLength(&mpDoubleStr
, mnDoubleStrCapacity
);
66 mxFastTokenHandler
= css::xml::sax::FastTokenHandler::create(
67 ::comphelper::getProcessComponentContext());
68 assert(xOutputStream
.is()); // cannot do anything without that
69 maCachedOutputStream
.setOutputStream( xOutputStream
);
72 FastSaxSerializer::~FastSaxSerializer()
74 rtl_string_release(mpDoubleStr
);
77 void FastSaxSerializer::startDocument()
79 writeBytes(sXmlHeader
, N_CHARS(sXmlHeader
));
82 void FastSaxSerializer::write( double value
)
84 rtl_math_doubleToString(
85 &mpDoubleStr
, &mnDoubleStrCapacity
, 0, value
, rtl_math_StringFormat_G
,
86 RTL_STR_MAX_VALUEOFDOUBLE
- RTL_CONSTASCII_LENGTH("-x.E-xxx"), '.', 0,
89 write(mpDoubleStr
->buffer
, mpDoubleStr
->length
);
90 // and "clear" the string
91 mpDoubleStr
->length
= 0;
92 mnDoubleStrCapacity
= RTL_STR_MAX_VALUEOFDOUBLE
;
95 void FastSaxSerializer::write( const OUString
& sOutput
, bool bEscape
)
97 const sal_Int32 nLength
= sOutput
.getLength();
98 for (sal_Int32 i
= 0; i
< nLength
; ++i
)
100 const sal_Unicode cUnicode
= sOutput
[ i
];
101 const char cChar
= cUnicode
;
102 if (cUnicode
& 0xff80)
104 write( OString(&cUnicode
, 1, RTL_TEXTENCODING_UTF8
) );
106 else if(bEscape
) switch( cChar
)
108 case '<': writeBytes( "<", 4 ); break;
109 case '>': writeBytes( ">", 4 ); break;
110 case '&': writeBytes( "&", 5 ); break;
111 case '\'': writeBytes( "'", 6 ); break;
112 case '"': writeBytes( """, 6 ); break;
113 case '\n': writeBytes( " ", 5 ); break;
114 case '\r': writeBytes( " ", 5 ); break;
115 default: writeBytes( &cChar
, 1 ); break;
118 writeBytes( &cChar
, 1 );
122 void FastSaxSerializer::write( const OString
& sOutput
, bool bEscape
)
124 write( sOutput
.getStr(), sOutput
.getLength(), bEscape
);
127 void FastSaxSerializer::write( const char* pStr
, sal_Int32 nLen
, bool bEscape
)
130 nLen
= pStr
? strlen(pStr
) : 0;
134 writeBytes( pStr
, nLen
);
138 for (sal_Int32 i
= 0; i
< nLen
; ++i
)
143 case '<': writeBytes( "<", 4 ); break;
144 case '>': writeBytes( ">", 4 ); break;
145 case '&': writeBytes( "&", 5 ); break;
146 case '\'': writeBytes( "'", 6 ); break;
147 case '"': writeBytes( """, 6 ); break;
148 case '\n': writeBytes( " ", 5 ); break;
149 case '\r': writeBytes( " ", 5 ); break;
150 default: writeBytes( &c
, 1 ); break;
155 void FastSaxSerializer::endDocument()
157 assert(mbMarkStackEmpty
&& maMarkStack
.empty());
158 maCachedOutputStream
.flush();
161 void FastSaxSerializer::writeId( ::sal_Int32 nElement
)
163 if( HAS_NAMESPACE( nElement
) ) {
164 writeBytes(mxFastTokenHandler
->getUTF8Identifier(NAMESPACE(nElement
)));
165 writeBytes(sColon
, N_CHARS(sColon
));
166 writeBytes(mxFastTokenHandler
->getUTF8Identifier(TOKEN(nElement
)));
168 writeBytes(mxFastTokenHandler
->getUTF8Identifier(nElement
));
172 OString
FastSaxSerializer::getId( ::sal_Int32 nElement
)
174 if (HAS_NAMESPACE(nElement
)) {
175 Sequence
<sal_Int8
> const ns(
176 mxFastTokenHandler
->getUTF8Identifier(NAMESPACE(nElement
)));
177 Sequence
<sal_Int8
> const name(
178 mxFastTokenHandler
->getUTF8Identifier(TOKEN(nElement
)));
179 return OString(reinterpret_cast<sal_Char
const*>(ns
.getConstArray()), ns
.getLength())
180 + OString(sColon
, N_CHARS(sColon
))
181 + OString(reinterpret_cast<sal_Char
const*>(name
.getConstArray()), name
.getLength());
183 Sequence
<sal_Int8
> const name(
184 mxFastTokenHandler
->getUTF8Identifier(nElement
));
185 return OString(reinterpret_cast<sal_Char
const*>(name
.getConstArray()), name
.getLength());
190 void FastSaxSerializer::startFastElement( ::sal_Int32 Element
, FastAttributeList
* pAttrList
)
192 if ( !mbMarkStackEmpty
)
194 maCachedOutputStream
.flush();
195 maMarkStack
.top()->setCurrentElement( Element
);
199 m_DebugStartedElements
.push(Element
);
202 writeBytes(sOpeningBracket
, N_CHARS(sOpeningBracket
));
206 writeFastAttributeList(*pAttrList
);
208 writeTokenValueList();
210 writeBytes(sClosingBracket
, N_CHARS(sClosingBracket
));
213 void FastSaxSerializer::endFastElement( ::sal_Int32 Element
)
216 assert(!m_DebugStartedElements
.empty());
217 // Well-formedness constraint: Element Type Match
218 assert(Element
== m_DebugStartedElements
.top());
219 m_DebugStartedElements
.pop();
222 writeBytes(sOpeningBracketAndSlash
, N_CHARS(sOpeningBracketAndSlash
));
226 writeBytes(sClosingBracket
, N_CHARS(sClosingBracket
));
229 void FastSaxSerializer::singleFastElement( ::sal_Int32 Element
, FastAttributeList
* pAttrList
)
231 if ( !mbMarkStackEmpty
)
233 maCachedOutputStream
.flush();
234 maMarkStack
.top()->setCurrentElement( Element
);
237 writeBytes(sOpeningBracket
, N_CHARS(sOpeningBracket
));
241 writeFastAttributeList(*pAttrList
);
243 writeTokenValueList();
245 writeBytes(sSlashAndClosingBracket
, N_CHARS(sSlashAndClosingBracket
));
248 ::com::sun::star::uno::Reference
< ::com::sun::star::io::XOutputStream
> FastSaxSerializer::getOutputStream()
250 return maCachedOutputStream
.getOutputStream();
253 void FastSaxSerializer::writeTokenValueList()
256 ::std::set
<OString
> DebugAttributes
;
258 for (size_t j
= 0; j
< maTokenValues
.size(); j
++)
260 writeBytes(sSpace
, N_CHARS(sSpace
));
262 sal_Int32 nToken
= maTokenValues
[j
].nToken
;
266 // Well-formedness constraint: Unique Att Spec
267 OString
const nameId(getId(nToken
));
268 assert(DebugAttributes
.find(nameId
) == DebugAttributes
.end());
269 DebugAttributes
.insert(nameId
);
272 writeBytes(sEqualSignAndQuote
, N_CHARS(sEqualSignAndQuote
));
274 write(maTokenValues
[j
].pValue
, -1, true);
276 writeBytes(sQuote
, N_CHARS(sQuote
));
278 maTokenValues
.clear();
281 void FastSaxSerializer::writeFastAttributeList(FastAttributeList
& rAttrList
)
284 ::std::set
<OString
> DebugAttributes
;
286 const std::vector
< sal_Int32
>& Tokens
= rAttrList
.getFastAttributeTokens();
287 for (size_t j
= 0; j
< Tokens
.size(); j
++)
289 writeBytes(sSpace
, N_CHARS(sSpace
));
291 sal_Int32 nToken
= Tokens
[j
];
295 // Well-formedness constraint: Unique Att Spec
296 OString
const nameId(getId(nToken
));
297 SAL_WARN_IF(DebugAttributes
.find(nameId
) != DebugAttributes
.end(), "sax", "Duplicate attribute: " << nameId
);
298 assert(DebugAttributes
.find(nameId
) == DebugAttributes
.end());
299 DebugAttributes
.insert(nameId
);
302 writeBytes(sEqualSignAndQuote
, N_CHARS(sEqualSignAndQuote
));
304 write(rAttrList
.getFastAttributeValue(j
), rAttrList
.AttributeValueLength(j
), true);
306 writeBytes(sQuote
, N_CHARS(sQuote
));
310 void FastSaxSerializer::mark( const Int32Sequence
& aOrder
)
312 if ( aOrder
.hasElements() )
314 boost::shared_ptr
< ForMerge
> pSort( new ForSort( aOrder
) );
315 maMarkStack
.push( pSort
);
316 maCachedOutputStream
.setOutput( pSort
);
320 boost::shared_ptr
< ForMerge
> pMerge( new ForMerge( ) );
321 maMarkStack
.push( pMerge
);
322 maCachedOutputStream
.setOutput( pMerge
);
324 mbMarkStackEmpty
= false;
327 void FastSaxSerializer::mergeTopMarks( sax_fastparser::MergeMarksEnum eMergeType
)
329 SAL_WARN_IF(mbMarkStackEmpty
, "sax", "Empty mark stack - nothing to merge");
330 if ( mbMarkStackEmpty
)
333 // flush, so that we get everything in getData()
334 maCachedOutputStream
.flush();
336 if ( maMarkStack
.size() == 1 && eMergeType
!= MERGE_MARKS_IGNORE
)
338 Sequence
<sal_Int8
> aSeq( maMarkStack
.top()->getData() );
340 mbMarkStackEmpty
= true;
341 maCachedOutputStream
.resetOutputToStream();
342 maCachedOutputStream
.writeBytes( aSeq
.getConstArray(), aSeq
.getLength() );
346 const Int8Sequence
aMerge( maMarkStack
.top()->getData() );
348 if (maMarkStack
.empty())
350 mbMarkStackEmpty
= true;
351 maCachedOutputStream
.resetOutputToStream();
355 maCachedOutputStream
.setOutput( maMarkStack
.top() );
358 switch ( eMergeType
)
360 case MERGE_MARKS_APPEND
: maMarkStack
.top()->append( aMerge
); break;
361 case MERGE_MARKS_PREPEND
: maMarkStack
.top()->prepend( aMerge
); break;
362 case MERGE_MARKS_POSTPONE
: maMarkStack
.top()->postpone( aMerge
); break;
363 case MERGE_MARKS_IGNORE
: break;
368 void FastSaxSerializer::writeBytes( const Sequence
< sal_Int8
>& rData
)
370 maCachedOutputStream
.writeBytes( rData
.getConstArray(), rData
.getLength() );
373 void FastSaxSerializer::writeBytes( const char* pStr
, size_t nLen
)
375 maCachedOutputStream
.writeBytes( reinterpret_cast<const sal_Int8
*>(pStr
), nLen
);
378 FastSaxSerializer::Int8Sequence
& FastSaxSerializer::ForMerge::getData()
380 merge( maData
, maPostponed
, true );
381 maPostponed
.realloc( 0 );
386 #if OSL_DEBUG_LEVEL > 0
387 void FastSaxSerializer::ForMerge::print( )
389 std::cerr
<< "Data: ";
390 for ( sal_Int32 i
=0, len
=maData
.getLength(); i
< len
; i
++ )
392 std::cerr
<< maData
[i
];
395 std::cerr
<< "\nPostponed: ";
396 for ( sal_Int32 i
=0, len
=maPostponed
.getLength(); i
< len
; i
++ )
398 std::cerr
<< maPostponed
[i
];
405 void FastSaxSerializer::ForMerge::prepend( const Int8Sequence
&rWhat
)
407 merge( maData
, rWhat
, false );
410 void FastSaxSerializer::ForMerge::append( const Int8Sequence
&rWhat
)
412 merge( maData
, rWhat
, true );
415 void FastSaxSerializer::ForMerge::postpone( const Int8Sequence
&rWhat
)
417 merge( maPostponed
, rWhat
, true );
420 void FastSaxSerializer::ForMerge::merge( Int8Sequence
&rTop
, const Int8Sequence
&rMerge
, bool bAppend
)
422 sal_Int32 nMergeLen
= rMerge
.getLength();
425 sal_Int32 nTopLen
= rTop
.getLength();
427 rTop
.realloc( nTopLen
+ nMergeLen
);
430 // append the rMerge to the rTop
431 memcpy( rTop
.getArray() + nTopLen
, rMerge
.getConstArray(), nMergeLen
);
435 // prepend the rMerge to the rTop
436 memmove( rTop
.getArray() + nMergeLen
, rTop
.getConstArray(), nTopLen
);
437 memcpy( rTop
.getArray(), rMerge
.getConstArray(), nMergeLen
);
442 void FastSaxSerializer::ForMerge::resetData( )
444 maData
= Int8Sequence();
447 void FastSaxSerializer::ForSort::setCurrentElement( sal_Int32 nElement
)
449 vector
< sal_Int32
> aOrder( comphelper::sequenceToContainer
<vector
<sal_Int32
> >(maOrder
) );
450 if( std::find( aOrder
.begin(), aOrder
.end(), nElement
) != aOrder
.end() )
452 mnCurrentElement
= nElement
;
453 if ( maData
.find( nElement
) == maData
.end() )
454 maData
[ nElement
] = Int8Sequence();
458 void FastSaxSerializer::ForSort::prepend( const Int8Sequence
&rWhat
)
463 void FastSaxSerializer::ForSort::append( const Int8Sequence
&rWhat
)
465 merge( maData
[mnCurrentElement
], rWhat
, true );
468 void FastSaxSerializer::ForSort::sort()
470 // Clear the ForMerge data to avoid duplicate items
474 std::map
< sal_Int32
, Int8Sequence
>::iterator iter
;
475 for ( sal_Int32 i
=0, len
=maOrder
.getLength(); i
< len
; i
++ )
477 iter
= maData
.find( maOrder
[i
] );
478 if ( iter
!= maData
.end() )
479 ForMerge::append( iter
->second
);
483 FastSaxSerializer::Int8Sequence
& FastSaxSerializer::ForSort::getData()
486 return ForMerge::getData();
489 #if OSL_DEBUG_LEVEL > 0
490 void FastSaxSerializer::ForSort::print( )
492 std::map
< sal_Int32
, Int8Sequence
>::iterator iter
= maData
.begin();
493 while ( iter
!= maData
.end( ) )
495 std::cerr
<< "pair: " << iter
->first
;
496 for ( sal_Int32 i
=0, len
=iter
->second
.getLength(); i
< len
; ++i
)
497 std::cerr
<< iter
->second
[i
];
507 } // namespace sax_fastparser
509 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */