1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: saxwriter.cxx,v $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
32 #include <com/sun/star/lang/XServiceInfo.hpp>
33 #include <com/sun/star/util/XCloneable.hpp>
34 #include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp>
35 #include <com/sun/star/xml/sax/XParser.hpp>
36 #include <com/sun/star/xml/sax/SAXParseException.hpp>
37 #include <com/sun/star/xml/sax/SAXInvalidCharacterException.hpp>
39 #include <com/sun/star/io/XActiveDataSource.hpp>
41 #include <cppuhelper/factory.hxx>
42 #include <cppuhelper/weak.hxx>
43 #include <cppuhelper/implbase3.hxx>
45 #include <rtl/strbuf.hxx>
46 #include <rtl/byteseq.hxx>
47 #include <rtl/ustrbuf.hxx>
49 using namespace ::rtl
;
50 using namespace ::std
;
51 using namespace ::osl
;
52 using namespace ::cppu
;
53 using namespace ::com::sun::star::uno
;
54 using namespace ::com::sun::star::lang
;
55 using namespace ::com::sun::star::registry
;
56 using namespace ::com::sun::star::xml::sax
;
57 using namespace ::com::sun::star::util
;
58 using namespace ::com::sun::star::io
;
60 #include "factory.hxx"
61 #include "xml2utf.hxx"
64 #define SEQUENCESIZE 1024
65 #define MAXCOLUMNCOUNT 72
70 * Character conversion functions
75 namespace sax_expatwrap
{
78 * Calculates the length of the sequence after conversion, but the conversion is not done.
79 * .g. &<>"' plus some more are
80 * special characters in XML that need to be transformed
82 * @param bConvertAll For Attributes it is necessary to convert every symbol (including line feed and tab)
83 * Set this to true, if you want to perform this special conversion
84 * @return The returned value is equal to the length of the incoming sequence, when no
85 + conversion is necessary, otherwise it is larger than the length of the sequence.
87 // inline sal_Int32 CalcXMLLen( const Sequence<sal_Int8> & seq , sal_Bool bConvertAll ) throw()
89 // sal_Int32 nLen = 0;
90 // const sal_Int8 *pArray = seq.getConstArray();
92 // for( int i = 0 ; i < seq.getLength() ; i ++ ) {
94 // sal_Int8 c = pArray[i];
97 // case '&': // resemble to &
104 // case 39: // 39 == ''', '
105 // case '"': // "
106 // case 13: // 
110 // case 10: // 

125 enum SaxInvalidCharacterError
132 class SaxWriterHelper
134 Reference
< XOutputStream
> m_out
;
135 Sequence
< sal_Int8
> m_Sequence
;
136 sal_Int8
* mp_Sequence
;
138 sal_Int32 nLastLineFeedPos
; // is negative after writing a sequence
139 sal_uInt32 nCurrentPos
;
140 sal_Bool m_bStartElementFinished
;
143 inline sal_uInt32
writeSequence() throw( SAXException
);
145 // use only if to insert the bytes more space in the sequence is needed and
146 // so the sequence has to write out and reset rPos to 0
147 // writes sequence only on overflow, sequence could be full on the end (rPos == SEQUENCESIZE)
148 inline void AddBytes(sal_Int8
* pTarget
, sal_uInt32
& rPos
,
149 const sal_Int8
* pBytes
, sal_uInt32 nBytesCount
) throw( SAXException
);
150 inline sal_Bool
convertToXML(const sal_Unicode
* pStr
,
152 sal_Bool bDoNormalization
,
153 sal_Bool bNormalizeWhitespace
,
155 sal_uInt32
& rPos
) throw( SAXException
);
156 inline void FinishStartElement() throw( SAXException
);
158 SaxWriterHelper(Reference
< XOutputStream
> m_TempOut
) :
160 m_Sequence(SEQUENCESIZE
),
164 m_bStartElementFinished(sal_True
)
166 OSL_ENSURE(SEQUENCESIZE
> 50, "Sequence cache size to small");
167 mp_Sequence
= m_Sequence
.getArray();
171 OSL_ENSURE(!nCurrentPos
, "cached Sequence not written");
172 OSL_ENSURE(m_bStartElementFinished
, "StartElement not complettly written");
175 inline void insertIndentation(sal_uInt32 m_nLevel
) throw( SAXException
);
177 // returns whether it works correct or invalid characters were in the string
178 // If there are invalid characters in the string it returns sal_False.
179 // Than the calling method has to throw the needed Exception.
180 inline sal_Bool
writeString(const rtl::OUString
& rWriteOutString
,
181 sal_Bool bDoNormalization
,
182 sal_Bool bNormalizeWhitespace
) throw( SAXException
);
184 sal_uInt32
GetLastColumnCount() { return (sal_uInt32
)(nCurrentPos
- nLastLineFeedPos
); }
186 inline void startDocument() throw( SAXException
);
188 // returns whether it works correct or invalid characters were in the strings
189 // If there are invalid characters in one of the strings it returns sal_False.
190 // Than the calling method has to throw the needed Exception.
191 inline SaxInvalidCharacterError
startElement(const rtl::OUString
& rName
, const Reference
< XAttributeList
>& xAttribs
) throw( SAXException
);
192 inline sal_Bool
FinishEmptyElement() throw( SAXException
);
194 // returns whether it works correct or invalid characters were in the string
195 // If there are invalid characters in the string it returns sal_False.
196 // Than the calling method has to throw the needed Exception.
197 inline sal_Bool
endElement(const rtl::OUString
& rName
) throw( SAXException
);
198 inline void endDocument() throw( SAXException
);
200 // returns whether it works correct or invalid characters were in the strings
201 // If there are invalid characters in the string it returns sal_False.
202 // Than the calling method has to throw the needed Exception.
203 inline sal_Bool
processingInstruction(const rtl::OUString
& rTarget
, const rtl::OUString
& rData
) throw( SAXException
);
204 inline void startCDATA() throw( SAXException
);
205 inline void endCDATA() throw( SAXException
);
207 // returns whether it works correct or invalid characters were in the strings
208 // If there are invalid characters in the string it returns sal_False.
209 // Than the calling method has to throw the needed Exception.
210 inline sal_Bool
comment(const rtl::OUString
& rComment
) throw( SAXException
);
212 inline void clearBuffer() throw( SAXException
);
215 const sal_Bool g_bValidCharsBelow32
[32] =
220 0,0,0,0,0,0,0,0, //16
224 inline sal_Bool
IsInvalidChar(const sal_Unicode aChar
)
226 sal_Bool
bRet(sal_False
);
227 // check first for the most common characters
228 if( aChar
< 32 || aChar
>= 0xd800 )
229 bRet
= ( (aChar
< 32 && ! g_bValidCharsBelow32
[aChar
]) ||
236 * write through to the output stream
239 inline sal_uInt32
SaxWriterHelper::writeSequence() throw( SAXException
)
243 m_out
->writeBytes( m_Sequence
);
245 catch( IOException
& e
)
250 OUString::createFromAscii( "io exception during writing" ),
251 Reference
< XInterface
> (),
254 nLastLineFeedPos
-= SEQUENCESIZE
;
258 inline void SaxWriterHelper::AddBytes(sal_Int8
* pTarget
, sal_uInt32
& rPos
,
259 const sal_Int8
* pBytes
, sal_uInt32 nBytesCount
) throw( SAXException
)
261 OSL_ENSURE((rPos
+ nBytesCount
) > SEQUENCESIZE
, "wrong use of AddBytesMethod");
262 sal_uInt32
nCount(SEQUENCESIZE
- rPos
);
263 memcpy( &(pTarget
[rPos
]) , pBytes
, nCount
);
265 OSL_ENSURE(rPos
+ nCount
== SEQUENCESIZE
, "the position should be the at the end");
267 rPos
= writeSequence();
268 sal_uInt32
nRestCount(nBytesCount
- nCount
);
269 if ((rPos
+ nRestCount
) <= SEQUENCESIZE
)
271 memcpy( &(pTarget
[rPos
]), &pBytes
[nCount
], nRestCount
);
275 AddBytes(pTarget
, rPos
, &pBytes
[nCount
], nRestCount
);
278 /** Converts an UTF16 string to UTF8 and does XML normalization
281 Pointer to a piece of memory, to where the output should be written. The caller
282 must call calcXMLByteLength on the same string, to ensure,
283 that there is enough memory for converting.
285 inline sal_Bool
SaxWriterHelper::convertToXML( const sal_Unicode
* pStr
,
287 sal_Bool bDoNormalization
,
288 sal_Bool bNormalizeWhitespace
,
290 sal_uInt32
& rPos
) throw( SAXException
)
292 sal_Bool
bRet(sal_True
);
293 sal_uInt32 nSurrogate
= 0;
295 for( sal_Int32 i
= 0 ; i
< nStrLen
; i
++ )
297 sal_uInt16 c
= pStr
[i
];
298 if (IsInvalidChar(c
))
300 else if( (c
>= 0x0001) && (c
<= 0x007F) )
302 if( bDoNormalization
)
306 case '&': // resemble to &
308 if ((rPos
+ 5) > SEQUENCESIZE
)
309 AddBytes(pTarget
, rPos
, (sal_Int8
*)"&", 5);
312 memcpy( &(pTarget
[rPos
]) , "&", 5 );
319 if ((rPos
+ 4) > SEQUENCESIZE
)
320 AddBytes(pTarget
, rPos
, (sal_Int8
*)"<", 4);
323 memcpy( &(pTarget
[rPos
]) , "<" , 4 );
330 if ((rPos
+ 4) > SEQUENCESIZE
)
331 AddBytes(pTarget
, rPos
, (sal_Int8
*)">", 4);
334 memcpy( &(pTarget
[rPos
]) , ">" , 4 );
339 case 39: // 39 == '''
341 if ((rPos
+ 6) > SEQUENCESIZE
)
342 AddBytes(pTarget
, rPos
, (sal_Int8
*)"'", 6);
345 memcpy( &(pTarget
[rPos
]) , "'" , 6 );
352 if ((rPos
+ 6) > SEQUENCESIZE
)
353 AddBytes(pTarget
, rPos
, (sal_Int8
*)""", 6);
356 memcpy( &(pTarget
[rPos
]) , """ , 6 );
363 if ((rPos
+ 6) > SEQUENCESIZE
)
364 AddBytes(pTarget
, rPos
, (sal_Int8
*)"
", 6);
367 memcpy( &(pTarget
[rPos
]) , "
" , 6 );
374 if( bNormalizeWhitespace
)
376 if ((rPos
+ 6) > SEQUENCESIZE
)
377 AddBytes(pTarget
, rPos
, (sal_Int8
*)"
" , 6);
380 memcpy( &(pTarget
[rPos
]) , "
" , 6 );
386 pTarget
[rPos
] = LINEFEED
;
387 nLastLineFeedPos
= rPos
;
394 if( bNormalizeWhitespace
)
396 if ((rPos
+ 6) > SEQUENCESIZE
)
397 AddBytes(pTarget
, rPos
, (sal_Int8
*)"	" , 6);
400 memcpy( &(pTarget
[rPos
]) , "	" , 6 );
413 pTarget
[rPos
] = (sal_Int8
)c
;
421 pTarget
[rPos
] = (sal_Int8
)c
;
422 if ((sal_Int8
)c
== LINEFEED
)
423 nLastLineFeedPos
= rPos
;
427 else if( c
>= 0xd800 && c
< 0xdc00 )
429 // 1. surrogate: save (until 2. surrogate)
430 OSL_ENSURE( nSurrogate
== 0, "left-over Unicode surrogate" );
431 nSurrogate
= ( ( c
& 0x03ff ) + 0x0040 );
433 else if( c
>= 0xdc00 && c
< 0xe000 )
435 // 2. surrogate: write as UTF-8
436 OSL_ENSURE( nSurrogate
!= 0, "lone 2nd Unicode surrogate" );
438 nSurrogate
= ( nSurrogate
<< 10 ) | ( c
& 0x03ff );
439 if( nSurrogate
>= 0x00010000 && nSurrogate
<= 0x0010FFFF )
441 sal_Int8 aBytes
[] = { sal_Int8(0xF0 | ((nSurrogate
>> 18) & 0x0F)),
442 sal_Int8(0x80 | ((nSurrogate
>> 12) & 0x3F)),
443 sal_Int8(0x80 | ((nSurrogate
>> 6) & 0x3F)),
444 sal_Int8(0x80 | ((nSurrogate
>> 0) & 0x3F)) };
445 if ((rPos
+ 4) > SEQUENCESIZE
)
446 AddBytes(pTarget
, rPos
, aBytes
, 4);
449 pTarget
[rPos
] = aBytes
[0];
451 pTarget
[rPos
] = aBytes
[1];
453 pTarget
[rPos
] = aBytes
[2];
455 pTarget
[rPos
] = aBytes
[3];
461 OSL_ENSURE( false, "illegal Unicode character" );
468 else if( c
> 0x07FF )
470 sal_Int8 aBytes
[] = { sal_Int8(0xE0 | ((c
>> 12) & 0x0F)),
471 sal_Int8(0x80 | ((c
>> 6) & 0x3F)),
472 sal_Int8(0x80 | ((c
>> 0) & 0x3F)) };
473 if ((rPos
+ 3) > SEQUENCESIZE
)
474 AddBytes(pTarget
, rPos
, aBytes
, 3);
477 pTarget
[rPos
] = aBytes
[0];
479 pTarget
[rPos
] = aBytes
[1];
481 pTarget
[rPos
] = aBytes
[2];
487 sal_Int8 aBytes
[] = { sal_Int8(0xC0 | ((c
>> 6) & 0x1F)),
488 sal_Int8(0x80 | ((c
>> 0) & 0x3F)) };
489 if ((rPos
+ 2) > SEQUENCESIZE
)
490 AddBytes(pTarget
, rPos
, aBytes
, 2);
493 pTarget
[rPos
] = aBytes
[0];
495 pTarget
[rPos
] = aBytes
[1];
499 OSL_ENSURE(rPos
<= SEQUENCESIZE
, "not reset current position");
500 if (rPos
== SEQUENCESIZE
)
501 rPos
= writeSequence();
503 // reset left-over surrogate
504 if( ( nSurrogate
!= 0 ) && !( c
>= 0xd800 && c
< 0xdc00 ) )
506 OSL_ENSURE( nSurrogate
!= 0, "left-over Unicode surrogate" );
514 inline void SaxWriterHelper::FinishStartElement() throw( SAXException
)
516 if (!m_bStartElementFinished
)
518 mp_Sequence
[nCurrentPos
] = '>';
520 if (nCurrentPos
== SEQUENCESIZE
)
521 nCurrentPos
= writeSequence();
522 m_bStartElementFinished
= sal_True
;
526 inline void SaxWriterHelper::insertIndentation(sal_uInt32 m_nLevel
) throw( SAXException
)
528 FinishStartElement();
531 if ((nCurrentPos
+ m_nLevel
+ 1) <= SEQUENCESIZE
)
533 mp_Sequence
[nCurrentPos
] = LINEFEED
;
534 nLastLineFeedPos
= nCurrentPos
;
536 memset( &(mp_Sequence
[nCurrentPos
]) , 32 , m_nLevel
);
537 nCurrentPos
+= m_nLevel
;
538 if (nCurrentPos
== SEQUENCESIZE
)
539 nCurrentPos
= writeSequence();
543 sal_uInt32
nCount(m_nLevel
+ 1);
544 sal_Int8
* pBytes
= new sal_Int8
[nCount
];
545 pBytes
[0] = LINEFEED
;
546 memset( &(pBytes
[1]), 32, m_nLevel
);
547 AddBytes(mp_Sequence
, nCurrentPos
, pBytes
, nCount
);
549 nLastLineFeedPos
= nCurrentPos
- nCount
;
550 if (nCurrentPos
== SEQUENCESIZE
)
551 nCurrentPos
= writeSequence();
556 mp_Sequence
[nCurrentPos
] = LINEFEED
;
557 nLastLineFeedPos
= nCurrentPos
;
559 if (nCurrentPos
== SEQUENCESIZE
)
560 nCurrentPos
= writeSequence();
564 inline sal_Bool
SaxWriterHelper::writeString( const rtl::OUString
& rWriteOutString
,
565 sal_Bool bDoNormalization
,
566 sal_Bool bNormalizeWhitespace
) throw( SAXException
)
568 FinishStartElement();
569 return convertToXML(rWriteOutString
.getStr(),
570 rWriteOutString
.getLength(),
572 bNormalizeWhitespace
,
577 inline void SaxWriterHelper::startDocument() throw( SAXException
)
579 const char pc
[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
580 const int nLen
= strlen( pc
);
581 if ((nCurrentPos
+ nLen
) <= SEQUENCESIZE
)
583 memcpy( mp_Sequence
, pc
, nLen
);
588 AddBytes(mp_Sequence
, nCurrentPos
, (sal_Int8
*)pc
, nLen
);
590 OSL_ENSURE(nCurrentPos
<= SEQUENCESIZE
, "not reset current position");
591 if (nCurrentPos
== SEQUENCESIZE
)
592 nCurrentPos
= writeSequence();
593 mp_Sequence
[nCurrentPos
] = LINEFEED
;
595 if (nCurrentPos
== SEQUENCESIZE
)
596 nCurrentPos
= writeSequence();
599 inline SaxInvalidCharacterError
SaxWriterHelper::startElement(const rtl::OUString
& rName
, const Reference
< XAttributeList
>& xAttribs
) throw( SAXException
)
601 FinishStartElement();
602 mp_Sequence
[nCurrentPos
] = '<';
604 if (nCurrentPos
== SEQUENCESIZE
)
605 nCurrentPos
= writeSequence();
607 SaxInvalidCharacterError
eRet(SAX_NONE
);
608 if (!writeString(rName
, sal_False
, sal_False
))
611 sal_Int16 nAttribCount
= xAttribs
.is() ? static_cast<sal_Int16
>(xAttribs
->getLength()) : 0;
612 for(sal_Int16 i
= 0 ; i
< nAttribCount
; i
++ )
614 mp_Sequence
[nCurrentPos
] = ' ';
616 if (nCurrentPos
== SEQUENCESIZE
)
617 nCurrentPos
= writeSequence();
619 if (!writeString(xAttribs
->getNameByIndex( i
), sal_False
, sal_False
))
622 mp_Sequence
[nCurrentPos
] = '=';
624 if (nCurrentPos
== SEQUENCESIZE
)
625 nCurrentPos
= writeSequence();
626 mp_Sequence
[nCurrentPos
] = '"';
628 if (nCurrentPos
== SEQUENCESIZE
)
629 nCurrentPos
= writeSequence();
631 if (!writeString(xAttribs
->getValueByIndex( i
), sal_True
, sal_True
) &&
632 !(eRet
== SAX_ERROR
))
635 mp_Sequence
[nCurrentPos
] = '"';
637 if (nCurrentPos
== SEQUENCESIZE
)
638 nCurrentPos
= writeSequence();
641 m_bStartElementFinished
= sal_False
; // because the '>' character is not added,
642 // because it is possible, that the "/>"
643 // characters have to add
647 inline sal_Bool
SaxWriterHelper::FinishEmptyElement() throw( SAXException
)
649 if (m_bStartElementFinished
)
652 mp_Sequence
[nCurrentPos
] = '/';
654 if (nCurrentPos
== SEQUENCESIZE
)
655 nCurrentPos
= writeSequence();
656 mp_Sequence
[nCurrentPos
] = '>';
658 if (nCurrentPos
== SEQUENCESIZE
)
659 nCurrentPos
= writeSequence();
661 m_bStartElementFinished
= sal_True
;
666 inline sal_Bool
SaxWriterHelper::endElement(const rtl::OUString
& rName
) throw( SAXException
)
668 FinishStartElement();
669 mp_Sequence
[nCurrentPos
] = '<';
671 if (nCurrentPos
== SEQUENCESIZE
)
672 nCurrentPos
= writeSequence();
673 mp_Sequence
[nCurrentPos
] = '/';
675 if (nCurrentPos
== SEQUENCESIZE
)
676 nCurrentPos
= writeSequence();
678 sal_Bool
bRet(writeString( rName
, sal_False
, sal_False
));
680 mp_Sequence
[nCurrentPos
] = '>';
682 if (nCurrentPos
== SEQUENCESIZE
)
683 nCurrentPos
= writeSequence();
688 inline void SaxWriterHelper::endDocument() throw( SAXException
)
692 m_Sequence
.realloc(nCurrentPos
);
693 nCurrentPos
= writeSequence();
694 //m_Sequence.realloc(SEQUENCESIZE);
698 inline void SaxWriterHelper::clearBuffer() throw( SAXException
)
700 FinishStartElement();
703 m_Sequence
.realloc(nCurrentPos
);
704 nCurrentPos
= writeSequence();
705 m_Sequence
.realloc(SEQUENCESIZE
);
709 inline sal_Bool
SaxWriterHelper::processingInstruction(const rtl::OUString
& rTarget
, const rtl::OUString
& rData
) throw( SAXException
)
711 FinishStartElement();
712 mp_Sequence
[nCurrentPos
] = '<';
714 if (nCurrentPos
== SEQUENCESIZE
)
715 nCurrentPos
= writeSequence();
716 mp_Sequence
[nCurrentPos
] = '?';
718 if (nCurrentPos
== SEQUENCESIZE
)
719 nCurrentPos
= writeSequence();
721 sal_Bool
bRet(writeString( rTarget
, sal_False
, sal_False
));
723 mp_Sequence
[nCurrentPos
] = ' ';
725 if (nCurrentPos
== SEQUENCESIZE
)
726 nCurrentPos
= writeSequence();
728 if (!writeString( rData
, sal_False
, sal_False
))
731 mp_Sequence
[nCurrentPos
] = '?';
733 if (nCurrentPos
== SEQUENCESIZE
)
734 nCurrentPos
= writeSequence();
735 mp_Sequence
[nCurrentPos
] = '>';
737 if (nCurrentPos
== SEQUENCESIZE
)
738 nCurrentPos
= writeSequence();
743 inline void SaxWriterHelper::startCDATA() throw( SAXException
)
745 FinishStartElement();
746 if ((nCurrentPos
+ 9) <= SEQUENCESIZE
)
748 memcpy( &(mp_Sequence
[nCurrentPos
]), "<![CDATA[" , 9 );
752 AddBytes(mp_Sequence
, nCurrentPos
, (sal_Int8
*)"<![CDATA[" , 9);
753 if (nCurrentPos
== SEQUENCESIZE
)
754 nCurrentPos
= writeSequence();
757 inline void SaxWriterHelper::endCDATA() throw( SAXException
)
759 FinishStartElement();
760 if ((nCurrentPos
+ 3) <= SEQUENCESIZE
)
762 memcpy( &(mp_Sequence
[nCurrentPos
]), "]]>" , 3 );
766 AddBytes(mp_Sequence
, nCurrentPos
, (sal_Int8
*)"]]>" , 3);
767 if (nCurrentPos
== SEQUENCESIZE
)
768 nCurrentPos
= writeSequence();
771 inline sal_Bool
SaxWriterHelper::comment(const rtl::OUString
& rComment
) throw( SAXException
)
773 FinishStartElement();
774 mp_Sequence
[nCurrentPos
] = '<';
776 if (nCurrentPos
== SEQUENCESIZE
)
777 nCurrentPos
= writeSequence();
778 mp_Sequence
[nCurrentPos
] = '!';
780 if (nCurrentPos
== SEQUENCESIZE
)
781 nCurrentPos
= writeSequence();
782 mp_Sequence
[nCurrentPos
] = '-';
784 if (nCurrentPos
== SEQUENCESIZE
)
785 nCurrentPos
= writeSequence();
786 mp_Sequence
[nCurrentPos
] = '-';
788 if (nCurrentPos
== SEQUENCESIZE
)
789 nCurrentPos
= writeSequence();
791 sal_Bool
bRet(writeString( rComment
, sal_False
, sal_False
));
793 mp_Sequence
[nCurrentPos
] = '-';
795 if (nCurrentPos
== SEQUENCESIZE
)
796 nCurrentPos
= writeSequence();
797 mp_Sequence
[nCurrentPos
] = '-';
799 if (nCurrentPos
== SEQUENCESIZE
)
800 nCurrentPos
= writeSequence();
801 mp_Sequence
[nCurrentPos
] = '>';
803 if (nCurrentPos
== SEQUENCESIZE
)
804 nCurrentPos
= writeSequence();
809 inline sal_Int32
calcXMLByteLength( const sal_Unicode
*pStr
, sal_Int32 nStrLen
,
810 sal_Bool bDoNormalization
,
811 sal_Bool bNormalizeWhitespace
)
813 sal_Int32 nOutputLength
= 0;
814 sal_uInt32 nSurrogate
= 0;
816 for( sal_Int32 i
= 0 ; i
< nStrLen
; i
++ )
818 sal_uInt16 c
= pStr
[i
];
819 if( !IsInvalidChar(c
) && (c
>= 0x0001) && (c
<= 0x007F) )
821 if( bDoNormalization
)
825 case '&': // resemble to &
832 case 39: // 39 == ''', '
840 if( bNormalizeWhitespace
)
842 nOutputLength
+= 6; //
858 else if( c
>= 0xd800 && c
< 0xdc00 )
861 nSurrogate
= ( ( c
& 0x03ff ) + 0x0040 );
863 else if( c
>= 0xdc00 && c
< 0xe000 )
865 // 2. surrogate: write as UTF-8 (if range is OK
866 nSurrogate
= ( nSurrogate
<< 10 ) | ( c
& 0x03ff );
867 if( nSurrogate
>= 0x00010000 && nSurrogate
<= 0x0010FFFF )
871 else if( c
> 0x07FF )
880 // surrogate processing
881 if( ( nSurrogate
!= 0 ) && !( c
>= 0xd800 && c
< 0xdc00 ) )
885 return nOutputLength
;
888 /** returns position of first ascii 10 within the string, -1 when no 10 in string.
890 static inline sal_Int32
getFirstLineBreak( const OUString
& str
) throw ()
892 const sal_Unicode
*pSource
= str
.getStr();
893 sal_Int32 nLen
= str
.getLength();
895 for( int n
= 0; n
< nLen
; n
++ )
897 if( LINEFEED
== pSource
[n
] ) {
904 /** returns position of last ascii 10 within sequence, -1 when no 10 in string.
906 static inline sal_Int32
getLastLineBreak( const Sequence
<sal_Int8
> & seq
) throw ()
908 const sal_Int8
*pSource
= seq
.getConstArray();
909 sal_Int32 nLen
= seq
.getLength();
911 for( int n
= nLen
-1; n
>= 0 ; n
-- )
913 if( LINEFEED
== pSource
[n
] ) {
922 public WeakImplHelper3
<
924 XExtendedDocumentHandler
,
930 mp_SaxWriterHelper( NULL
),
931 m_bForceLineBreak(sal_False
),
932 m_bAllowLineBreak(sal_False
)
936 delete mp_SaxWriterHelper
;
939 public: // XActiveDataSource
940 virtual void SAL_CALL
setOutputStream(const Reference
< XOutputStream
> & aStream
)
941 throw (RuntimeException
)
943 // temporary: set same stream again to clear buffer
944 if ( m_out
== aStream
&& mp_SaxWriterHelper
&& m_bDocStarted
)
945 mp_SaxWriterHelper
->clearBuffer();
950 delete mp_SaxWriterHelper
;
951 mp_SaxWriterHelper
= new SaxWriterHelper(m_out
);
952 m_bDocStarted
= sal_False
;
954 m_bIsCDATA
= sal_False
;
958 virtual Reference
< XOutputStream
> SAL_CALL
getOutputStream(void)
959 throw(RuntimeException
)
962 public: // XDocumentHandler
963 virtual void SAL_CALL
startDocument(void)
964 throw(SAXException
, RuntimeException
);
966 virtual void SAL_CALL
endDocument(void)
967 throw(SAXException
, RuntimeException
);
969 virtual void SAL_CALL
startElement(const OUString
& aName
,
970 const Reference
< XAttributeList
> & xAttribs
)
971 throw (SAXException
, RuntimeException
);
973 virtual void SAL_CALL
endElement(const OUString
& aName
)
974 throw(SAXException
, RuntimeException
);
976 virtual void SAL_CALL
characters(const OUString
& aChars
)
977 throw(SAXException
, RuntimeException
);
979 virtual void SAL_CALL
ignorableWhitespace(const OUString
& aWhitespaces
)
980 throw(SAXException
, RuntimeException
);
981 virtual void SAL_CALL
processingInstruction(const OUString
& aTarget
,
982 const OUString
& aData
)
983 throw(SAXException
, RuntimeException
);
984 virtual void SAL_CALL
setDocumentLocator(const Reference
< XLocator
> & xLocator
)
985 throw(SAXException
, RuntimeException
);
987 public: // XExtendedDocumentHandler
988 virtual void SAL_CALL
startCDATA(void) throw(SAXException
, RuntimeException
);
989 virtual void SAL_CALL
endCDATA(void) throw(RuntimeException
);
990 virtual void SAL_CALL
comment(const OUString
& sComment
)
991 throw(SAXException
, RuntimeException
);
992 virtual void SAL_CALL
unknown(const OUString
& sString
)
993 throw(SAXException
, RuntimeException
);
994 virtual void SAL_CALL
allowLineBreak(void)
995 throw(SAXException
,RuntimeException
);
997 public: // XServiceInfo
998 OUString SAL_CALL
getImplementationName() throw();
999 Sequence
< OUString
> SAL_CALL
getSupportedServiceNames(void) throw();
1000 sal_Bool SAL_CALL
supportsService(const OUString
& ServiceName
) throw();
1004 void writeSequence( const Sequence
<sal_Int8
> & seq
);
1005 sal_Int32
getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurence
) throw();
1007 Reference
< XOutputStream
> m_out
;
1008 Sequence
< sal_Int8
> m_seqStartElement
;
1009 SaxWriterHelper
* mp_SaxWriterHelper
;
1011 // Status information
1012 sal_Bool m_bDocStarted
: 1;
1013 sal_Bool m_bIsCDATA
: 1;
1014 sal_Bool m_bForceLineBreak
: 1;
1015 sal_Bool m_bAllowLineBreak
: 1;
1020 //--------------------------------------
1021 // the extern interface
1022 //---------------------------------------
1023 Reference
< XInterface
> SAL_CALL
SaxWriter_CreateInstance(
1024 const Reference
< XMultiServiceFactory
> & )
1027 SAXWriter
*p
= new SAXWriter
;
1028 return Reference
< XInterface
> ( SAL_STATIC_CAST(OWeakObject
*, p
) );
1031 OUString
SaxWriter_getServiceName() throw()
1033 return OUString::createFromAscii( "com.sun.star.xml.sax.Writer" );
1036 OUString
SaxWriter_getImplementationName() throw()
1038 return OUString::createFromAscii( "com.sun.star.extensions.xml.sax.Writer" );
1041 Sequence
< OUString
> SaxWriter_getSupportedServiceNames(void) throw()
1043 Sequence
<OUString
> aRet(1);
1044 aRet
.getArray()[0] = SaxWriter_getServiceName();
1049 sal_Int32
SAXWriter::getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurence
) throw()
1051 sal_Int32 nLength
=-1;
1052 if (mp_SaxWriterHelper
)
1054 if ( m_bForceLineBreak
||
1055 (m_bAllowLineBreak
&&
1056 ((nFirstLineBreakOccurence
+ mp_SaxWriterHelper
->GetLastColumnCount()) > MAXCOLUMNCOUNT
)) )
1059 m_bForceLineBreak
= sal_False
;
1060 m_bAllowLineBreak
= sal_False
;
1064 static inline sal_Bool
isFirstCharWhitespace( const sal_Unicode
*p
) throw()
1071 OUString
SAXWriter::getImplementationName() throw()
1073 return SaxWriter_getImplementationName();
1077 sal_Bool
SAXWriter::supportsService(const OUString
& ServiceName
) throw()
1079 Sequence
< OUString
> aSNL
= getSupportedServiceNames();
1080 const OUString
* pArray
= aSNL
.getConstArray();
1082 for( sal_Int32 i
= 0; i
< aSNL
.getLength(); i
++ )
1083 if( pArray
[i
] == ServiceName
)
1090 Sequence
< OUString
> SAXWriter::getSupportedServiceNames(void) throw ()
1092 Sequence
<OUString
> seq(1);
1093 seq
.getArray()[0] = SaxWriter_getServiceName();
1099 void SAXWriter::startDocument() throw(SAXException
, RuntimeException
)
1101 if( m_bDocStarted
|| ! m_out
.is() || !mp_SaxWriterHelper
) {
1102 throw SAXException();
1104 m_bDocStarted
= sal_True
;
1105 mp_SaxWriterHelper
->startDocument();
1109 void SAXWriter::endDocument(void) throw(SAXException
, RuntimeException
)
1111 if( ! m_bDocStarted
)
1114 OUString::createFromAscii( "endDocument called before startDocument" ),
1115 Reference
< XInterface
>() , Any() );
1119 OUString::createFromAscii( "unexpected end of document" ),
1120 Reference
< XInterface
>() , Any() );
1122 mp_SaxWriterHelper
->endDocument();
1125 m_out
->closeOutput();
1127 catch( IOException
& e
)
1132 OUString::createFromAscii( "IO exception during closing the IO Stream" ),
1133 Reference
< XInterface
> (),
1139 void SAXWriter::startElement(const OUString
& aName
, const Reference
< XAttributeList
>& xAttribs
)
1140 throw(SAXException
, RuntimeException
)
1142 if( ! m_bDocStarted
)
1144 SAXException except
;
1145 except
.Message
= OUString( RTL_CONSTASCII_USTRINGPARAM( "startElement called before startDocument" ));
1150 SAXException except
;
1151 except
.Message
= OUString( RTL_CONSTASCII_USTRINGPARAM( "startElement call not allowed with CDATA sections" ));
1155 sal_Int32
nLength(0);
1156 if (m_bAllowLineBreak
)
1158 sal_Int32 nAttribCount
= xAttribs
.is() ? xAttribs
->getLength() : 0;
1161 nLength
+= calcXMLByteLength( aName
.getStr() , aName
.getLength(),
1162 sal_False
, sal_False
); // the tag name
1165 for( n
= 0 ; n
< static_cast<sal_Int16
>(nAttribCount
) ; n
++ ) {
1167 OUString tmp
= xAttribs
->getNameByIndex( n
);
1169 nLength
+= calcXMLByteLength( tmp
.getStr() , tmp
.getLength() , sal_False
, sal_False
);
1173 tmp
= xAttribs
->getValueByIndex( n
);
1175 nLength
+= calcXMLByteLength( tmp
.getStr(), tmp
.getLength(), sal_True
, sal_True
);
1183 // Is there a new indentation necesarry ?
1184 sal_Int32
nPrefix(getIndentPrefixLength( nLength
));
1186 // write into sequence
1188 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1190 SaxInvalidCharacterError
eRet(mp_SaxWriterHelper
->startElement(aName
, xAttribs
));
1194 if (eRet
== SAX_WARNING
)
1196 SAXInvalidCharacterException except
;
1197 except
.Message
= OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export in a attribute value" ) );
1200 else if (eRet
== SAX_ERROR
)
1202 SAXException except
;
1203 except
.Message
= OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) );
1208 void SAXWriter::endElement(const OUString
& aName
) throw (SAXException
, RuntimeException
)
1210 if( ! m_bDocStarted
) {
1211 throw SAXException ();
1215 if( m_nLevel
< 0 ) {
1216 throw SAXException();
1218 sal_Bool
bRet(sal_True
);
1220 if( mp_SaxWriterHelper
->FinishEmptyElement() )
1221 m_bForceLineBreak
= sal_False
;
1224 // only ascii chars allowed
1225 sal_Int32
nLength(0);
1226 if (m_bAllowLineBreak
)
1227 nLength
= 3 + calcXMLByteLength( aName
.getStr(), aName
.getLength(), sal_False
, sal_False
);
1228 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1231 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1233 bRet
= mp_SaxWriterHelper
->endElement(aName
);
1238 SAXException except
;
1239 except
.Message
= OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) );
1244 void SAXWriter::characters(const OUString
& aChars
) throw(SAXException
, RuntimeException
)
1246 if( ! m_bDocStarted
)
1248 SAXException except
;
1249 except
.Message
= OUString( RTL_CONSTASCII_USTRINGPARAM( "characters method called before startDocument" ) );
1253 sal_Bool
bThrowException(sal_False
);
1254 if( aChars
.getLength() )
1257 bThrowException
= !mp_SaxWriterHelper
->writeString( aChars
, sal_False
, sal_False
);
1260 // Note : nFirstLineBreakOccurence is not exact, because we don't know, how
1261 // many 2 and 3 byte chars are inbetween. However this whole stuff
1262 // is eitherway for pretty printing only, so it does not need to be exact.
1263 sal_Int32
nLength(0);
1264 sal_Int32
nIndentPrefix(-1);
1265 if (m_bAllowLineBreak
)
1267 sal_Int32 nFirstLineBreakOccurence
= getFirstLineBreak( aChars
);
1269 nLength
= calcXMLByteLength( aChars
.getStr(), aChars
.getLength(),
1270 ! m_bIsCDATA
, sal_False
);
1271 nIndentPrefix
= getIndentPrefixLength(
1272 nFirstLineBreakOccurence
>= 0 ? nFirstLineBreakOccurence
: nLength
);
1275 nIndentPrefix
= getIndentPrefixLength(nLength
);
1277 // insert indentation
1278 if( nIndentPrefix
>= 0 )
1280 if( isFirstCharWhitespace( aChars
.getStr() ) )
1281 mp_SaxWriterHelper
->insertIndentation( nIndentPrefix
- 1 );
1283 mp_SaxWriterHelper
->insertIndentation( nIndentPrefix
);
1285 bThrowException
= !mp_SaxWriterHelper
->writeString(aChars
, sal_True
, sal_False
);
1288 if (bThrowException
)
1290 SAXInvalidCharacterException except
;
1291 except
.Message
= OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) );
1297 void SAXWriter::ignorableWhitespace(const OUString
&) throw(SAXException
, RuntimeException
)
1299 if( ! m_bDocStarted
)
1301 throw SAXException ();
1304 m_bForceLineBreak
= sal_True
;
1307 void SAXWriter::processingInstruction(const OUString
& aTarget
, const OUString
& aData
)
1308 throw (SAXException
, RuntimeException
)
1310 if( ! m_bDocStarted
|| m_bIsCDATA
)
1312 throw SAXException();
1315 sal_Int32
nLength(0);
1316 if (m_bAllowLineBreak
)
1318 nLength
= 2; // "<?"
1319 nLength
+= calcXMLByteLength( aTarget
.getStr(), aTarget
.getLength(), sal_False
, sal_False
);
1321 nLength
+= 1; // " "
1323 nLength
+= calcXMLByteLength( aData
.getStr(), aData
.getLength(), sal_False
, sal_False
);
1325 nLength
+= 2; // "?>"
1328 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1331 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1333 if (!mp_SaxWriterHelper
->processingInstruction(aTarget
, aData
))
1335 SAXException except
;
1336 except
.Message
= OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) );
1342 void SAXWriter::setDocumentLocator(const Reference
< XLocator
>&)
1343 throw (SAXException
, RuntimeException
)
1348 void SAXWriter::startCDATA(void) throw(SAXException
, RuntimeException
)
1350 if( ! m_bDocStarted
|| m_bIsCDATA
)
1352 throw SAXException ();
1355 sal_Int32 nLength
= 9;
1356 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1358 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1360 mp_SaxWriterHelper
->startCDATA();
1362 m_bIsCDATA
= sal_True
;
1365 void SAXWriter::endCDATA(void) throw (RuntimeException
)
1367 if( ! m_bDocStarted
| ! m_bIsCDATA
)
1369 SAXException except
;
1370 except
.Message
= OUString( RTL_CONSTASCII_USTRINGPARAM( "endCDATA was called without startCDATA" ) );
1374 sal_Int32 nLength
= 3;
1375 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1377 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1379 mp_SaxWriterHelper
->endCDATA();
1381 m_bIsCDATA
= sal_False
;
1385 void SAXWriter::comment(const OUString
& sComment
) throw(SAXException
, RuntimeException
)
1387 if( ! m_bDocStarted
|| m_bIsCDATA
)
1389 throw SAXException();
1392 sal_Int32
nLength(0);
1393 if (m_bAllowLineBreak
)
1395 nLength
= 4; // "<!--"
1396 nLength
+= calcXMLByteLength( sComment
.getStr(), sComment
.getLength(), sal_False
, sal_False
);
1401 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1403 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1405 if (!mp_SaxWriterHelper
->comment(sComment
))
1407 SAXException except
;
1408 except
.Message
= OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) );
1414 void SAXWriter::allowLineBreak( ) throw ( SAXException
, RuntimeException
)
1416 if( ! m_bDocStarted
|| m_bAllowLineBreak
) {
1417 throw SAXException();
1420 m_bAllowLineBreak
= sal_True
;
1423 void SAXWriter::unknown(const OUString
& sString
) throw (SAXException
, RuntimeException
)
1426 if( ! m_bDocStarted
)
1428 throw SAXException ();
1432 throw SAXException();
1435 if( sString
.matchAsciiL( "<?xml", 5 ) )
1438 sal_Int32
nLength(0);
1439 if (m_bAllowLineBreak
)
1440 nLength
= calcXMLByteLength( sString
.getStr(), sString
.getLength(), sal_False
, sal_False
);
1442 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1444 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1446 if (!mp_SaxWriterHelper
->writeString( sString
, sal_False
, sal_False
))
1448 SAXException except
;
1449 except
.Message
= OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) );