1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
30 #include <com/sun/star/lang/XServiceInfo.hpp>
31 #include <com/sun/star/util/XCloneable.hpp>
32 #include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp>
33 #include <com/sun/star/xml/sax/XParser.hpp>
34 #include <com/sun/star/xml/sax/SAXParseException.hpp>
35 #include <com/sun/star/xml/sax/SAXInvalidCharacterException.hpp>
37 #include <com/sun/star/io/XActiveDataSource.hpp>
39 #include <cppuhelper/factory.hxx>
40 #include <cppuhelper/weak.hxx>
41 #include <cppuhelper/implbase3.hxx>
43 #include <rtl/strbuf.hxx>
44 #include <rtl/byteseq.hxx>
45 #include <rtl/ustrbuf.hxx>
47 using namespace ::rtl
;
48 using namespace ::std
;
49 using namespace ::osl
;
50 using namespace ::cppu
;
51 using namespace ::com::sun::star::uno
;
52 using namespace ::com::sun::star::lang
;
53 using namespace ::com::sun::star::registry
;
54 using namespace ::com::sun::star::xml::sax
;
55 using namespace ::com::sun::star::util
;
56 using namespace ::com::sun::star::io
;
58 #include "factory.hxx"
59 #include "xml2utf.hxx"
62 #define SEQUENCESIZE 1024
63 #define MAXCOLUMNCOUNT 72
68 * Character conversion functions
73 namespace sax_expatwrap
{
75 enum SaxInvalidCharacterError
84 Reference
< XOutputStream
> m_out
;
85 Sequence
< sal_Int8
> m_Sequence
;
86 sal_Int8
* mp_Sequence
;
88 sal_Int32 nLastLineFeedPos
; // is negative after writing a sequence
89 sal_uInt32 nCurrentPos
;
90 sal_Bool m_bStartElementFinished
;
93 inline sal_uInt32
writeSequence() throw( SAXException
);
95 // use only if to insert the bytes more space in the sequence is needed and
96 // so the sequence has to write out and reset rPos to 0
97 // writes sequence only on overflow, sequence could be full on the end (rPos == SEQUENCESIZE)
98 inline void AddBytes(sal_Int8
* pTarget
, sal_uInt32
& rPos
,
99 const sal_Int8
* pBytes
, sal_uInt32 nBytesCount
) throw( SAXException
);
100 inline sal_Bool
convertToXML(const sal_Unicode
* pStr
,
102 sal_Bool bDoNormalization
,
103 sal_Bool bNormalizeWhitespace
,
105 sal_uInt32
& rPos
) throw( SAXException
);
106 inline void FinishStartElement() throw( SAXException
);
108 SaxWriterHelper(Reference
< XOutputStream
> m_TempOut
) :
110 m_Sequence(SEQUENCESIZE
),
114 m_bStartElementFinished(sal_True
)
116 OSL_ENSURE(SEQUENCESIZE
> 50, "Sequence cache size to small");
117 mp_Sequence
= m_Sequence
.getArray();
121 OSL_ENSURE(!nCurrentPos
, "cached Sequence not written");
122 OSL_ENSURE(m_bStartElementFinished
, "StartElement not complettly written");
125 inline void insertIndentation(sal_uInt32 m_nLevel
) throw( SAXException
);
127 // returns whether it works correct or invalid characters were in the string
128 // If there are invalid characters in the string it returns sal_False.
129 // Than the calling method has to throw the needed Exception.
130 inline sal_Bool
writeString(const rtl::OUString
& rWriteOutString
,
131 sal_Bool bDoNormalization
,
132 sal_Bool bNormalizeWhitespace
) throw( SAXException
);
134 sal_uInt32
GetLastColumnCount() const
135 { return (sal_uInt32
)(nCurrentPos
- nLastLineFeedPos
); }
137 inline void startDocument() throw( SAXException
);
139 // returns whether it works correct or invalid characters were in the strings
140 // If there are invalid characters in one of the strings it returns sal_False.
141 // Than the calling method has to throw the needed Exception.
142 inline SaxInvalidCharacterError
startElement(const rtl::OUString
& rName
, const Reference
< XAttributeList
>& xAttribs
) throw( SAXException
);
143 inline sal_Bool
FinishEmptyElement() throw( SAXException
);
145 // returns whether it works correct or invalid characters were in the string
146 // If there are invalid characters in the string it returns sal_False.
147 // Than the calling method has to throw the needed Exception.
148 inline sal_Bool
endElement(const rtl::OUString
& rName
) throw( SAXException
);
149 inline void endDocument() throw( SAXException
);
151 // returns whether it works correct or invalid characters were in the strings
152 // If there are invalid characters in the string it returns sal_False.
153 // Than the calling method has to throw the needed Exception.
154 inline sal_Bool
processingInstruction(const rtl::OUString
& rTarget
, const rtl::OUString
& rData
) throw( SAXException
);
155 inline void startCDATA() throw( SAXException
);
156 inline void endCDATA() throw( SAXException
);
158 // returns whether it works correct or invalid characters were in the strings
159 // If there are invalid characters in the string it returns sal_False.
160 // Than the calling method has to throw the needed Exception.
161 inline sal_Bool
comment(const rtl::OUString
& rComment
) throw( SAXException
);
163 inline void clearBuffer() throw( SAXException
);
166 const sal_Bool g_bValidCharsBelow32
[32] =
171 0,0,0,0,0,0,0,0, //16
175 inline sal_Bool
IsInvalidChar(const sal_Unicode aChar
)
177 sal_Bool
bRet(sal_False
);
178 // check first for the most common characters
179 if( aChar
< 32 || aChar
>= 0xd800 )
180 bRet
= ( (aChar
< 32 && ! g_bValidCharsBelow32
[aChar
]) ||
187 * write through to the output stream
190 inline sal_uInt32
SaxWriterHelper::writeSequence() throw( SAXException
)
194 m_out
->writeBytes( m_Sequence
);
196 catch (const IOException
& e
)
201 OUString("io exception during writing"),
202 Reference
< XInterface
> (),
205 nLastLineFeedPos
-= SEQUENCESIZE
;
209 inline void SaxWriterHelper::AddBytes(sal_Int8
* pTarget
, sal_uInt32
& rPos
,
210 const sal_Int8
* pBytes
, sal_uInt32 nBytesCount
) throw( SAXException
)
212 OSL_ENSURE((rPos
+ nBytesCount
) > SEQUENCESIZE
, "wrong use of AddBytesMethod");
213 sal_uInt32
nCount(SEQUENCESIZE
- rPos
);
214 memcpy( &(pTarget
[rPos
]) , pBytes
, nCount
);
216 OSL_ENSURE(rPos
+ nCount
== SEQUENCESIZE
, "the position should be the at the end");
218 rPos
= writeSequence();
219 sal_uInt32
nRestCount(nBytesCount
- nCount
);
220 if ((rPos
+ nRestCount
) <= SEQUENCESIZE
)
222 memcpy( &(pTarget
[rPos
]), &pBytes
[nCount
], nRestCount
);
226 AddBytes(pTarget
, rPos
, &pBytes
[nCount
], nRestCount
);
229 /** Converts an UTF16 string to UTF8 and does XML normalization
232 Pointer to a piece of memory, to where the output should be written. The caller
233 must call calcXMLByteLength on the same string, to ensure,
234 that there is enough memory for converting.
236 inline sal_Bool
SaxWriterHelper::convertToXML( const sal_Unicode
* pStr
,
238 sal_Bool bDoNormalization
,
239 sal_Bool bNormalizeWhitespace
,
241 sal_uInt32
& rPos
) throw( SAXException
)
243 sal_Bool
bRet(sal_True
);
244 sal_uInt32 nSurrogate
= 0;
246 for( sal_Int32 i
= 0 ; i
< nStrLen
; i
++ )
248 sal_uInt16 c
= pStr
[i
];
249 if (IsInvalidChar(c
))
251 else if( (c
>= 0x0001) && (c
<= 0x007F) )
253 if( bDoNormalization
)
257 case '&': // resemble to &
259 if ((rPos
+ 5) > SEQUENCESIZE
)
260 AddBytes(pTarget
, rPos
, (sal_Int8
*)"&", 5);
263 memcpy( &(pTarget
[rPos
]) , "&", 5 );
270 if ((rPos
+ 4) > SEQUENCESIZE
)
271 AddBytes(pTarget
, rPos
, (sal_Int8
*)"<", 4);
274 memcpy( &(pTarget
[rPos
]) , "<" , 4 );
281 if ((rPos
+ 4) > SEQUENCESIZE
)
282 AddBytes(pTarget
, rPos
, (sal_Int8
*)">", 4);
285 memcpy( &(pTarget
[rPos
]) , ">" , 4 );
290 case 39: // 39 == '''
292 if ((rPos
+ 6) > SEQUENCESIZE
)
293 AddBytes(pTarget
, rPos
, (sal_Int8
*)"'", 6);
296 memcpy( &(pTarget
[rPos
]) , "'" , 6 );
303 if ((rPos
+ 6) > SEQUENCESIZE
)
304 AddBytes(pTarget
, rPos
, (sal_Int8
*)""", 6);
307 memcpy( &(pTarget
[rPos
]) , """ , 6 );
314 if ((rPos
+ 6) > SEQUENCESIZE
)
315 AddBytes(pTarget
, rPos
, (sal_Int8
*)"
", 6);
318 memcpy( &(pTarget
[rPos
]) , "
" , 6 );
325 if( bNormalizeWhitespace
)
327 if ((rPos
+ 6) > SEQUENCESIZE
)
328 AddBytes(pTarget
, rPos
, (sal_Int8
*)"
" , 6);
331 memcpy( &(pTarget
[rPos
]) , "
" , 6 );
337 pTarget
[rPos
] = LINEFEED
;
338 nLastLineFeedPos
= rPos
;
345 if( bNormalizeWhitespace
)
347 if ((rPos
+ 6) > SEQUENCESIZE
)
348 AddBytes(pTarget
, rPos
, (sal_Int8
*)"	" , 6);
351 memcpy( &(pTarget
[rPos
]) , "	" , 6 );
364 pTarget
[rPos
] = (sal_Int8
)c
;
372 pTarget
[rPos
] = (sal_Int8
)c
;
373 if ((sal_Int8
)c
== LINEFEED
)
374 nLastLineFeedPos
= rPos
;
378 else if( c
>= 0xd800 && c
< 0xdc00 )
380 // 1. surrogate: save (until 2. surrogate)
381 OSL_ENSURE( nSurrogate
== 0, "left-over Unicode surrogate" );
382 nSurrogate
= ( ( c
& 0x03ff ) + 0x0040 );
384 else if( c
>= 0xdc00 && c
< 0xe000 )
386 // 2. surrogate: write as UTF-8
387 OSL_ENSURE( nSurrogate
!= 0, "lone 2nd Unicode surrogate" );
389 nSurrogate
= ( nSurrogate
<< 10 ) | ( c
& 0x03ff );
390 if( nSurrogate
>= 0x00010000 && nSurrogate
<= 0x0010FFFF )
392 sal_Int8 aBytes
[] = { sal_Int8(0xF0 | ((nSurrogate
>> 18) & 0x0F)),
393 sal_Int8(0x80 | ((nSurrogate
>> 12) & 0x3F)),
394 sal_Int8(0x80 | ((nSurrogate
>> 6) & 0x3F)),
395 sal_Int8(0x80 | ((nSurrogate
>> 0) & 0x3F)) };
396 if ((rPos
+ 4) > SEQUENCESIZE
)
397 AddBytes(pTarget
, rPos
, aBytes
, 4);
400 pTarget
[rPos
] = aBytes
[0];
402 pTarget
[rPos
] = aBytes
[1];
404 pTarget
[rPos
] = aBytes
[2];
406 pTarget
[rPos
] = aBytes
[3];
412 OSL_FAIL( "illegal Unicode character" );
419 else if( c
> 0x07FF )
421 sal_Int8 aBytes
[] = { sal_Int8(0xE0 | ((c
>> 12) & 0x0F)),
422 sal_Int8(0x80 | ((c
>> 6) & 0x3F)),
423 sal_Int8(0x80 | ((c
>> 0) & 0x3F)) };
424 if ((rPos
+ 3) > SEQUENCESIZE
)
425 AddBytes(pTarget
, rPos
, aBytes
, 3);
428 pTarget
[rPos
] = aBytes
[0];
430 pTarget
[rPos
] = aBytes
[1];
432 pTarget
[rPos
] = aBytes
[2];
438 sal_Int8 aBytes
[] = { sal_Int8(0xC0 | ((c
>> 6) & 0x1F)),
439 sal_Int8(0x80 | ((c
>> 0) & 0x3F)) };
440 if ((rPos
+ 2) > SEQUENCESIZE
)
441 AddBytes(pTarget
, rPos
, aBytes
, 2);
444 pTarget
[rPos
] = aBytes
[0];
446 pTarget
[rPos
] = aBytes
[1];
450 OSL_ENSURE(rPos
<= SEQUENCESIZE
, "not reset current position");
451 if (rPos
== SEQUENCESIZE
)
452 rPos
= writeSequence();
454 // reset left-over surrogate
455 if( ( nSurrogate
!= 0 ) && !( c
>= 0xd800 && c
< 0xdc00 ) )
457 OSL_ENSURE( nSurrogate
!= 0, "left-over Unicode surrogate" );
465 inline void SaxWriterHelper::FinishStartElement() throw( SAXException
)
467 if (!m_bStartElementFinished
)
469 mp_Sequence
[nCurrentPos
] = '>';
471 if (nCurrentPos
== SEQUENCESIZE
)
472 nCurrentPos
= writeSequence();
473 m_bStartElementFinished
= sal_True
;
477 inline void SaxWriterHelper::insertIndentation(sal_uInt32 m_nLevel
) throw( SAXException
)
479 FinishStartElement();
482 if ((nCurrentPos
+ m_nLevel
+ 1) <= SEQUENCESIZE
)
484 mp_Sequence
[nCurrentPos
] = LINEFEED
;
485 nLastLineFeedPos
= nCurrentPos
;
487 memset( &(mp_Sequence
[nCurrentPos
]) , 32 , m_nLevel
);
488 nCurrentPos
+= m_nLevel
;
489 if (nCurrentPos
== SEQUENCESIZE
)
490 nCurrentPos
= writeSequence();
494 sal_uInt32
nCount(m_nLevel
+ 1);
495 sal_Int8
* pBytes
= new sal_Int8
[nCount
];
496 pBytes
[0] = LINEFEED
;
497 memset( &(pBytes
[1]), 32, m_nLevel
);
498 AddBytes(mp_Sequence
, nCurrentPos
, pBytes
, nCount
);
500 nLastLineFeedPos
= nCurrentPos
- nCount
;
501 if (nCurrentPos
== SEQUENCESIZE
)
502 nCurrentPos
= writeSequence();
507 mp_Sequence
[nCurrentPos
] = LINEFEED
;
508 nLastLineFeedPos
= nCurrentPos
;
510 if (nCurrentPos
== SEQUENCESIZE
)
511 nCurrentPos
= writeSequence();
515 inline sal_Bool
SaxWriterHelper::writeString( const rtl::OUString
& rWriteOutString
,
516 sal_Bool bDoNormalization
,
517 sal_Bool bNormalizeWhitespace
) throw( SAXException
)
519 FinishStartElement();
520 return convertToXML(rWriteOutString
.getStr(),
521 rWriteOutString
.getLength(),
523 bNormalizeWhitespace
,
528 inline void SaxWriterHelper::startDocument() throw( SAXException
)
530 const char pc
[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
531 const int nLen
= strlen( pc
);
532 if ((nCurrentPos
+ nLen
) <= SEQUENCESIZE
)
534 memcpy( mp_Sequence
, pc
, nLen
);
539 AddBytes(mp_Sequence
, nCurrentPos
, (sal_Int8
*)pc
, nLen
);
541 OSL_ENSURE(nCurrentPos
<= SEQUENCESIZE
, "not reset current position");
542 if (nCurrentPos
== SEQUENCESIZE
)
543 nCurrentPos
= writeSequence();
544 mp_Sequence
[nCurrentPos
] = LINEFEED
;
546 if (nCurrentPos
== SEQUENCESIZE
)
547 nCurrentPos
= writeSequence();
550 inline SaxInvalidCharacterError
SaxWriterHelper::startElement(const rtl::OUString
& rName
, const Reference
< XAttributeList
>& xAttribs
) throw( SAXException
)
552 FinishStartElement();
553 mp_Sequence
[nCurrentPos
] = '<';
555 if (nCurrentPos
== SEQUENCESIZE
)
556 nCurrentPos
= writeSequence();
558 SaxInvalidCharacterError
eRet(SAX_NONE
);
559 if (!writeString(rName
, sal_False
, sal_False
))
562 sal_Int16 nAttribCount
= xAttribs
.is() ? static_cast<sal_Int16
>(xAttribs
->getLength()) : 0;
563 for(sal_Int16 i
= 0 ; i
< nAttribCount
; i
++ )
565 mp_Sequence
[nCurrentPos
] = ' ';
567 if (nCurrentPos
== SEQUENCESIZE
)
568 nCurrentPos
= writeSequence();
570 if (!writeString(xAttribs
->getNameByIndex( i
), sal_False
, sal_False
))
573 mp_Sequence
[nCurrentPos
] = '=';
575 if (nCurrentPos
== SEQUENCESIZE
)
576 nCurrentPos
= writeSequence();
577 mp_Sequence
[nCurrentPos
] = '"';
579 if (nCurrentPos
== SEQUENCESIZE
)
580 nCurrentPos
= writeSequence();
582 if (!writeString(xAttribs
->getValueByIndex( i
), sal_True
, sal_True
) &&
583 !(eRet
== SAX_ERROR
))
586 mp_Sequence
[nCurrentPos
] = '"';
588 if (nCurrentPos
== SEQUENCESIZE
)
589 nCurrentPos
= writeSequence();
592 m_bStartElementFinished
= sal_False
; // because the '>' character is not added,
593 // because it is possible, that the "/>"
594 // characters have to add
598 inline sal_Bool
SaxWriterHelper::FinishEmptyElement() throw( SAXException
)
600 if (m_bStartElementFinished
)
603 mp_Sequence
[nCurrentPos
] = '/';
605 if (nCurrentPos
== SEQUENCESIZE
)
606 nCurrentPos
= writeSequence();
607 mp_Sequence
[nCurrentPos
] = '>';
609 if (nCurrentPos
== SEQUENCESIZE
)
610 nCurrentPos
= writeSequence();
612 m_bStartElementFinished
= sal_True
;
617 inline sal_Bool
SaxWriterHelper::endElement(const rtl::OUString
& rName
) throw( SAXException
)
619 FinishStartElement();
620 mp_Sequence
[nCurrentPos
] = '<';
622 if (nCurrentPos
== SEQUENCESIZE
)
623 nCurrentPos
= writeSequence();
624 mp_Sequence
[nCurrentPos
] = '/';
626 if (nCurrentPos
== SEQUENCESIZE
)
627 nCurrentPos
= writeSequence();
629 sal_Bool
bRet(writeString( rName
, sal_False
, sal_False
));
631 mp_Sequence
[nCurrentPos
] = '>';
633 if (nCurrentPos
== SEQUENCESIZE
)
634 nCurrentPos
= writeSequence();
639 inline void SaxWriterHelper::endDocument() throw( SAXException
)
643 m_Sequence
.realloc(nCurrentPos
);
644 nCurrentPos
= writeSequence();
645 //m_Sequence.realloc(SEQUENCESIZE);
649 inline void SaxWriterHelper::clearBuffer() throw( SAXException
)
651 FinishStartElement();
654 m_Sequence
.realloc(nCurrentPos
);
655 nCurrentPos
= writeSequence();
656 m_Sequence
.realloc(SEQUENCESIZE
);
657 // Be sure to update the array pointer after the reallocation.
658 mp_Sequence
= m_Sequence
.getArray();
662 inline sal_Bool
SaxWriterHelper::processingInstruction(const rtl::OUString
& rTarget
, const rtl::OUString
& rData
) throw( SAXException
)
664 FinishStartElement();
665 mp_Sequence
[nCurrentPos
] = '<';
667 if (nCurrentPos
== SEQUENCESIZE
)
668 nCurrentPos
= writeSequence();
669 mp_Sequence
[nCurrentPos
] = '?';
671 if (nCurrentPos
== SEQUENCESIZE
)
672 nCurrentPos
= writeSequence();
674 sal_Bool
bRet(writeString( rTarget
, sal_False
, sal_False
));
676 mp_Sequence
[nCurrentPos
] = ' ';
678 if (nCurrentPos
== SEQUENCESIZE
)
679 nCurrentPos
= writeSequence();
681 if (!writeString( rData
, sal_False
, sal_False
))
684 mp_Sequence
[nCurrentPos
] = '?';
686 if (nCurrentPos
== SEQUENCESIZE
)
687 nCurrentPos
= writeSequence();
688 mp_Sequence
[nCurrentPos
] = '>';
690 if (nCurrentPos
== SEQUENCESIZE
)
691 nCurrentPos
= writeSequence();
696 inline void SaxWriterHelper::startCDATA() throw( SAXException
)
698 FinishStartElement();
699 if ((nCurrentPos
+ 9) <= SEQUENCESIZE
)
701 memcpy( &(mp_Sequence
[nCurrentPos
]), "<![CDATA[" , 9 );
705 AddBytes(mp_Sequence
, nCurrentPos
, (sal_Int8
*)"<![CDATA[" , 9);
706 if (nCurrentPos
== SEQUENCESIZE
)
707 nCurrentPos
= writeSequence();
710 inline void SaxWriterHelper::endCDATA() throw( SAXException
)
712 FinishStartElement();
713 if ((nCurrentPos
+ 3) <= SEQUENCESIZE
)
715 memcpy( &(mp_Sequence
[nCurrentPos
]), "]]>" , 3 );
719 AddBytes(mp_Sequence
, nCurrentPos
, (sal_Int8
*)"]]>" , 3);
720 if (nCurrentPos
== SEQUENCESIZE
)
721 nCurrentPos
= writeSequence();
724 inline sal_Bool
SaxWriterHelper::comment(const rtl::OUString
& rComment
) throw( SAXException
)
726 FinishStartElement();
727 mp_Sequence
[nCurrentPos
] = '<';
729 if (nCurrentPos
== SEQUENCESIZE
)
730 nCurrentPos
= writeSequence();
731 mp_Sequence
[nCurrentPos
] = '!';
733 if (nCurrentPos
== SEQUENCESIZE
)
734 nCurrentPos
= writeSequence();
735 mp_Sequence
[nCurrentPos
] = '-';
737 if (nCurrentPos
== SEQUENCESIZE
)
738 nCurrentPos
= writeSequence();
739 mp_Sequence
[nCurrentPos
] = '-';
741 if (nCurrentPos
== SEQUENCESIZE
)
742 nCurrentPos
= writeSequence();
744 sal_Bool
bRet(writeString( rComment
, sal_False
, sal_False
));
746 mp_Sequence
[nCurrentPos
] = '-';
748 if (nCurrentPos
== SEQUENCESIZE
)
749 nCurrentPos
= writeSequence();
750 mp_Sequence
[nCurrentPos
] = '-';
752 if (nCurrentPos
== SEQUENCESIZE
)
753 nCurrentPos
= writeSequence();
754 mp_Sequence
[nCurrentPos
] = '>';
756 if (nCurrentPos
== SEQUENCESIZE
)
757 nCurrentPos
= writeSequence();
762 inline sal_Int32
calcXMLByteLength( const sal_Unicode
*pStr
, sal_Int32 nStrLen
,
763 sal_Bool bDoNormalization
,
764 sal_Bool bNormalizeWhitespace
)
766 sal_Int32 nOutputLength
= 0;
767 sal_uInt32 nSurrogate
= 0;
769 for( sal_Int32 i
= 0 ; i
< nStrLen
; i
++ )
771 sal_uInt16 c
= pStr
[i
];
772 if( !IsInvalidChar(c
) && (c
>= 0x0001) && (c
<= 0x007F) )
774 if( bDoNormalization
)
778 case '&': // resemble to &
785 case 39: // 39 == ''', '
793 if( bNormalizeWhitespace
)
795 nOutputLength
+= 6; //
811 else if( c
>= 0xd800 && c
< 0xdc00 )
814 nSurrogate
= ( ( c
& 0x03ff ) + 0x0040 );
816 else if( c
>= 0xdc00 && c
< 0xe000 )
818 // 2. surrogate: write as UTF-8 (if range is OK
819 nSurrogate
= ( nSurrogate
<< 10 ) | ( c
& 0x03ff );
820 if( nSurrogate
>= 0x00010000 && nSurrogate
<= 0x0010FFFF )
824 else if( c
> 0x07FF )
833 // surrogate processing
834 if( ( nSurrogate
!= 0 ) && !( c
>= 0xd800 && c
< 0xdc00 ) )
838 return nOutputLength
;
841 /** returns position of first ascii 10 within the string, -1 when no 10 in string.
843 static inline sal_Int32
getFirstLineBreak( const OUString
& str
) throw ()
845 const sal_Unicode
*pSource
= str
.getStr();
846 sal_Int32 nLen
= str
.getLength();
848 for( int n
= 0; n
< nLen
; n
++ )
850 if( LINEFEED
== pSource
[n
] ) {
857 /** returns position of last ascii 10 within sequence, -1 when no 10 in string.
859 static inline sal_Int32
getLastLineBreak( const Sequence
<sal_Int8
> & seq
) throw ()
861 const sal_Int8
*pSource
= seq
.getConstArray();
862 sal_Int32 nLen
= seq
.getLength();
864 for( int n
= nLen
-1; n
>= 0 ; n
-- )
866 if( LINEFEED
== pSource
[n
] ) {
875 public WeakImplHelper3
<
877 XExtendedDocumentHandler
,
883 mp_SaxWriterHelper( NULL
),
884 m_bForceLineBreak(sal_False
),
885 m_bAllowLineBreak(sal_False
)
889 delete mp_SaxWriterHelper
;
892 public: // XActiveDataSource
893 virtual void SAL_CALL
setOutputStream(const Reference
< XOutputStream
> & aStream
)
894 throw (RuntimeException
)
896 // temporary: set same stream again to clear buffer
897 if ( m_out
== aStream
&& mp_SaxWriterHelper
&& m_bDocStarted
)
898 mp_SaxWriterHelper
->clearBuffer();
903 delete mp_SaxWriterHelper
;
904 mp_SaxWriterHelper
= new SaxWriterHelper(m_out
);
905 m_bDocStarted
= sal_False
;
907 m_bIsCDATA
= sal_False
;
911 virtual Reference
< XOutputStream
> SAL_CALL
getOutputStream(void)
912 throw(RuntimeException
)
915 public: // XDocumentHandler
916 virtual void SAL_CALL
startDocument(void)
917 throw(SAXException
, RuntimeException
);
919 virtual void SAL_CALL
endDocument(void)
920 throw(SAXException
, RuntimeException
);
922 virtual void SAL_CALL
startElement(const OUString
& aName
,
923 const Reference
< XAttributeList
> & xAttribs
)
924 throw (SAXException
, RuntimeException
);
926 virtual void SAL_CALL
endElement(const OUString
& aName
)
927 throw(SAXException
, RuntimeException
);
929 virtual void SAL_CALL
characters(const OUString
& aChars
)
930 throw(SAXException
, RuntimeException
);
932 virtual void SAL_CALL
ignorableWhitespace(const OUString
& aWhitespaces
)
933 throw(SAXException
, RuntimeException
);
934 virtual void SAL_CALL
processingInstruction(const OUString
& aTarget
,
935 const OUString
& aData
)
936 throw(SAXException
, RuntimeException
);
937 virtual void SAL_CALL
setDocumentLocator(const Reference
< XLocator
> & xLocator
)
938 throw(SAXException
, RuntimeException
);
940 public: // XExtendedDocumentHandler
941 virtual void SAL_CALL
startCDATA(void) throw(SAXException
, RuntimeException
);
942 virtual void SAL_CALL
endCDATA(void) throw(RuntimeException
);
943 virtual void SAL_CALL
comment(const OUString
& sComment
)
944 throw(SAXException
, RuntimeException
);
945 virtual void SAL_CALL
unknown(const OUString
& sString
)
946 throw(SAXException
, RuntimeException
);
947 virtual void SAL_CALL
allowLineBreak(void)
948 throw(SAXException
,RuntimeException
);
950 public: // XServiceInfo
951 OUString SAL_CALL
getImplementationName() throw();
952 Sequence
< OUString
> SAL_CALL
getSupportedServiceNames(void) throw();
953 sal_Bool SAL_CALL
supportsService(const OUString
& ServiceName
) throw();
957 void writeSequence( const Sequence
<sal_Int8
> & seq
);
958 sal_Int32
getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurrence
) throw();
960 Reference
< XOutputStream
> m_out
;
961 Sequence
< sal_Int8
> m_seqStartElement
;
962 SaxWriterHelper
* mp_SaxWriterHelper
;
964 // Status information
965 sal_Bool m_bDocStarted
: 1;
966 sal_Bool m_bIsCDATA
: 1;
967 sal_Bool m_bForceLineBreak
: 1;
968 sal_Bool m_bAllowLineBreak
: 1;
973 //--------------------------------------
974 // the extern interface
975 //---------------------------------------
976 Reference
< XInterface
> SAL_CALL
SaxWriter_CreateInstance(
977 SAL_UNUSED_PARAMETER
const Reference
< XMultiServiceFactory
> & )
980 SAXWriter
*p
= new SAXWriter
;
981 return Reference
< XInterface
> ( (static_cast< OWeakObject
* >(p
)) );
984 OUString
SaxWriter_getServiceName() throw()
986 return OUString("com.sun.star.xml.sax.Writer");
989 OUString
SaxWriter_getImplementationName() throw()
991 return OUString("com.sun.star.extensions.xml.sax.Writer");
994 Sequence
< OUString
> SaxWriter_getSupportedServiceNames(void) throw()
996 Sequence
<OUString
> aRet(1);
997 aRet
.getArray()[0] = SaxWriter_getServiceName();
1002 sal_Int32
SAXWriter::getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurrence
) throw()
1004 sal_Int32 nLength
=-1;
1005 if (mp_SaxWriterHelper
)
1007 if ( m_bForceLineBreak
||
1008 (m_bAllowLineBreak
&&
1009 ((nFirstLineBreakOccurrence
+ mp_SaxWriterHelper
->GetLastColumnCount()) > MAXCOLUMNCOUNT
)) )
1012 m_bForceLineBreak
= sal_False
;
1013 m_bAllowLineBreak
= sal_False
;
1017 static inline sal_Bool
isFirstCharWhitespace( const sal_Unicode
*p
) throw()
1024 OUString
SAXWriter::getImplementationName() throw()
1026 return SaxWriter_getImplementationName();
1030 sal_Bool
SAXWriter::supportsService(const OUString
& ServiceName
) throw()
1032 Sequence
< OUString
> aSNL
= getSupportedServiceNames();
1033 const OUString
* pArray
= aSNL
.getConstArray();
1035 for( sal_Int32 i
= 0; i
< aSNL
.getLength(); i
++ )
1036 if( pArray
[i
] == ServiceName
)
1043 Sequence
< OUString
> SAXWriter::getSupportedServiceNames(void) throw ()
1045 Sequence
<OUString
> seq(1);
1046 seq
.getArray()[0] = SaxWriter_getServiceName();
1052 void SAXWriter::startDocument() throw(SAXException
, RuntimeException
)
1054 if( m_bDocStarted
|| ! m_out
.is() || !mp_SaxWriterHelper
) {
1055 throw SAXException();
1057 m_bDocStarted
= sal_True
;
1058 mp_SaxWriterHelper
->startDocument();
1062 void SAXWriter::endDocument(void) throw(SAXException
, RuntimeException
)
1064 if( ! m_bDocStarted
)
1067 OUString("endDocument called before startDocument"),
1068 Reference
< XInterface
>() , Any() );
1072 OUString("unexpected end of document"),
1073 Reference
< XInterface
>() , Any() );
1075 mp_SaxWriterHelper
->endDocument();
1078 m_out
->closeOutput();
1080 catch (const IOException
& e
)
1085 OUString("IO exception during closing the IO Stream"),
1086 Reference
< XInterface
> (),
1092 void SAXWriter::startElement(const OUString
& aName
, const Reference
< XAttributeList
>& xAttribs
)
1093 throw(SAXException
, RuntimeException
)
1095 if( ! m_bDocStarted
)
1097 SAXException except
;
1098 except
.Message
= OUString( "startElement called before startDocument" );
1103 SAXException except
;
1104 except
.Message
= OUString( "startElement call not allowed with CDATA sections" );
1108 sal_Int32
nLength(0);
1109 if (m_bAllowLineBreak
)
1111 sal_Int32 nAttribCount
= xAttribs
.is() ? xAttribs
->getLength() : 0;
1114 nLength
+= calcXMLByteLength( aName
.getStr() , aName
.getLength(),
1115 sal_False
, sal_False
); // the tag name
1118 for( n
= 0 ; n
< static_cast<sal_Int16
>(nAttribCount
) ; n
++ ) {
1120 OUString tmp
= xAttribs
->getNameByIndex( n
);
1122 nLength
+= calcXMLByteLength( tmp
.getStr() , tmp
.getLength() , sal_False
, sal_False
);
1126 tmp
= xAttribs
->getValueByIndex( n
);
1128 nLength
+= calcXMLByteLength( tmp
.getStr(), tmp
.getLength(), sal_True
, sal_True
);
1136 // Is there a new indentation necesarry ?
1137 sal_Int32
nPrefix(getIndentPrefixLength( nLength
));
1139 // write into sequence
1141 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1143 SaxInvalidCharacterError
eRet(mp_SaxWriterHelper
->startElement(aName
, xAttribs
));
1147 if (eRet
== SAX_WARNING
)
1149 SAXInvalidCharacterException except
;
1150 except
.Message
= OUString( "Invalid charcter during XML-Export in a attribute value" );
1153 else if (eRet
== SAX_ERROR
)
1155 SAXException except
;
1156 except
.Message
= OUString( "Invalid charcter during XML-Export" );
1161 void SAXWriter::endElement(const OUString
& aName
) throw (SAXException
, RuntimeException
)
1163 if( ! m_bDocStarted
) {
1164 throw SAXException ();
1168 if( m_nLevel
< 0 ) {
1169 throw SAXException();
1171 sal_Bool
bRet(sal_True
);
1173 if( mp_SaxWriterHelper
->FinishEmptyElement() )
1174 m_bForceLineBreak
= sal_False
;
1177 // only ascii chars allowed
1178 sal_Int32
nLength(0);
1179 if (m_bAllowLineBreak
)
1180 nLength
= 3 + calcXMLByteLength( aName
.getStr(), aName
.getLength(), sal_False
, sal_False
);
1181 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1184 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1186 bRet
= mp_SaxWriterHelper
->endElement(aName
);
1191 SAXException except
;
1192 except
.Message
= OUString( "Invalid charcter during XML-Export" );
1197 void SAXWriter::characters(const OUString
& aChars
) throw(SAXException
, RuntimeException
)
1199 if( ! m_bDocStarted
)
1201 SAXException except
;
1202 except
.Message
= OUString( "characters method called before startDocument" );
1206 sal_Bool
bThrowException(sal_False
);
1207 if( !aChars
.isEmpty() )
1210 bThrowException
= !mp_SaxWriterHelper
->writeString( aChars
, sal_False
, sal_False
);
1213 // Note : nFirstLineBreakOccurrence is not exact, because we don't know, how
1214 // many 2 and 3 byte chars are inbetween. However this whole stuff
1215 // is eitherway for pretty printing only, so it does not need to be exact.
1216 sal_Int32
nLength(0);
1217 sal_Int32
nIndentPrefix(-1);
1218 if (m_bAllowLineBreak
)
1220 sal_Int32 nFirstLineBreakOccurrence
= getFirstLineBreak( aChars
);
1222 nLength
= calcXMLByteLength( aChars
.getStr(), aChars
.getLength(),
1223 ! m_bIsCDATA
, sal_False
);
1224 nIndentPrefix
= getIndentPrefixLength(
1225 nFirstLineBreakOccurrence
>= 0 ? nFirstLineBreakOccurrence
: nLength
);
1228 nIndentPrefix
= getIndentPrefixLength(nLength
);
1230 // insert indentation
1231 if( nIndentPrefix
>= 0 )
1233 if( isFirstCharWhitespace( aChars
.getStr() ) )
1234 mp_SaxWriterHelper
->insertIndentation( nIndentPrefix
- 1 );
1236 mp_SaxWriterHelper
->insertIndentation( nIndentPrefix
);
1238 bThrowException
= !mp_SaxWriterHelper
->writeString(aChars
, sal_True
, sal_False
);
1241 if (bThrowException
)
1243 SAXInvalidCharacterException except
;
1244 except
.Message
= OUString( "Invalid charcter during XML-Export" );
1250 void SAXWriter::ignorableWhitespace(const OUString
&) throw(SAXException
, RuntimeException
)
1252 if( ! m_bDocStarted
)
1254 throw SAXException ();
1257 m_bForceLineBreak
= sal_True
;
1260 void SAXWriter::processingInstruction(const OUString
& aTarget
, const OUString
& aData
)
1261 throw (SAXException
, RuntimeException
)
1263 if( ! m_bDocStarted
|| m_bIsCDATA
)
1265 throw SAXException();
1268 sal_Int32
nLength(0);
1269 if (m_bAllowLineBreak
)
1271 nLength
= 2; // "<?"
1272 nLength
+= calcXMLByteLength( aTarget
.getStr(), aTarget
.getLength(), sal_False
, sal_False
);
1274 nLength
+= 1; // " "
1276 nLength
+= calcXMLByteLength( aData
.getStr(), aData
.getLength(), sal_False
, sal_False
);
1278 nLength
+= 2; // "?>"
1281 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1284 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1286 if (!mp_SaxWriterHelper
->processingInstruction(aTarget
, aData
))
1288 SAXException except
;
1289 except
.Message
= OUString( "Invalid charcter during XML-Export" );
1295 void SAXWriter::setDocumentLocator(const Reference
< XLocator
>&)
1296 throw (SAXException
, RuntimeException
)
1301 void SAXWriter::startCDATA(void) throw(SAXException
, RuntimeException
)
1303 if( ! m_bDocStarted
|| m_bIsCDATA
)
1305 throw SAXException ();
1308 sal_Int32 nLength
= 9;
1309 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1311 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1313 mp_SaxWriterHelper
->startCDATA();
1315 m_bIsCDATA
= sal_True
;
1318 void SAXWriter::endCDATA(void) throw (RuntimeException
)
1320 if( ! m_bDocStarted
| ! m_bIsCDATA
)
1322 SAXException except
;
1323 except
.Message
= OUString( "endCDATA was called without startCDATA" );
1327 sal_Int32 nLength
= 3;
1328 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1330 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1332 mp_SaxWriterHelper
->endCDATA();
1334 m_bIsCDATA
= sal_False
;
1338 void SAXWriter::comment(const OUString
& sComment
) throw(SAXException
, RuntimeException
)
1340 if( ! m_bDocStarted
|| m_bIsCDATA
)
1342 throw SAXException();
1345 sal_Int32
nLength(0);
1346 if (m_bAllowLineBreak
)
1348 nLength
= 4; // "<!--"
1349 nLength
+= calcXMLByteLength( sComment
.getStr(), sComment
.getLength(), sal_False
, sal_False
);
1354 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1356 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1358 if (!mp_SaxWriterHelper
->comment(sComment
))
1360 SAXException except
;
1361 except
.Message
= OUString( "Invalid charcter during XML-Export" );
1367 void SAXWriter::allowLineBreak( ) throw ( SAXException
, RuntimeException
)
1369 if( ! m_bDocStarted
|| m_bAllowLineBreak
) {
1370 throw SAXException();
1373 m_bAllowLineBreak
= sal_True
;
1376 void SAXWriter::unknown(const OUString
& sString
) throw (SAXException
, RuntimeException
)
1379 if( ! m_bDocStarted
)
1381 throw SAXException ();
1385 throw SAXException();
1388 if( sString
.matchAsciiL( "<?xml", 5 ) )
1391 sal_Int32
nLength(0);
1392 if (m_bAllowLineBreak
)
1393 nLength
= calcXMLByteLength( sString
.getStr(), sString
.getLength(), sal_False
, sal_False
);
1395 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1397 mp_SaxWriterHelper
->insertIndentation( nPrefix
);
1399 if (!mp_SaxWriterHelper
->writeString( sString
, sal_False
, sal_False
))
1401 SAXException except
;
1402 except
.Message
= OUString( "Invalid charcter during XML-Export" );
1409 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */