1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
26 #include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
27 #include <com/sun/star/lang/XServiceInfo.hpp>
28 #include <com/sun/star/uno/XComponentContext.hpp>
29 #include <com/sun/star/util/XCloneable.hpp>
30 #include <com/sun/star/xml/sax/XParser.hpp>
31 #include <com/sun/star/xml/sax/SAXParseException.hpp>
32 #include <com/sun/star/xml/sax/SAXInvalidCharacterException.hpp>
33 #include <com/sun/star/xml/sax/XWriter.hpp>
35 #include <com/sun/star/io/XActiveDataSource.hpp>
37 #include <cppuhelper/weak.hxx>
38 #include <cppuhelper/implbase2.hxx>
39 #include <cppuhelper/supportsservice.hxx>
41 #include <osl/diagnose.h>
42 #include <rtl/ref.hxx>
43 #include <rtl/ustrbuf.hxx>
45 using namespace ::std
;
46 using namespace ::osl
;
47 using namespace ::cppu
;
48 using namespace ::com::sun::star::uno
;
49 using namespace ::com::sun::star::lang
;
50 using namespace ::com::sun::star::xml::sax
;
51 using namespace ::com::sun::star::util
;
52 using namespace ::com::sun::star::io
;
54 #include "xml2utf.hxx"
55 #include <boost/scoped_array.hpp>
58 #define SEQUENCESIZE 1024
59 #define MAXCOLUMNCOUNT 72
64 * Character conversion functions
71 enum SaxInvalidCharacterError
82 ::std::stack
<OUString
> m_DebugStartedElements
;
86 Reference
< XOutputStream
> m_out
;
87 Sequence
< sal_Int8
> m_Sequence
;
88 sal_Int8
* mp_Sequence
;
90 sal_Int32 nLastLineFeedPos
; // is negative after writing a sequence
91 sal_uInt32 nCurrentPos
;
92 bool m_bStartElementFinished
;
94 inline sal_uInt32
writeSequence() throw( SAXException
);
96 // use only if to insert the bytes more space in the sequence is needed and
97 // so the sequence has to write out and reset rPos to 0
98 // writes sequence only on overflow, sequence could be full on the end (rPos == SEQUENCESIZE)
99 inline void AddBytes(sal_Int8
* pTarget
, sal_uInt32
& rPos
,
100 const sal_Int8
* pBytes
, sal_uInt32 nBytesCount
) throw( SAXException
);
101 inline bool convertToXML(const sal_Unicode
* pStr
,
103 bool bDoNormalization
,
104 bool bNormalizeWhitespace
,
106 sal_uInt32
& rPos
) throw( SAXException
);
107 inline void FinishStartElement() throw( SAXException
);
109 explicit SaxWriterHelper(Reference
< XOutputStream
> m_TempOut
)
111 , m_Sequence(SEQUENCESIZE
)
113 , nLastLineFeedPos(0)
115 , m_bStartElementFinished(true)
117 OSL_ENSURE(SEQUENCESIZE
> 50, "Sequence cache size to small");
118 mp_Sequence
= m_Sequence
.getArray();
122 OSL_ENSURE(!nCurrentPos
, "cached Sequence not written");
123 OSL_ENSURE(m_bStartElementFinished
, "StartElement not complettly written");
126 inline void insertIndentation(sal_uInt32 m_nLevel
) throw( SAXException
);
128 // returns whether it works correct or invalid characters were in the string
129 // If there are invalid characters in the string it returns sal_False.
130 // Than the calling method has to throw the needed Exception.
131 inline bool writeString(const OUString
& rWriteOutString
,
132 bool bDoNormalization
,
133 bool bNormalizeWhitespace
) throw( SAXException
);
135 sal_uInt32
GetLastColumnCount() const throw()
136 { return (sal_uInt32
)(nCurrentPos
- nLastLineFeedPos
); }
138 inline void startDocument() throw( SAXException
);
140 // returns whether it works correct or invalid characters were in the strings
141 // If there are invalid characters in one of the strings it returns sal_False.
142 // Than the calling method has to throw the needed Exception.
143 inline SaxInvalidCharacterError
startElement(const OUString
& rName
, const Reference
< XAttributeList
>& xAttribs
) throw( SAXException
);
144 inline bool FinishEmptyElement() throw( SAXException
);
146 // returns whether it works correct or invalid characters were in the string
147 // If there are invalid characters in the string it returns sal_False.
148 // Than the calling method has to throw the needed Exception.
149 inline bool endElement(const OUString
& rName
) throw( SAXException
);
150 inline void endDocument() throw( SAXException
);
152 // returns whether it works correct or invalid characters were in the strings
153 // If there are invalid characters in the string it returns sal_False.
154 // Than the calling method has to throw the needed Exception.
155 inline bool processingInstruction(const OUString
& rTarget
, const OUString
& rData
) throw( SAXException
);
156 inline void startCDATA() throw( SAXException
);
157 inline void endCDATA() throw( SAXException
);
159 // returns whether it works correct or invalid characters were in the strings
160 // If there are invalid characters in the string it returns sal_False.
161 // Than the calling method has to throw the needed Exception.
162 inline bool comment(const OUString
& rComment
) throw( SAXException
);
164 inline void clearBuffer() throw( SAXException
);
167 const sal_Bool g_bValidCharsBelow32
[32] =
172 0,0,0,0,0,0,0,0, //16
176 inline bool IsInvalidChar(const sal_Unicode aChar
)
179 // check first for the most common characters
180 if( aChar
< 32 || aChar
>= 0xd800 )
181 bRet
= ( (aChar
< 32 && ! g_bValidCharsBelow32
[aChar
]) ||
188 * write through to the output stream
191 inline sal_uInt32
SaxWriterHelper::writeSequence() throw( SAXException
)
195 m_out
->writeBytes( m_Sequence
);
197 catch (const IOException
& e
)
202 "IO exception during writing",
203 Reference
< XInterface
> (),
206 nLastLineFeedPos
-= SEQUENCESIZE
;
210 inline void SaxWriterHelper::AddBytes(sal_Int8
* pTarget
, sal_uInt32
& rPos
,
211 const sal_Int8
* pBytes
, sal_uInt32 nBytesCount
) throw( SAXException
)
213 OSL_ENSURE((rPos
+ nBytesCount
) > SEQUENCESIZE
, "wrong use of AddBytesMethod");
214 sal_uInt32
nCount(SEQUENCESIZE
- rPos
);
215 memcpy( &(pTarget
[rPos
]) , pBytes
, nCount
);
217 OSL_ENSURE(rPos
+ nCount
== SEQUENCESIZE
, "the position should be the at the end");
219 rPos
= writeSequence();
220 sal_uInt32
nRestCount(nBytesCount
- nCount
);
221 if ((rPos
+ nRestCount
) <= SEQUENCESIZE
)
223 memcpy( &(pTarget
[rPos
]), &pBytes
[nCount
], nRestCount
);
227 AddBytes(pTarget
, rPos
, &pBytes
[nCount
], nRestCount
);
230 /** Converts an UTF16 string to UTF8 and does XML normalization
233 Pointer to a piece of memory, to where the output should be written. The caller
234 must call calcXMLByteLength on the same string, to ensure,
235 that there is enough memory for converting.
237 inline bool SaxWriterHelper::convertToXML( const sal_Unicode
* pStr
,
239 bool bDoNormalization
,
240 bool bNormalizeWhitespace
,
242 sal_uInt32
& rPos
) throw( SAXException
)
245 sal_uInt32 nSurrogate
= 0;
247 for( sal_Int32 i
= 0 ; i
< nStrLen
; i
++ )
249 sal_uInt16 c
= pStr
[i
];
250 if (IsInvalidChar(c
))
252 else if( (c
>= 0x0001) && (c
<= 0x007F) )
254 if( bDoNormalization
)
258 case '&': // resemble to &
260 if ((rPos
+ 5) > SEQUENCESIZE
)
261 AddBytes(pTarget
, rPos
, reinterpret_cast<sal_Int8
const *>("&"), 5);
264 memcpy( &(pTarget
[rPos
]) , "&", 5 );
271 if ((rPos
+ 4) > SEQUENCESIZE
)
272 AddBytes(pTarget
, rPos
, reinterpret_cast<sal_Int8
const *>("<"), 4);
275 memcpy( &(pTarget
[rPos
]) , "<" , 4 );
282 if ((rPos
+ 4) > SEQUENCESIZE
)
283 AddBytes(pTarget
, rPos
, reinterpret_cast<sal_Int8
const *>(">"), 4);
286 memcpy( &(pTarget
[rPos
]) , ">" , 4 );
291 case 39: // 39 == '''
293 if ((rPos
+ 6) > SEQUENCESIZE
)
294 AddBytes(pTarget
, rPos
, reinterpret_cast<sal_Int8
const *>("'"), 6);
297 memcpy( &(pTarget
[rPos
]) , "'" , 6 );
304 if ((rPos
+ 6) > SEQUENCESIZE
)
305 AddBytes(pTarget
, rPos
, reinterpret_cast<sal_Int8
const *>("""), 6);
308 memcpy( &(pTarget
[rPos
]) , """ , 6 );
315 if ((rPos
+ 6) > SEQUENCESIZE
)
316 AddBytes(pTarget
, rPos
, reinterpret_cast<sal_Int8
const *>("
"), 6);
319 memcpy( &(pTarget
[rPos
]) , "
" , 6 );
326 if( bNormalizeWhitespace
)
328 if ((rPos
+ 6) > SEQUENCESIZE
)
329 AddBytes(pTarget
, rPos
, reinterpret_cast<sal_Int8
const *>("
"), 6);
332 memcpy( &(pTarget
[rPos
]) , "
" , 6 );
338 pTarget
[rPos
] = LINEFEED
;
339 nLastLineFeedPos
= rPos
;
346 if( bNormalizeWhitespace
)
348 if ((rPos
+ 6) > SEQUENCESIZE
)
349 AddBytes(pTarget
, rPos
, reinterpret_cast<sal_Int8
const *>("	"), 6);
352 memcpy( &(pTarget
[rPos
]) , "	" , 6 );
365 pTarget
[rPos
] = (sal_Int8
)c
;
373 pTarget
[rPos
] = (sal_Int8
)c
;
374 if ((sal_Int8
)c
== LINEFEED
)
375 nLastLineFeedPos
= rPos
;
379 else if( c
>= 0xd800 && c
< 0xdc00 )
381 // 1. surrogate: save (until 2. surrogate)
382 OSL_ENSURE( nSurrogate
== 0, "left-over Unicode surrogate" );
383 nSurrogate
= ( ( c
& 0x03ff ) + 0x0040 );
385 else if( c
>= 0xdc00 && c
< 0xe000 )
387 // 2. surrogate: write as UTF-8
388 OSL_ENSURE( nSurrogate
!= 0, "lone 2nd Unicode surrogate" );
390 nSurrogate
= ( nSurrogate
<< 10 ) | ( c
& 0x03ff );
391 if( nSurrogate
>= 0x00010000 && nSurrogate
<= 0x0010FFFF )
393 sal_Int8 aBytes
[] = { sal_Int8(0xF0 | ((nSurrogate
>> 18) & 0x0F)),
394 sal_Int8(0x80 | ((nSurrogate
>> 12) & 0x3F)),
395 sal_Int8(0x80 | ((nSurrogate
>> 6) & 0x3F)),
396 sal_Int8(0x80 | ((nSurrogate
>> 0) & 0x3F)) };
397 if ((rPos
+ 4) > SEQUENCESIZE
)
398 AddBytes(pTarget
, rPos
, aBytes
, 4);
401 pTarget
[rPos
] = aBytes
[0];
403 pTarget
[rPos
] = aBytes
[1];
405 pTarget
[rPos
] = aBytes
[2];
407 pTarget
[rPos
] = aBytes
[3];
413 OSL_FAIL( "illegal Unicode character" );
420 else if( c
> 0x07FF )
422 sal_Int8 aBytes
[] = { sal_Int8(0xE0 | ((c
>> 12) & 0x0F)),
423 sal_Int8(0x80 | ((c
>> 6) & 0x3F)),
424 sal_Int8(0x80 | ((c
>> 0) & 0x3F)) };
425 if ((rPos
+ 3) > SEQUENCESIZE
)
426 AddBytes(pTarget
, rPos
, aBytes
, 3);
429 pTarget
[rPos
] = aBytes
[0];
431 pTarget
[rPos
] = aBytes
[1];
433 pTarget
[rPos
] = aBytes
[2];
439 sal_Int8 aBytes
[] = { sal_Int8(0xC0 | ((c
>> 6) & 0x1F)),
440 sal_Int8(0x80 | ((c
>> 0) & 0x3F)) };
441 if ((rPos
+ 2) > SEQUENCESIZE
)
442 AddBytes(pTarget
, rPos
, aBytes
, 2);
445 pTarget
[rPos
] = aBytes
[0];
447 pTarget
[rPos
] = aBytes
[1];
451 OSL_ENSURE(rPos
<= SEQUENCESIZE
, "not reset current position");
452 if (rPos
== SEQUENCESIZE
)
453 rPos
= writeSequence();
455 // reset left-over surrogate
456 if( ( nSurrogate
!= 0 ) && !( c
>= 0xd800 && c
< 0xdc00 ) )
458 OSL_ENSURE( nSurrogate
!= 0, "left-over Unicode surrogate" );
466 inline void SaxWriterHelper::FinishStartElement() throw( SAXException
)
468 if (!m_bStartElementFinished
)
470 mp_Sequence
[nCurrentPos
] = '>';
472 if (nCurrentPos
== SEQUENCESIZE
)
473 nCurrentPos
= writeSequence();
474 m_bStartElementFinished
= true;
478 inline void SaxWriterHelper::insertIndentation(sal_uInt32 m_nLevel
) throw( SAXException
)
480 FinishStartElement();
483 if ((nCurrentPos
+ m_nLevel
+ 1) <= SEQUENCESIZE
)
485 mp_Sequence
[nCurrentPos
] = LINEFEED
;
486 nLastLineFeedPos
= nCurrentPos
;
488 memset( &(mp_Sequence
[nCurrentPos
]) , 32 , m_nLevel
);
489 nCurrentPos
+= m_nLevel
;
490 if (nCurrentPos
== SEQUENCESIZE
)
491 nCurrentPos
= writeSequence();
495 sal_uInt32
nCount(m_nLevel
+ 1);
496 boost::scoped_array
<sal_Int8
> pBytes(new sal_Int8
[nCount
]);
497 pBytes
[0] = LINEFEED
;
498 memset( &(pBytes
[1]), 32, m_nLevel
);
499 AddBytes(mp_Sequence
, nCurrentPos
, pBytes
.get(), nCount
);
501 nLastLineFeedPos
= nCurrentPos
- nCount
;
502 if (nCurrentPos
== SEQUENCESIZE
)
503 nCurrentPos
= writeSequence();
508 mp_Sequence
[nCurrentPos
] = LINEFEED
;
509 nLastLineFeedPos
= nCurrentPos
;
511 if (nCurrentPos
== SEQUENCESIZE
)
512 nCurrentPos
= writeSequence();
516 inline bool SaxWriterHelper::writeString( const OUString
& rWriteOutString
,
517 bool bDoNormalization
,
518 bool bNormalizeWhitespace
) throw( SAXException
)
520 FinishStartElement();
521 return convertToXML(rWriteOutString
.getStr(),
522 rWriteOutString
.getLength(),
524 bNormalizeWhitespace
,
529 inline void SaxWriterHelper::startDocument() throw( SAXException
)
531 const char pc
[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
532 const int nLen
= strlen( pc
);
533 if ((nCurrentPos
+ nLen
) <= SEQUENCESIZE
)
535 memcpy( mp_Sequence
, pc
, nLen
);
540 AddBytes(mp_Sequence
, nCurrentPos
, reinterpret_cast<sal_Int8
const *>(pc
), nLen
);
542 OSL_ENSURE(nCurrentPos
<= SEQUENCESIZE
, "not reset current position");
543 if (nCurrentPos
== SEQUENCESIZE
)
544 nCurrentPos
= writeSequence();
545 mp_Sequence
[nCurrentPos
] = LINEFEED
;
547 if (nCurrentPos
== SEQUENCESIZE
)
548 nCurrentPos
= writeSequence();
551 inline SaxInvalidCharacterError
SaxWriterHelper::startElement(const OUString
& rName
, const Reference
< XAttributeList
>& xAttribs
) throw( SAXException
)
553 FinishStartElement();
556 m_DebugStartedElements
.push(rName
);
557 ::std::set
<OUString
> DebugAttributes
;
560 mp_Sequence
[nCurrentPos
] = '<';
562 if (nCurrentPos
== SEQUENCESIZE
)
563 nCurrentPos
= writeSequence();
565 SaxInvalidCharacterError
eRet(SAX_NONE
);
566 if (!writeString(rName
, false, false))
569 sal_Int16 nAttribCount
= xAttribs
.is() ? static_cast<sal_Int16
>(xAttribs
->getLength()) : 0;
570 for(sal_Int16 i
= 0 ; i
< nAttribCount
; i
++ )
572 mp_Sequence
[nCurrentPos
] = ' ';
574 if (nCurrentPos
== SEQUENCESIZE
)
575 nCurrentPos
= writeSequence();
577 OUString
const& rAttrName(xAttribs
->getNameByIndex(i
));
579 // Well-formedness constraint: Unique Att Spec
580 assert(DebugAttributes
.find(rAttrName
) == DebugAttributes
.end());
581 DebugAttributes
.insert(rAttrName
);
583 if (!writeString(rAttrName
, false, false))
586 mp_Sequence
[nCurrentPos
] = '=';
588 if (nCurrentPos
== SEQUENCESIZE
)
589 nCurrentPos
= writeSequence();
590 mp_Sequence
[nCurrentPos
] = '"';
592 if (nCurrentPos
== SEQUENCESIZE
)
593 nCurrentPos
= writeSequence();
595 if (!writeString(xAttribs
->getValueByIndex( i
), true, true) &&
596 !(eRet
== SAX_ERROR
))
599 mp_Sequence
[nCurrentPos
] = '"';
601 if (nCurrentPos
== SEQUENCESIZE
)
602 nCurrentPos
= writeSequence();
605 m_bStartElementFinished
= false; // because the '>' character is not added,
606 // because it is possible, that the "/>"
607 // characters have to add
611 inline bool SaxWriterHelper::FinishEmptyElement() throw( SAXException
)
613 if (m_bStartElementFinished
)
616 mp_Sequence
[nCurrentPos
] = '/';
618 if (nCurrentPos
== SEQUENCESIZE
)
619 nCurrentPos
= writeSequence();
620 mp_Sequence
[nCurrentPos
] = '>';
622 if (nCurrentPos
== SEQUENCESIZE
)
623 nCurrentPos
= writeSequence();
625 m_bStartElementFinished
= true;
630 inline bool SaxWriterHelper::endElement(const OUString
& rName
) throw( SAXException
)
632 FinishStartElement();
634 mp_Sequence
[nCurrentPos
] = '<';
636 if (nCurrentPos
== SEQUENCESIZE
)
637 nCurrentPos
= writeSequence();
638 mp_Sequence
[nCurrentPos
] = '/';
640 if (nCurrentPos
== SEQUENCESIZE
)
641 nCurrentPos
= writeSequence();
643 bool bRet(writeString( rName
, false, false));
645 mp_Sequence
[nCurrentPos
] = '>';
647 if (nCurrentPos
== SEQUENCESIZE
)
648 nCurrentPos
= writeSequence();
653 inline void SaxWriterHelper::endDocument() throw( SAXException
)
657 m_Sequence
.realloc(nCurrentPos
);
658 nCurrentPos
= writeSequence();
659 //m_Sequence.realloc(SEQUENCESIZE);
663 inline void SaxWriterHelper::clearBuffer() throw( SAXException
)
665 FinishStartElement();
668 m_Sequence
.realloc(nCurrentPos
);
669 nCurrentPos
= writeSequence();
670 m_Sequence
.realloc(SEQUENCESIZE
);
671 // Be sure to update the array pointer after the reallocation.
672 mp_Sequence
= m_Sequence
.getArray();
676 inline bool SaxWriterHelper::processingInstruction(const OUString
& rTarget
, const OUString
& rData
) throw( SAXException
)
678 FinishStartElement();
679 mp_Sequence
[nCurrentPos
] = '<';
681 if (nCurrentPos
== SEQUENCESIZE
)
682 nCurrentPos
= writeSequence();
683 mp_Sequence
[nCurrentPos
] = '?';
685 if (nCurrentPos
== SEQUENCESIZE
)
686 nCurrentPos
= writeSequence();
688 bool bRet(writeString( rTarget
, false, false ));
690 mp_Sequence
[nCurrentPos
] = ' ';
692 if (nCurrentPos
== SEQUENCESIZE
)
693 nCurrentPos
= writeSequence();
695 if (!writeString( rData
, false, false ))
698 mp_Sequence
[nCurrentPos
] = '?';
700 if (nCurrentPos
== SEQUENCESIZE
)
701 nCurrentPos
= writeSequence();
702 mp_Sequence
[nCurrentPos
] = '>';
704 if (nCurrentPos
== SEQUENCESIZE
)
705 nCurrentPos
= writeSequence();
710 inline void SaxWriterHelper::startCDATA() throw( SAXException
)
712 FinishStartElement();
713 if ((nCurrentPos
+ 9) <= SEQUENCESIZE
)
715 memcpy( &(mp_Sequence
[nCurrentPos
]), "<![CDATA[" , 9 );
719 AddBytes(mp_Sequence
, nCurrentPos
, reinterpret_cast<sal_Int8
const *>("<![CDATA["), 9);
720 if (nCurrentPos
== SEQUENCESIZE
)
721 nCurrentPos
= writeSequence();
724 inline void SaxWriterHelper::endCDATA() throw( SAXException
)
726 FinishStartElement();
727 if ((nCurrentPos
+ 3) <= SEQUENCESIZE
)
729 memcpy( &(mp_Sequence
[nCurrentPos
]), "]]>" , 3 );
733 AddBytes(mp_Sequence
, nCurrentPos
, reinterpret_cast<sal_Int8
const *>("]]>"), 3);
734 if (nCurrentPos
== SEQUENCESIZE
)
735 nCurrentPos
= writeSequence();
738 inline bool SaxWriterHelper::comment(const OUString
& rComment
) throw( SAXException
)
740 FinishStartElement();
741 mp_Sequence
[nCurrentPos
] = '<';
743 if (nCurrentPos
== SEQUENCESIZE
)
744 nCurrentPos
= writeSequence();
745 mp_Sequence
[nCurrentPos
] = '!';
747 if (nCurrentPos
== SEQUENCESIZE
)
748 nCurrentPos
= writeSequence();
749 mp_Sequence
[nCurrentPos
] = '-';
751 if (nCurrentPos
== SEQUENCESIZE
)
752 nCurrentPos
= writeSequence();
753 mp_Sequence
[nCurrentPos
] = '-';
755 if (nCurrentPos
== SEQUENCESIZE
)
756 nCurrentPos
= writeSequence();
758 bool bRet(writeString( rComment
, false, false));
760 mp_Sequence
[nCurrentPos
] = '-';
762 if (nCurrentPos
== SEQUENCESIZE
)
763 nCurrentPos
= writeSequence();
764 mp_Sequence
[nCurrentPos
] = '-';
766 if (nCurrentPos
== SEQUENCESIZE
)
767 nCurrentPos
= writeSequence();
768 mp_Sequence
[nCurrentPos
] = '>';
770 if (nCurrentPos
== SEQUENCESIZE
)
771 nCurrentPos
= writeSequence();
776 inline sal_Int32
calcXMLByteLength( const sal_Unicode
*pStr
, sal_Int32 nStrLen
,
777 bool bDoNormalization
,
778 bool bNormalizeWhitespace
)
780 sal_Int32 nOutputLength
= 0;
781 sal_uInt32 nSurrogate
= 0;
783 for( sal_Int32 i
= 0 ; i
< nStrLen
; i
++ )
785 sal_uInt16 c
= pStr
[i
];
786 if( !IsInvalidChar(c
) && (c
>= 0x0001) && (c
<= 0x007F) )
788 if( bDoNormalization
)
792 case '&': // resemble to &
799 case 39: // 39 == ''', '
807 if( bNormalizeWhitespace
)
825 else if( c
>= 0xd800 && c
< 0xdc00 )
828 nSurrogate
= ( ( c
& 0x03ff ) + 0x0040 );
830 else if( c
>= 0xdc00 && c
< 0xe000 )
832 // 2. surrogate: write as UTF-8 (if range is OK
833 nSurrogate
= ( nSurrogate
<< 10 ) | ( c
& 0x03ff );
834 if( nSurrogate
>= 0x00010000 && nSurrogate
<= 0x0010FFFF )
838 else if( c
> 0x07FF )
847 // surrogate processing
848 if( ( nSurrogate
!= 0 ) && !( c
>= 0xd800 && c
< 0xdc00 ) )
852 return nOutputLength
;
855 /** returns position of first ascii 10 within the string, -1 when no 10 in string.
857 static inline sal_Int32
getFirstLineBreak( const OUString
& str
) throw ()
859 const sal_Unicode
*pSource
= str
.getStr();
860 sal_Int32 nLen
= str
.getLength();
862 for( int n
= 0; n
< nLen
; n
++ )
864 if( LINEFEED
== pSource
[n
] ) {
872 public WeakImplHelper2
<
878 : m_pSaxWriterHelper(NULL
)
879 , m_bDocStarted(false)
881 , m_bForceLineBreak(false)
882 , m_bAllowLineBreak(false)
888 delete m_pSaxWriterHelper
;
891 public: // XActiveDataSource
892 virtual void SAL_CALL
setOutputStream(const Reference
< XOutputStream
> & aStream
)
893 throw (RuntimeException
, std::exception
) SAL_OVERRIDE
897 // temporary: set same stream again to clear buffer
898 if ( m_out
== aStream
&& m_pSaxWriterHelper
&& m_bDocStarted
)
899 m_pSaxWriterHelper
->clearBuffer();
903 delete m_pSaxWriterHelper
;
904 m_pSaxWriterHelper
= new SaxWriterHelper(m_out
);
905 m_bDocStarted
= false;
910 catch (const SAXException
& e
)
912 throw css::lang::WrappedTargetRuntimeException(
914 static_cast < OWeakObject
* > ( this ),
918 virtual Reference
< XOutputStream
> SAL_CALL
getOutputStream()
919 throw(RuntimeException
, std::exception
) SAL_OVERRIDE
924 public: // XDocumentHandler
925 virtual void SAL_CALL
startDocument()
926 throw(SAXException
, RuntimeException
, std::exception
) SAL_OVERRIDE
;
928 virtual void SAL_CALL
endDocument()
929 throw(SAXException
, RuntimeException
, std::exception
) SAL_OVERRIDE
;
931 virtual void SAL_CALL
startElement(const OUString
& aName
,
932 const Reference
< XAttributeList
> & xAttribs
)
933 throw (SAXException
, RuntimeException
, std::exception
) SAL_OVERRIDE
;
935 virtual void SAL_CALL
endElement(const OUString
& aName
)
936 throw(SAXException
, RuntimeException
, std::exception
) SAL_OVERRIDE
;
938 virtual void SAL_CALL
characters(const OUString
& aChars
)
939 throw(SAXException
, RuntimeException
, std::exception
) SAL_OVERRIDE
;
941 virtual void SAL_CALL
ignorableWhitespace(const OUString
& aWhitespaces
)
942 throw(SAXException
, RuntimeException
, std::exception
) SAL_OVERRIDE
;
943 virtual void SAL_CALL
processingInstruction(const OUString
& aTarget
,
944 const OUString
& aData
)
945 throw(SAXException
, RuntimeException
, std::exception
) SAL_OVERRIDE
;
946 virtual void SAL_CALL
setDocumentLocator(const Reference
< XLocator
> & xLocator
)
947 throw(SAXException
, RuntimeException
, std::exception
) SAL_OVERRIDE
;
949 public: // XExtendedDocumentHandler
950 virtual void SAL_CALL
startCDATA() throw(SAXException
, RuntimeException
, std::exception
) SAL_OVERRIDE
;
951 virtual void SAL_CALL
endCDATA() throw(SAXException
,RuntimeException
, std::exception
) SAL_OVERRIDE
;
952 virtual void SAL_CALL
comment(const OUString
& sComment
)
953 throw(SAXException
, RuntimeException
, std::exception
) SAL_OVERRIDE
;
954 virtual void SAL_CALL
unknown(const OUString
& sString
)
955 throw(SAXException
, RuntimeException
, std::exception
) SAL_OVERRIDE
;
956 virtual void SAL_CALL
allowLineBreak()
957 throw(SAXException
,RuntimeException
, std::exception
) SAL_OVERRIDE
;
959 public: // XServiceInfo
960 OUString SAL_CALL
getImplementationName() throw(std::exception
) SAL_OVERRIDE
;
961 Sequence
< OUString
> SAL_CALL
getSupportedServiceNames() throw(std::exception
) SAL_OVERRIDE
;
962 sal_Bool SAL_CALL
supportsService(const OUString
& ServiceName
) throw(std::exception
) SAL_OVERRIDE
;
965 sal_Int32
getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurrence
) throw();
967 Reference
< XOutputStream
> m_out
;
968 SaxWriterHelper
* m_pSaxWriterHelper
;
970 // Status information
971 bool m_bDocStarted
: 1;
973 bool m_bForceLineBreak
: 1;
974 bool m_bAllowLineBreak
: 1;
978 sal_Int32
SAXWriter::getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurrence
) throw()
980 sal_Int32 nLength
=-1;
981 if (m_pSaxWriterHelper
)
983 if ( m_bForceLineBreak
||
984 (m_bAllowLineBreak
&&
985 ((nFirstLineBreakOccurrence
+ m_pSaxWriterHelper
->GetLastColumnCount()) > MAXCOLUMNCOUNT
)) )
988 m_bForceLineBreak
= false;
989 m_bAllowLineBreak
= false;
993 static inline bool isFirstCharWhitespace( const sal_Unicode
*p
) throw()
999 OUString
SAXWriter::getImplementationName() throw(std::exception
)
1001 return OUString("com.sun.star.extensions.xml.sax.Writer");
1005 sal_Bool
SAXWriter::supportsService(const OUString
& ServiceName
) throw(std::exception
)
1007 return cppu::supportsService(this, ServiceName
);
1011 Sequence
< OUString
> SAXWriter::getSupportedServiceNames() throw (std::exception
)
1013 Sequence
<OUString
> seq(1);
1014 seq
[0] = "com.sun.star.xml.sax.Writer";
1018 void SAXWriter::startDocument() throw(SAXException
, RuntimeException
, std::exception
)
1020 if( m_bDocStarted
|| ! m_out
.is() || !m_pSaxWriterHelper
) {
1021 throw SAXException();
1023 m_bDocStarted
= true;
1024 m_pSaxWriterHelper
->startDocument();
1028 void SAXWriter::endDocument() throw(SAXException
, RuntimeException
, std::exception
)
1030 if( ! m_bDocStarted
)
1033 "endDocument called before startDocument",
1034 Reference
< XInterface
>() , Any() );
1038 "unexpected end of document",
1039 Reference
< XInterface
>() , Any() );
1041 m_pSaxWriterHelper
->endDocument();
1044 m_out
->closeOutput();
1046 catch (const IOException
& e
)
1051 "IO exception during closing the IO Stream",
1052 Reference
< XInterface
> (),
1058 void SAXWriter::startElement(const OUString
& aName
, const Reference
< XAttributeList
>& xAttribs
)
1059 throw(SAXException
, RuntimeException
, std::exception
)
1061 if( ! m_bDocStarted
)
1063 SAXException except
;
1064 except
.Message
= "startElement called before startDocument";
1069 SAXException except
;
1070 except
.Message
= "startElement call not allowed with CDATA sections";
1074 sal_Int32
nLength(0);
1075 if (m_bAllowLineBreak
)
1077 sal_Int32 nAttribCount
= xAttribs
.is() ? xAttribs
->getLength() : 0;
1080 nLength
+= calcXMLByteLength( aName
.getStr() , aName
.getLength(),
1081 false, false ); // the tag name
1084 for( n
= 0 ; n
< static_cast<sal_Int16
>(nAttribCount
) ; n
++ ) {
1086 OUString tmp
= xAttribs
->getNameByIndex( n
);
1088 nLength
+= calcXMLByteLength( tmp
.getStr() , tmp
.getLength() , false, false );
1092 tmp
= xAttribs
->getValueByIndex( n
);
1094 nLength
+= calcXMLByteLength( tmp
.getStr(), tmp
.getLength(), true, true );
1102 // Is there a new indentation necesarry ?
1103 sal_Int32
nPrefix(getIndentPrefixLength( nLength
));
1105 // write into sequence
1107 m_pSaxWriterHelper
->insertIndentation( nPrefix
);
1109 SaxInvalidCharacterError
eRet(m_pSaxWriterHelper
->startElement(aName
, xAttribs
));
1113 if (eRet
== SAX_WARNING
)
1115 SAXInvalidCharacterException except
;
1116 except
.Message
= "Invalid character during XML-Export in a attribute value";
1119 else if (eRet
== SAX_ERROR
)
1121 SAXException except
;
1122 except
.Message
= "Invalid character during XML-Export";
1127 void SAXWriter::endElement(const OUString
& aName
) throw (SAXException
, RuntimeException
, std::exception
)
1129 if( ! m_bDocStarted
) {
1130 throw SAXException ();
1134 if( m_nLevel
< 0 ) {
1135 throw SAXException();
1139 // check here because Helper's endElement is not always called
1141 assert(!m_pSaxWriterHelper
->m_DebugStartedElements
.empty());
1142 // Well-formedness constraint: Element Type Match
1143 assert(aName
== m_pSaxWriterHelper
->m_DebugStartedElements
.top());
1144 m_pSaxWriterHelper
->m_DebugStartedElements
.pop();
1147 if( m_pSaxWriterHelper
->FinishEmptyElement() )
1148 m_bForceLineBreak
= false;
1151 // only ascii chars allowed
1152 sal_Int32
nLength(0);
1153 if (m_bAllowLineBreak
)
1154 nLength
= 3 + calcXMLByteLength( aName
.getStr(), aName
.getLength(), false, false );
1155 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1158 m_pSaxWriterHelper
->insertIndentation( nPrefix
);
1160 bRet
= m_pSaxWriterHelper
->endElement(aName
);
1165 SAXException except
;
1166 except
.Message
= "Invalid character during XML-Export";
1171 void SAXWriter::characters(const OUString
& aChars
) throw(SAXException
, RuntimeException
, std::exception
)
1173 if( ! m_bDocStarted
)
1175 SAXException except
;
1176 except
.Message
= "characters method called before startDocument";
1180 bool bThrowException(false);
1181 if( !aChars
.isEmpty() )
1184 bThrowException
= !m_pSaxWriterHelper
->writeString( aChars
, false, false );
1187 // Note : nFirstLineBreakOccurrence is not exact, because we don't know, how
1188 // many 2 and 3 byte chars are inbetween. However this whole stuff
1189 // is eitherway for pretty printing only, so it does not need to be exact.
1190 sal_Int32
nLength(0);
1191 sal_Int32
nIndentPrefix(-1);
1192 if (m_bAllowLineBreak
)
1194 sal_Int32 nFirstLineBreakOccurrence
= getFirstLineBreak( aChars
);
1196 nLength
= calcXMLByteLength( aChars
.getStr(), aChars
.getLength(),
1197 ! m_bIsCDATA
, false );
1198 nIndentPrefix
= getIndentPrefixLength(
1199 nFirstLineBreakOccurrence
>= 0 ? nFirstLineBreakOccurrence
: nLength
);
1202 nIndentPrefix
= getIndentPrefixLength(nLength
);
1204 // insert indentation
1205 if( nIndentPrefix
>= 0 )
1207 if( isFirstCharWhitespace( aChars
.getStr() ) )
1208 m_pSaxWriterHelper
->insertIndentation( nIndentPrefix
- 1 );
1210 m_pSaxWriterHelper
->insertIndentation( nIndentPrefix
);
1212 bThrowException
= !m_pSaxWriterHelper
->writeString(aChars
, true , false);
1215 if (bThrowException
)
1217 SAXInvalidCharacterException except
;
1218 except
.Message
= "Invalid character during XML-Export";
1224 void SAXWriter::ignorableWhitespace(const OUString
&) throw(SAXException
, RuntimeException
, std::exception
)
1226 if( ! m_bDocStarted
)
1228 throw SAXException ();
1231 m_bForceLineBreak
= true;
1234 void SAXWriter::processingInstruction(const OUString
& aTarget
, const OUString
& aData
)
1235 throw (SAXException
, RuntimeException
, std::exception
)
1237 if( ! m_bDocStarted
|| m_bIsCDATA
)
1239 throw SAXException();
1242 sal_Int32
nLength(0);
1243 if (m_bAllowLineBreak
)
1245 nLength
= 2; // "<?"
1246 nLength
+= calcXMLByteLength( aTarget
.getStr(), aTarget
.getLength(), false, false );
1248 nLength
+= 1; // " "
1250 nLength
+= calcXMLByteLength( aData
.getStr(), aData
.getLength(), false, false );
1252 nLength
+= 2; // "?>"
1255 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1258 m_pSaxWriterHelper
->insertIndentation( nPrefix
);
1260 if (!m_pSaxWriterHelper
->processingInstruction(aTarget
, aData
))
1262 SAXException except
;
1263 except
.Message
= "Invalid character during XML-Export";
1269 void SAXWriter::setDocumentLocator(const Reference
< XLocator
>&)
1270 throw (SAXException
, RuntimeException
, std::exception
)
1275 void SAXWriter::startCDATA() throw(SAXException
, RuntimeException
, std::exception
)
1277 if( ! m_bDocStarted
|| m_bIsCDATA
)
1279 throw SAXException ();
1282 sal_Int32 nLength
= 9;
1283 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1285 m_pSaxWriterHelper
->insertIndentation( nPrefix
);
1287 m_pSaxWriterHelper
->startCDATA();
1292 void SAXWriter::endCDATA() throw (SAXException
,RuntimeException
, std::exception
)
1294 if( ! m_bDocStarted
|| ! m_bIsCDATA
)
1296 SAXException except
;
1297 except
.Message
= "endCDATA was called without startCDATA";
1301 sal_Int32 nLength
= 3;
1302 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1304 m_pSaxWriterHelper
->insertIndentation( nPrefix
);
1306 m_pSaxWriterHelper
->endCDATA();
1312 void SAXWriter::comment(const OUString
& sComment
) throw(SAXException
, RuntimeException
, std::exception
)
1314 if( ! m_bDocStarted
|| m_bIsCDATA
)
1316 throw SAXException();
1319 sal_Int32
nLength(0);
1320 if (m_bAllowLineBreak
)
1322 nLength
= 4; // "<!--"
1323 nLength
+= calcXMLByteLength( sComment
.getStr(), sComment
.getLength(), false, false);
1328 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1330 m_pSaxWriterHelper
->insertIndentation( nPrefix
);
1332 if (!m_pSaxWriterHelper
->comment(sComment
))
1334 SAXException except
;
1335 except
.Message
= "Invalid character during XML-Export";
1341 void SAXWriter::allowLineBreak( ) throw ( SAXException
, RuntimeException
, std::exception
)
1343 if( ! m_bDocStarted
|| m_bAllowLineBreak
) {
1344 throw SAXException();
1347 m_bAllowLineBreak
= true;
1350 void SAXWriter::unknown(const OUString
& sString
) throw (SAXException
, RuntimeException
, std::exception
)
1353 if( ! m_bDocStarted
)
1355 throw SAXException ();
1359 throw SAXException();
1362 if( sString
.startsWith( "<?xml" ) )
1365 sal_Int32
nLength(0);
1366 if (m_bAllowLineBreak
)
1367 nLength
= calcXMLByteLength( sString
.getStr(), sString
.getLength(), false, false );
1369 sal_Int32 nPrefix
= getIndentPrefixLength( nLength
);
1371 m_pSaxWriterHelper
->insertIndentation( nPrefix
);
1373 if (!m_pSaxWriterHelper
->writeString( sString
, false, false))
1375 SAXException except
;
1376 except
.Message
= "Invalid character during XML-Export";
1383 extern "C" SAL_DLLPUBLIC_EXPORT
css::uno::XInterface
* SAL_CALL
1384 com_sun_star_extensions_xml_sax_Writer_get_implementation(
1385 css::uno::XComponentContext
*,
1386 css::uno::Sequence
<css::uno::Any
> const &)
1388 return cppu::acquire(new SAXWriter
);
1391 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */