Update ooo320-m1
[ooovba.git] / sax / source / expatwrap / saxwriter.cxx
blobf208ee612b7f394213f0c67598ffabb1b7aaab35
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: saxwriter.cxx,v $
10 * $Revision: 1.21 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
30 #include <string.h>
32 #include <com/sun/star/lang/XServiceInfo.hpp>
33 #include <com/sun/star/util/XCloneable.hpp>
34 #include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp>
35 #include <com/sun/star/xml/sax/XParser.hpp>
36 #include <com/sun/star/xml/sax/SAXParseException.hpp>
37 #include <com/sun/star/xml/sax/SAXInvalidCharacterException.hpp>
39 #include <com/sun/star/io/XActiveDataSource.hpp>
41 #include <cppuhelper/factory.hxx>
42 #include <cppuhelper/weak.hxx>
43 #include <cppuhelper/implbase3.hxx>
45 #include <rtl/strbuf.hxx>
46 #include <rtl/byteseq.hxx>
47 #include <rtl/ustrbuf.hxx>
49 using namespace ::rtl;
50 using namespace ::std;
51 using namespace ::osl;
52 using namespace ::cppu;
53 using namespace ::com::sun::star::uno;
54 using namespace ::com::sun::star::lang;
55 using namespace ::com::sun::star::registry;
56 using namespace ::com::sun::star::xml::sax;
57 using namespace ::com::sun::star::util;
58 using namespace ::com::sun::star::io;
60 #include "factory.hxx"
61 #include "xml2utf.hxx"
63 #define LINEFEED 10
64 #define SEQUENCESIZE 1024
65 #define MAXCOLUMNCOUNT 72
67 /******
70 * Character conversion functions
73 *****/
75 namespace sax_expatwrap {
76 /*****
78 * Calculates the length of the sequence after conversion, but the conversion is not done.
79 * .g. &<>"' plus some more are
80 * special characters in XML that need to be transformed
82 * @param bConvertAll For Attributes it is necessary to convert every symbol (including line feed and tab)
83 * Set this to true, if you want to perform this special conversion
84 * @return The returned value is equal to the length of the incoming sequence, when no
85 + conversion is necessary, otherwise it is larger than the length of the sequence.
86 ****/
87 // inline sal_Int32 CalcXMLLen( const Sequence<sal_Int8> & seq , sal_Bool bConvertAll ) throw()
88 // {
89 // sal_Int32 nLen = 0;
90 // const sal_Int8 *pArray = seq.getConstArray();
92 // for( int i = 0 ; i < seq.getLength() ; i ++ ) {
94 // sal_Int8 c = pArray[i];
95 // switch( c )
96 // {
97 // case '&': // resemble to &amp;
98 // nLen +=5;
99 // break;
100 // case '<': // &lt;
101 // case '>': // &gt;
102 // nLen +=4;
103 // break;
104 // case 39: // 39 == ''', &apos;
105 // case '"': // &quot;
106 // case 13: // &#x0d;
107 // nLen += 6;
108 // break;
110 // case 10: // &#x0a;
111 // case 9: // &#x09;
112 // if( bConvertAll )
113 // {
114 // nLen += 6; //
115 // }
116 // break;
117 // default:
118 // nLen ++;
119 // }
120 // }
122 // return nLen;
123 // }
125 enum SaxInvalidCharacterError
127 SAX_NONE,
128 SAX_WARNING,
129 SAX_ERROR
132 class SaxWriterHelper
134 Reference< XOutputStream > m_out;
135 Sequence < sal_Int8 > m_Sequence;
136 sal_Int8* mp_Sequence;
138 sal_Int32 nLastLineFeedPos; // is negative after writing a sequence
139 sal_uInt32 nCurrentPos;
140 sal_Bool m_bStartElementFinished;
143 inline sal_uInt32 writeSequence() throw( SAXException );
145 // use only if to insert the bytes more space in the sequence is needed and
146 // so the sequence has to write out and reset rPos to 0
147 // writes sequence only on overflow, sequence could be full on the end (rPos == SEQUENCESIZE)
148 inline void AddBytes(sal_Int8* pTarget, sal_uInt32& rPos,
149 const sal_Int8* pBytes, sal_uInt32 nBytesCount) throw( SAXException );
150 inline sal_Bool convertToXML(const sal_Unicode * pStr,
151 sal_Int32 nStrLen,
152 sal_Bool bDoNormalization,
153 sal_Bool bNormalizeWhitespace,
154 sal_Int8 *pTarget,
155 sal_uInt32& rPos) throw( SAXException );
156 inline void FinishStartElement() throw( SAXException );
157 public:
158 SaxWriterHelper(Reference< XOutputStream > m_TempOut) :
159 m_out(m_TempOut),
160 m_Sequence(SEQUENCESIZE),
161 mp_Sequence(NULL),
162 nLastLineFeedPos(0),
163 nCurrentPos(0),
164 m_bStartElementFinished(sal_True)
166 OSL_ENSURE(SEQUENCESIZE > 50, "Sequence cache size to small");
167 mp_Sequence = m_Sequence.getArray();
169 ~SaxWriterHelper()
171 OSL_ENSURE(!nCurrentPos, "cached Sequence not written");
172 OSL_ENSURE(m_bStartElementFinished, "StartElement not complettly written");
175 inline void insertIndentation(sal_uInt32 m_nLevel) throw( SAXException );
177 // returns whether it works correct or invalid characters were in the string
178 // If there are invalid characters in the string it returns sal_False.
179 // Than the calling method has to throw the needed Exception.
180 inline sal_Bool writeString(const rtl::OUString& rWriteOutString,
181 sal_Bool bDoNormalization,
182 sal_Bool bNormalizeWhitespace) throw( SAXException );
184 sal_uInt32 GetLastColumnCount() { return (sal_uInt32)(nCurrentPos - nLastLineFeedPos); }
186 inline void startDocument() throw( SAXException );
188 // returns whether it works correct or invalid characters were in the strings
189 // If there are invalid characters in one of the strings it returns sal_False.
190 // Than the calling method has to throw the needed Exception.
191 inline SaxInvalidCharacterError startElement(const rtl::OUString& rName, const Reference< XAttributeList >& xAttribs) throw( SAXException );
192 inline sal_Bool FinishEmptyElement() throw( SAXException );
194 // returns whether it works correct or invalid characters were in the string
195 // If there are invalid characters in the string it returns sal_False.
196 // Than the calling method has to throw the needed Exception.
197 inline sal_Bool endElement(const rtl::OUString& rName) throw( SAXException );
198 inline void endDocument() throw( SAXException );
200 // returns whether it works correct or invalid characters were in the strings
201 // If there are invalid characters in the string it returns sal_False.
202 // Than the calling method has to throw the needed Exception.
203 inline sal_Bool processingInstruction(const rtl::OUString& rTarget, const rtl::OUString& rData) throw( SAXException );
204 inline void startCDATA() throw( SAXException );
205 inline void endCDATA() throw( SAXException );
207 // returns whether it works correct or invalid characters were in the strings
208 // If there are invalid characters in the string it returns sal_False.
209 // Than the calling method has to throw the needed Exception.
210 inline sal_Bool comment(const rtl::OUString& rComment) throw( SAXException );
212 inline void clearBuffer() throw( SAXException );
215 const sal_Bool g_bValidCharsBelow32[32] =
217 // 0 1 2 3 4 5 6 7
218 0,0,0,0,0,0,0,0, //0
219 0,1,1,0,0,1,0,0, //8
220 0,0,0,0,0,0,0,0, //16
221 0,0,0,0,0,0,0,0
224 inline sal_Bool IsInvalidChar(const sal_Unicode aChar)
226 sal_Bool bRet(sal_False);
227 // check first for the most common characters
228 if( aChar < 32 || aChar >= 0xd800 )
229 bRet = ( (aChar < 32 && ! g_bValidCharsBelow32[aChar]) ||
230 aChar == 0xffff ||
231 aChar == 0xfffe );
232 return bRet;
235 /********
236 * write through to the output stream
238 *****/
239 inline sal_uInt32 SaxWriterHelper::writeSequence() throw( SAXException )
243 m_out->writeBytes( m_Sequence );
245 catch( IOException & e )
247 Any a;
248 a <<= e;
249 throw SAXException(
250 OUString::createFromAscii( "io exception during writing" ),
251 Reference< XInterface > (),
252 a );
254 nLastLineFeedPos -= SEQUENCESIZE;
255 return 0;
258 inline void SaxWriterHelper::AddBytes(sal_Int8* pTarget, sal_uInt32& rPos,
259 const sal_Int8* pBytes, sal_uInt32 nBytesCount) throw( SAXException )
261 OSL_ENSURE((rPos + nBytesCount) > SEQUENCESIZE, "wrong use of AddBytesMethod");
262 sal_uInt32 nCount(SEQUENCESIZE - rPos);
263 memcpy( &(pTarget[rPos]) , pBytes, nCount);
265 OSL_ENSURE(rPos + nCount == SEQUENCESIZE, "the position should be the at the end");
267 rPos = writeSequence();
268 sal_uInt32 nRestCount(nBytesCount - nCount);
269 if ((rPos + nRestCount) <= SEQUENCESIZE)
271 memcpy( &(pTarget[rPos]), &pBytes[nCount], nRestCount);
272 rPos += nRestCount;
274 else
275 AddBytes(pTarget, rPos, &pBytes[nCount], nRestCount);
278 /** Converts an UTF16 string to UTF8 and does XML normalization
280 @param pTarget
281 Pointer to a piece of memory, to where the output should be written. The caller
282 must call calcXMLByteLength on the same string, to ensure,
283 that there is enough memory for converting.
285 inline sal_Bool SaxWriterHelper::convertToXML( const sal_Unicode * pStr,
286 sal_Int32 nStrLen,
287 sal_Bool bDoNormalization,
288 sal_Bool bNormalizeWhitespace,
289 sal_Int8 *pTarget,
290 sal_uInt32& rPos ) throw( SAXException )
292 sal_Bool bRet(sal_True);
293 sal_uInt32 nSurrogate = 0;
295 for( sal_Int32 i = 0 ; i < nStrLen ; i ++ )
297 sal_uInt16 c = pStr[i];
298 if (IsInvalidChar(c))
299 bRet = sal_False;
300 else if( (c >= 0x0001) && (c <= 0x007F) )
302 if( bDoNormalization )
304 switch( c )
306 case '&': // resemble to &amp;
308 if ((rPos + 5) > SEQUENCESIZE)
309 AddBytes(pTarget, rPos, (sal_Int8*)"&amp;", 5);
310 else
312 memcpy( &(pTarget[rPos]) , "&amp;", 5 );
313 rPos += 5;
316 break;
317 case '<':
319 if ((rPos + 4) > SEQUENCESIZE)
320 AddBytes(pTarget, rPos, (sal_Int8*)"&lt;", 4);
321 else
323 memcpy( &(pTarget[rPos]) , "&lt;" , 4 );
324 rPos += 4; // &lt;
327 break;
328 case '>':
330 if ((rPos + 4) > SEQUENCESIZE)
331 AddBytes(pTarget, rPos, (sal_Int8*)"&gt;", 4);
332 else
334 memcpy( &(pTarget[rPos]) , "&gt;" , 4 );
335 rPos += 4; // &gt;
338 break;
339 case 39: // 39 == '''
341 if ((rPos + 6) > SEQUENCESIZE)
342 AddBytes(pTarget, rPos, (sal_Int8*)"&apos;", 6);
343 else
345 memcpy( &(pTarget[rPos]) , "&apos;" , 6 );
346 rPos += 6; // &apos;
349 break;
350 case '"':
352 if ((rPos + 6) > SEQUENCESIZE)
353 AddBytes(pTarget, rPos, (sal_Int8*)"&quot;", 6);
354 else
356 memcpy( &(pTarget[rPos]) , "&quot;" , 6 );
357 rPos += 6; // &quot;
360 break;
361 case 13:
363 if ((rPos + 6) > SEQUENCESIZE)
364 AddBytes(pTarget, rPos, (sal_Int8*)"&#x0d;", 6);
365 else
367 memcpy( &(pTarget[rPos]) , "&#x0d;" , 6 );
368 rPos += 6;
371 break;
372 case LINEFEED:
374 if( bNormalizeWhitespace )
376 if ((rPos + 6) > SEQUENCESIZE)
377 AddBytes(pTarget, rPos, (sal_Int8*)"&#x0a;" , 6);
378 else
380 memcpy( &(pTarget[rPos]) , "&#x0a;" , 6 );
381 rPos += 6;
384 else
386 pTarget[rPos] = LINEFEED;
387 nLastLineFeedPos = rPos;
388 rPos ++;
391 break;
392 case 9:
394 if( bNormalizeWhitespace )
396 if ((rPos + 6) > SEQUENCESIZE)
397 AddBytes(pTarget, rPos, (sal_Int8*)"&#x09;" , 6);
398 else
400 memcpy( &(pTarget[rPos]) , "&#x09;" , 6 );
401 rPos += 6;
404 else
406 pTarget[rPos] = 9;
407 rPos ++;
410 break;
411 default:
413 pTarget[rPos] = (sal_Int8)c;
414 rPos ++;
416 break;
419 else
421 pTarget[rPos] = (sal_Int8)c;
422 if ((sal_Int8)c == LINEFEED)
423 nLastLineFeedPos = rPos;
424 rPos ++;
427 else if( c >= 0xd800 && c < 0xdc00 )
429 // 1. surrogate: save (until 2. surrogate)
430 OSL_ENSURE( nSurrogate == 0, "left-over Unicode surrogate" );
431 nSurrogate = ( ( c & 0x03ff ) + 0x0040 );
433 else if( c >= 0xdc00 && c < 0xe000 )
435 // 2. surrogate: write as UTF-8
436 OSL_ENSURE( nSurrogate != 0, "lone 2nd Unicode surrogate" );
438 nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
439 if( nSurrogate >= 0x00010000 && nSurrogate <= 0x0010FFFF )
441 sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)),
442 sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)),
443 sal_Int8(0x80 | ((nSurrogate >> 6) & 0x3F)),
444 sal_Int8(0x80 | ((nSurrogate >> 0) & 0x3F)) };
445 if ((rPos + 4) > SEQUENCESIZE)
446 AddBytes(pTarget, rPos, aBytes, 4);
447 else
449 pTarget[rPos] = aBytes[0];
450 rPos ++;
451 pTarget[rPos] = aBytes[1];
452 rPos ++;
453 pTarget[rPos] = aBytes[2];
454 rPos ++;
455 pTarget[rPos] = aBytes[3];
456 rPos ++;
459 else
461 OSL_ENSURE( false, "illegal Unicode character" );
462 bRet = sal_False;
465 // reset surrogate
466 nSurrogate = 0;
468 else if( c > 0x07FF )
470 sal_Int8 aBytes[] = { sal_Int8(0xE0 | ((c >> 12) & 0x0F)),
471 sal_Int8(0x80 | ((c >> 6) & 0x3F)),
472 sal_Int8(0x80 | ((c >> 0) & 0x3F)) };
473 if ((rPos + 3) > SEQUENCESIZE)
474 AddBytes(pTarget, rPos, aBytes, 3);
475 else
477 pTarget[rPos] = aBytes[0];
478 rPos ++;
479 pTarget[rPos] = aBytes[1];
480 rPos ++;
481 pTarget[rPos] = aBytes[2];
482 rPos ++;
485 else
487 sal_Int8 aBytes[] = { sal_Int8(0xC0 | ((c >> 6) & 0x1F)),
488 sal_Int8(0x80 | ((c >> 0) & 0x3F)) };
489 if ((rPos + 2) > SEQUENCESIZE)
490 AddBytes(pTarget, rPos, aBytes, 2);
491 else
493 pTarget[rPos] = aBytes[0];
494 rPos ++;
495 pTarget[rPos] = aBytes[1];
496 rPos ++;
499 OSL_ENSURE(rPos <= SEQUENCESIZE, "not reset current position");
500 if (rPos == SEQUENCESIZE)
501 rPos = writeSequence();
503 // reset left-over surrogate
504 if( ( nSurrogate != 0 ) && !( c >= 0xd800 && c < 0xdc00 ) )
506 OSL_ENSURE( nSurrogate != 0, "left-over Unicode surrogate" );
507 nSurrogate = 0;
508 bRet = sal_False;
511 return bRet;
514 inline void SaxWriterHelper::FinishStartElement() throw( SAXException )
516 if (!m_bStartElementFinished)
518 mp_Sequence[nCurrentPos] = '>';
519 nCurrentPos++;
520 if (nCurrentPos == SEQUENCESIZE)
521 nCurrentPos = writeSequence();
522 m_bStartElementFinished = sal_True;
526 inline void SaxWriterHelper::insertIndentation(sal_uInt32 m_nLevel) throw( SAXException )
528 FinishStartElement();
529 if (m_nLevel > 0)
531 if ((nCurrentPos + m_nLevel + 1) <= SEQUENCESIZE)
533 mp_Sequence[nCurrentPos] = LINEFEED;
534 nLastLineFeedPos = nCurrentPos;
535 nCurrentPos++;
536 memset( &(mp_Sequence[nCurrentPos]) , 32 , m_nLevel );
537 nCurrentPos += m_nLevel;
538 if (nCurrentPos == SEQUENCESIZE)
539 nCurrentPos = writeSequence();
541 else
543 sal_uInt32 nCount(m_nLevel + 1);
544 sal_Int8* pBytes = new sal_Int8[nCount];
545 pBytes[0] = LINEFEED;
546 memset( &(pBytes[1]), 32, m_nLevel );
547 AddBytes(mp_Sequence, nCurrentPos, pBytes, nCount);
548 delete[] pBytes;
549 nLastLineFeedPos = nCurrentPos - nCount;
550 if (nCurrentPos == SEQUENCESIZE)
551 nCurrentPos = writeSequence();
554 else
556 mp_Sequence[nCurrentPos] = LINEFEED;
557 nLastLineFeedPos = nCurrentPos;
558 nCurrentPos++;
559 if (nCurrentPos == SEQUENCESIZE)
560 nCurrentPos = writeSequence();
564 inline sal_Bool SaxWriterHelper::writeString( const rtl::OUString& rWriteOutString,
565 sal_Bool bDoNormalization,
566 sal_Bool bNormalizeWhitespace ) throw( SAXException )
568 FinishStartElement();
569 return convertToXML(rWriteOutString.getStr(),
570 rWriteOutString.getLength(),
571 bDoNormalization,
572 bNormalizeWhitespace,
573 mp_Sequence,
574 nCurrentPos);
577 inline void SaxWriterHelper::startDocument() throw( SAXException )
579 const char pc[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
580 const int nLen = strlen( pc );
581 if ((nCurrentPos + nLen) <= SEQUENCESIZE)
583 memcpy( mp_Sequence, pc , nLen );
584 nCurrentPos += nLen;
586 else
588 AddBytes(mp_Sequence, nCurrentPos, (sal_Int8*)pc, nLen);
590 OSL_ENSURE(nCurrentPos <= SEQUENCESIZE, "not reset current position");
591 if (nCurrentPos == SEQUENCESIZE)
592 nCurrentPos = writeSequence();
593 mp_Sequence[nCurrentPos] = LINEFEED;
594 nCurrentPos++;
595 if (nCurrentPos == SEQUENCESIZE)
596 nCurrentPos = writeSequence();
599 inline SaxInvalidCharacterError SaxWriterHelper::startElement(const rtl::OUString& rName, const Reference< XAttributeList >& xAttribs) throw( SAXException )
601 FinishStartElement();
602 mp_Sequence[nCurrentPos] = '<';
603 nCurrentPos++;
604 if (nCurrentPos == SEQUENCESIZE)
605 nCurrentPos = writeSequence();
607 SaxInvalidCharacterError eRet(SAX_NONE);
608 if (!writeString(rName, sal_False, sal_False))
609 eRet = SAX_ERROR;
611 sal_Int16 nAttribCount = xAttribs.is() ? static_cast<sal_Int16>(xAttribs->getLength()) : 0;
612 for(sal_Int16 i = 0 ; i < nAttribCount ; i++ )
614 mp_Sequence[nCurrentPos] = ' ';
615 nCurrentPos++;
616 if (nCurrentPos == SEQUENCESIZE)
617 nCurrentPos = writeSequence();
619 if (!writeString(xAttribs->getNameByIndex( i ), sal_False, sal_False))
620 eRet = SAX_ERROR;
622 mp_Sequence[nCurrentPos] = '=';
623 nCurrentPos++;
624 if (nCurrentPos == SEQUENCESIZE)
625 nCurrentPos = writeSequence();
626 mp_Sequence[nCurrentPos] = '"';
627 nCurrentPos++;
628 if (nCurrentPos == SEQUENCESIZE)
629 nCurrentPos = writeSequence();
631 if (!writeString(xAttribs->getValueByIndex( i ), sal_True, sal_True) &&
632 !(eRet == SAX_ERROR))
633 eRet = SAX_WARNING;
635 mp_Sequence[nCurrentPos] = '"';
636 nCurrentPos++;
637 if (nCurrentPos == SEQUENCESIZE)
638 nCurrentPos = writeSequence();
641 m_bStartElementFinished = sal_False; // because the '>' character is not added,
642 // because it is possible, that the "/>"
643 // characters have to add
644 return eRet;
647 inline sal_Bool SaxWriterHelper::FinishEmptyElement() throw( SAXException )
649 if (m_bStartElementFinished)
650 return sal_False;
652 mp_Sequence[nCurrentPos] = '/';
653 nCurrentPos++;
654 if (nCurrentPos == SEQUENCESIZE)
655 nCurrentPos = writeSequence();
656 mp_Sequence[nCurrentPos] = '>';
657 nCurrentPos++;
658 if (nCurrentPos == SEQUENCESIZE)
659 nCurrentPos = writeSequence();
661 m_bStartElementFinished = sal_True;
663 return sal_True;
666 inline sal_Bool SaxWriterHelper::endElement(const rtl::OUString& rName) throw( SAXException )
668 FinishStartElement();
669 mp_Sequence[nCurrentPos] = '<';
670 nCurrentPos++;
671 if (nCurrentPos == SEQUENCESIZE)
672 nCurrentPos = writeSequence();
673 mp_Sequence[nCurrentPos] = '/';
674 nCurrentPos++;
675 if (nCurrentPos == SEQUENCESIZE)
676 nCurrentPos = writeSequence();
678 sal_Bool bRet(writeString( rName, sal_False, sal_False));
680 mp_Sequence[nCurrentPos] = '>';
681 nCurrentPos++;
682 if (nCurrentPos == SEQUENCESIZE)
683 nCurrentPos = writeSequence();
685 return bRet;
688 inline void SaxWriterHelper::endDocument() throw( SAXException )
690 if (nCurrentPos > 0)
692 m_Sequence.realloc(nCurrentPos);
693 nCurrentPos = writeSequence();
694 //m_Sequence.realloc(SEQUENCESIZE);
698 inline void SaxWriterHelper::clearBuffer() throw( SAXException )
700 FinishStartElement();
701 if (nCurrentPos > 0)
703 m_Sequence.realloc(nCurrentPos);
704 nCurrentPos = writeSequence();
705 m_Sequence.realloc(SEQUENCESIZE);
709 inline sal_Bool SaxWriterHelper::processingInstruction(const rtl::OUString& rTarget, const rtl::OUString& rData) throw( SAXException )
711 FinishStartElement();
712 mp_Sequence[nCurrentPos] = '<';
713 nCurrentPos++;
714 if (nCurrentPos == SEQUENCESIZE)
715 nCurrentPos = writeSequence();
716 mp_Sequence[nCurrentPos] = '?';
717 nCurrentPos++;
718 if (nCurrentPos == SEQUENCESIZE)
719 nCurrentPos = writeSequence();
721 sal_Bool bRet(writeString( rTarget, sal_False, sal_False ));
723 mp_Sequence[nCurrentPos] = ' ';
724 nCurrentPos++;
725 if (nCurrentPos == SEQUENCESIZE)
726 nCurrentPos = writeSequence();
728 if (!writeString( rData, sal_False, sal_False ))
729 bRet = sal_False;
731 mp_Sequence[nCurrentPos] = '?';
732 nCurrentPos++;
733 if (nCurrentPos == SEQUENCESIZE)
734 nCurrentPos = writeSequence();
735 mp_Sequence[nCurrentPos] = '>';
736 nCurrentPos++;
737 if (nCurrentPos == SEQUENCESIZE)
738 nCurrentPos = writeSequence();
740 return bRet;
743 inline void SaxWriterHelper::startCDATA() throw( SAXException )
745 FinishStartElement();
746 if ((nCurrentPos + 9) <= SEQUENCESIZE)
748 memcpy( &(mp_Sequence[nCurrentPos]), "<![CDATA[" , 9 );
749 nCurrentPos += 9;
751 else
752 AddBytes(mp_Sequence, nCurrentPos, (sal_Int8*)"<![CDATA[" , 9);
753 if (nCurrentPos == SEQUENCESIZE)
754 nCurrentPos = writeSequence();
757 inline void SaxWriterHelper::endCDATA() throw( SAXException )
759 FinishStartElement();
760 if ((nCurrentPos + 3) <= SEQUENCESIZE)
762 memcpy( &(mp_Sequence[nCurrentPos]), "]]>" , 3 );
763 nCurrentPos += 3;
765 else
766 AddBytes(mp_Sequence, nCurrentPos, (sal_Int8*)"]]>" , 3);
767 if (nCurrentPos == SEQUENCESIZE)
768 nCurrentPos = writeSequence();
771 inline sal_Bool SaxWriterHelper::comment(const rtl::OUString& rComment) throw( SAXException )
773 FinishStartElement();
774 mp_Sequence[nCurrentPos] = '<';
775 nCurrentPos++;
776 if (nCurrentPos == SEQUENCESIZE)
777 nCurrentPos = writeSequence();
778 mp_Sequence[nCurrentPos] = '!';
779 nCurrentPos++;
780 if (nCurrentPos == SEQUENCESIZE)
781 nCurrentPos = writeSequence();
782 mp_Sequence[nCurrentPos] = '-';
783 nCurrentPos++;
784 if (nCurrentPos == SEQUENCESIZE)
785 nCurrentPos = writeSequence();
786 mp_Sequence[nCurrentPos] = '-';
787 nCurrentPos++;
788 if (nCurrentPos == SEQUENCESIZE)
789 nCurrentPos = writeSequence();
791 sal_Bool bRet(writeString( rComment, sal_False, sal_False));
793 mp_Sequence[nCurrentPos] = '-';
794 nCurrentPos++;
795 if (nCurrentPos == SEQUENCESIZE)
796 nCurrentPos = writeSequence();
797 mp_Sequence[nCurrentPos] = '-';
798 nCurrentPos++;
799 if (nCurrentPos == SEQUENCESIZE)
800 nCurrentPos = writeSequence();
801 mp_Sequence[nCurrentPos] = '>';
802 nCurrentPos++;
803 if (nCurrentPos == SEQUENCESIZE)
804 nCurrentPos = writeSequence();
806 return bRet;
809 inline sal_Int32 calcXMLByteLength( const sal_Unicode *pStr, sal_Int32 nStrLen,
810 sal_Bool bDoNormalization,
811 sal_Bool bNormalizeWhitespace )
813 sal_Int32 nOutputLength = 0;
814 sal_uInt32 nSurrogate = 0;
816 for( sal_Int32 i = 0 ; i < nStrLen ; i++ )
818 sal_uInt16 c = pStr[i];
819 if( !IsInvalidChar(c) && (c >= 0x0001) && (c <= 0x007F) )
821 if( bDoNormalization )
823 switch( c )
825 case '&': // resemble to &amp;
826 nOutputLength +=5;
827 break;
828 case '<': // &lt;
829 case '>': // &gt;
830 nOutputLength +=4;
831 break;
832 case 39: // 39 == ''', &apos;
833 case '"': // &quot;
834 case 13: // &#x0d;
835 nOutputLength += 6;
836 break;
838 case 10: // &#x0a;
839 case 9: // &#x09;
840 if( bNormalizeWhitespace )
842 nOutputLength += 6; //
844 else
846 nOutputLength ++;
848 break;
849 default:
850 nOutputLength ++;
853 else
855 nOutputLength ++;
858 else if( c >= 0xd800 && c < 0xdc00 )
860 // save surrogate
861 nSurrogate = ( ( c & 0x03ff ) + 0x0040 );
863 else if( c >= 0xdc00 && c < 0xe000 )
865 // 2. surrogate: write as UTF-8 (if range is OK
866 nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
867 if( nSurrogate >= 0x00010000 && nSurrogate <= 0x0010FFFF )
868 nOutputLength += 4;
869 nSurrogate = 0;
871 else if( c > 0x07FF )
873 nOutputLength += 3;
875 else
877 nOutputLength += 2;
880 // surrogate processing
881 if( ( nSurrogate != 0 ) && !( c >= 0xd800 && c < 0xdc00 ) )
882 nSurrogate = 0;
885 return nOutputLength;
888 /** returns position of first ascii 10 within the string, -1 when no 10 in string.
890 static inline sal_Int32 getFirstLineBreak( const OUString & str ) throw ()
892 const sal_Unicode *pSource = str.getStr();
893 sal_Int32 nLen = str.getLength();
895 for( int n = 0; n < nLen ; n ++ )
897 if( LINEFEED == pSource[n] ) {
898 return n;
901 return -1;
904 /** returns position of last ascii 10 within sequence, -1 when no 10 in string.
906 static inline sal_Int32 getLastLineBreak( const Sequence<sal_Int8> & seq) throw ()
908 const sal_Int8 *pSource = seq.getConstArray();
909 sal_Int32 nLen = seq.getLength();
911 for( int n = nLen-1; n >= 0 ; n -- )
913 if( LINEFEED == pSource[n] ) {
914 return n;
917 return -1;
921 class SAXWriter :
922 public WeakImplHelper3<
923 XActiveDataSource,
924 XExtendedDocumentHandler,
925 XServiceInfo >
927 public:
928 SAXWriter( ) :
929 m_seqStartElement(),
930 mp_SaxWriterHelper( NULL ),
931 m_bForceLineBreak(sal_False),
932 m_bAllowLineBreak(sal_False)
934 ~SAXWriter()
936 delete mp_SaxWriterHelper;
939 public: // XActiveDataSource
940 virtual void SAL_CALL setOutputStream(const Reference< XOutputStream > & aStream)
941 throw (RuntimeException)
943 // temporary: set same stream again to clear buffer
944 if ( m_out == aStream && mp_SaxWriterHelper && m_bDocStarted )
945 mp_SaxWriterHelper->clearBuffer();
946 else
949 m_out = aStream;
950 delete mp_SaxWriterHelper;
951 mp_SaxWriterHelper = new SaxWriterHelper(m_out);
952 m_bDocStarted = sal_False;
953 m_nLevel = 0;
954 m_bIsCDATA = sal_False;
958 virtual Reference< XOutputStream > SAL_CALL getOutputStream(void)
959 throw(RuntimeException)
960 { return m_out; }
962 public: // XDocumentHandler
963 virtual void SAL_CALL startDocument(void)
964 throw(SAXException, RuntimeException);
966 virtual void SAL_CALL endDocument(void)
967 throw(SAXException, RuntimeException);
969 virtual void SAL_CALL startElement(const OUString& aName,
970 const Reference< XAttributeList > & xAttribs)
971 throw (SAXException, RuntimeException);
973 virtual void SAL_CALL endElement(const OUString& aName)
974 throw(SAXException, RuntimeException);
976 virtual void SAL_CALL characters(const OUString& aChars)
977 throw(SAXException, RuntimeException);
979 virtual void SAL_CALL ignorableWhitespace(const OUString& aWhitespaces)
980 throw(SAXException, RuntimeException);
981 virtual void SAL_CALL processingInstruction(const OUString& aTarget,
982 const OUString& aData)
983 throw(SAXException, RuntimeException);
984 virtual void SAL_CALL setDocumentLocator(const Reference< XLocator > & xLocator)
985 throw(SAXException, RuntimeException);
987 public: // XExtendedDocumentHandler
988 virtual void SAL_CALL startCDATA(void) throw(SAXException, RuntimeException);
989 virtual void SAL_CALL endCDATA(void) throw(RuntimeException);
990 virtual void SAL_CALL comment(const OUString& sComment)
991 throw(SAXException, RuntimeException);
992 virtual void SAL_CALL unknown(const OUString& sString)
993 throw(SAXException, RuntimeException);
994 virtual void SAL_CALL allowLineBreak(void)
995 throw(SAXException,RuntimeException);
997 public: // XServiceInfo
998 OUString SAL_CALL getImplementationName() throw();
999 Sequence< OUString > SAL_CALL getSupportedServiceNames(void) throw();
1000 sal_Bool SAL_CALL supportsService(const OUString& ServiceName) throw();
1002 private:
1004 void writeSequence( const Sequence<sal_Int8> & seq );
1005 sal_Int32 getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurence ) throw();
1007 Reference< XOutputStream > m_out;
1008 Sequence < sal_Int8 > m_seqStartElement;
1009 SaxWriterHelper* mp_SaxWriterHelper;
1011 // Status information
1012 sal_Bool m_bDocStarted : 1;
1013 sal_Bool m_bIsCDATA : 1;
1014 sal_Bool m_bForceLineBreak : 1;
1015 sal_Bool m_bAllowLineBreak : 1;
1016 sal_Int32 m_nLevel;
1020 //--------------------------------------
1021 // the extern interface
1022 //---------------------------------------
1023 Reference < XInterface > SAL_CALL SaxWriter_CreateInstance(
1024 const Reference < XMultiServiceFactory > & )
1025 throw (Exception)
1027 SAXWriter *p = new SAXWriter;
1028 return Reference< XInterface > ( SAL_STATIC_CAST(OWeakObject *, p ) );
1031 OUString SaxWriter_getServiceName() throw()
1033 return OUString::createFromAscii( "com.sun.star.xml.sax.Writer" );
1036 OUString SaxWriter_getImplementationName() throw()
1038 return OUString::createFromAscii( "com.sun.star.extensions.xml.sax.Writer" );
1041 Sequence< OUString > SaxWriter_getSupportedServiceNames(void) throw()
1043 Sequence<OUString> aRet(1);
1044 aRet.getArray()[0] = SaxWriter_getServiceName();
1045 return aRet;
1049 sal_Int32 SAXWriter::getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurence ) throw()
1051 sal_Int32 nLength =-1;
1052 if (mp_SaxWriterHelper)
1054 if ( m_bForceLineBreak ||
1055 (m_bAllowLineBreak &&
1056 ((nFirstLineBreakOccurence + mp_SaxWriterHelper->GetLastColumnCount()) > MAXCOLUMNCOUNT)) )
1057 nLength = m_nLevel;
1059 m_bForceLineBreak = sal_False;
1060 m_bAllowLineBreak = sal_False;
1061 return nLength;
1064 static inline sal_Bool isFirstCharWhitespace( const sal_Unicode *p ) throw()
1066 return *p == ' ';
1070 // XServiceInfo
1071 OUString SAXWriter::getImplementationName() throw()
1073 return SaxWriter_getImplementationName();
1076 // XServiceInfo
1077 sal_Bool SAXWriter::supportsService(const OUString& ServiceName) throw()
1079 Sequence< OUString > aSNL = getSupportedServiceNames();
1080 const OUString * pArray = aSNL.getConstArray();
1082 for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
1083 if( pArray[i] == ServiceName )
1084 return sal_True;
1086 return sal_False;
1089 // XServiceInfo
1090 Sequence< OUString > SAXWriter::getSupportedServiceNames(void) throw ()
1092 Sequence<OUString> seq(1);
1093 seq.getArray()[0] = SaxWriter_getServiceName();
1094 return seq;
1099 void SAXWriter::startDocument() throw(SAXException, RuntimeException )
1101 if( m_bDocStarted || ! m_out.is() || !mp_SaxWriterHelper ) {
1102 throw SAXException();
1104 m_bDocStarted = sal_True;
1105 mp_SaxWriterHelper->startDocument();
1109 void SAXWriter::endDocument(void) throw(SAXException, RuntimeException)
1111 if( ! m_bDocStarted )
1113 throw SAXException(
1114 OUString::createFromAscii( "endDocument called before startDocument" ),
1115 Reference< XInterface >() , Any() );
1117 if( m_nLevel ) {
1118 throw SAXException(
1119 OUString::createFromAscii( "unexpected end of document" ),
1120 Reference< XInterface >() , Any() );
1122 mp_SaxWriterHelper->endDocument();
1125 m_out->closeOutput();
1127 catch( IOException & e )
1129 Any a;
1130 a <<= e;
1131 throw SAXException(
1132 OUString::createFromAscii( "IO exception during closing the IO Stream" ),
1133 Reference< XInterface > (),
1134 a );
1139 void SAXWriter::startElement(const OUString& aName, const Reference< XAttributeList >& xAttribs)
1140 throw(SAXException, RuntimeException)
1142 if( ! m_bDocStarted )
1144 SAXException except;
1145 except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "startElement called before startDocument" ));
1146 throw except;
1148 if( m_bIsCDATA )
1150 SAXException except;
1151 except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "startElement call not allowed with CDATA sections" ));
1152 throw except;
1155 sal_Int32 nLength(0);
1156 if (m_bAllowLineBreak)
1158 sal_Int32 nAttribCount = xAttribs.is() ? xAttribs->getLength() : 0;
1160 nLength ++; // "<"
1161 nLength += calcXMLByteLength( aName.getStr() , aName.getLength(),
1162 sal_False, sal_False ); // the tag name
1164 sal_Int16 n;
1165 for( n = 0 ; n < static_cast<sal_Int16>(nAttribCount) ; n ++ ) {
1166 nLength ++; // " "
1167 OUString tmp = xAttribs->getNameByIndex( n );
1169 nLength += calcXMLByteLength( tmp.getStr() , tmp.getLength() , sal_False, sal_False );
1171 nLength += 2; // ="
1173 tmp = xAttribs->getValueByIndex( n );
1175 nLength += calcXMLByteLength( tmp.getStr(), tmp.getLength(), sal_True, sal_True );
1177 nLength += 1; // "
1180 nLength ++; // '>'
1183 // Is there a new indentation necesarry ?
1184 sal_Int32 nPrefix(getIndentPrefixLength( nLength ));
1186 // write into sequence
1187 if( nPrefix >= 0 )
1188 mp_SaxWriterHelper->insertIndentation( nPrefix );
1190 SaxInvalidCharacterError eRet(mp_SaxWriterHelper->startElement(aName, xAttribs));
1192 m_nLevel++;
1194 if (eRet == SAX_WARNING)
1196 SAXInvalidCharacterException except;
1197 except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export in a attribute value" ) );
1198 throw except;
1200 else if (eRet == SAX_ERROR)
1202 SAXException except;
1203 except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) );
1204 throw except;
1208 void SAXWriter::endElement(const OUString& aName) throw (SAXException, RuntimeException)
1210 if( ! m_bDocStarted ) {
1211 throw SAXException ();
1213 m_nLevel --;
1215 if( m_nLevel < 0 ) {
1216 throw SAXException();
1218 sal_Bool bRet(sal_True);
1220 if( mp_SaxWriterHelper->FinishEmptyElement() )
1221 m_bForceLineBreak = sal_False;
1222 else
1224 // only ascii chars allowed
1225 sal_Int32 nLength(0);
1226 if (m_bAllowLineBreak)
1227 nLength = 3 + calcXMLByteLength( aName.getStr(), aName.getLength(), sal_False, sal_False );
1228 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1230 if( nPrefix >= 0 )
1231 mp_SaxWriterHelper->insertIndentation( nPrefix );
1233 bRet = mp_SaxWriterHelper->endElement(aName);
1236 if (!bRet)
1238 SAXException except;
1239 except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) );
1240 throw except;
1244 void SAXWriter::characters(const OUString& aChars) throw(SAXException, RuntimeException)
1246 if( ! m_bDocStarted )
1248 SAXException except;
1249 except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "characters method called before startDocument" ) );
1250 throw except;
1253 sal_Bool bThrowException(sal_False);
1254 if( aChars.getLength() )
1256 if( m_bIsCDATA )
1257 bThrowException = !mp_SaxWriterHelper->writeString( aChars, sal_False, sal_False );
1258 else
1260 // Note : nFirstLineBreakOccurence is not exact, because we don't know, how
1261 // many 2 and 3 byte chars are inbetween. However this whole stuff
1262 // is eitherway for pretty printing only, so it does not need to be exact.
1263 sal_Int32 nLength(0);
1264 sal_Int32 nIndentPrefix(-1);
1265 if (m_bAllowLineBreak)
1267 sal_Int32 nFirstLineBreakOccurence = getFirstLineBreak( aChars );
1269 nLength = calcXMLByteLength( aChars.getStr(), aChars.getLength(),
1270 ! m_bIsCDATA , sal_False );
1271 nIndentPrefix = getIndentPrefixLength(
1272 nFirstLineBreakOccurence >= 0 ? nFirstLineBreakOccurence : nLength );
1274 else
1275 nIndentPrefix = getIndentPrefixLength(nLength);
1277 // insert indentation
1278 if( nIndentPrefix >= 0 )
1280 if( isFirstCharWhitespace( aChars.getStr() ) )
1281 mp_SaxWriterHelper->insertIndentation( nIndentPrefix - 1 );
1282 else
1283 mp_SaxWriterHelper->insertIndentation( nIndentPrefix );
1285 bThrowException = !mp_SaxWriterHelper->writeString(aChars, sal_True , sal_False);
1288 if (bThrowException)
1290 SAXInvalidCharacterException except;
1291 except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) );
1292 throw except;
1297 void SAXWriter::ignorableWhitespace(const OUString&) throw(SAXException, RuntimeException)
1299 if( ! m_bDocStarted )
1301 throw SAXException ();
1304 m_bForceLineBreak = sal_True;
1307 void SAXWriter::processingInstruction(const OUString& aTarget, const OUString& aData)
1308 throw (SAXException, RuntimeException)
1310 if( ! m_bDocStarted || m_bIsCDATA )
1312 throw SAXException();
1315 sal_Int32 nLength(0);
1316 if (m_bAllowLineBreak)
1318 nLength = 2; // "<?"
1319 nLength += calcXMLByteLength( aTarget.getStr(), aTarget.getLength(), sal_False, sal_False );
1321 nLength += 1; // " "
1323 nLength += calcXMLByteLength( aData.getStr(), aData.getLength(), sal_False, sal_False );
1325 nLength += 2; // "?>"
1328 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1330 if( nPrefix >= 0 )
1331 mp_SaxWriterHelper->insertIndentation( nPrefix );
1333 if (!mp_SaxWriterHelper->processingInstruction(aTarget, aData))
1335 SAXException except;
1336 except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) );
1337 throw except;
1342 void SAXWriter::setDocumentLocator(const Reference< XLocator >&)
1343 throw (SAXException, RuntimeException)
1348 void SAXWriter::startCDATA(void) throw(SAXException, RuntimeException)
1350 if( ! m_bDocStarted || m_bIsCDATA)
1352 throw SAXException ();
1355 sal_Int32 nLength = 9;
1356 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1357 if( nPrefix >= 0 )
1358 mp_SaxWriterHelper->insertIndentation( nPrefix );
1360 mp_SaxWriterHelper->startCDATA();
1362 m_bIsCDATA = sal_True;
1365 void SAXWriter::endCDATA(void) throw (RuntimeException)
1367 if( ! m_bDocStarted | ! m_bIsCDATA)
1369 SAXException except;
1370 except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "endCDATA was called without startCDATA" ) );
1371 throw except;
1374 sal_Int32 nLength = 3;
1375 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1376 if( nPrefix >= 0 )
1377 mp_SaxWriterHelper->insertIndentation( nPrefix );
1379 mp_SaxWriterHelper->endCDATA();
1381 m_bIsCDATA = sal_False;
1385 void SAXWriter::comment(const OUString& sComment) throw(SAXException, RuntimeException)
1387 if( ! m_bDocStarted || m_bIsCDATA )
1389 throw SAXException();
1392 sal_Int32 nLength(0);
1393 if (m_bAllowLineBreak)
1395 nLength = 4; // "<!--"
1396 nLength += calcXMLByteLength( sComment.getStr(), sComment.getLength(), sal_False, sal_False);
1398 nLength += 3;
1401 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1402 if( nPrefix >= 0 )
1403 mp_SaxWriterHelper->insertIndentation( nPrefix );
1405 if (!mp_SaxWriterHelper->comment(sComment))
1407 SAXException except;
1408 except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) );
1409 throw except;
1414 void SAXWriter::allowLineBreak( ) throw ( SAXException , RuntimeException)
1416 if( ! m_bDocStarted || m_bAllowLineBreak ) {
1417 throw SAXException();
1420 m_bAllowLineBreak = sal_True;
1423 void SAXWriter::unknown(const OUString& sString) throw (SAXException, RuntimeException)
1426 if( ! m_bDocStarted )
1428 throw SAXException ();
1430 if( m_bIsCDATA )
1432 throw SAXException();
1435 if( sString.matchAsciiL( "<?xml", 5 ) )
1436 return;
1438 sal_Int32 nLength(0);
1439 if (m_bAllowLineBreak)
1440 nLength = calcXMLByteLength( sString.getStr(), sString.getLength(), sal_False, sal_False );
1442 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1443 if( nPrefix >= 0 )
1444 mp_SaxWriterHelper->insertIndentation( nPrefix );
1446 if (!mp_SaxWriterHelper->writeString( sString, sal_False, sal_False))
1448 SAXException except;
1449 except.Message = OUString( RTL_CONSTASCII_USTRINGPARAM( "Invalid charcter during XML-Export" ) );
1450 throw except;