Bump for 3.6-28
[LibreOffice.git] / sax / source / expatwrap / saxwriter.cxx
blob57f99f0cfbbe99b20bb51478a0a4244bc2caff85
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*************************************************************************
4 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
6 * Copyright 2000, 2010 Oracle and/or its affiliates.
8 * OpenOffice.org - a multi-platform office productivity suite
10 * This file is part of OpenOffice.org.
12 * OpenOffice.org is free software: you can redistribute it and/or modify
13 * it under the terms of the GNU Lesser General Public License version 3
14 * only, as published by the Free Software Foundation.
16 * OpenOffice.org is distributed in the hope that it will be useful,
17 * but WITHOUT ANY WARRANTY; without even the implied warranty of
18 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
19 * GNU Lesser General Public License version 3 for more details
20 * (a copy is included in the LICENSE file that accompanied this code).
22 * You should have received a copy of the GNU Lesser General Public License
23 * version 3 along with OpenOffice.org. If not, see
24 * <http://www.openoffice.org/license.html>
25 * for a copy of the LGPLv3 License.
27 ************************************************************************/
28 #include <string.h>
30 #include <com/sun/star/lang/XServiceInfo.hpp>
31 #include <com/sun/star/util/XCloneable.hpp>
32 #include <com/sun/star/xml/sax/XExtendedDocumentHandler.hpp>
33 #include <com/sun/star/xml/sax/XParser.hpp>
34 #include <com/sun/star/xml/sax/SAXParseException.hpp>
35 #include <com/sun/star/xml/sax/SAXInvalidCharacterException.hpp>
37 #include <com/sun/star/io/XActiveDataSource.hpp>
39 #include <cppuhelper/factory.hxx>
40 #include <cppuhelper/weak.hxx>
41 #include <cppuhelper/implbase3.hxx>
43 #include <rtl/strbuf.hxx>
44 #include <rtl/byteseq.hxx>
45 #include <rtl/ustrbuf.hxx>
47 using namespace ::rtl;
48 using namespace ::std;
49 using namespace ::osl;
50 using namespace ::cppu;
51 using namespace ::com::sun::star::uno;
52 using namespace ::com::sun::star::lang;
53 using namespace ::com::sun::star::registry;
54 using namespace ::com::sun::star::xml::sax;
55 using namespace ::com::sun::star::util;
56 using namespace ::com::sun::star::io;
58 #include "factory.hxx"
59 #include "xml2utf.hxx"
61 #define LINEFEED 10
62 #define SEQUENCESIZE 1024
63 #define MAXCOLUMNCOUNT 72
65 /******
68 * Character conversion functions
71 *****/
73 namespace sax_expatwrap {
75 enum SaxInvalidCharacterError
77 SAX_NONE,
78 SAX_WARNING,
79 SAX_ERROR
82 class SaxWriterHelper
84 Reference< XOutputStream > m_out;
85 Sequence < sal_Int8 > m_Sequence;
86 sal_Int8* mp_Sequence;
88 sal_Int32 nLastLineFeedPos; // is negative after writing a sequence
89 sal_uInt32 nCurrentPos;
90 sal_Bool m_bStartElementFinished;
93 inline sal_uInt32 writeSequence() throw( SAXException );
95 // use only if to insert the bytes more space in the sequence is needed and
96 // so the sequence has to write out and reset rPos to 0
97 // writes sequence only on overflow, sequence could be full on the end (rPos == SEQUENCESIZE)
98 inline void AddBytes(sal_Int8* pTarget, sal_uInt32& rPos,
99 const sal_Int8* pBytes, sal_uInt32 nBytesCount) throw( SAXException );
100 inline sal_Bool convertToXML(const sal_Unicode * pStr,
101 sal_Int32 nStrLen,
102 sal_Bool bDoNormalization,
103 sal_Bool bNormalizeWhitespace,
104 sal_Int8 *pTarget,
105 sal_uInt32& rPos) throw( SAXException );
106 inline void FinishStartElement() throw( SAXException );
107 public:
108 SaxWriterHelper(Reference< XOutputStream > m_TempOut) :
109 m_out(m_TempOut),
110 m_Sequence(SEQUENCESIZE),
111 mp_Sequence(NULL),
112 nLastLineFeedPos(0),
113 nCurrentPos(0),
114 m_bStartElementFinished(sal_True)
116 OSL_ENSURE(SEQUENCESIZE > 50, "Sequence cache size to small");
117 mp_Sequence = m_Sequence.getArray();
119 ~SaxWriterHelper()
121 OSL_ENSURE(!nCurrentPos, "cached Sequence not written");
122 OSL_ENSURE(m_bStartElementFinished, "StartElement not complettly written");
125 inline void insertIndentation(sal_uInt32 m_nLevel) throw( SAXException );
127 // returns whether it works correct or invalid characters were in the string
128 // If there are invalid characters in the string it returns sal_False.
129 // Than the calling method has to throw the needed Exception.
130 inline sal_Bool writeString(const rtl::OUString& rWriteOutString,
131 sal_Bool bDoNormalization,
132 sal_Bool bNormalizeWhitespace) throw( SAXException );
134 sal_uInt32 GetLastColumnCount() const
135 { return (sal_uInt32)(nCurrentPos - nLastLineFeedPos); }
137 inline void startDocument() throw( SAXException );
139 // returns whether it works correct or invalid characters were in the strings
140 // If there are invalid characters in one of the strings it returns sal_False.
141 // Than the calling method has to throw the needed Exception.
142 inline SaxInvalidCharacterError startElement(const rtl::OUString& rName, const Reference< XAttributeList >& xAttribs) throw( SAXException );
143 inline sal_Bool FinishEmptyElement() throw( SAXException );
145 // returns whether it works correct or invalid characters were in the string
146 // If there are invalid characters in the string it returns sal_False.
147 // Than the calling method has to throw the needed Exception.
148 inline sal_Bool endElement(const rtl::OUString& rName) throw( SAXException );
149 inline void endDocument() throw( SAXException );
151 // returns whether it works correct or invalid characters were in the strings
152 // If there are invalid characters in the string it returns sal_False.
153 // Than the calling method has to throw the needed Exception.
154 inline sal_Bool processingInstruction(const rtl::OUString& rTarget, const rtl::OUString& rData) throw( SAXException );
155 inline void startCDATA() throw( SAXException );
156 inline void endCDATA() throw( SAXException );
158 // returns whether it works correct or invalid characters were in the strings
159 // If there are invalid characters in the string it returns sal_False.
160 // Than the calling method has to throw the needed Exception.
161 inline sal_Bool comment(const rtl::OUString& rComment) throw( SAXException );
163 inline void clearBuffer() throw( SAXException );
166 const sal_Bool g_bValidCharsBelow32[32] =
168 // 0 1 2 3 4 5 6 7
169 0,0,0,0,0,0,0,0, //0
170 0,1,1,0,0,1,0,0, //8
171 0,0,0,0,0,0,0,0, //16
172 0,0,0,0,0,0,0,0
175 inline sal_Bool IsInvalidChar(const sal_Unicode aChar)
177 sal_Bool bRet(sal_False);
178 // check first for the most common characters
179 if( aChar < 32 || aChar >= 0xd800 )
180 bRet = ( (aChar < 32 && ! g_bValidCharsBelow32[aChar]) ||
181 aChar == 0xffff ||
182 aChar == 0xfffe );
183 return bRet;
186 /********
187 * write through to the output stream
189 *****/
190 inline sal_uInt32 SaxWriterHelper::writeSequence() throw( SAXException )
194 m_out->writeBytes( m_Sequence );
196 catch (const IOException & e)
198 Any a;
199 a <<= e;
200 throw SAXException(
201 OUString("io exception during writing"),
202 Reference< XInterface > (),
203 a );
205 nLastLineFeedPos -= SEQUENCESIZE;
206 return 0;
209 inline void SaxWriterHelper::AddBytes(sal_Int8* pTarget, sal_uInt32& rPos,
210 const sal_Int8* pBytes, sal_uInt32 nBytesCount) throw( SAXException )
212 OSL_ENSURE((rPos + nBytesCount) > SEQUENCESIZE, "wrong use of AddBytesMethod");
213 sal_uInt32 nCount(SEQUENCESIZE - rPos);
214 memcpy( &(pTarget[rPos]) , pBytes, nCount);
216 OSL_ENSURE(rPos + nCount == SEQUENCESIZE, "the position should be the at the end");
218 rPos = writeSequence();
219 sal_uInt32 nRestCount(nBytesCount - nCount);
220 if ((rPos + nRestCount) <= SEQUENCESIZE)
222 memcpy( &(pTarget[rPos]), &pBytes[nCount], nRestCount);
223 rPos += nRestCount;
225 else
226 AddBytes(pTarget, rPos, &pBytes[nCount], nRestCount);
229 /** Converts an UTF16 string to UTF8 and does XML normalization
231 @param pTarget
232 Pointer to a piece of memory, to where the output should be written. The caller
233 must call calcXMLByteLength on the same string, to ensure,
234 that there is enough memory for converting.
236 inline sal_Bool SaxWriterHelper::convertToXML( const sal_Unicode * pStr,
237 sal_Int32 nStrLen,
238 sal_Bool bDoNormalization,
239 sal_Bool bNormalizeWhitespace,
240 sal_Int8 *pTarget,
241 sal_uInt32& rPos ) throw( SAXException )
243 sal_Bool bRet(sal_True);
244 sal_uInt32 nSurrogate = 0;
246 for( sal_Int32 i = 0 ; i < nStrLen ; i ++ )
248 sal_uInt16 c = pStr[i];
249 if (IsInvalidChar(c))
250 bRet = sal_False;
251 else if( (c >= 0x0001) && (c <= 0x007F) )
253 if( bDoNormalization )
255 switch( c )
257 case '&': // resemble to &amp;
259 if ((rPos + 5) > SEQUENCESIZE)
260 AddBytes(pTarget, rPos, (sal_Int8*)"&amp;", 5);
261 else
263 memcpy( &(pTarget[rPos]) , "&amp;", 5 );
264 rPos += 5;
267 break;
268 case '<':
270 if ((rPos + 4) > SEQUENCESIZE)
271 AddBytes(pTarget, rPos, (sal_Int8*)"&lt;", 4);
272 else
274 memcpy( &(pTarget[rPos]) , "&lt;" , 4 );
275 rPos += 4; // &lt;
278 break;
279 case '>':
281 if ((rPos + 4) > SEQUENCESIZE)
282 AddBytes(pTarget, rPos, (sal_Int8*)"&gt;", 4);
283 else
285 memcpy( &(pTarget[rPos]) , "&gt;" , 4 );
286 rPos += 4; // &gt;
289 break;
290 case 39: // 39 == '''
292 if ((rPos + 6) > SEQUENCESIZE)
293 AddBytes(pTarget, rPos, (sal_Int8*)"&apos;", 6);
294 else
296 memcpy( &(pTarget[rPos]) , "&apos;" , 6 );
297 rPos += 6; // &apos;
300 break;
301 case '"':
303 if ((rPos + 6) > SEQUENCESIZE)
304 AddBytes(pTarget, rPos, (sal_Int8*)"&quot;", 6);
305 else
307 memcpy( &(pTarget[rPos]) , "&quot;" , 6 );
308 rPos += 6; // &quot;
311 break;
312 case 13:
314 if ((rPos + 6) > SEQUENCESIZE)
315 AddBytes(pTarget, rPos, (sal_Int8*)"&#x0d;", 6);
316 else
318 memcpy( &(pTarget[rPos]) , "&#x0d;" , 6 );
319 rPos += 6;
322 break;
323 case LINEFEED:
325 if( bNormalizeWhitespace )
327 if ((rPos + 6) > SEQUENCESIZE)
328 AddBytes(pTarget, rPos, (sal_Int8*)"&#x0a;" , 6);
329 else
331 memcpy( &(pTarget[rPos]) , "&#x0a;" , 6 );
332 rPos += 6;
335 else
337 pTarget[rPos] = LINEFEED;
338 nLastLineFeedPos = rPos;
339 rPos ++;
342 break;
343 case 9:
345 if( bNormalizeWhitespace )
347 if ((rPos + 6) > SEQUENCESIZE)
348 AddBytes(pTarget, rPos, (sal_Int8*)"&#x09;" , 6);
349 else
351 memcpy( &(pTarget[rPos]) , "&#x09;" , 6 );
352 rPos += 6;
355 else
357 pTarget[rPos] = 9;
358 rPos ++;
361 break;
362 default:
364 pTarget[rPos] = (sal_Int8)c;
365 rPos ++;
367 break;
370 else
372 pTarget[rPos] = (sal_Int8)c;
373 if ((sal_Int8)c == LINEFEED)
374 nLastLineFeedPos = rPos;
375 rPos ++;
378 else if( c >= 0xd800 && c < 0xdc00 )
380 // 1. surrogate: save (until 2. surrogate)
381 OSL_ENSURE( nSurrogate == 0, "left-over Unicode surrogate" );
382 nSurrogate = ( ( c & 0x03ff ) + 0x0040 );
384 else if( c >= 0xdc00 && c < 0xe000 )
386 // 2. surrogate: write as UTF-8
387 OSL_ENSURE( nSurrogate != 0, "lone 2nd Unicode surrogate" );
389 nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
390 if( nSurrogate >= 0x00010000 && nSurrogate <= 0x0010FFFF )
392 sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)),
393 sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)),
394 sal_Int8(0x80 | ((nSurrogate >> 6) & 0x3F)),
395 sal_Int8(0x80 | ((nSurrogate >> 0) & 0x3F)) };
396 if ((rPos + 4) > SEQUENCESIZE)
397 AddBytes(pTarget, rPos, aBytes, 4);
398 else
400 pTarget[rPos] = aBytes[0];
401 rPos ++;
402 pTarget[rPos] = aBytes[1];
403 rPos ++;
404 pTarget[rPos] = aBytes[2];
405 rPos ++;
406 pTarget[rPos] = aBytes[3];
407 rPos ++;
410 else
412 OSL_FAIL( "illegal Unicode character" );
413 bRet = sal_False;
416 // reset surrogate
417 nSurrogate = 0;
419 else if( c > 0x07FF )
421 sal_Int8 aBytes[] = { sal_Int8(0xE0 | ((c >> 12) & 0x0F)),
422 sal_Int8(0x80 | ((c >> 6) & 0x3F)),
423 sal_Int8(0x80 | ((c >> 0) & 0x3F)) };
424 if ((rPos + 3) > SEQUENCESIZE)
425 AddBytes(pTarget, rPos, aBytes, 3);
426 else
428 pTarget[rPos] = aBytes[0];
429 rPos ++;
430 pTarget[rPos] = aBytes[1];
431 rPos ++;
432 pTarget[rPos] = aBytes[2];
433 rPos ++;
436 else
438 sal_Int8 aBytes[] = { sal_Int8(0xC0 | ((c >> 6) & 0x1F)),
439 sal_Int8(0x80 | ((c >> 0) & 0x3F)) };
440 if ((rPos + 2) > SEQUENCESIZE)
441 AddBytes(pTarget, rPos, aBytes, 2);
442 else
444 pTarget[rPos] = aBytes[0];
445 rPos ++;
446 pTarget[rPos] = aBytes[1];
447 rPos ++;
450 OSL_ENSURE(rPos <= SEQUENCESIZE, "not reset current position");
451 if (rPos == SEQUENCESIZE)
452 rPos = writeSequence();
454 // reset left-over surrogate
455 if( ( nSurrogate != 0 ) && !( c >= 0xd800 && c < 0xdc00 ) )
457 OSL_ENSURE( nSurrogate != 0, "left-over Unicode surrogate" );
458 nSurrogate = 0;
459 bRet = sal_False;
462 return bRet;
465 inline void SaxWriterHelper::FinishStartElement() throw( SAXException )
467 if (!m_bStartElementFinished)
469 mp_Sequence[nCurrentPos] = '>';
470 nCurrentPos++;
471 if (nCurrentPos == SEQUENCESIZE)
472 nCurrentPos = writeSequence();
473 m_bStartElementFinished = sal_True;
477 inline void SaxWriterHelper::insertIndentation(sal_uInt32 m_nLevel) throw( SAXException )
479 FinishStartElement();
480 if (m_nLevel > 0)
482 if ((nCurrentPos + m_nLevel + 1) <= SEQUENCESIZE)
484 mp_Sequence[nCurrentPos] = LINEFEED;
485 nLastLineFeedPos = nCurrentPos;
486 nCurrentPos++;
487 memset( &(mp_Sequence[nCurrentPos]) , 32 , m_nLevel );
488 nCurrentPos += m_nLevel;
489 if (nCurrentPos == SEQUENCESIZE)
490 nCurrentPos = writeSequence();
492 else
494 sal_uInt32 nCount(m_nLevel + 1);
495 sal_Int8* pBytes = new sal_Int8[nCount];
496 pBytes[0] = LINEFEED;
497 memset( &(pBytes[1]), 32, m_nLevel );
498 AddBytes(mp_Sequence, nCurrentPos, pBytes, nCount);
499 delete[] pBytes;
500 nLastLineFeedPos = nCurrentPos - nCount;
501 if (nCurrentPos == SEQUENCESIZE)
502 nCurrentPos = writeSequence();
505 else
507 mp_Sequence[nCurrentPos] = LINEFEED;
508 nLastLineFeedPos = nCurrentPos;
509 nCurrentPos++;
510 if (nCurrentPos == SEQUENCESIZE)
511 nCurrentPos = writeSequence();
515 inline sal_Bool SaxWriterHelper::writeString( const rtl::OUString& rWriteOutString,
516 sal_Bool bDoNormalization,
517 sal_Bool bNormalizeWhitespace ) throw( SAXException )
519 FinishStartElement();
520 return convertToXML(rWriteOutString.getStr(),
521 rWriteOutString.getLength(),
522 bDoNormalization,
523 bNormalizeWhitespace,
524 mp_Sequence,
525 nCurrentPos);
528 inline void SaxWriterHelper::startDocument() throw( SAXException )
530 const char pc[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
531 const int nLen = strlen( pc );
532 if ((nCurrentPos + nLen) <= SEQUENCESIZE)
534 memcpy( mp_Sequence, pc , nLen );
535 nCurrentPos += nLen;
537 else
539 AddBytes(mp_Sequence, nCurrentPos, (sal_Int8*)pc, nLen);
541 OSL_ENSURE(nCurrentPos <= SEQUENCESIZE, "not reset current position");
542 if (nCurrentPos == SEQUENCESIZE)
543 nCurrentPos = writeSequence();
544 mp_Sequence[nCurrentPos] = LINEFEED;
545 nCurrentPos++;
546 if (nCurrentPos == SEQUENCESIZE)
547 nCurrentPos = writeSequence();
550 inline SaxInvalidCharacterError SaxWriterHelper::startElement(const rtl::OUString& rName, const Reference< XAttributeList >& xAttribs) throw( SAXException )
552 FinishStartElement();
553 mp_Sequence[nCurrentPos] = '<';
554 nCurrentPos++;
555 if (nCurrentPos == SEQUENCESIZE)
556 nCurrentPos = writeSequence();
558 SaxInvalidCharacterError eRet(SAX_NONE);
559 if (!writeString(rName, sal_False, sal_False))
560 eRet = SAX_ERROR;
562 sal_Int16 nAttribCount = xAttribs.is() ? static_cast<sal_Int16>(xAttribs->getLength()) : 0;
563 for(sal_Int16 i = 0 ; i < nAttribCount ; i++ )
565 mp_Sequence[nCurrentPos] = ' ';
566 nCurrentPos++;
567 if (nCurrentPos == SEQUENCESIZE)
568 nCurrentPos = writeSequence();
570 if (!writeString(xAttribs->getNameByIndex( i ), sal_False, sal_False))
571 eRet = SAX_ERROR;
573 mp_Sequence[nCurrentPos] = '=';
574 nCurrentPos++;
575 if (nCurrentPos == SEQUENCESIZE)
576 nCurrentPos = writeSequence();
577 mp_Sequence[nCurrentPos] = '"';
578 nCurrentPos++;
579 if (nCurrentPos == SEQUENCESIZE)
580 nCurrentPos = writeSequence();
582 if (!writeString(xAttribs->getValueByIndex( i ), sal_True, sal_True) &&
583 !(eRet == SAX_ERROR))
584 eRet = SAX_WARNING;
586 mp_Sequence[nCurrentPos] = '"';
587 nCurrentPos++;
588 if (nCurrentPos == SEQUENCESIZE)
589 nCurrentPos = writeSequence();
592 m_bStartElementFinished = sal_False; // because the '>' character is not added,
593 // because it is possible, that the "/>"
594 // characters have to add
595 return eRet;
598 inline sal_Bool SaxWriterHelper::FinishEmptyElement() throw( SAXException )
600 if (m_bStartElementFinished)
601 return sal_False;
603 mp_Sequence[nCurrentPos] = '/';
604 nCurrentPos++;
605 if (nCurrentPos == SEQUENCESIZE)
606 nCurrentPos = writeSequence();
607 mp_Sequence[nCurrentPos] = '>';
608 nCurrentPos++;
609 if (nCurrentPos == SEQUENCESIZE)
610 nCurrentPos = writeSequence();
612 m_bStartElementFinished = sal_True;
614 return sal_True;
617 inline sal_Bool SaxWriterHelper::endElement(const rtl::OUString& rName) throw( SAXException )
619 FinishStartElement();
620 mp_Sequence[nCurrentPos] = '<';
621 nCurrentPos++;
622 if (nCurrentPos == SEQUENCESIZE)
623 nCurrentPos = writeSequence();
624 mp_Sequence[nCurrentPos] = '/';
625 nCurrentPos++;
626 if (nCurrentPos == SEQUENCESIZE)
627 nCurrentPos = writeSequence();
629 sal_Bool bRet(writeString( rName, sal_False, sal_False));
631 mp_Sequence[nCurrentPos] = '>';
632 nCurrentPos++;
633 if (nCurrentPos == SEQUENCESIZE)
634 nCurrentPos = writeSequence();
636 return bRet;
639 inline void SaxWriterHelper::endDocument() throw( SAXException )
641 if (nCurrentPos > 0)
643 m_Sequence.realloc(nCurrentPos);
644 nCurrentPos = writeSequence();
645 //m_Sequence.realloc(SEQUENCESIZE);
649 inline void SaxWriterHelper::clearBuffer() throw( SAXException )
651 FinishStartElement();
652 if (nCurrentPos > 0)
654 m_Sequence.realloc(nCurrentPos);
655 nCurrentPos = writeSequence();
656 m_Sequence.realloc(SEQUENCESIZE);
657 // Be sure to update the array pointer after the reallocation.
658 mp_Sequence = m_Sequence.getArray();
662 inline sal_Bool SaxWriterHelper::processingInstruction(const rtl::OUString& rTarget, const rtl::OUString& rData) throw( SAXException )
664 FinishStartElement();
665 mp_Sequence[nCurrentPos] = '<';
666 nCurrentPos++;
667 if (nCurrentPos == SEQUENCESIZE)
668 nCurrentPos = writeSequence();
669 mp_Sequence[nCurrentPos] = '?';
670 nCurrentPos++;
671 if (nCurrentPos == SEQUENCESIZE)
672 nCurrentPos = writeSequence();
674 sal_Bool bRet(writeString( rTarget, sal_False, sal_False ));
676 mp_Sequence[nCurrentPos] = ' ';
677 nCurrentPos++;
678 if (nCurrentPos == SEQUENCESIZE)
679 nCurrentPos = writeSequence();
681 if (!writeString( rData, sal_False, sal_False ))
682 bRet = sal_False;
684 mp_Sequence[nCurrentPos] = '?';
685 nCurrentPos++;
686 if (nCurrentPos == SEQUENCESIZE)
687 nCurrentPos = writeSequence();
688 mp_Sequence[nCurrentPos] = '>';
689 nCurrentPos++;
690 if (nCurrentPos == SEQUENCESIZE)
691 nCurrentPos = writeSequence();
693 return bRet;
696 inline void SaxWriterHelper::startCDATA() throw( SAXException )
698 FinishStartElement();
699 if ((nCurrentPos + 9) <= SEQUENCESIZE)
701 memcpy( &(mp_Sequence[nCurrentPos]), "<![CDATA[" , 9 );
702 nCurrentPos += 9;
704 else
705 AddBytes(mp_Sequence, nCurrentPos, (sal_Int8*)"<![CDATA[" , 9);
706 if (nCurrentPos == SEQUENCESIZE)
707 nCurrentPos = writeSequence();
710 inline void SaxWriterHelper::endCDATA() throw( SAXException )
712 FinishStartElement();
713 if ((nCurrentPos + 3) <= SEQUENCESIZE)
715 memcpy( &(mp_Sequence[nCurrentPos]), "]]>" , 3 );
716 nCurrentPos += 3;
718 else
719 AddBytes(mp_Sequence, nCurrentPos, (sal_Int8*)"]]>" , 3);
720 if (nCurrentPos == SEQUENCESIZE)
721 nCurrentPos = writeSequence();
724 inline sal_Bool SaxWriterHelper::comment(const rtl::OUString& rComment) throw( SAXException )
726 FinishStartElement();
727 mp_Sequence[nCurrentPos] = '<';
728 nCurrentPos++;
729 if (nCurrentPos == SEQUENCESIZE)
730 nCurrentPos = writeSequence();
731 mp_Sequence[nCurrentPos] = '!';
732 nCurrentPos++;
733 if (nCurrentPos == SEQUENCESIZE)
734 nCurrentPos = writeSequence();
735 mp_Sequence[nCurrentPos] = '-';
736 nCurrentPos++;
737 if (nCurrentPos == SEQUENCESIZE)
738 nCurrentPos = writeSequence();
739 mp_Sequence[nCurrentPos] = '-';
740 nCurrentPos++;
741 if (nCurrentPos == SEQUENCESIZE)
742 nCurrentPos = writeSequence();
744 sal_Bool bRet(writeString( rComment, sal_False, sal_False));
746 mp_Sequence[nCurrentPos] = '-';
747 nCurrentPos++;
748 if (nCurrentPos == SEQUENCESIZE)
749 nCurrentPos = writeSequence();
750 mp_Sequence[nCurrentPos] = '-';
751 nCurrentPos++;
752 if (nCurrentPos == SEQUENCESIZE)
753 nCurrentPos = writeSequence();
754 mp_Sequence[nCurrentPos] = '>';
755 nCurrentPos++;
756 if (nCurrentPos == SEQUENCESIZE)
757 nCurrentPos = writeSequence();
759 return bRet;
762 inline sal_Int32 calcXMLByteLength( const sal_Unicode *pStr, sal_Int32 nStrLen,
763 sal_Bool bDoNormalization,
764 sal_Bool bNormalizeWhitespace )
766 sal_Int32 nOutputLength = 0;
767 sal_uInt32 nSurrogate = 0;
769 for( sal_Int32 i = 0 ; i < nStrLen ; i++ )
771 sal_uInt16 c = pStr[i];
772 if( !IsInvalidChar(c) && (c >= 0x0001) && (c <= 0x007F) )
774 if( bDoNormalization )
776 switch( c )
778 case '&': // resemble to &amp;
779 nOutputLength +=5;
780 break;
781 case '<': // &lt;
782 case '>': // &gt;
783 nOutputLength +=4;
784 break;
785 case 39: // 39 == ''', &apos;
786 case '"': // &quot;
787 case 13: // &#x0d;
788 nOutputLength += 6;
789 break;
791 case 10: // &#x0a;
792 case 9: // &#x09;
793 if( bNormalizeWhitespace )
795 nOutputLength += 6; //
797 else
799 nOutputLength ++;
801 break;
802 default:
803 nOutputLength ++;
806 else
808 nOutputLength ++;
811 else if( c >= 0xd800 && c < 0xdc00 )
813 // save surrogate
814 nSurrogate = ( ( c & 0x03ff ) + 0x0040 );
816 else if( c >= 0xdc00 && c < 0xe000 )
818 // 2. surrogate: write as UTF-8 (if range is OK
819 nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
820 if( nSurrogate >= 0x00010000 && nSurrogate <= 0x0010FFFF )
821 nOutputLength += 4;
822 nSurrogate = 0;
824 else if( c > 0x07FF )
826 nOutputLength += 3;
828 else
830 nOutputLength += 2;
833 // surrogate processing
834 if( ( nSurrogate != 0 ) && !( c >= 0xd800 && c < 0xdc00 ) )
835 nSurrogate = 0;
838 return nOutputLength;
841 /** returns position of first ascii 10 within the string, -1 when no 10 in string.
843 static inline sal_Int32 getFirstLineBreak( const OUString & str ) throw ()
845 const sal_Unicode *pSource = str.getStr();
846 sal_Int32 nLen = str.getLength();
848 for( int n = 0; n < nLen ; n ++ )
850 if( LINEFEED == pSource[n] ) {
851 return n;
854 return -1;
857 /** returns position of last ascii 10 within sequence, -1 when no 10 in string.
859 static inline sal_Int32 getLastLineBreak( const Sequence<sal_Int8> & seq) throw ()
861 const sal_Int8 *pSource = seq.getConstArray();
862 sal_Int32 nLen = seq.getLength();
864 for( int n = nLen-1; n >= 0 ; n -- )
866 if( LINEFEED == pSource[n] ) {
867 return n;
870 return -1;
874 class SAXWriter :
875 public WeakImplHelper3<
876 XActiveDataSource,
877 XExtendedDocumentHandler,
878 XServiceInfo >
880 public:
881 SAXWriter( ) :
882 m_seqStartElement(),
883 mp_SaxWriterHelper( NULL ),
884 m_bForceLineBreak(sal_False),
885 m_bAllowLineBreak(sal_False)
887 ~SAXWriter()
889 delete mp_SaxWriterHelper;
892 public: // XActiveDataSource
893 virtual void SAL_CALL setOutputStream(const Reference< XOutputStream > & aStream)
894 throw (RuntimeException)
896 // temporary: set same stream again to clear buffer
897 if ( m_out == aStream && mp_SaxWriterHelper && m_bDocStarted )
898 mp_SaxWriterHelper->clearBuffer();
899 else
902 m_out = aStream;
903 delete mp_SaxWriterHelper;
904 mp_SaxWriterHelper = new SaxWriterHelper(m_out);
905 m_bDocStarted = sal_False;
906 m_nLevel = 0;
907 m_bIsCDATA = sal_False;
911 virtual Reference< XOutputStream > SAL_CALL getOutputStream(void)
912 throw(RuntimeException)
913 { return m_out; }
915 public: // XDocumentHandler
916 virtual void SAL_CALL startDocument(void)
917 throw(SAXException, RuntimeException);
919 virtual void SAL_CALL endDocument(void)
920 throw(SAXException, RuntimeException);
922 virtual void SAL_CALL startElement(const OUString& aName,
923 const Reference< XAttributeList > & xAttribs)
924 throw (SAXException, RuntimeException);
926 virtual void SAL_CALL endElement(const OUString& aName)
927 throw(SAXException, RuntimeException);
929 virtual void SAL_CALL characters(const OUString& aChars)
930 throw(SAXException, RuntimeException);
932 virtual void SAL_CALL ignorableWhitespace(const OUString& aWhitespaces)
933 throw(SAXException, RuntimeException);
934 virtual void SAL_CALL processingInstruction(const OUString& aTarget,
935 const OUString& aData)
936 throw(SAXException, RuntimeException);
937 virtual void SAL_CALL setDocumentLocator(const Reference< XLocator > & xLocator)
938 throw(SAXException, RuntimeException);
940 public: // XExtendedDocumentHandler
941 virtual void SAL_CALL startCDATA(void) throw(SAXException, RuntimeException);
942 virtual void SAL_CALL endCDATA(void) throw(RuntimeException);
943 virtual void SAL_CALL comment(const OUString& sComment)
944 throw(SAXException, RuntimeException);
945 virtual void SAL_CALL unknown(const OUString& sString)
946 throw(SAXException, RuntimeException);
947 virtual void SAL_CALL allowLineBreak(void)
948 throw(SAXException,RuntimeException);
950 public: // XServiceInfo
951 OUString SAL_CALL getImplementationName() throw();
952 Sequence< OUString > SAL_CALL getSupportedServiceNames(void) throw();
953 sal_Bool SAL_CALL supportsService(const OUString& ServiceName) throw();
955 private:
957 void writeSequence( const Sequence<sal_Int8> & seq );
958 sal_Int32 getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurrence ) throw();
960 Reference< XOutputStream > m_out;
961 Sequence < sal_Int8 > m_seqStartElement;
962 SaxWriterHelper* mp_SaxWriterHelper;
964 // Status information
965 sal_Bool m_bDocStarted : 1;
966 sal_Bool m_bIsCDATA : 1;
967 sal_Bool m_bForceLineBreak : 1;
968 sal_Bool m_bAllowLineBreak : 1;
969 sal_Int32 m_nLevel;
973 //--------------------------------------
974 // the extern interface
975 //---------------------------------------
976 Reference < XInterface > SAL_CALL SaxWriter_CreateInstance(
977 SAL_UNUSED_PARAMETER const Reference < XMultiServiceFactory > & )
978 throw (Exception)
980 SAXWriter *p = new SAXWriter;
981 return Reference< XInterface > ( (static_cast< OWeakObject * >(p)) );
984 OUString SaxWriter_getServiceName() throw()
986 return OUString("com.sun.star.xml.sax.Writer");
989 OUString SaxWriter_getImplementationName() throw()
991 return OUString("com.sun.star.extensions.xml.sax.Writer");
994 Sequence< OUString > SaxWriter_getSupportedServiceNames(void) throw()
996 Sequence<OUString> aRet(1);
997 aRet.getArray()[0] = SaxWriter_getServiceName();
998 return aRet;
1002 sal_Int32 SAXWriter::getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurrence ) throw()
1004 sal_Int32 nLength =-1;
1005 if (mp_SaxWriterHelper)
1007 if ( m_bForceLineBreak ||
1008 (m_bAllowLineBreak &&
1009 ((nFirstLineBreakOccurrence + mp_SaxWriterHelper->GetLastColumnCount()) > MAXCOLUMNCOUNT)) )
1010 nLength = m_nLevel;
1012 m_bForceLineBreak = sal_False;
1013 m_bAllowLineBreak = sal_False;
1014 return nLength;
1017 static inline sal_Bool isFirstCharWhitespace( const sal_Unicode *p ) throw()
1019 return *p == ' ';
1023 // XServiceInfo
1024 OUString SAXWriter::getImplementationName() throw()
1026 return SaxWriter_getImplementationName();
1029 // XServiceInfo
1030 sal_Bool SAXWriter::supportsService(const OUString& ServiceName) throw()
1032 Sequence< OUString > aSNL = getSupportedServiceNames();
1033 const OUString * pArray = aSNL.getConstArray();
1035 for( sal_Int32 i = 0; i < aSNL.getLength(); i++ )
1036 if( pArray[i] == ServiceName )
1037 return sal_True;
1039 return sal_False;
1042 // XServiceInfo
1043 Sequence< OUString > SAXWriter::getSupportedServiceNames(void) throw ()
1045 Sequence<OUString> seq(1);
1046 seq.getArray()[0] = SaxWriter_getServiceName();
1047 return seq;
1052 void SAXWriter::startDocument() throw(SAXException, RuntimeException )
1054 if( m_bDocStarted || ! m_out.is() || !mp_SaxWriterHelper ) {
1055 throw SAXException();
1057 m_bDocStarted = sal_True;
1058 mp_SaxWriterHelper->startDocument();
1062 void SAXWriter::endDocument(void) throw(SAXException, RuntimeException)
1064 if( ! m_bDocStarted )
1066 throw SAXException(
1067 OUString("endDocument called before startDocument"),
1068 Reference< XInterface >() , Any() );
1070 if( m_nLevel ) {
1071 throw SAXException(
1072 OUString("unexpected end of document"),
1073 Reference< XInterface >() , Any() );
1075 mp_SaxWriterHelper->endDocument();
1078 m_out->closeOutput();
1080 catch (const IOException & e)
1082 Any a;
1083 a <<= e;
1084 throw SAXException(
1085 OUString("IO exception during closing the IO Stream"),
1086 Reference< XInterface > (),
1087 a );
1092 void SAXWriter::startElement(const OUString& aName, const Reference< XAttributeList >& xAttribs)
1093 throw(SAXException, RuntimeException)
1095 if( ! m_bDocStarted )
1097 SAXException except;
1098 except.Message = OUString( "startElement called before startDocument" );
1099 throw except;
1101 if( m_bIsCDATA )
1103 SAXException except;
1104 except.Message = OUString( "startElement call not allowed with CDATA sections" );
1105 throw except;
1108 sal_Int32 nLength(0);
1109 if (m_bAllowLineBreak)
1111 sal_Int32 nAttribCount = xAttribs.is() ? xAttribs->getLength() : 0;
1113 nLength ++; // "<"
1114 nLength += calcXMLByteLength( aName.getStr() , aName.getLength(),
1115 sal_False, sal_False ); // the tag name
1117 sal_Int16 n;
1118 for( n = 0 ; n < static_cast<sal_Int16>(nAttribCount) ; n ++ ) {
1119 nLength ++; // " "
1120 OUString tmp = xAttribs->getNameByIndex( n );
1122 nLength += calcXMLByteLength( tmp.getStr() , tmp.getLength() , sal_False, sal_False );
1124 nLength += 2; // ="
1126 tmp = xAttribs->getValueByIndex( n );
1128 nLength += calcXMLByteLength( tmp.getStr(), tmp.getLength(), sal_True, sal_True );
1130 nLength += 1; // "
1133 nLength ++; // '>'
1136 // Is there a new indentation necesarry ?
1137 sal_Int32 nPrefix(getIndentPrefixLength( nLength ));
1139 // write into sequence
1140 if( nPrefix >= 0 )
1141 mp_SaxWriterHelper->insertIndentation( nPrefix );
1143 SaxInvalidCharacterError eRet(mp_SaxWriterHelper->startElement(aName, xAttribs));
1145 m_nLevel++;
1147 if (eRet == SAX_WARNING)
1149 SAXInvalidCharacterException except;
1150 except.Message = OUString( "Invalid charcter during XML-Export in a attribute value" );
1151 throw except;
1153 else if (eRet == SAX_ERROR)
1155 SAXException except;
1156 except.Message = OUString( "Invalid charcter during XML-Export" );
1157 throw except;
1161 void SAXWriter::endElement(const OUString& aName) throw (SAXException, RuntimeException)
1163 if( ! m_bDocStarted ) {
1164 throw SAXException ();
1166 m_nLevel --;
1168 if( m_nLevel < 0 ) {
1169 throw SAXException();
1171 sal_Bool bRet(sal_True);
1173 if( mp_SaxWriterHelper->FinishEmptyElement() )
1174 m_bForceLineBreak = sal_False;
1175 else
1177 // only ascii chars allowed
1178 sal_Int32 nLength(0);
1179 if (m_bAllowLineBreak)
1180 nLength = 3 + calcXMLByteLength( aName.getStr(), aName.getLength(), sal_False, sal_False );
1181 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1183 if( nPrefix >= 0 )
1184 mp_SaxWriterHelper->insertIndentation( nPrefix );
1186 bRet = mp_SaxWriterHelper->endElement(aName);
1189 if (!bRet)
1191 SAXException except;
1192 except.Message = OUString( "Invalid charcter during XML-Export" );
1193 throw except;
1197 void SAXWriter::characters(const OUString& aChars) throw(SAXException, RuntimeException)
1199 if( ! m_bDocStarted )
1201 SAXException except;
1202 except.Message = OUString( "characters method called before startDocument" );
1203 throw except;
1206 sal_Bool bThrowException(sal_False);
1207 if( !aChars.isEmpty() )
1209 if( m_bIsCDATA )
1210 bThrowException = !mp_SaxWriterHelper->writeString( aChars, sal_False, sal_False );
1211 else
1213 // Note : nFirstLineBreakOccurrence is not exact, because we don't know, how
1214 // many 2 and 3 byte chars are inbetween. However this whole stuff
1215 // is eitherway for pretty printing only, so it does not need to be exact.
1216 sal_Int32 nLength(0);
1217 sal_Int32 nIndentPrefix(-1);
1218 if (m_bAllowLineBreak)
1220 sal_Int32 nFirstLineBreakOccurrence = getFirstLineBreak( aChars );
1222 nLength = calcXMLByteLength( aChars.getStr(), aChars.getLength(),
1223 ! m_bIsCDATA , sal_False );
1224 nIndentPrefix = getIndentPrefixLength(
1225 nFirstLineBreakOccurrence >= 0 ? nFirstLineBreakOccurrence : nLength );
1227 else
1228 nIndentPrefix = getIndentPrefixLength(nLength);
1230 // insert indentation
1231 if( nIndentPrefix >= 0 )
1233 if( isFirstCharWhitespace( aChars.getStr() ) )
1234 mp_SaxWriterHelper->insertIndentation( nIndentPrefix - 1 );
1235 else
1236 mp_SaxWriterHelper->insertIndentation( nIndentPrefix );
1238 bThrowException = !mp_SaxWriterHelper->writeString(aChars, sal_True , sal_False);
1241 if (bThrowException)
1243 SAXInvalidCharacterException except;
1244 except.Message = OUString( "Invalid charcter during XML-Export" );
1245 throw except;
1250 void SAXWriter::ignorableWhitespace(const OUString&) throw(SAXException, RuntimeException)
1252 if( ! m_bDocStarted )
1254 throw SAXException ();
1257 m_bForceLineBreak = sal_True;
1260 void SAXWriter::processingInstruction(const OUString& aTarget, const OUString& aData)
1261 throw (SAXException, RuntimeException)
1263 if( ! m_bDocStarted || m_bIsCDATA )
1265 throw SAXException();
1268 sal_Int32 nLength(0);
1269 if (m_bAllowLineBreak)
1271 nLength = 2; // "<?"
1272 nLength += calcXMLByteLength( aTarget.getStr(), aTarget.getLength(), sal_False, sal_False );
1274 nLength += 1; // " "
1276 nLength += calcXMLByteLength( aData.getStr(), aData.getLength(), sal_False, sal_False );
1278 nLength += 2; // "?>"
1281 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1283 if( nPrefix >= 0 )
1284 mp_SaxWriterHelper->insertIndentation( nPrefix );
1286 if (!mp_SaxWriterHelper->processingInstruction(aTarget, aData))
1288 SAXException except;
1289 except.Message = OUString( "Invalid charcter during XML-Export" );
1290 throw except;
1295 void SAXWriter::setDocumentLocator(const Reference< XLocator >&)
1296 throw (SAXException, RuntimeException)
1301 void SAXWriter::startCDATA(void) throw(SAXException, RuntimeException)
1303 if( ! m_bDocStarted || m_bIsCDATA)
1305 throw SAXException ();
1308 sal_Int32 nLength = 9;
1309 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1310 if( nPrefix >= 0 )
1311 mp_SaxWriterHelper->insertIndentation( nPrefix );
1313 mp_SaxWriterHelper->startCDATA();
1315 m_bIsCDATA = sal_True;
1318 void SAXWriter::endCDATA(void) throw (RuntimeException)
1320 if( ! m_bDocStarted | ! m_bIsCDATA)
1322 SAXException except;
1323 except.Message = OUString( "endCDATA was called without startCDATA" );
1324 throw except;
1327 sal_Int32 nLength = 3;
1328 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1329 if( nPrefix >= 0 )
1330 mp_SaxWriterHelper->insertIndentation( nPrefix );
1332 mp_SaxWriterHelper->endCDATA();
1334 m_bIsCDATA = sal_False;
1338 void SAXWriter::comment(const OUString& sComment) throw(SAXException, RuntimeException)
1340 if( ! m_bDocStarted || m_bIsCDATA )
1342 throw SAXException();
1345 sal_Int32 nLength(0);
1346 if (m_bAllowLineBreak)
1348 nLength = 4; // "<!--"
1349 nLength += calcXMLByteLength( sComment.getStr(), sComment.getLength(), sal_False, sal_False);
1351 nLength += 3;
1354 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1355 if( nPrefix >= 0 )
1356 mp_SaxWriterHelper->insertIndentation( nPrefix );
1358 if (!mp_SaxWriterHelper->comment(sComment))
1360 SAXException except;
1361 except.Message = OUString( "Invalid charcter during XML-Export" );
1362 throw except;
1367 void SAXWriter::allowLineBreak( ) throw ( SAXException , RuntimeException)
1369 if( ! m_bDocStarted || m_bAllowLineBreak ) {
1370 throw SAXException();
1373 m_bAllowLineBreak = sal_True;
1376 void SAXWriter::unknown(const OUString& sString) throw (SAXException, RuntimeException)
1379 if( ! m_bDocStarted )
1381 throw SAXException ();
1383 if( m_bIsCDATA )
1385 throw SAXException();
1388 if( sString.matchAsciiL( "<?xml", 5 ) )
1389 return;
1391 sal_Int32 nLength(0);
1392 if (m_bAllowLineBreak)
1393 nLength = calcXMLByteLength( sString.getStr(), sString.getLength(), sal_False, sal_False );
1395 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1396 if( nPrefix >= 0 )
1397 mp_SaxWriterHelper->insertIndentation( nPrefix );
1399 if (!mp_SaxWriterHelper->writeString( sString, sal_False, sal_False))
1401 SAXException except;
1402 except.Message = OUString( "Invalid charcter during XML-Export" );
1403 throw except;
1409 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */