update emoji autocorrect entries from po-files
[LibreOffice.git] / sax / source / expatwrap / saxwriter.cxx
blobfeb8efd2c6bb9466d52aeeec7a74718411cfac19
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <string.h>
22 #include <cassert>
23 #include <set>
24 #include <stack>
26 #include <com/sun/star/lang/WrappedTargetRuntimeException.hpp>
27 #include <com/sun/star/lang/XServiceInfo.hpp>
28 #include <com/sun/star/uno/XComponentContext.hpp>
29 #include <com/sun/star/util/XCloneable.hpp>
30 #include <com/sun/star/xml/sax/XParser.hpp>
31 #include <com/sun/star/xml/sax/SAXParseException.hpp>
32 #include <com/sun/star/xml/sax/SAXInvalidCharacterException.hpp>
33 #include <com/sun/star/xml/sax/XWriter.hpp>
35 #include <com/sun/star/io/XActiveDataSource.hpp>
37 #include <cppuhelper/weak.hxx>
38 #include <cppuhelper/implbase2.hxx>
39 #include <cppuhelper/supportsservice.hxx>
41 #include <osl/diagnose.h>
42 #include <rtl/ref.hxx>
43 #include <rtl/ustrbuf.hxx>
45 using namespace ::std;
46 using namespace ::osl;
47 using namespace ::cppu;
48 using namespace ::com::sun::star::uno;
49 using namespace ::com::sun::star::lang;
50 using namespace ::com::sun::star::xml::sax;
51 using namespace ::com::sun::star::util;
52 using namespace ::com::sun::star::io;
54 #include "xml2utf.hxx"
55 #include <boost/scoped_array.hpp>
57 #define LINEFEED 10
58 #define SEQUENCESIZE 1024
59 #define MAXCOLUMNCOUNT 72
61 /******
64 * Character conversion functions
67 *****/
69 namespace {
71 enum SaxInvalidCharacterError
73 SAX_NONE,
74 SAX_WARNING,
75 SAX_ERROR
78 class SaxWriterHelper
80 #ifdef DBG_UTIL
81 public:
82 ::std::stack<OUString> m_DebugStartedElements;
83 #endif
85 private:
86 Reference< XOutputStream > m_out;
87 Sequence < sal_Int8 > m_Sequence;
88 sal_Int8* mp_Sequence;
90 sal_Int32 nLastLineFeedPos; // is negative after writing a sequence
91 sal_uInt32 nCurrentPos;
92 bool m_bStartElementFinished;
94 inline sal_uInt32 writeSequence() throw( SAXException );
96 // use only if to insert the bytes more space in the sequence is needed and
97 // so the sequence has to write out and reset rPos to 0
98 // writes sequence only on overflow, sequence could be full on the end (rPos == SEQUENCESIZE)
99 inline void AddBytes(sal_Int8* pTarget, sal_uInt32& rPos,
100 const sal_Int8* pBytes, sal_uInt32 nBytesCount) throw( SAXException );
101 inline bool convertToXML(const sal_Unicode * pStr,
102 sal_Int32 nStrLen,
103 bool bDoNormalization,
104 bool bNormalizeWhitespace,
105 sal_Int8 *pTarget,
106 sal_uInt32& rPos) throw( SAXException );
107 inline void FinishStartElement() throw( SAXException );
108 public:
109 explicit SaxWriterHelper(Reference< XOutputStream > m_TempOut)
110 : m_out(m_TempOut)
111 , m_Sequence(SEQUENCESIZE)
112 , mp_Sequence(NULL)
113 , nLastLineFeedPos(0)
114 , nCurrentPos(0)
115 , m_bStartElementFinished(true)
117 OSL_ENSURE(SEQUENCESIZE > 50, "Sequence cache size to small");
118 mp_Sequence = m_Sequence.getArray();
120 ~SaxWriterHelper()
122 OSL_ENSURE(!nCurrentPos, "cached Sequence not written");
123 OSL_ENSURE(m_bStartElementFinished, "StartElement not complettly written");
126 inline void insertIndentation(sal_uInt32 m_nLevel) throw( SAXException );
128 // returns whether it works correct or invalid characters were in the string
129 // If there are invalid characters in the string it returns sal_False.
130 // Than the calling method has to throw the needed Exception.
131 inline bool writeString(const OUString& rWriteOutString,
132 bool bDoNormalization,
133 bool bNormalizeWhitespace) throw( SAXException );
135 sal_uInt32 GetLastColumnCount() const throw()
136 { return (sal_uInt32)(nCurrentPos - nLastLineFeedPos); }
138 inline void startDocument() throw( SAXException );
140 // returns whether it works correct or invalid characters were in the strings
141 // If there are invalid characters in one of the strings it returns sal_False.
142 // Than the calling method has to throw the needed Exception.
143 inline SaxInvalidCharacterError startElement(const OUString& rName, const Reference< XAttributeList >& xAttribs) throw( SAXException );
144 inline bool FinishEmptyElement() throw( SAXException );
146 // returns whether it works correct or invalid characters were in the string
147 // If there are invalid characters in the string it returns sal_False.
148 // Than the calling method has to throw the needed Exception.
149 inline bool endElement(const OUString& rName) throw( SAXException );
150 inline void endDocument() throw( SAXException );
152 // returns whether it works correct or invalid characters were in the strings
153 // If there are invalid characters in the string it returns sal_False.
154 // Than the calling method has to throw the needed Exception.
155 inline bool processingInstruction(const OUString& rTarget, const OUString& rData) throw( SAXException );
156 inline void startCDATA() throw( SAXException );
157 inline void endCDATA() throw( SAXException );
159 // returns whether it works correct or invalid characters were in the strings
160 // If there are invalid characters in the string it returns sal_False.
161 // Than the calling method has to throw the needed Exception.
162 inline bool comment(const OUString& rComment) throw( SAXException );
164 inline void clearBuffer() throw( SAXException );
167 const sal_Bool g_bValidCharsBelow32[32] =
169 // 0 1 2 3 4 5 6 7
170 0,0,0,0,0,0,0,0, //0
171 0,1,1,0,0,1,0,0, //8
172 0,0,0,0,0,0,0,0, //16
173 0,0,0,0,0,0,0,0
176 inline bool IsInvalidChar(const sal_Unicode aChar)
178 bool bRet(false);
179 // check first for the most common characters
180 if( aChar < 32 || aChar >= 0xd800 )
181 bRet = ( (aChar < 32 && ! g_bValidCharsBelow32[aChar]) ||
182 aChar == 0xffff ||
183 aChar == 0xfffe );
184 return bRet;
187 /********
188 * write through to the output stream
190 *****/
191 inline sal_uInt32 SaxWriterHelper::writeSequence() throw( SAXException )
195 m_out->writeBytes( m_Sequence );
197 catch (const IOException & e)
199 Any a;
200 a <<= e;
201 throw SAXException(
202 "IO exception during writing",
203 Reference< XInterface > (),
204 a );
206 nLastLineFeedPos -= SEQUENCESIZE;
207 return 0;
210 inline void SaxWriterHelper::AddBytes(sal_Int8* pTarget, sal_uInt32& rPos,
211 const sal_Int8* pBytes, sal_uInt32 nBytesCount) throw( SAXException )
213 OSL_ENSURE((rPos + nBytesCount) > SEQUENCESIZE, "wrong use of AddBytesMethod");
214 sal_uInt32 nCount(SEQUENCESIZE - rPos);
215 memcpy( &(pTarget[rPos]) , pBytes, nCount);
217 OSL_ENSURE(rPos + nCount == SEQUENCESIZE, "the position should be the at the end");
219 rPos = writeSequence();
220 sal_uInt32 nRestCount(nBytesCount - nCount);
221 if ((rPos + nRestCount) <= SEQUENCESIZE)
223 memcpy( &(pTarget[rPos]), &pBytes[nCount], nRestCount);
224 rPos += nRestCount;
226 else
227 AddBytes(pTarget, rPos, &pBytes[nCount], nRestCount);
230 /** Converts an UTF16 string to UTF8 and does XML normalization
232 @param pTarget
233 Pointer to a piece of memory, to where the output should be written. The caller
234 must call calcXMLByteLength on the same string, to ensure,
235 that there is enough memory for converting.
237 inline bool SaxWriterHelper::convertToXML( const sal_Unicode * pStr,
238 sal_Int32 nStrLen,
239 bool bDoNormalization,
240 bool bNormalizeWhitespace,
241 sal_Int8 *pTarget,
242 sal_uInt32& rPos ) throw( SAXException )
244 bool bRet(true);
245 sal_uInt32 nSurrogate = 0;
247 for( sal_Int32 i = 0 ; i < nStrLen ; i ++ )
249 sal_uInt16 c = pStr[i];
250 if (IsInvalidChar(c))
251 bRet = false;
252 else if( (c >= 0x0001) && (c <= 0x007F) )
254 if( bDoNormalization )
256 switch( c )
258 case '&': // resemble to &amp;
260 if ((rPos + 5) > SEQUENCESIZE)
261 AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const *>("&amp;"), 5);
262 else
264 memcpy( &(pTarget[rPos]) , "&amp;", 5 );
265 rPos += 5;
268 break;
269 case '<':
271 if ((rPos + 4) > SEQUENCESIZE)
272 AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const *>("&lt;"), 4);
273 else
275 memcpy( &(pTarget[rPos]) , "&lt;" , 4 );
276 rPos += 4; // &lt;
279 break;
280 case '>':
282 if ((rPos + 4) > SEQUENCESIZE)
283 AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const *>("&gt;"), 4);
284 else
286 memcpy( &(pTarget[rPos]) , "&gt;" , 4 );
287 rPos += 4; // &gt;
290 break;
291 case 39: // 39 == '''
293 if ((rPos + 6) > SEQUENCESIZE)
294 AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const *>("&apos;"), 6);
295 else
297 memcpy( &(pTarget[rPos]) , "&apos;" , 6 );
298 rPos += 6; // &apos;
301 break;
302 case '"':
304 if ((rPos + 6) > SEQUENCESIZE)
305 AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const *>("&quot;"), 6);
306 else
308 memcpy( &(pTarget[rPos]) , "&quot;" , 6 );
309 rPos += 6; // &quot;
312 break;
313 case 13:
315 if ((rPos + 6) > SEQUENCESIZE)
316 AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const *>("&#x0d;"), 6);
317 else
319 memcpy( &(pTarget[rPos]) , "&#x0d;" , 6 );
320 rPos += 6;
323 break;
324 case LINEFEED:
326 if( bNormalizeWhitespace )
328 if ((rPos + 6) > SEQUENCESIZE)
329 AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const *>("&#x0a;"), 6);
330 else
332 memcpy( &(pTarget[rPos]) , "&#x0a;" , 6 );
333 rPos += 6;
336 else
338 pTarget[rPos] = LINEFEED;
339 nLastLineFeedPos = rPos;
340 rPos ++;
343 break;
344 case 9:
346 if( bNormalizeWhitespace )
348 if ((rPos + 6) > SEQUENCESIZE)
349 AddBytes(pTarget, rPos, reinterpret_cast<sal_Int8 const *>("&#x09;"), 6);
350 else
352 memcpy( &(pTarget[rPos]) , "&#x09;" , 6 );
353 rPos += 6;
356 else
358 pTarget[rPos] = 9;
359 rPos ++;
362 break;
363 default:
365 pTarget[rPos] = (sal_Int8)c;
366 rPos ++;
368 break;
371 else
373 pTarget[rPos] = (sal_Int8)c;
374 if ((sal_Int8)c == LINEFEED)
375 nLastLineFeedPos = rPos;
376 rPos ++;
379 else if( c >= 0xd800 && c < 0xdc00 )
381 // 1. surrogate: save (until 2. surrogate)
382 OSL_ENSURE( nSurrogate == 0, "left-over Unicode surrogate" );
383 nSurrogate = ( ( c & 0x03ff ) + 0x0040 );
385 else if( c >= 0xdc00 && c < 0xe000 )
387 // 2. surrogate: write as UTF-8
388 OSL_ENSURE( nSurrogate != 0, "lone 2nd Unicode surrogate" );
390 nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
391 if( nSurrogate >= 0x00010000 && nSurrogate <= 0x0010FFFF )
393 sal_Int8 aBytes[] = { sal_Int8(0xF0 | ((nSurrogate >> 18) & 0x0F)),
394 sal_Int8(0x80 | ((nSurrogate >> 12) & 0x3F)),
395 sal_Int8(0x80 | ((nSurrogate >> 6) & 0x3F)),
396 sal_Int8(0x80 | ((nSurrogate >> 0) & 0x3F)) };
397 if ((rPos + 4) > SEQUENCESIZE)
398 AddBytes(pTarget, rPos, aBytes, 4);
399 else
401 pTarget[rPos] = aBytes[0];
402 rPos ++;
403 pTarget[rPos] = aBytes[1];
404 rPos ++;
405 pTarget[rPos] = aBytes[2];
406 rPos ++;
407 pTarget[rPos] = aBytes[3];
408 rPos ++;
411 else
413 OSL_FAIL( "illegal Unicode character" );
414 bRet = false;
417 // reset surrogate
418 nSurrogate = 0;
420 else if( c > 0x07FF )
422 sal_Int8 aBytes[] = { sal_Int8(0xE0 | ((c >> 12) & 0x0F)),
423 sal_Int8(0x80 | ((c >> 6) & 0x3F)),
424 sal_Int8(0x80 | ((c >> 0) & 0x3F)) };
425 if ((rPos + 3) > SEQUENCESIZE)
426 AddBytes(pTarget, rPos, aBytes, 3);
427 else
429 pTarget[rPos] = aBytes[0];
430 rPos ++;
431 pTarget[rPos] = aBytes[1];
432 rPos ++;
433 pTarget[rPos] = aBytes[2];
434 rPos ++;
437 else
439 sal_Int8 aBytes[] = { sal_Int8(0xC0 | ((c >> 6) & 0x1F)),
440 sal_Int8(0x80 | ((c >> 0) & 0x3F)) };
441 if ((rPos + 2) > SEQUENCESIZE)
442 AddBytes(pTarget, rPos, aBytes, 2);
443 else
445 pTarget[rPos] = aBytes[0];
446 rPos ++;
447 pTarget[rPos] = aBytes[1];
448 rPos ++;
451 OSL_ENSURE(rPos <= SEQUENCESIZE, "not reset current position");
452 if (rPos == SEQUENCESIZE)
453 rPos = writeSequence();
455 // reset left-over surrogate
456 if( ( nSurrogate != 0 ) && !( c >= 0xd800 && c < 0xdc00 ) )
458 OSL_ENSURE( nSurrogate != 0, "left-over Unicode surrogate" );
459 nSurrogate = 0;
460 bRet = false;
463 return bRet;
466 inline void SaxWriterHelper::FinishStartElement() throw( SAXException )
468 if (!m_bStartElementFinished)
470 mp_Sequence[nCurrentPos] = '>';
471 nCurrentPos++;
472 if (nCurrentPos == SEQUENCESIZE)
473 nCurrentPos = writeSequence();
474 m_bStartElementFinished = true;
478 inline void SaxWriterHelper::insertIndentation(sal_uInt32 m_nLevel) throw( SAXException )
480 FinishStartElement();
481 if (m_nLevel > 0)
483 if ((nCurrentPos + m_nLevel + 1) <= SEQUENCESIZE)
485 mp_Sequence[nCurrentPos] = LINEFEED;
486 nLastLineFeedPos = nCurrentPos;
487 nCurrentPos++;
488 memset( &(mp_Sequence[nCurrentPos]) , 32 , m_nLevel );
489 nCurrentPos += m_nLevel;
490 if (nCurrentPos == SEQUENCESIZE)
491 nCurrentPos = writeSequence();
493 else
495 sal_uInt32 nCount(m_nLevel + 1);
496 boost::scoped_array<sal_Int8> pBytes(new sal_Int8[nCount]);
497 pBytes[0] = LINEFEED;
498 memset( &(pBytes[1]), 32, m_nLevel );
499 AddBytes(mp_Sequence, nCurrentPos, pBytes.get(), nCount);
500 pBytes.reset();
501 nLastLineFeedPos = nCurrentPos - nCount;
502 if (nCurrentPos == SEQUENCESIZE)
503 nCurrentPos = writeSequence();
506 else
508 mp_Sequence[nCurrentPos] = LINEFEED;
509 nLastLineFeedPos = nCurrentPos;
510 nCurrentPos++;
511 if (nCurrentPos == SEQUENCESIZE)
512 nCurrentPos = writeSequence();
516 inline bool SaxWriterHelper::writeString( const OUString& rWriteOutString,
517 bool bDoNormalization,
518 bool bNormalizeWhitespace ) throw( SAXException )
520 FinishStartElement();
521 return convertToXML(rWriteOutString.getStr(),
522 rWriteOutString.getLength(),
523 bDoNormalization,
524 bNormalizeWhitespace,
525 mp_Sequence,
526 nCurrentPos);
529 inline void SaxWriterHelper::startDocument() throw( SAXException )
531 const char pc[] = "<?xml version=\"1.0\" encoding=\"UTF-8\"?>";
532 const int nLen = strlen( pc );
533 if ((nCurrentPos + nLen) <= SEQUENCESIZE)
535 memcpy( mp_Sequence, pc , nLen );
536 nCurrentPos += nLen;
538 else
540 AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const *>(pc), nLen);
542 OSL_ENSURE(nCurrentPos <= SEQUENCESIZE, "not reset current position");
543 if (nCurrentPos == SEQUENCESIZE)
544 nCurrentPos = writeSequence();
545 mp_Sequence[nCurrentPos] = LINEFEED;
546 nCurrentPos++;
547 if (nCurrentPos == SEQUENCESIZE)
548 nCurrentPos = writeSequence();
551 inline SaxInvalidCharacterError SaxWriterHelper::startElement(const OUString& rName, const Reference< XAttributeList >& xAttribs) throw( SAXException )
553 FinishStartElement();
555 #ifdef DBG_UTIL
556 m_DebugStartedElements.push(rName);
557 ::std::set<OUString> DebugAttributes;
558 #endif
560 mp_Sequence[nCurrentPos] = '<';
561 nCurrentPos++;
562 if (nCurrentPos == SEQUENCESIZE)
563 nCurrentPos = writeSequence();
565 SaxInvalidCharacterError eRet(SAX_NONE);
566 if (!writeString(rName, false, false))
567 eRet = SAX_ERROR;
569 sal_Int16 nAttribCount = xAttribs.is() ? static_cast<sal_Int16>(xAttribs->getLength()) : 0;
570 for(sal_Int16 i = 0 ; i < nAttribCount ; i++ )
572 mp_Sequence[nCurrentPos] = ' ';
573 nCurrentPos++;
574 if (nCurrentPos == SEQUENCESIZE)
575 nCurrentPos = writeSequence();
577 OUString const& rAttrName(xAttribs->getNameByIndex(i));
578 #ifdef DBG_UTIL
579 // Well-formedness constraint: Unique Att Spec
580 assert(DebugAttributes.find(rAttrName) == DebugAttributes.end());
581 DebugAttributes.insert(rAttrName);
582 #endif
583 if (!writeString(rAttrName, false, false))
584 eRet = SAX_ERROR;
586 mp_Sequence[nCurrentPos] = '=';
587 nCurrentPos++;
588 if (nCurrentPos == SEQUENCESIZE)
589 nCurrentPos = writeSequence();
590 mp_Sequence[nCurrentPos] = '"';
591 nCurrentPos++;
592 if (nCurrentPos == SEQUENCESIZE)
593 nCurrentPos = writeSequence();
595 if (!writeString(xAttribs->getValueByIndex( i ), true, true) &&
596 !(eRet == SAX_ERROR))
597 eRet = SAX_WARNING;
599 mp_Sequence[nCurrentPos] = '"';
600 nCurrentPos++;
601 if (nCurrentPos == SEQUENCESIZE)
602 nCurrentPos = writeSequence();
605 m_bStartElementFinished = false; // because the '>' character is not added,
606 // because it is possible, that the "/>"
607 // characters have to add
608 return eRet;
611 inline bool SaxWriterHelper::FinishEmptyElement() throw( SAXException )
613 if (m_bStartElementFinished)
614 return false;
616 mp_Sequence[nCurrentPos] = '/';
617 nCurrentPos++;
618 if (nCurrentPos == SEQUENCESIZE)
619 nCurrentPos = writeSequence();
620 mp_Sequence[nCurrentPos] = '>';
621 nCurrentPos++;
622 if (nCurrentPos == SEQUENCESIZE)
623 nCurrentPos = writeSequence();
625 m_bStartElementFinished = true;
627 return true;
630 inline bool SaxWriterHelper::endElement(const OUString& rName) throw( SAXException )
632 FinishStartElement();
634 mp_Sequence[nCurrentPos] = '<';
635 nCurrentPos++;
636 if (nCurrentPos == SEQUENCESIZE)
637 nCurrentPos = writeSequence();
638 mp_Sequence[nCurrentPos] = '/';
639 nCurrentPos++;
640 if (nCurrentPos == SEQUENCESIZE)
641 nCurrentPos = writeSequence();
643 bool bRet(writeString( rName, false, false));
645 mp_Sequence[nCurrentPos] = '>';
646 nCurrentPos++;
647 if (nCurrentPos == SEQUENCESIZE)
648 nCurrentPos = writeSequence();
650 return bRet;
653 inline void SaxWriterHelper::endDocument() throw( SAXException )
655 if (nCurrentPos > 0)
657 m_Sequence.realloc(nCurrentPos);
658 nCurrentPos = writeSequence();
659 //m_Sequence.realloc(SEQUENCESIZE);
663 inline void SaxWriterHelper::clearBuffer() throw( SAXException )
665 FinishStartElement();
666 if (nCurrentPos > 0)
668 m_Sequence.realloc(nCurrentPos);
669 nCurrentPos = writeSequence();
670 m_Sequence.realloc(SEQUENCESIZE);
671 // Be sure to update the array pointer after the reallocation.
672 mp_Sequence = m_Sequence.getArray();
676 inline bool SaxWriterHelper::processingInstruction(const OUString& rTarget, const OUString& rData) throw( SAXException )
678 FinishStartElement();
679 mp_Sequence[nCurrentPos] = '<';
680 nCurrentPos++;
681 if (nCurrentPos == SEQUENCESIZE)
682 nCurrentPos = writeSequence();
683 mp_Sequence[nCurrentPos] = '?';
684 nCurrentPos++;
685 if (nCurrentPos == SEQUENCESIZE)
686 nCurrentPos = writeSequence();
688 bool bRet(writeString( rTarget, false, false ));
690 mp_Sequence[nCurrentPos] = ' ';
691 nCurrentPos++;
692 if (nCurrentPos == SEQUENCESIZE)
693 nCurrentPos = writeSequence();
695 if (!writeString( rData, false, false ))
696 bRet = false;
698 mp_Sequence[nCurrentPos] = '?';
699 nCurrentPos++;
700 if (nCurrentPos == SEQUENCESIZE)
701 nCurrentPos = writeSequence();
702 mp_Sequence[nCurrentPos] = '>';
703 nCurrentPos++;
704 if (nCurrentPos == SEQUENCESIZE)
705 nCurrentPos = writeSequence();
707 return bRet;
710 inline void SaxWriterHelper::startCDATA() throw( SAXException )
712 FinishStartElement();
713 if ((nCurrentPos + 9) <= SEQUENCESIZE)
715 memcpy( &(mp_Sequence[nCurrentPos]), "<![CDATA[" , 9 );
716 nCurrentPos += 9;
718 else
719 AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const *>("<![CDATA["), 9);
720 if (nCurrentPos == SEQUENCESIZE)
721 nCurrentPos = writeSequence();
724 inline void SaxWriterHelper::endCDATA() throw( SAXException )
726 FinishStartElement();
727 if ((nCurrentPos + 3) <= SEQUENCESIZE)
729 memcpy( &(mp_Sequence[nCurrentPos]), "]]>" , 3 );
730 nCurrentPos += 3;
732 else
733 AddBytes(mp_Sequence, nCurrentPos, reinterpret_cast<sal_Int8 const *>("]]>"), 3);
734 if (nCurrentPos == SEQUENCESIZE)
735 nCurrentPos = writeSequence();
738 inline bool SaxWriterHelper::comment(const OUString& rComment) throw( SAXException )
740 FinishStartElement();
741 mp_Sequence[nCurrentPos] = '<';
742 nCurrentPos++;
743 if (nCurrentPos == SEQUENCESIZE)
744 nCurrentPos = writeSequence();
745 mp_Sequence[nCurrentPos] = '!';
746 nCurrentPos++;
747 if (nCurrentPos == SEQUENCESIZE)
748 nCurrentPos = writeSequence();
749 mp_Sequence[nCurrentPos] = '-';
750 nCurrentPos++;
751 if (nCurrentPos == SEQUENCESIZE)
752 nCurrentPos = writeSequence();
753 mp_Sequence[nCurrentPos] = '-';
754 nCurrentPos++;
755 if (nCurrentPos == SEQUENCESIZE)
756 nCurrentPos = writeSequence();
758 bool bRet(writeString( rComment, false, false));
760 mp_Sequence[nCurrentPos] = '-';
761 nCurrentPos++;
762 if (nCurrentPos == SEQUENCESIZE)
763 nCurrentPos = writeSequence();
764 mp_Sequence[nCurrentPos] = '-';
765 nCurrentPos++;
766 if (nCurrentPos == SEQUENCESIZE)
767 nCurrentPos = writeSequence();
768 mp_Sequence[nCurrentPos] = '>';
769 nCurrentPos++;
770 if (nCurrentPos == SEQUENCESIZE)
771 nCurrentPos = writeSequence();
773 return bRet;
776 inline sal_Int32 calcXMLByteLength( const sal_Unicode *pStr, sal_Int32 nStrLen,
777 bool bDoNormalization,
778 bool bNormalizeWhitespace )
780 sal_Int32 nOutputLength = 0;
781 sal_uInt32 nSurrogate = 0;
783 for( sal_Int32 i = 0 ; i < nStrLen ; i++ )
785 sal_uInt16 c = pStr[i];
786 if( !IsInvalidChar(c) && (c >= 0x0001) && (c <= 0x007F) )
788 if( bDoNormalization )
790 switch( c )
792 case '&': // resemble to &amp;
793 nOutputLength +=5;
794 break;
795 case '<': // &lt;
796 case '>': // &gt;
797 nOutputLength +=4;
798 break;
799 case 39: // 39 == ''', &apos;
800 case '"': // &quot;
801 case 13: // &#x0d;
802 nOutputLength += 6;
803 break;
805 case 10: // &#x0a;
806 case 9: // &#x09;
807 if( bNormalizeWhitespace )
809 nOutputLength += 6;
811 else
813 nOutputLength ++;
815 break;
816 default:
817 nOutputLength ++;
820 else
822 nOutputLength ++;
825 else if( c >= 0xd800 && c < 0xdc00 )
827 // save surrogate
828 nSurrogate = ( ( c & 0x03ff ) + 0x0040 );
830 else if( c >= 0xdc00 && c < 0xe000 )
832 // 2. surrogate: write as UTF-8 (if range is OK
833 nSurrogate = ( nSurrogate << 10 ) | ( c & 0x03ff );
834 if( nSurrogate >= 0x00010000 && nSurrogate <= 0x0010FFFF )
835 nOutputLength += 4;
836 nSurrogate = 0;
838 else if( c > 0x07FF )
840 nOutputLength += 3;
842 else
844 nOutputLength += 2;
847 // surrogate processing
848 if( ( nSurrogate != 0 ) && !( c >= 0xd800 && c < 0xdc00 ) )
849 nSurrogate = 0;
852 return nOutputLength;
855 /** returns position of first ascii 10 within the string, -1 when no 10 in string.
857 static inline sal_Int32 getFirstLineBreak( const OUString & str ) throw ()
859 const sal_Unicode *pSource = str.getStr();
860 sal_Int32 nLen = str.getLength();
862 for( int n = 0; n < nLen ; n ++ )
864 if( LINEFEED == pSource[n] ) {
865 return n;
868 return -1;
871 class SAXWriter :
872 public WeakImplHelper2<
873 XWriter,
874 XServiceInfo >
876 public:
877 SAXWriter()
878 : m_pSaxWriterHelper(NULL)
879 , m_bDocStarted(false)
880 , m_bIsCDATA(false)
881 , m_bForceLineBreak(false)
882 , m_bAllowLineBreak(false)
883 , m_nLevel(0)
886 virtual ~SAXWriter()
888 delete m_pSaxWriterHelper;
891 public: // XActiveDataSource
892 virtual void SAL_CALL setOutputStream(const Reference< XOutputStream > & aStream)
893 throw (RuntimeException, std::exception) SAL_OVERRIDE
897 // temporary: set same stream again to clear buffer
898 if ( m_out == aStream && m_pSaxWriterHelper && m_bDocStarted )
899 m_pSaxWriterHelper->clearBuffer();
900 else
902 m_out = aStream;
903 delete m_pSaxWriterHelper;
904 m_pSaxWriterHelper = new SaxWriterHelper(m_out);
905 m_bDocStarted = false;
906 m_nLevel = 0;
907 m_bIsCDATA = false;
910 catch (const SAXException& e)
912 throw css::lang::WrappedTargetRuntimeException(
913 e.Message,
914 static_cast < OWeakObject * > ( this ),
915 e.WrappedException);
918 virtual Reference< XOutputStream > SAL_CALL getOutputStream()
919 throw(RuntimeException, std::exception) SAL_OVERRIDE
921 return m_out;
924 public: // XDocumentHandler
925 virtual void SAL_CALL startDocument()
926 throw(SAXException, RuntimeException, std::exception) SAL_OVERRIDE;
928 virtual void SAL_CALL endDocument()
929 throw(SAXException, RuntimeException, std::exception) SAL_OVERRIDE;
931 virtual void SAL_CALL startElement(const OUString& aName,
932 const Reference< XAttributeList > & xAttribs)
933 throw (SAXException, RuntimeException, std::exception) SAL_OVERRIDE;
935 virtual void SAL_CALL endElement(const OUString& aName)
936 throw(SAXException, RuntimeException, std::exception) SAL_OVERRIDE;
938 virtual void SAL_CALL characters(const OUString& aChars)
939 throw(SAXException, RuntimeException, std::exception) SAL_OVERRIDE;
941 virtual void SAL_CALL ignorableWhitespace(const OUString& aWhitespaces)
942 throw(SAXException, RuntimeException, std::exception) SAL_OVERRIDE;
943 virtual void SAL_CALL processingInstruction(const OUString& aTarget,
944 const OUString& aData)
945 throw(SAXException, RuntimeException, std::exception) SAL_OVERRIDE;
946 virtual void SAL_CALL setDocumentLocator(const Reference< XLocator > & xLocator)
947 throw(SAXException, RuntimeException, std::exception) SAL_OVERRIDE;
949 public: // XExtendedDocumentHandler
950 virtual void SAL_CALL startCDATA() throw(SAXException, RuntimeException, std::exception) SAL_OVERRIDE;
951 virtual void SAL_CALL endCDATA() throw(SAXException,RuntimeException, std::exception) SAL_OVERRIDE;
952 virtual void SAL_CALL comment(const OUString& sComment)
953 throw(SAXException, RuntimeException, std::exception) SAL_OVERRIDE;
954 virtual void SAL_CALL unknown(const OUString& sString)
955 throw(SAXException, RuntimeException, std::exception) SAL_OVERRIDE;
956 virtual void SAL_CALL allowLineBreak()
957 throw(SAXException,RuntimeException, std::exception) SAL_OVERRIDE;
959 public: // XServiceInfo
960 OUString SAL_CALL getImplementationName() throw(std::exception) SAL_OVERRIDE;
961 Sequence< OUString > SAL_CALL getSupportedServiceNames() throw(std::exception) SAL_OVERRIDE;
962 sal_Bool SAL_CALL supportsService(const OUString& ServiceName) throw(std::exception) SAL_OVERRIDE;
964 private:
965 sal_Int32 getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurrence ) throw();
967 Reference< XOutputStream > m_out;
968 SaxWriterHelper* m_pSaxWriterHelper;
970 // Status information
971 bool m_bDocStarted : 1;
972 bool m_bIsCDATA : 1;
973 bool m_bForceLineBreak : 1;
974 bool m_bAllowLineBreak : 1;
975 sal_Int32 m_nLevel;
978 sal_Int32 SAXWriter::getIndentPrefixLength( sal_Int32 nFirstLineBreakOccurrence ) throw()
980 sal_Int32 nLength =-1;
981 if (m_pSaxWriterHelper)
983 if ( m_bForceLineBreak ||
984 (m_bAllowLineBreak &&
985 ((nFirstLineBreakOccurrence + m_pSaxWriterHelper->GetLastColumnCount()) > MAXCOLUMNCOUNT)) )
986 nLength = m_nLevel;
988 m_bForceLineBreak = false;
989 m_bAllowLineBreak = false;
990 return nLength;
993 static inline bool isFirstCharWhitespace( const sal_Unicode *p ) throw()
995 return *p == ' ';
998 // XServiceInfo
999 OUString SAXWriter::getImplementationName() throw(std::exception)
1001 return OUString("com.sun.star.extensions.xml.sax.Writer");
1004 // XServiceInfo
1005 sal_Bool SAXWriter::supportsService(const OUString& ServiceName) throw(std::exception)
1007 return cppu::supportsService(this, ServiceName);
1010 // XServiceInfo
1011 Sequence< OUString > SAXWriter::getSupportedServiceNames() throw (std::exception)
1013 Sequence<OUString> seq(1);
1014 seq[0] = "com.sun.star.xml.sax.Writer";
1015 return seq;
1018 void SAXWriter::startDocument() throw(SAXException, RuntimeException, std::exception )
1020 if( m_bDocStarted || ! m_out.is() || !m_pSaxWriterHelper ) {
1021 throw SAXException();
1023 m_bDocStarted = true;
1024 m_pSaxWriterHelper->startDocument();
1028 void SAXWriter::endDocument() throw(SAXException, RuntimeException, std::exception)
1030 if( ! m_bDocStarted )
1032 throw SAXException(
1033 "endDocument called before startDocument",
1034 Reference< XInterface >() , Any() );
1036 if( m_nLevel ) {
1037 throw SAXException(
1038 "unexpected end of document",
1039 Reference< XInterface >() , Any() );
1041 m_pSaxWriterHelper->endDocument();
1044 m_out->closeOutput();
1046 catch (const IOException & e)
1048 Any a;
1049 a <<= e;
1050 throw SAXException(
1051 "IO exception during closing the IO Stream",
1052 Reference< XInterface > (),
1053 a );
1058 void SAXWriter::startElement(const OUString& aName, const Reference< XAttributeList >& xAttribs)
1059 throw(SAXException, RuntimeException, std::exception)
1061 if( ! m_bDocStarted )
1063 SAXException except;
1064 except.Message = "startElement called before startDocument";
1065 throw except;
1067 if( m_bIsCDATA )
1069 SAXException except;
1070 except.Message = "startElement call not allowed with CDATA sections";
1071 throw except;
1074 sal_Int32 nLength(0);
1075 if (m_bAllowLineBreak)
1077 sal_Int32 nAttribCount = xAttribs.is() ? xAttribs->getLength() : 0;
1079 nLength ++; // "<"
1080 nLength += calcXMLByteLength( aName.getStr() , aName.getLength(),
1081 false, false ); // the tag name
1083 sal_Int16 n;
1084 for( n = 0 ; n < static_cast<sal_Int16>(nAttribCount) ; n ++ ) {
1085 nLength ++; // " "
1086 OUString tmp = xAttribs->getNameByIndex( n );
1088 nLength += calcXMLByteLength( tmp.getStr() , tmp.getLength() , false, false );
1090 nLength += 2; // ="
1092 tmp = xAttribs->getValueByIndex( n );
1094 nLength += calcXMLByteLength( tmp.getStr(), tmp.getLength(), true, true );
1096 nLength += 1; // "
1099 nLength ++; // '>'
1102 // Is there a new indentation necesarry ?
1103 sal_Int32 nPrefix(getIndentPrefixLength( nLength ));
1105 // write into sequence
1106 if( nPrefix >= 0 )
1107 m_pSaxWriterHelper->insertIndentation( nPrefix );
1109 SaxInvalidCharacterError eRet(m_pSaxWriterHelper->startElement(aName, xAttribs));
1111 m_nLevel++;
1113 if (eRet == SAX_WARNING)
1115 SAXInvalidCharacterException except;
1116 except.Message = "Invalid character during XML-Export in a attribute value";
1117 throw except;
1119 else if (eRet == SAX_ERROR)
1121 SAXException except;
1122 except.Message = "Invalid character during XML-Export";
1123 throw except;
1127 void SAXWriter::endElement(const OUString& aName) throw (SAXException, RuntimeException, std::exception)
1129 if( ! m_bDocStarted ) {
1130 throw SAXException ();
1132 m_nLevel --;
1134 if( m_nLevel < 0 ) {
1135 throw SAXException();
1137 bool bRet(true);
1139 // check here because Helper's endElement is not always called
1140 #ifdef DBG_UTIL
1141 assert(!m_pSaxWriterHelper->m_DebugStartedElements.empty());
1142 // Well-formedness constraint: Element Type Match
1143 assert(aName == m_pSaxWriterHelper->m_DebugStartedElements.top());
1144 m_pSaxWriterHelper->m_DebugStartedElements.pop();
1145 #endif
1147 if( m_pSaxWriterHelper->FinishEmptyElement() )
1148 m_bForceLineBreak = false;
1149 else
1151 // only ascii chars allowed
1152 sal_Int32 nLength(0);
1153 if (m_bAllowLineBreak)
1154 nLength = 3 + calcXMLByteLength( aName.getStr(), aName.getLength(), false, false );
1155 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1157 if( nPrefix >= 0 )
1158 m_pSaxWriterHelper->insertIndentation( nPrefix );
1160 bRet = m_pSaxWriterHelper->endElement(aName);
1163 if (!bRet)
1165 SAXException except;
1166 except.Message = "Invalid character during XML-Export";
1167 throw except;
1171 void SAXWriter::characters(const OUString& aChars) throw(SAXException, RuntimeException, std::exception)
1173 if( ! m_bDocStarted )
1175 SAXException except;
1176 except.Message = "characters method called before startDocument";
1177 throw except;
1180 bool bThrowException(false);
1181 if( !aChars.isEmpty() )
1183 if( m_bIsCDATA )
1184 bThrowException = !m_pSaxWriterHelper->writeString( aChars, false, false );
1185 else
1187 // Note : nFirstLineBreakOccurrence is not exact, because we don't know, how
1188 // many 2 and 3 byte chars are inbetween. However this whole stuff
1189 // is eitherway for pretty printing only, so it does not need to be exact.
1190 sal_Int32 nLength(0);
1191 sal_Int32 nIndentPrefix(-1);
1192 if (m_bAllowLineBreak)
1194 sal_Int32 nFirstLineBreakOccurrence = getFirstLineBreak( aChars );
1196 nLength = calcXMLByteLength( aChars.getStr(), aChars.getLength(),
1197 ! m_bIsCDATA , false );
1198 nIndentPrefix = getIndentPrefixLength(
1199 nFirstLineBreakOccurrence >= 0 ? nFirstLineBreakOccurrence : nLength );
1201 else
1202 nIndentPrefix = getIndentPrefixLength(nLength);
1204 // insert indentation
1205 if( nIndentPrefix >= 0 )
1207 if( isFirstCharWhitespace( aChars.getStr() ) )
1208 m_pSaxWriterHelper->insertIndentation( nIndentPrefix - 1 );
1209 else
1210 m_pSaxWriterHelper->insertIndentation( nIndentPrefix );
1212 bThrowException = !m_pSaxWriterHelper->writeString(aChars, true , false);
1215 if (bThrowException)
1217 SAXInvalidCharacterException except;
1218 except.Message = "Invalid character during XML-Export";
1219 throw except;
1224 void SAXWriter::ignorableWhitespace(const OUString&) throw(SAXException, RuntimeException, std::exception)
1226 if( ! m_bDocStarted )
1228 throw SAXException ();
1231 m_bForceLineBreak = true;
1234 void SAXWriter::processingInstruction(const OUString& aTarget, const OUString& aData)
1235 throw (SAXException, RuntimeException, std::exception)
1237 if( ! m_bDocStarted || m_bIsCDATA )
1239 throw SAXException();
1242 sal_Int32 nLength(0);
1243 if (m_bAllowLineBreak)
1245 nLength = 2; // "<?"
1246 nLength += calcXMLByteLength( aTarget.getStr(), aTarget.getLength(), false, false );
1248 nLength += 1; // " "
1250 nLength += calcXMLByteLength( aData.getStr(), aData.getLength(), false, false );
1252 nLength += 2; // "?>"
1255 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1257 if( nPrefix >= 0 )
1258 m_pSaxWriterHelper->insertIndentation( nPrefix );
1260 if (!m_pSaxWriterHelper->processingInstruction(aTarget, aData))
1262 SAXException except;
1263 except.Message = "Invalid character during XML-Export";
1264 throw except;
1269 void SAXWriter::setDocumentLocator(const Reference< XLocator >&)
1270 throw (SAXException, RuntimeException, std::exception)
1275 void SAXWriter::startCDATA() throw(SAXException, RuntimeException, std::exception)
1277 if( ! m_bDocStarted || m_bIsCDATA)
1279 throw SAXException ();
1282 sal_Int32 nLength = 9;
1283 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1284 if( nPrefix >= 0 )
1285 m_pSaxWriterHelper->insertIndentation( nPrefix );
1287 m_pSaxWriterHelper->startCDATA();
1289 m_bIsCDATA = true;
1292 void SAXWriter::endCDATA() throw (SAXException,RuntimeException, std::exception)
1294 if( ! m_bDocStarted || ! m_bIsCDATA)
1296 SAXException except;
1297 except.Message = "endCDATA was called without startCDATA";
1298 throw except;
1301 sal_Int32 nLength = 3;
1302 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1303 if( nPrefix >= 0 )
1304 m_pSaxWriterHelper->insertIndentation( nPrefix );
1306 m_pSaxWriterHelper->endCDATA();
1308 m_bIsCDATA = false;
1312 void SAXWriter::comment(const OUString& sComment) throw(SAXException, RuntimeException, std::exception)
1314 if( ! m_bDocStarted || m_bIsCDATA )
1316 throw SAXException();
1319 sal_Int32 nLength(0);
1320 if (m_bAllowLineBreak)
1322 nLength = 4; // "<!--"
1323 nLength += calcXMLByteLength( sComment.getStr(), sComment.getLength(), false, false);
1325 nLength += 3;
1328 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1329 if( nPrefix >= 0 )
1330 m_pSaxWriterHelper->insertIndentation( nPrefix );
1332 if (!m_pSaxWriterHelper->comment(sComment))
1334 SAXException except;
1335 except.Message = "Invalid character during XML-Export";
1336 throw except;
1341 void SAXWriter::allowLineBreak( ) throw ( SAXException , RuntimeException, std::exception)
1343 if( ! m_bDocStarted || m_bAllowLineBreak ) {
1344 throw SAXException();
1347 m_bAllowLineBreak = true;
1350 void SAXWriter::unknown(const OUString& sString) throw (SAXException, RuntimeException, std::exception)
1353 if( ! m_bDocStarted )
1355 throw SAXException ();
1357 if( m_bIsCDATA )
1359 throw SAXException();
1362 if( sString.startsWith( "<?xml" ) )
1363 return;
1365 sal_Int32 nLength(0);
1366 if (m_bAllowLineBreak)
1367 nLength = calcXMLByteLength( sString.getStr(), sString.getLength(), false, false );
1369 sal_Int32 nPrefix = getIndentPrefixLength( nLength );
1370 if( nPrefix >= 0 )
1371 m_pSaxWriterHelper->insertIndentation( nPrefix );
1373 if (!m_pSaxWriterHelper->writeString( sString, false, false))
1375 SAXException except;
1376 except.Message = "Invalid character during XML-Export";
1377 throw except;
1381 } // namespace
1383 extern "C" SAL_DLLPUBLIC_EXPORT css::uno::XInterface * SAL_CALL
1384 com_sun_star_extensions_xml_sax_Writer_get_implementation(
1385 css::uno::XComponentContext *,
1386 css::uno::Sequence<css::uno::Any> const &)
1388 return cppu::acquire(new SAXWriter);
1391 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */