1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include "oox/vml/vmlinputstream.hxx"
22 #include <com/sun/star/io/XTextInputStream2.hpp>
25 #include <rtl/strbuf.hxx>
26 #include <osl/diagnose.h>
27 #include "oox/helper/helper.hxx"
28 #include "oox/helper/textinputstream.hxx"
33 using namespace ::com::sun::star::io
;
34 using namespace ::com::sun::star::uno
;
38 inline const sal_Char
* lclFindCharacter( const sal_Char
* pcBeg
, const sal_Char
* pcEnd
, sal_Char cChar
)
40 sal_Int32 nIndex
= rtl_str_indexOfChar_WithLength( pcBeg
, static_cast< sal_Int32
>( pcEnd
- pcBeg
), cChar
);
41 return (nIndex
< 0) ? pcEnd
: (pcBeg
+ nIndex
);
44 inline bool lclIsWhiteSpace( sal_Char cChar
)
49 const sal_Char
* lclFindWhiteSpace( const sal_Char
* pcBeg
, const sal_Char
* pcEnd
)
51 for( ; pcBeg
< pcEnd
; ++pcBeg
)
52 if( lclIsWhiteSpace( *pcBeg
) )
57 const sal_Char
* lclFindNonWhiteSpace( const sal_Char
* pcBeg
, const sal_Char
* pcEnd
)
59 for( ; pcBeg
< pcEnd
; ++pcBeg
)
60 if( !lclIsWhiteSpace( *pcBeg
) )
65 const sal_Char
* lclTrimWhiteSpaceFromEnd( const sal_Char
* pcBeg
, const sal_Char
* pcEnd
)
67 while( (pcBeg
< pcEnd
) && lclIsWhiteSpace( pcEnd
[ -1 ] ) )
72 inline void lclAppendToBuffer( OStringBuffer
& rBuffer
, const sal_Char
* pcBeg
, const sal_Char
* pcEnd
)
74 rBuffer
.append( pcBeg
, static_cast< sal_Int32
>( pcEnd
- pcBeg
) );
77 void lclProcessAttribs( OStringBuffer
& rBuffer
, const sal_Char
* pcBeg
, const sal_Char
* pcEnd
)
79 /* Map attribute names to char-pointer of all attributes. This map is used
80 to find multiple occurrences of attributes with the same name. The
81 mapped pointers are used as map key in the next map below. */
82 typedef ::std::map
< OString
, const sal_Char
* > AttributeNameMap
;
83 AttributeNameMap aAttributeNames
;
85 /* Map the char-pointers of all attributes to the full attribute definition
86 string. This preserves the original order of the used attributes. */
87 typedef ::std::map
< const sal_Char
*, OString
> AttributeDataMap
;
88 AttributeDataMap aAttributes
;
91 const sal_Char
* pcNameBeg
= pcBeg
;
92 while( bOk
&& (pcNameBeg
< pcEnd
) )
94 // pcNameBeg points to begin of attribute name, find equality sign
95 const sal_Char
* pcEqualSign
= lclFindCharacter( pcNameBeg
, pcEnd
, '=' );
96 if ((bOk
= (pcEqualSign
< pcEnd
)))
98 // find end of attribute name (ignore whitespace between name and equality sign)
99 const sal_Char
* pcNameEnd
= lclTrimWhiteSpaceFromEnd( pcNameBeg
, pcEqualSign
);
100 if( (bOk
= (pcNameBeg
< pcNameEnd
)) )
102 // find begin of attribute value (must be single or double quote)
103 const sal_Char
* pcValueBeg
= lclFindNonWhiteSpace( pcEqualSign
+ 1, pcEnd
);
104 if( (bOk
= (pcValueBeg
< pcEnd
) && ((*pcValueBeg
== '\'') || (*pcValueBeg
== '"'))) )
106 // find end of attribute value (matching quote character)
107 const sal_Char
* pcValueEnd
= lclFindCharacter( pcValueBeg
+ 1, pcEnd
, *pcValueBeg
);
108 if( (bOk
= (pcValueEnd
< pcEnd
)) )
111 OString
aAttribName( pcNameBeg
, static_cast< sal_Int32
>( pcNameEnd
- pcNameBeg
) );
112 OString
aAttribData( pcNameBeg
, static_cast< sal_Int32
>( pcValueEnd
- pcNameBeg
) );
113 // search for an existing attribute with the same name
114 AttributeNameMap::iterator aIt
= aAttributeNames
.find( aAttribName
);
115 // remove its definition from the data map
116 if( aIt
!= aAttributeNames
.end() )
117 aAttributes
.erase( aIt
->second
);
118 // insert the attribute into both maps
119 aAttributeNames
[ aAttribName
] = pcNameBeg
;
120 aAttributes
[ pcNameBeg
] = aAttribData
;
121 // continue with next attribute (skip whitespace after this attribute)
122 pcNameBeg
= pcValueEnd
;
123 if( (pcNameBeg
< pcEnd
) && ((bOk
= lclIsWhiteSpace( *pcNameBeg
))) )
124 pcNameBeg
= lclFindNonWhiteSpace( pcNameBeg
+ 1, pcEnd
);
131 // if no error has occurred, build the resulting attribute list
133 for( AttributeDataMap::iterator aIt
= aAttributes
.begin(), aEnd
= aAttributes
.end(); aIt
!= aEnd
; ++aIt
)
134 rBuffer
.append( ' ' ).append( aIt
->second
);
135 // on error, just append the complete passed string
137 lclAppendToBuffer( rBuffer
, pcBeg
, pcEnd
);
140 void lclProcessElement( OStringBuffer
& rBuffer
, const OString
& rElement
)
142 // check that passed string starts and ends with the brackets of an XML element
143 sal_Int32 nElementLen
= rElement
.getLength();
144 if( nElementLen
== 0 )
147 const sal_Char
* pcOpen
= rElement
.getStr();
148 const sal_Char
* pcClose
= pcOpen
+ nElementLen
- 1;
150 // no complete element found
151 if( (pcOpen
>= pcClose
) || (*pcOpen
!= '<') || (*pcClose
!= '>') )
153 // just append all passed characters
154 rBuffer
.append( rElement
);
157 // skip parser instructions: '<![...]>'
158 else if( (nElementLen
>= 5) && (pcOpen
[ 1 ] == '!') && (pcOpen
[ 2 ] == '[') && (pcClose
[ -1 ] == ']') )
163 // replace '<br>' element with newline
164 else if( (nElementLen
>= 4) && (pcOpen
[ 1 ] == 'b') && (pcOpen
[ 2 ] == 'r') && (lclFindNonWhiteSpace( pcOpen
+ 3, pcClose
) == pcClose
) )
166 rBuffer
.append( '\n' );
169 // check start elements and simple elements for repeated attributes
170 else if( pcOpen
[ 1 ] != '/' )
172 // find positions of text content inside brackets, exclude '/' in '<simpleelement/>'
173 const sal_Char
* pcContentBeg
= pcOpen
+ 1;
174 bool bIsEmptyElement
= pcClose
[ -1 ] == '/';
175 const sal_Char
* pcContentEnd
= bIsEmptyElement
? (pcClose
- 1) : pcClose
;
176 // append opening bracket and element name to buffer
177 const sal_Char
* pcWhiteSpace
= lclFindWhiteSpace( pcContentBeg
, pcContentEnd
);
178 lclAppendToBuffer( rBuffer
, pcOpen
, pcWhiteSpace
);
179 // find begin of attributes, and process all attributes
180 const sal_Char
* pcAttribBeg
= lclFindNonWhiteSpace( pcWhiteSpace
, pcContentEnd
);
181 if( pcAttribBeg
< pcContentEnd
)
182 lclProcessAttribs( rBuffer
, pcAttribBeg
, pcContentEnd
);
184 if( bIsEmptyElement
)
185 rBuffer
.append( '/' );
186 rBuffer
.append( '>' );
189 // append end elements without further processing
192 rBuffer
.append( rElement
);
196 bool lclProcessCharacters( OStringBuffer
& rBuffer
, const OString
& rChars
)
198 /* MSO has a very weird way to store and handle whitespaces. The stream
199 may contain lots of spaces, tabs, and newlines which have to be handled
200 as single space character. This will be done in this function.
202 If the element text contains a literal line break, it will be stored as
203 <br> tag (without matching </br> element). This input stream wrapper
204 will replace this element with a literal LF character (see below).
206 A single space character for its own is stored as is. Example: The
209 represents a single space character. The XML parser will ignore this
210 space character completely without issuing a 'characters' event. The
211 VML import filter implementation has to react on this case manually.
213 A single space character following another character is stored
214 literally and must not be stipped away here. Example: The element
216 contains the three letters a, b, and c, followed by a space character.
218 Consecutive space characters, or a leading single space character, are
219 stored in a <span> element. If there are N space characters (N > 1),
220 then the <span> element contains exactly (N-1) NBSP (non-breaking
221 space) characters, followed by a regular space character. Examples:
223 <font><span style='mso-spacerun:yes'>\xA0\xA0\xA0 </span></font>
224 represents 4 consecutive space characters. Has to be handled by the
225 implementation. The element
226 <font><span style='mso-spacerun:yes'> abc</span></font>
227 represents a space characters followed by the letters a, b, c. These
228 strings have to be handled by the VML import filter implementation.
231 // passed string ends with the leading opening bracket of an XML element
232 const sal_Char
* pcBeg
= rChars
.getStr();
233 const sal_Char
* pcEnd
= pcBeg
+ rChars
.getLength();
234 bool bHasBracket
= (pcBeg
< pcEnd
) && (pcEnd
[ -1 ] == '<');
235 if( bHasBracket
) --pcEnd
;
237 // skip leading whitespace
238 const sal_Char
* pcContentsBeg
= lclFindNonWhiteSpace( pcBeg
, pcEnd
);
239 while( pcContentsBeg
< pcEnd
)
241 const sal_Char
* pcWhitespaceBeg
= lclFindWhiteSpace( pcContentsBeg
+ 1, pcEnd
);
242 lclAppendToBuffer( rBuffer
, pcContentsBeg
, pcWhitespaceBeg
);
243 if( pcWhitespaceBeg
< pcEnd
)
244 rBuffer
.append( ' ' );
245 pcContentsBeg
= lclFindNonWhiteSpace( pcWhitespaceBeg
, pcEnd
);
253 InputStream::InputStream( const Reference
< XComponentContext
>& rxContext
, const Reference
< XInputStream
>& rxInStrm
) :
254 // use single-byte ISO-8859-1 encoding which maps all byte characters to the first 256 Unicode characters
255 mxTextStrm( TextInputStream::createXTextInputStream( rxContext
, rxInStrm
, RTL_TEXTENCODING_ISO_8859_1
) ),
256 maOpeningBracket( 1 ),
257 maClosingBracket( 1 ),
258 maOpeningCData( CREATE_OSTRING( "<![CDATA[" ) ),
259 maClosingCData( CREATE_OSTRING( "]]>" ) ),
262 if (!mxTextStrm
.is())
264 maOpeningBracket
[ 0 ] = '<';
265 maClosingBracket
[ 0 ] = '>';
268 InputStream::~InputStream()
272 sal_Int32 SAL_CALL
InputStream::readBytes( Sequence
< sal_Int8
>& rData
, sal_Int32 nBytesToRead
)
273 throw (NotConnectedException
, BufferSizeExceededException
, IOException
, RuntimeException
, std::exception
)
275 if( nBytesToRead
< 0 )
278 rData
.realloc( nBytesToRead
);
279 sal_Int8
* pcDest
= rData
.getArray();
281 while( (nBytesToRead
> 0) && !mxTextStrm
->isEOF() )
284 sal_Int32 nReadSize
= ::std::min( nBytesToRead
, maBuffer
.getLength() - mnBufferPos
);
287 memcpy( pcDest
+ nRet
, maBuffer
.getStr() + mnBufferPos
, static_cast< size_t >( nReadSize
) );
288 mnBufferPos
+= nReadSize
;
289 nBytesToRead
-= nReadSize
;
293 if( nRet
< rData
.getLength() )
294 rData
.realloc( nRet
);
298 sal_Int32 SAL_CALL
InputStream::readSomeBytes( Sequence
< sal_Int8
>& rData
, sal_Int32 nMaxBytesToRead
)
299 throw (NotConnectedException
, BufferSizeExceededException
, IOException
, RuntimeException
, std::exception
)
301 return readBytes( rData
, nMaxBytesToRead
);
304 void SAL_CALL
InputStream::skipBytes( sal_Int32 nBytesToSkip
)
305 throw (NotConnectedException
, BufferSizeExceededException
, IOException
, RuntimeException
, std::exception
)
307 if( nBytesToSkip
< 0 )
310 while( (nBytesToSkip
> 0) && !mxTextStrm
->isEOF() )
313 sal_Int32 nSkipSize
= ::std::min( nBytesToSkip
, maBuffer
.getLength() - mnBufferPos
);
314 mnBufferPos
+= nSkipSize
;
315 nBytesToSkip
-= nSkipSize
;
319 sal_Int32 SAL_CALL
InputStream::available() throw (NotConnectedException
, IOException
, RuntimeException
, std::exception
)
322 return maBuffer
.getLength() - mnBufferPos
;
325 void SAL_CALL
InputStream::closeInput() throw (NotConnectedException
, IOException
, RuntimeException
, std::exception
)
327 mxTextStrm
->closeInput();
330 // private --------------------------------------------------------------------
332 void InputStream::updateBuffer() throw (IOException
, RuntimeException
)
334 while( (mnBufferPos
>= maBuffer
.getLength()) && !mxTextStrm
->isEOF() )
336 // collect new contents in a string buffer
337 OStringBuffer aBuffer
;
339 // read and process characters until the opening bracket of the next XML element
340 OString aChars
= readToElementBegin();
341 bool bHasOpeningBracket
= lclProcessCharacters( aBuffer
, aChars
);
343 // read and process characters until (and including) closing bracket (an XML element)
344 OSL_ENSURE( bHasOpeningBracket
|| mxTextStrm
->isEOF(), "InputStream::updateBuffer - missing opening bracket of XML element" );
345 if( bHasOpeningBracket
&& !mxTextStrm
->isEOF() )
347 // read the element text (add the leading opening bracket manually)
348 OString aElement
= OString( '<' ) + readToElementEnd();
349 // check for CDATA part, starting with '<![CDATA['
350 if( aElement
.match( maOpeningCData
) )
352 // search the end tag ']]>'
353 while( ((aElement
.getLength() < maClosingCData
.getLength()) || !aElement
.endsWith( maClosingCData
)) && !mxTextStrm
->isEOF() )
354 aElement
+= readToElementEnd();
355 // copy the entire CDATA part
356 aBuffer
.append( aElement
);
360 // no CDATA part - process the contents of the element
361 lclProcessElement( aBuffer
, aElement
);
365 maBuffer
= aBuffer
.makeStringAndClear();
370 OString
InputStream::readToElementBegin() throw (IOException
, RuntimeException
)
372 return OUStringToOString( mxTextStrm
->readString( maOpeningBracket
, sal_False
), RTL_TEXTENCODING_ISO_8859_1
);
375 OString
InputStream::readToElementEnd() throw (IOException
, RuntimeException
)
377 OString aText
= OUStringToOString( mxTextStrm
->readString( maClosingBracket
, sal_False
), RTL_TEXTENCODING_ISO_8859_1
);
378 OSL_ENSURE( aText
.endsWith(">"), "InputStream::readToElementEnd - missing closing bracket of XML element" );
385 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */