1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: xistream.hxx,v $
10 * $Revision: 1.13.30.3 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #ifndef SC_XISTREAM_HXX
32 #define SC_XISTREAM_HXX
34 #include <comphelper/docpasswordhelper.hxx>
35 #include <svx/mscodec.hxx>
36 #include "xlstream.hxx"
37 #include "xlconst.hxx"
41 /* ============================================================================
42 Input stream class for Excel import
43 - CONTINUE record handling
44 - ByteString and UniString support
46 ============================================================================ */
48 // ============================================================================
50 // ============================================================================
52 class XclImpDecrypter
;
53 typedef ScfRef
< XclImpDecrypter
> XclImpDecrypterRef
;
55 /** Base class for BIFF stream decryption. */
56 class XclImpDecrypter
: public ::comphelper::IDocPasswordVerifier
59 explicit XclImpDecrypter();
60 virtual ~XclImpDecrypter();
62 /** Returns the current error code of the decrypter. */
63 inline ErrCode
GetError() const { return mnError
; }
64 /** Returns true, if the decoder has been initialized correctly. */
65 inline bool IsValid() const { return mnError
== ERRCODE_NONE
; }
67 /** Creates a (ref-counted) copy of this decrypter object. */
68 XclImpDecrypterRef
Clone() const;
70 /** Implementation of the ::comphelper::IDocPasswordVerifier interface,
71 calls the new virtual function implVerify(). */
72 virtual ::comphelper::DocPasswordVerifierResult
73 verifyPassword( const ::rtl::OUString
& rPassword
);
75 /** Updates the decrypter on start of a new record or after seeking stream. */
76 void Update( SvStream
& rStrm
, sal_uInt16 nRecSize
);
77 /** Reads and decrypts nBytes bytes and stores data into the existing(!) buffer pData.
78 @return Count of bytes really read. */
79 sal_uInt16
Read( SvStream
& rStrm
, void* pData
, sal_uInt16 nBytes
);
82 /** Protected copy c'tor for OnClone(). */
83 explicit XclImpDecrypter( const XclImpDecrypter
& rSrc
);
86 /** Implementation of cloning this object. */
87 virtual XclImpDecrypter
* OnClone() const = 0;
88 /** Derived classes implement password verification and initialization of
90 virtual bool OnVerify( const ::rtl::OUString
& rPassword
) = 0;
91 /** Implementation of updating the decrypter. */
92 virtual void OnUpdate( sal_Size nOldStrmPos
, sal_Size nNewStrmPos
, sal_uInt16 nRecSize
) = 0;
93 /** Implementation of the decryption. */
94 virtual sal_uInt16
OnRead( SvStream
& rStrm
, sal_uInt8
* pnData
, sal_uInt16 nBytes
) = 0;
97 ErrCode mnError
; /// Decrypter error code.
98 sal_Size mnOldPos
; /// Last known stream position.
99 sal_uInt16 mnRecSize
; /// Current record size.
102 // ----------------------------------------------------------------------------
104 /** Decrypts BIFF5 stream contents. */
105 class XclImpBiff5Decrypter
: public XclImpDecrypter
108 explicit XclImpBiff5Decrypter( sal_uInt16 nKey
, sal_uInt16 nHash
);
111 /** Private copy c'tor for OnClone(). */
112 explicit XclImpBiff5Decrypter( const XclImpBiff5Decrypter
& rSrc
);
114 /** Implementation of cloning this object. */
115 virtual XclImpBiff5Decrypter
* OnClone() const;
116 /** Implements password verification and initialization of the decoder. */
117 virtual bool OnVerify( const ::rtl::OUString
& rPassword
);
118 /** Implementation of updating the decrypter. */
119 virtual void OnUpdate( sal_Size nOldStrmPos
, sal_Size nNewStrmPos
, sal_uInt16 nRecSize
);
120 /** Implementation of the decryption. */
121 virtual sal_uInt16
OnRead( SvStream
& rStrm
, sal_uInt8
* pnData
, sal_uInt16 nBytes
);
124 ::svx::MSCodec_XorXLS95 maCodec
; /// Crypto algorithm implementation.
125 ::std::vector
< sal_uInt8
> maPassword
;
130 // ----------------------------------------------------------------------------
132 /** Decrypts BIFF8 stream contents using the given document identifier. */
133 class XclImpBiff8Decrypter
: public XclImpDecrypter
136 explicit XclImpBiff8Decrypter( sal_uInt8 pnSalt
[ 16 ],
137 sal_uInt8 pnVerifier
[ 16 ], sal_uInt8 pnVerifierHash
[ 16 ] );
140 /** Private copy c'tor for OnClone(). */
141 explicit XclImpBiff8Decrypter( const XclImpBiff8Decrypter
& rSrc
);
143 /** Implementation of cloning this object. */
144 virtual XclImpBiff8Decrypter
* OnClone() const;
145 /** Implements password verification and initialization of the decoder. */
146 virtual bool OnVerify( const ::rtl::OUString
& rPassword
);
147 /** Implementation of updating the decrypter. */
148 virtual void OnUpdate( sal_Size nOldStrmPos
, sal_Size nNewStrmPos
, sal_uInt16 nRecSize
);
149 /** Implementation of the decryption. */
150 virtual sal_uInt16
OnRead( SvStream
& rStrm
, sal_uInt8
* pnData
, sal_uInt16 nBytes
);
152 /** Returns the block number corresponding to the passed stream position. */
153 sal_uInt32
GetBlock( sal_Size nStrmPos
) const;
154 /** Returns the block offset corresponding to the passed stream position. */
155 sal_uInt16
GetOffset( sal_Size nStrmPos
) const;
158 ::svx::MSCodec_Std97 maCodec
; /// Crypto algorithm implementation.
159 ::std::vector
< sal_uInt16
> maPassword
;
160 ::std::vector
< sal_uInt8
> maSalt
;
161 ::std::vector
< sal_uInt8
> maVerifier
;
162 ::std::vector
< sal_uInt8
> maVerifierHash
;
165 // ============================================================================
167 // ============================================================================
169 /** This class represents an Excel stream position.
170 @descr It contains the relevant data for a stream position inside of a record
171 (including CONTINUE records). */
172 class XclImpStreamPos
175 /** Constructs an invalid stream position data object. */
176 explicit XclImpStreamPos();
178 /** Sets the stream position data to the passed values. */
179 void Set( const SvStream
& rStrm
, sal_Size nNextPos
, sal_Size nCurrSize
,
180 sal_uInt16 nRawRecId
, sal_uInt16 nRawRecSize
, sal_uInt16 nRawRecLeft
,
183 /** Writes the contained stream position data to the given variables. */
184 void Get( SvStream
& rStrm
, sal_Size
& rnNextPos
, sal_Size
& rnCurrSize
,
185 sal_uInt16
& rnRawRecId
, sal_uInt16
& rnRawRecSize
, sal_uInt16
& rnRawRecLeft
,
186 bool& rbValid
) const;
189 sal_Size mnPos
; /// Absolute position of the stream.
190 sal_Size mnNextPos
; /// Absolute position of next record.
191 sal_Size mnCurrSize
; /// Current calculated size of the record.
192 sal_uInt16 mnRawRecId
; /// Current raw record ID (including CONTINUEs).
193 sal_uInt16 mnRawRecSize
; /// Current raw record size (without following CONTINUEs).
194 sal_uInt16 mnRawRecLeft
; /// Bytes left in current raw record (without following CONTINUEs).
195 bool mbValid
; /// Read state: false = record overread.
198 // ============================================================================
200 /** This class is used to import record oriented streams.
201 @descr An instance is constructed with an SvStream. The SvStream stream is
202 reset to its start while constructing this stream.
204 To start reading a record call StartNextRecord(). Now it is possible to
205 read all contents of the record using operator>>() or any of the Read***()
206 functions. If some data exceeds the record size limit, the stream looks for
207 a following CONTINUE record and jumps automatically to it. It is NOT
208 allowed that an atomic data type is split into two records (i.e. 4 bytes of
209 a double in one record and the other 4 bytes in a following CONTINUE).
211 Trying to read over the record limits results in a stream error. The
212 IsValid() function indicates that with returning false. From now on it is
213 undefined what data the read functions will return. The error state will be
214 reset, if the record is reset (with the method ResetRecord()) or if the
215 next record is started.
217 To switch off the automatic lookup of CONTINUE records, use ResetRecord()
218 with false parameter. This is useful i.e. on import of Escher objects,
219 where sometimes solely CONTINUE records will occur. The automatic lookup
220 keeps switched off until the method ResetRecord() is called with parameter
221 true. All other settings done on the stream (i.e. alternative CONTINUE
222 record identifier, enabled decryption, NUL substitution character) will be
223 reset to default values, if a new record is started.
225 The import stream supports decrypting the stream data. The contents of a
226 record (not the record header) will be encrypted by Excel if the file has
227 been stored with password protection. The functions SetDecrypter(),
228 EnableDecryption(), and DisableDecryption() control the usage of the
229 decryption algorithms. SetDecrypter() sets a new decryption algorithm and
230 initially enables it. DisableDecryption() may be used to stop the usage of
231 the decryption temporarily (sometimes record contents are never encrypted,
232 i.e. all BOF records or the stream position in BOUNDSHEET). Decryption will
233 be reenabled automatically, if a new record is started with the function
236 It is possible to store several stream positions inside a record (including
237 its CONTINUE records). The positions are stored on a stack, which can be
238 controlled with the functions PushPosition(), PopPosition() and
239 RejectPosition(). The stack will be cleared whenever a new record is
240 started with the function StartNextRecord().
242 Additionally a single global stream position can be stored which keeps
243 valid during the whole import process (methods StoreGlobalPosition(),
244 SeekGlobalPosition() and DeleteGlobalPosition()). This is the only way to
245 jump back to a previous record (that is a real jump without return).
250 /** Detects the BIFF version of the passed workbook stream. */
251 static XclBiff
DetectBiffVersion( SvStream
& rStrm
);
253 /** Constructs the Excel record import stream using a TOOLS stream object.
254 @param rInStrm The system input stream. Will be set to its start position.
255 Must exist as long as this object exists.
256 @param bContLookup Automatic CONTINUE lookup on/off. */
257 explicit XclImpStream(
259 const XclImpRoot
& rRoot
,
260 bool bContLookup
= true );
264 /** Returns the filter root data. */
265 inline const XclImpRoot
& GetRoot() const { return mrRoot
; }
267 /** Sets stream pointer to the start of the next record content.
268 @descr Ignores all CONTINUE records of the current record, if automatic
269 CONTINUE usage is switched on.
270 @return false = no record found (end of stream). */
271 bool StartNextRecord();
272 /** Sets stream pointer to the start of the record content for the record
273 at the passed absolute stream position.
274 @return false = no record found (end of stream). */
275 bool StartNextRecord( sal_Size nNextRecPos
);
276 /** Sets stream pointer to begin of record content.
277 @param bContLookup Automatic CONTINUE lookup on/off. In difference
278 to other stream settings, this setting is persistent until next call of
279 this function (because it is wanted to receive the next CONTINUE
281 @param nAltContId Sets an alternative record ID for content
282 continuation. This value is reset automatically when a new record is
283 started with StartNextRecord(). */
284 void ResetRecord( bool bContLookup
,
285 sal_uInt16 nAltContId
= EXC_ID_UNKNOWN
);
287 /** Enables decryption of record contents for the rest of the stream. */
288 void SetDecrypter( XclImpDecrypterRef xDecrypter
);
289 /** Sets decrypter from another stream. */
290 void CopyDecrypterFrom( const XclImpStream
& rStrm
);
291 /** Returns true, if a valid decrypter is set at the stream. */
292 bool HasValidDecrypter() const;
293 /** Switches usage of current decryption algorithm on/off.
294 @descr Encryption is re-enabled automatically, if a new record is
295 started using the function StartNextRecord(). */
296 void EnableDecryption( bool bEnable
= true );
297 /** Switches usage of current decryption algorithm off.
298 @descr This is a record-local setting. The function StartNextRecord()
299 always enables decryption. */
300 inline void DisableDecryption() { EnableDecryption( false ); }
302 /** Pushes current position on user position stack.
303 @descr This stack is emptied when starting a new record with
304 StartNextRecord(). The decryption state (enabled/disabled) is not
305 pushed onto the stack. */
307 /** Seeks to last position from user position stack.
308 @descr This position will be removed from the stack. */
310 //UNUSED2008-05 /** Removes last position from user position stack, but does not seek to it. */
311 //UNUSED2008-05 void RejectPosition();
313 /** Stores current position. This position keeps valid in all records. */
314 void StoreGlobalPosition();
315 /** Seeks to the stored global user position. */
316 void SeekGlobalPosition();
317 /** Invalidates global user position. */
318 inline void DeleteGlobalPosition() { mbHasGlobPos
= false; }
320 /** Returns record reading state: false = record overread. */
321 inline bool IsValid() const { return mbValid
; }
322 /** Returns the current record ID. */
323 inline sal_uInt16
GetRecId() const { return mnRecId
; }
324 /** Returns the position inside of the whole record content. */
325 sal_Size
GetRecPos() const;
326 /** Returns the data size of the whole record without record headers. */
327 sal_Size
GetRecSize();
328 /** Returns remaining data size of the whole record without record headers. */
329 sal_Size
GetRecLeft();
330 /** Returns the record ID of the following record. */
331 sal_uInt16
GetNextRecId();
333 XclImpStream
& operator>>( sal_Int8
& rnValue
);
334 XclImpStream
& operator>>( sal_uInt8
& rnValue
);
335 XclImpStream
& operator>>( sal_Int16
& rnValue
);
336 XclImpStream
& operator>>( sal_uInt16
& rnValue
);
337 XclImpStream
& operator>>( sal_Int32
& rnValue
);
338 XclImpStream
& operator>>( sal_uInt32
& rnValue
);
339 XclImpStream
& operator>>( float& rfValue
);
340 XclImpStream
& operator>>( double& rfValue
);
343 sal_uInt8
ReaduInt8();
344 sal_Int16
ReadInt16();
345 sal_uInt16
ReaduInt16();
346 sal_Int32
ReadInt32();
347 sal_uInt32
ReaduInt32();
351 /** Reads nBytes bytes to the existing(!) buffer pData.
352 @return Count of bytes really read. */
353 sal_Size
Read( void* pData
, sal_Size nBytes
);
354 /** Copies nBytes bytes to rOutStrm.
355 @return Count of bytes really written. */
356 sal_Size
CopyToStream( SvStream
& rOutStrm
, sal_Size nBytes
);
358 /** Copies the entire record to rOutStrm. The current record position keeps unchanged.
359 @return Count of bytes really written. */
360 sal_Size
CopyRecordToStream( SvStream
& rOutStrm
);
362 /** Seeks absolute in record content to the specified position.
363 @descr The value 0 means start of record, independent from physical stream position. */
364 void Seek( sal_Size nPos
);
365 /** Seeks forward inside the current record. */
366 void Ignore( sal_Size nBytes
);
368 // *** special string functions *** ---------------------------------------
370 // *** read/ignore unicode strings *** ------------------------------------
371 /* - look for CONTINUE records even if CONTINUE handling disabled
372 (only if inside of a CONTINUE record - for TXO import)
373 - no overread assertions (for Applix wrong string length export bug)
375 structure of an Excel unicode string:
376 (1) 2 byte character count
377 (2) 1 byte flags (16-bit-characters, rich string, far east string)
378 (3) [2 byte rich string format run count]
379 (4) [4 byte far east data size]
381 (6) [4 * (rich string format run count) byte]
382 (7) [(far east data size) byte]
384 ext. header = (3), (4)
388 /** Reads ext. header, detects 8/16 bit mode, sets all ext. info.
389 @return Total size of ext. data. */
390 sal_Size
ReadUniStringExtHeader(
391 bool& rb16Bit
, bool& rbRich
, bool& rbFareast
,
392 sal_uInt16
& rnFormatRuns
, sal_uInt32
& rnExtInf
, sal_uInt8 nFlags
);
393 /** Seeks to begin of character array, detects 8/16 bit mode.
394 @return Total size of ext. data. */
395 sal_Size
ReadUniStringExtHeader( bool& rb16Bit
, sal_uInt8 nFlags
);
397 /** Sets a replacement character for NUL characters.
398 @descr NUL characters must be replaced, because Tools strings cannot
399 handle them. The substitution character is reset to '?' automatically,
400 if a new record is started using the function StartNextRecord().
401 @param cNulSubst The character to use for NUL replacement. It is
402 possible to specify NUL here. in this case strings are terminated when
403 the first NUL occurs during string import. */
404 inline void SetNulSubstChar( sal_Unicode cNulSubst
= '?' ) { mcNulSubst
= cNulSubst
; }
406 /** Reads nChars characters and returns the string. */
407 String
ReadRawUniString( sal_uInt16 nChars
, bool b16Bit
);
408 /** Reads ext. header, nChar characters, ext. data and returns the string. */
409 String
ReadUniString( sal_uInt16 nChars
, sal_uInt8 nFlags
);
410 /** Reads 8 bit flags, ext. header, nChar characters, ext. data and returns the string. */
411 String
ReadUniString( sal_uInt16 nChars
);
412 /** Reads 16 bit character count, 8 bit flags, ext. header, character array,
413 ext. data and returns the string. */
414 String
ReadUniString();
416 /** Ignores nChars characters. */
417 void IgnoreRawUniString( sal_uInt16 nChars
, bool b16Bit
);
418 /** Ignores ext. header, nChar characters, ext. data. */
419 void IgnoreUniString( sal_uInt16 nChars
, sal_uInt8 nFlags
);
420 /** Ignores 8 bit flags, ext. header, nChar characters, ext. data. */
421 void IgnoreUniString( sal_uInt16 nChars
);
422 /** Ignores 16 bit character count, 8 bit flags, ext. header, character array, ext. data. */
423 void IgnoreUniString();
425 // *** read/ignore 8-bit-strings, store in String *** ---------------------
427 /** Reads nChar byte characters and returns the string. */
428 String
ReadRawByteString( sal_uInt16 nChars
);
429 /** Reads 8/16 bit string length, character array and returns the string. */
430 String
ReadByteString( bool b16BitLen
);
432 // *** SvStream functions *** ---------------------------------------------
434 /** Returns the absolute stream position. */
435 inline sal_Size
GetSvStreamPos() const { return mrStrm
.Tell(); }
436 /** Returns the stream size. */
437 inline sal_Size
GetSvStreamSize() const { return mnStreamSize
; }
440 /** Stores current stream position into rPos. */
441 void StorePosition( XclImpStreamPos
& rPos
);
442 /** Restores stream position contained in rPos. */
443 void RestorePosition( const XclImpStreamPos
& rPos
);
445 /** Seeks to next raw record header and reads record ID and size.
446 @descr This is a "raw" function, means that stream members are
447 inconsistent after return. Does only change mnRawRecId, mnRawRecSize,
448 and the base stream position, but no other members.
449 @return false = No record header found (end of stream). */
450 bool ReadNextRawRecHeader();
452 /** Initializes the decrypter to read a new record. */
453 void SetupDecrypter();
454 /** Initializes all members after base stream has been seeked to new raw record. */
455 void SetupRawRecord();
456 /** Initializes all members after base stream has been seeked to new record. */
459 /** Returns true, if the passed ID is real or alternative continuation record ID. */
460 bool IsContinueId( sal_uInt16 nRecId
) const;
462 /** Goes to start of the next CONTINUE record.
463 @descr Stream must be located at the end of a raw record, and handling
464 of CONTINUE records must be enabled.
465 @return Copy of mbValid. */
466 bool JumpToNextContinue();
467 /** Goes to start of the next CONTINUE record while reading strings.
468 @descr Stream must be located at the end of a raw record. If reading
469 has been started in a CONTINUE record, jumps to an existing following
470 CONTINUE record, even if handling of CONTINUE records is disabled (This
471 is a special handling for TXO string data). Reads additional Unicode
472 flag byte at start of the new raw record and sets or resets rb16Bit.
473 @return Copy of mbValid. */
474 bool JumpToNextStringContinue( bool& rb16Bit
);
476 /** Ensures that reading nBytes bytes is possible with next stream access.
477 @descr Stream must be located at the end of a raw record, and handling
478 of CONTINUE records must be enabled.
479 @return Copy of mbValid. */
480 bool EnsureRawReadSize( sal_uInt16 nBytes
);
481 /** Returns the maximum size of raw data possible to read in one block. */
482 sal_uInt16
GetMaxRawReadSize( sal_Size nBytes
) const;
484 /** Reads and decrypts nBytes bytes to the existing(!) buffer pData.
485 @return Count of bytes really read. */
486 sal_uInt16
ReadRawData( void* pData
, sal_uInt16 nBytes
);
488 /** Reads 8 bit/16 bit string length. */
489 inline sal_uInt16
ReadByteStrLen( bool b16BitLen
)
490 { return b16BitLen
? ReaduInt16() : ReaduInt8(); }
493 typedef ::std::vector
< XclImpStreamPos
> XclImpStreamPosStack
;
495 SvStream
& mrStrm
; /// Reference to the system input stream.
496 const XclImpRoot
& mrRoot
; /// Filter root data.
498 XclImpDecrypterRef mxDecrypter
; /// Provides methods to decrypt data.
500 XclImpStreamPos maFirstRec
; /// Start position of current record.
501 XclImpStreamPosStack maPosStack
; /// Stack for record positions.
503 XclImpStreamPos maGlobPos
; /// User defined position elsewhere in stream.
504 sal_uInt16 mnGlobRecId
; /// Record ID for user defined position.
505 bool mbGlobValidRec
; /// Was user position a valid record?
506 bool mbHasGlobPos
; /// Is user position defined?
508 sal_Size mnStreamSize
; /// Size of system stream.
509 sal_Size mnNextRecPos
; /// Start of next record header.
510 sal_Size mnCurrRecSize
; /// Helper for record position.
511 sal_Size mnComplRecSize
; /// Size of complete record data (with CONTINUEs).
512 bool mbHasComplRec
; /// true = mnComplRecSize is valid.
514 sal_uInt16 mnRecId
; /// Current record ID (not the CONTINUE ID).
515 sal_uInt16 mnAltContId
; /// Alternative record ID for content continuation.
517 sal_uInt16 mnRawRecId
; /// Current raw record ID (including CONTINUEs).
518 sal_uInt16 mnRawRecSize
; /// Current raw record size (without following CONTINUEs).
519 sal_uInt16 mnRawRecLeft
; /// Bytes left in current raw record (without following CONTINUEs).
521 sal_Unicode mcNulSubst
; /// Replacement for NUL characters.
523 bool mbCont
; /// Automatic CONTINUE lookup on/off.
524 bool mbUseDecr
; /// Usage of decryption.
525 bool mbValidRec
; /// false = No more records to read.
526 bool mbValid
; /// false = Record overread.
529 // ============================================================================