1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: biffinputstream.hxx,v $
10 * $Revision: 1.4.20.1 $
12 * This file is part of OpenOffice.org.
14 * OpenOffice.org is free software: you can redistribute it and/or modify
15 * it under the terms of the GNU Lesser General Public License version 3
16 * only, as published by the Free Software Foundation.
18 * OpenOffice.org is distributed in the hope that it will be useful,
19 * but WITHOUT ANY WARRANTY; without even the implied warranty of
20 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
21 * GNU Lesser General Public License version 3 for more details
22 * (a copy is included in the LICENSE file that accompanied this code).
24 * You should have received a copy of the GNU Lesser General Public License
25 * version 3 along with OpenOffice.org. If not, see
26 * <http://www.openoffice.org/license.html>
27 * for a copy of the LGPLv3 License.
29 ************************************************************************/
31 #ifndef OOX_XLS_BIFFINPUTSTREAM_HXX
32 #define OOX_XLS_BIFFINPUTSTREAM_HXX
35 #include "oox/helper/binaryinputstream.hxx"
36 #include "oox/xls/biffhelper.hxx"
37 #include "oox/xls/biffcodec.hxx"
39 namespace rtl
{ class OUStringBuffer
; }
44 // ============================================================================
48 /** Buffers the contents of a raw record and encapsulates stream decoding. */
49 class BiffInputRecordBuffer
52 explicit BiffInputRecordBuffer( BinaryInputStream
& rInStrm
);
54 /** Returns the wrapped binary base stream. */
55 inline const BinaryInputStream
& getBaseStream() const { return mrInStrm
; }
57 /** Sets a decoder object and decrypts buffered record data. */
58 void setDecoder( const BiffDecoderRef
& rxDecoder
);
59 /** Returns the current decoder object. */
60 inline BiffDecoderRef
getDecoder() const { return mxDecoder
; }
61 /** Enables/disables usage of current decoder. */
62 void enableDecoder( bool bEnable
);
64 /** Restarts the stream at the passed position. Buffer is invalid until the
65 next call of startRecord() or startNextRecord(). */
66 void restartAt( sal_Int64 nPos
);
68 /** Reads the record header at the passed position. */
69 bool startRecord( sal_Int64 nHeaderPos
);
70 /** Reads the next record header from the stream. */
71 bool startNextRecord();
72 /** Returns the start position of the record header in the core stream. */
73 sal_uInt16
getNextRecId();
75 /** Returns the start position of the record header in the core stream. */
76 inline sal_Int64
getRecHeaderPos() const { return mnHeaderPos
; }
77 /** Returns the current record identifier. */
78 inline sal_uInt16
getRecId() const { return mnRecId
; }
79 /** Returns the current record size. */
80 inline sal_uInt16
getRecSize() const { return mnRecSize
; }
81 /** Returns the current read position in the current record body. */
82 inline sal_uInt16
getRecPos() const { return mnRecPos
; }
83 /** Returns the number of remaining bytes in the current record body. */
84 inline sal_uInt16
getRecLeft() const { return mnRecSize
- mnRecPos
; }
86 /** Reads nBytes bytes to the existing buffer opData. Must NOT overread the source buffer. */
87 void read( void* opData
, sal_uInt16 nBytes
);
88 /** Ignores nBytes bytes. Must NOT overread the buffer. */
89 void skip( sal_uInt16 nBytes
);
92 /** Updates data buffer from stream, if needed. */
94 /** Updates decoded data from original data. */
98 typedef ::std::vector
< sal_uInt8
> DataBuffer
;
100 BinaryInputStream
& mrInStrm
; /// Core input stream.
101 DataBuffer maOriginalData
; /// Original data read from stream.
102 DataBuffer maDecodedData
; /// Decoded data.
103 DataBuffer
* mpCurrentData
; /// Points to data buffer currently in use.
104 BiffDecoderRef mxDecoder
; /// Decoder object.
105 sal_Int64 mnHeaderPos
; /// Stream start position of current record header.
106 sal_Int64 mnBodyPos
; /// Stream start position of current record body.
107 sal_Int64 mnBufferBodyPos
; /// Stream start position of buffered data.
108 sal_Int64 mnNextHeaderPos
; /// Stream start position of next record header.
109 sal_uInt16 mnRecId
; /// Current record identifier.
110 sal_uInt16 mnRecSize
; /// Current record size.
111 sal_uInt16 mnRecPos
; /// Current position in record body.
112 bool mbValidHeader
; /// True = valid record header.
117 // ============================================================================
119 /** This class is used to read BIFF record streams.
121 An instance is constructed with a BinaryInputStream object. The passed
122 stream is reset to its start while constructing this stream.
124 To start reading a record call startNextRecord(). Now it is possible to
125 read all contents of the record using operator>>() or any of the read***()
126 functions. If some data exceeds the record size limit, the stream looks for
127 a following CONTINUE record and jumps automatically to it. It is NOT
128 allowed that an atomic data type is split into two records (e.g. 4 bytes of
129 a double in one record and the other 4 bytes in a following CONTINUE).
131 Trying to read over the record limits results in a stream error. The
132 isValid() function indicates that by returning false. From now on the data
133 returned by the read functions is undefined. The error state will be reset,
134 if the record is reset (with the function resetRecord()), or if the next
137 To switch off the automatic lookup of CONTINUE records, use resetRecord()
138 with false parameter. This is useful e.g. on import of drawing layer data,
139 where sometimes solely CONTINUE records will occur. The automatic lookup
140 keeps switched off until the method resetRecord() is called with parameter
141 true. All other settings done on the stream (e.g. alternative CONTINUE
142 record identifier, enabled decryption, NUL substitution character) will be
143 reset to default values, if a new record is started.
145 The import stream supports decrypting the stream data. The contents of a
146 record (not the record header) will be encrypted by Excel if the file has
147 been stored with password protection. The functions setDecoder() and
148 enableDecoder() control the usage of the decryption algorithms.
149 setDecoder() sets a new decryption algorithm and initially enables it.
150 enableDecoder( false ) may be used to stop the usage of the decryption
151 temporarily (sometimes record contents are never encrypted, e.g. all BOF
152 records or the stream position in SHEET records). Decryption will be
153 reenabled automatically, if a new record is started with the function
156 class BiffInputStream
: public BinaryInputStream
159 /** Constructs the BIFF record stream using the passed binary stream.
162 The base input stream. Must be seekable. Will be seeked to its
165 @param bContLookup Automatic CONTINUE lookup on/off.
167 explicit BiffInputStream(
168 BinaryInputStream
& rInStream
,
169 bool bContLookup
= true );
171 // record control ---------------------------------------------------------
173 /** Sets stream pointer to the start of the next record content.
175 Ignores all CONTINUE records of the current record, if automatic
176 CONTINUE usage is switched on.
178 @return False = no record found (end of stream).
180 bool startNextRecord();
182 /** Sets stream pointer to the start of the content of the specified record.
184 The handle of the current record can be received and stored using the
185 function getRecHandle() for later usage with this function. The record
186 handle is equivalent to the position of the underlying binary stream,
187 thus the function can be used to perform a hard seek to a specific
188 position, if it is sure that a record starts exactly at this position.
190 @return False = no record found (invalid handle passed).
192 bool startRecordByHandle( sal_Int64 nRecHandle
);
194 /** Sets stream pointer to begin of record content.
197 Automatic CONTINUE lookup on/off. In difference to other stream
198 settings, this setting is persistent until next call of this
199 function (because it is wanted to receive the next CONTINUE records
202 Sets an alternative record identifier for content continuation.
203 This value is reset automatically when a new record is started with
208 sal_uInt16 nAltContId
= BIFF_ID_UNKNOWN
);
210 /** Sets stream pointer before current record and invalidates stream.
212 The next call to startNextRecord() will start again the current record.
213 This can be used in situations where a loop or a function leaves on a
214 specific record, but the parent context expects to start this record by
215 itself. The stream is invalid as long as the first record has not been
216 started (it is not allowed to call any other stream operation then).
220 // decoder ----------------------------------------------------------------
222 /** Sets a new decoder object.
224 Enables decryption of record contents for the rest of the stream.
226 void setDecoder( const BiffDecoderRef
& rxDecoder
);
228 /** Enables/disables usage of current decoder.
230 Decryption is reenabled automatically, if a new record is started using
231 the function startNextRecord().
233 void enableDecoder( bool bEnable
= true );
235 // stream/record state and info -------------------------------------------
237 /** Returns the current record identifier. */
238 inline sal_uInt16
getRecId() const { return mnRecId
; }
239 /** Returns the record identifier of the following record. */
240 sal_uInt16
getNextRecId();
242 /** Returns a unique handle for the current record that can be used with
243 the function startRecordByHandle(). */
244 inline sal_Int64
getRecHandle() const { return mnRecHandle
; }
246 // BinaryStreamBase interface (seeking) -----------------------------------
248 /** Returns true, as the BIFF input stream is required to be seekable. */
249 virtual bool isSeekable() const;
250 /** Returns the position inside of the whole record content. */
251 virtual sal_Int64
tell() const;
252 /** Returns the data size of the whole record without record headers. */
253 virtual sal_Int64
getLength() const;
254 /** Seeks in record content to the specified position. */
255 virtual void seek( sal_Int64 nRecPos
);
257 /** Returns the absolute position in the wrapped binary stream. */
258 sal_Int64
tellBase() const;
259 /** Returns the total size of the wrapped binary stream. */
260 sal_Int64
getBaseLength() const;
262 // BinaryInputStream interface (stream read access) -----------------------
264 /** Reads nBytes bytes to the passed sequence.
265 @return Number of bytes really read. */
266 virtual sal_Int32
readData( StreamDataSequence
& orData
, sal_Int32 nBytes
);
267 /** Reads nBytes bytes and copies them to the passed buffer opMem.
268 @return Number of bytes really read. */
269 virtual sal_Int32
readMemory( void* opMem
, sal_Int32 nBytes
);
270 /** Seeks forward inside the current record. */
271 virtual void skip( sal_Int32 nBytes
);
273 /** Stream operator for integral and floating-point types. */
274 template< typename Type
>
275 inline BiffInputStream
& operator>>( Type
& ornValue
) { readValue( ornValue
); return *this; }
277 // byte strings -----------------------------------------------------------
279 /** Reads 8/16 bit string length and character array, and returns the string.
281 True = Read 16-bit string length field before the character array.
282 False = Read 8-bit string length field before the character array.
283 @param bAllowNulChars
284 True = NUL characters are inserted into the imported string.
285 False = NUL characters are replaced by question marks (default).
287 ::rtl::OString
readByteString( bool b16BitLen
, bool bAllowNulChars
= false );
289 /** Reads 8/16 bit string length and character array, and returns a Unicode string.
291 True = Read 16-bit string length field before the character array.
292 False = Read 8-bit string length field before the character array.
293 @param eTextEnc The text encoding used to create the Unicode string.
294 @param bAllowNulChars
295 True = NUL characters are inserted into the imported string.
296 False = NUL characters are replaced by question marks (default).
298 ::rtl::OUString
readByteStringUC( bool b16BitLen
, rtl_TextEncoding eTextEnc
, bool bAllowNulChars
= false );
300 /** Ignores 8/16 bit string length and character array.
302 True = Read 16-bit string length field before the character array.
303 False = Read 8-bit string length field before the character array.
305 void skipByteString( bool b16BitLen
);
307 // Unicode strings --------------------------------------------------------
309 /** Reads nChars characters of a BIFF8 string, and returns the string.
310 @param nChars Number of characters to read from the stream.
312 True = The character array contains 16-bit characters.
313 False = The character array contains truncated 8-bit characters.
314 @param bAllowNulChars
315 True = NUL characters are inserted into the imported string.
316 False = NUL characters are replaced by question marks (default).
318 ::rtl::OUString
readUniStringChars( sal_uInt16 nChars
, bool b16BitChars
, bool bAllowNulChars
= false );
320 /** Reads 8-bit flags, extended header, nChar characters, extended data of
321 a BIFF8 string, and returns the string.
322 @param nChars Number of characters to read from the stream.
323 @param bAllowNulChars
324 True = NUL characters are inserted into the imported string.
325 False = NUL characters are replaced by question marks (default).
327 ::rtl::OUString
readUniStringBody( sal_uInt16 nChars
, bool bAllowNulChars
= false );
329 /** Reads 16-bit character count, 8-bit flags, extended header, character
330 array, extended data of a BIFF8 string, and returns the string.
331 @param bAllowNulChars
332 True = NUL characters are inserted into the imported string.
333 False = NUL characters are replaced by question marks (default).
335 ::rtl::OUString
readUniString( bool bAllowNulChars
= false );
337 /** Ignores nChars characters of a BIFF8 string.
338 @param nChars Number of characters to skip in the stream.
340 True = The character array contains 16-bit characters.
341 False = The character array contains truncated 8-bit characters.
343 void skipUniStringChars( sal_uInt16 nChars
, bool b16BitChars
);
345 /** Ignores 8-bit flags, extended header, nChar characters, extended data
347 @param nChars Number of characters to skip in the stream.
349 void skipUniStringBody( sal_uInt16 nChars
);
351 /** Ignores 16-bit character count, 8-bit flags, extended header, character
352 array, extended data of a BIFF8 string.
354 void skipUniString();
356 // ------------------------------------------------------------------------
358 /** Forwards calls of readValue() template functions to the record buffer. */
359 virtual void readAtom( void* opMem
, sal_uInt8 nSize
);
361 /** Initializes all members after base stream has been seeked to new record. */
363 /** Restarts the current record from the beginning. */
364 void restartRecord( bool bInvalidateRecSize
);
365 /** Sets stream pointer before specified record and invalidates stream. */
366 void rewindToRecord( sal_Int64 nRecHandle
);
367 /** Returns true, if stream was able to start a valid record. */
368 inline bool isInRecord() const { return mnRecHandle
>= 0; }
370 /** Returns true, if the passed ID is real or alternative continuation record ID. */
371 bool isContinueId( sal_uInt16 nRecId
) const;
372 /** Goes to start of the next CONTINUE record.
373 @descr Stream must be located at the end of a raw record, and handling
374 of CONTINUE records must be enabled.
375 @return True if next CONTINUE record has been found and initialized. */
376 bool jumpToNextContinue();
377 /** Goes to start of the next CONTINUE record while reading strings.
378 @descr Stream must be located at the end of a raw record. If reading
379 has been started in a CONTINUE record, jumps to an existing following
380 CONTINUE record, even if handling of CONTINUE records is disabled (this
381 is a special handling for TXO string data). Reads additional Unicode
382 flag byte at start of the new raw record and sets or resets rb16BitChars.
383 @return True if next CONTINUE record has been found and initialized. */
384 bool jumpToNextStringContinue( bool& rb16BitChars
);
385 /** Calculates the complete length of the current record including CONTINUE
386 records, stores the length in mnComplRecSize. */
387 void calcRecordLength();
389 /** Ensures that reading nBytes bytes is possible with next stream access.
390 @descr Stream must be located at the end of a raw record, and handling
391 of CONTINUE records must be enabled.
392 @return True if nBytes can be read from stream. */
393 bool ensureRawReadSize( sal_uInt16 nBytes
);
394 /** Returns the maximum size of raw data possible to read in one block. */
395 sal_uInt16
getMaxRawReadSize( sal_Int32 nBytes
) const;
397 /** Reads an array of Unicode characters and appends them to the passed buffer. */
398 void appendUnicodeArray( ::rtl::OUStringBuffer
& orBuffer
, sal_uInt16 nChars
, bool b16BitChars
, bool bAllowNulChars
);
399 /** Reads the BIFF8 Unicode string header fields. */
400 void readUniStringHeader( bool& orb16BitChars
, sal_Int32
& ornAddSize
);
403 prv::BiffInputRecordBuffer maRecBuffer
; /// Raw record data buffer.
405 sal_Int64 mnRecHandle
; /// Handle of current record.
406 sal_uInt16 mnRecId
; /// Identifier of current record (not the CONTINUE ID).
407 sal_uInt16 mnAltContId
; /// Alternative identifier for content continuation records.
409 sal_Int64 mnCurrRecSize
; /// Helper for record size and position.
410 sal_Int64 mnComplRecSize
; /// Size of complete record data (with CONTINUEs).
411 bool mbHasComplRec
; /// True = mnComplRecSize is valid.
413 bool mbCont
; /// True = automatic CONTINUE lookup enabled.
416 // ============================================================================
418 class BiffInputStreamPos
421 explicit BiffInputStreamPos( BiffInputStream
& rStrm
);
423 bool restorePosition();
425 inline BiffInputStream
& getStream() { return mrStrm
; }
428 BiffInputStream
& mrStrm
;
429 sal_Int64 mnRecHandle
;
433 // ============================================================================
435 /** Stores the current position of the passed stream on construction and
436 restores it automatically on destruction. */
437 class BiffInputStreamPosGuard
: private BiffInputStreamPos
440 explicit BiffInputStreamPosGuard( BiffInputStream
& rStrm
);
441 ~BiffInputStreamPosGuard();
444 // ============================================================================