1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: pdfentries.cxx,v $
13 * This file is part of OpenOffice.org.
15 * OpenOffice.org is free software: you can redistribute it and/or modify
16 * it under the terms of the GNU Lesser General Public License version 3
17 * only, as published by the Free Software Foundation.
19 * OpenOffice.org is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Lesser General Public License version 3 for more details
23 * (a copy is included in the LICENSE file that accompanied this code).
25 * You should have received a copy of the GNU Lesser General Public License
26 * version 3 along with OpenOffice.org. If not, see
27 * <http://www.openoffice.org/license.html>
28 * for a copy of the LGPLv3 License.
30 ************************************************************************/
32 // MARKER(update_precomp.py): autogen include statement, do not remove
33 #include "precompiled_sdext.hxx"
35 #include <pdfparse.hxx>
37 #include <rtl/strbuf.hxx>
38 #include <rtl/ustring.hxx>
39 #include <rtl/alloc.h>
40 #include <rtl/digest.h>
41 #include <rtl/cipher.h>
42 #include <rtl/memory.h>
46 #include <zlib/zlib.h>
61 // xref table: maps object number to a pair of (generation, buffer offset)
62 typedef std::map
< unsigned int, std::pair
< unsigned int, unsigned int > > XRefTable
;
63 XRefTable m_aXRefTable
;
64 // container of all indirect objects (usually a PDFFile*)
65 const PDFContainer
* m_pObjectContainer
;
67 // returns true if the xref table was updated
68 bool insertXref( unsigned int nObject
, unsigned int nGeneration
, unsigned int nOffset
)
70 XRefTable::iterator it
= m_aXRefTable
.find( nObject
);
71 if( it
== m_aXRefTable
.end() )
74 m_aXRefTable
[ nObject
] = std::pair
<unsigned int, unsigned int>(nGeneration
,nOffset
);
77 // update old entry, if generation number is higher
78 if( it
->second
.first
< nGeneration
)
80 it
->second
= std::pair
<unsigned int, unsigned int>(nGeneration
,nOffset
);
86 EmitImplData( const PDFContainer
* pTopContainer
) :
87 m_pObjectContainer( pTopContainer
)
90 bool decrypt( const sal_uInt8
* pInBuffer
, sal_uInt32 nLen
, sal_uInt8
* pOutBuffer
,
91 unsigned int nObject
, unsigned int nGeneration
) const
93 const PDFFile
* pFile
= dynamic_cast<const PDFFile
*>(m_pObjectContainer
);
94 return pFile
? pFile
->decrypt( pInBuffer
, nLen
, pOutBuffer
, nObject
, nGeneration
) : false;
100 using namespace pdfparse
;
102 EmitContext::EmitContext( const PDFContainer
* pTop
) :
108 m_pImplData
= new EmitImplData( pTop
);
111 EmitContext::~EmitContext()
116 PDFEntry::~PDFEntry()
120 EmitImplData
* PDFEntry::getEmitData( EmitContext
& rContext
) const
122 return rContext
.m_pImplData
;
125 void PDFEntry::setEmitData( EmitContext
& rContext
, EmitImplData
* pNewEmitData
) const
127 if( rContext
.m_pImplData
&& rContext
.m_pImplData
!= pNewEmitData
)
128 delete rContext
.m_pImplData
;
129 rContext
.m_pImplData
= pNewEmitData
;
132 PDFValue::~PDFValue()
136 PDFComment::~PDFComment()
140 bool PDFComment::emit( EmitContext
& rWriteContext
) const
142 return rWriteContext
.write( m_aComment
.getStr(), m_aComment
.getLength() );
145 PDFEntry
* PDFComment::clone() const
147 return new PDFComment( m_aComment
);
154 bool PDFName::emit( EmitContext
& rWriteContext
) const
156 if( ! rWriteContext
.write( " /", 2 ) )
158 return rWriteContext
.write( m_aName
.getStr(), m_aName
.getLength() );
161 PDFEntry
* PDFName::clone() const
163 return new PDFName( m_aName
);
166 OUString
PDFName::getFilteredName() const
168 OStringBuffer
aFilter( m_aName
.getLength() );
169 const sal_Char
* pStr
= m_aName
.getStr();
170 unsigned int nLen
= m_aName
.getLength();
171 for( unsigned int i
= 0; i
< nLen
; i
++ )
173 if( pStr
[i
] == '#' && i
< nLen
- 3 )
175 sal_Char rResult
= 0;
177 if( pStr
[i
] >= '0' && pStr
[i
] <= '9' )
178 rResult
= sal_Char( pStr
[i
]-'0' ) << 4;
179 else if( pStr
[i
] >= 'a' && pStr
[i
] <= 'f' )
180 rResult
= sal_Char( pStr
[i
]-'a' + 10 ) << 4;
181 else if( pStr
[i
] >= 'A' && pStr
[i
] <= 'F' )
182 rResult
= sal_Char( pStr
[i
]-'A' + 10 ) << 4;
184 if( pStr
[i
] >= '0' && pStr
[i
] <= '9' )
185 rResult
|= sal_Char( pStr
[i
]-'0' );
186 else if( pStr
[i
] >= 'a' && pStr
[i
] <= 'f' )
187 rResult
|= sal_Char( pStr
[i
]-'a' + 10 );
188 else if( pStr
[i
] >= 'A' && pStr
[i
] <= 'F' )
189 rResult
|= sal_Char( pStr
[i
]-'A' + 10 );
190 aFilter
.append( rResult
);
193 aFilter
.append( pStr
[i
] );
195 return OStringToOUString( aFilter
.makeStringAndClear(), RTL_TEXTENCODING_UTF8
);
198 PDFString::~PDFString()
202 bool PDFString::emit( EmitContext
& rWriteContext
) const
204 if( ! rWriteContext
.write( " ", 1 ) )
206 return rWriteContext
.write( m_aString
.getStr(), m_aString
.getLength() );
209 PDFEntry
* PDFString::clone() const
211 return new PDFString( m_aString
);
214 OString
PDFString::getFilteredString() const
216 int nLen
= m_aString
.getLength();
217 OStringBuffer
aBuf( nLen
);
219 const sal_Char
* pStr
= m_aString
.getStr();
222 const sal_Char
* pRun
= pStr
+1;
223 while( pRun
- pStr
< nLen
-1 )
228 if( pRun
- pStr
< nLen
)
233 else if( *pRun
== 'r' )
235 else if( *pRun
== 't' )
237 else if( *pRun
== 'b' )
239 else if( *pRun
== 'f' )
241 else if( *pRun
== '(' )
243 else if( *pRun
== ')' )
245 else if( *pRun
== '\\' )
247 else if( *pRun
== '\n' )
252 else if( *pRun
== '\r' )
262 while( i
++ < 3 && *pRun
>= '0' && *pRun
<= '7' )
263 aEsc
= 8*aEsc
+ (*pRun
++ - '0');
264 // move pointer back to last character of octal sequence
271 aBuf
.append( *pRun
);
272 // move pointer to next character
276 else if( *pStr
== '<' )
278 const sal_Char
* pRun
= pStr
+1;
279 while( *pRun
!= '>' && pRun
- pStr
< nLen
)
281 sal_Char rResult
= 0;
282 if( *pRun
>= '0' && *pRun
<= '9' )
283 rResult
= sal_Char( *pRun
-'0' ) << 4;
284 else if( *pRun
>= 'a' && *pRun
<= 'f' )
285 rResult
= sal_Char( *pRun
-'a' + 10 ) << 4;
286 else if( *pRun
>= 'A' && *pRun
<= 'F' )
287 rResult
= sal_Char( *pRun
-'A' + 10 ) << 4;
289 if( *pRun
!= '>' && pRun
- pStr
< nLen
)
291 if( *pRun
>= '0' && *pRun
<= '9' )
292 rResult
|= sal_Char( *pRun
-'0' );
293 else if( *pRun
>= 'a' && *pRun
<= 'f' )
294 rResult
|= sal_Char( *pRun
-'a' + 10 );
295 else if( *pRun
>= 'A' && *pRun
<= 'F' )
296 rResult
|= sal_Char( *pRun
-'A' + 10 );
299 aBuf
.append( rResult
);
303 return aBuf
.makeStringAndClear();
306 PDFNumber::~PDFNumber()
310 bool PDFNumber::emit( EmitContext
& rWriteContext
) const
312 rtl::OStringBuffer
aBuf( 32 );
315 double fValue
= m_fValue
;
324 sal_Int64 nInt
= (sal_Int64
)fValue
;
325 fValue
-= (double)nInt
;
326 // optimizing hardware may lead to a value of 1.0 after the subtraction
327 if( fValue
== 1.0 || log10( 1.0-fValue
) <= -nPrecision
)
335 fValue
*= pow( 10.0, (double)nPrecision
);
336 nFrac
= (sal_Int64
)fValue
;
338 if( bNeg
&& ( nInt
|| nFrac
) )
345 sal_Int64 nBound
= (sal_Int64
)(pow( 10.0, nPrecision
- 1.0 )+0.5);
346 for ( i
= 0; ( i
< nPrecision
) && nFrac
; i
++ )
348 sal_Int64 nNumb
= nFrac
/ nBound
;
349 nFrac
-= nNumb
* nBound
;
350 aBuf
.append( nNumb
);
355 return rWriteContext
.write( aBuf
.getStr(), aBuf
.getLength() );
358 PDFEntry
* PDFNumber::clone() const
360 return new PDFNumber( m_fValue
);
368 bool PDFBool::emit( EmitContext
& rWriteContext
) const
370 return m_bValue
? rWriteContext
.write( " true", 5 ) : rWriteContext
.write( " false", 6 );
373 PDFEntry
* PDFBool::clone() const
375 return new PDFBool( m_bValue
);
382 bool PDFNull::emit( EmitContext
& rWriteContext
) const
384 return rWriteContext
.write( " null", 5 );
387 PDFEntry
* PDFNull::clone() const
389 return new PDFNull();
393 PDFObjectRef::~PDFObjectRef()
397 bool PDFObjectRef::emit( EmitContext
& rWriteContext
) const
399 OStringBuffer
aBuf( 16 );
401 aBuf
.append( sal_Int32( m_nNumber
) );
403 aBuf
.append( sal_Int32( m_nGeneration
) );
404 aBuf
.append( " R", 2 );
405 return rWriteContext
.write( aBuf
.getStr(), aBuf
.getLength() );
408 PDFEntry
* PDFObjectRef::clone() const
410 return new PDFObjectRef( m_nNumber
, m_nGeneration
);
413 PDFContainer::~PDFContainer()
415 int nEle
= m_aSubElements
.size();
416 for( int i
= 0; i
< nEle
; i
++ )
417 delete m_aSubElements
[i
];
420 bool PDFContainer::emitSubElements( EmitContext
& rWriteContext
) const
422 int nEle
= m_aSubElements
.size();
423 for( int i
= 0; i
< nEle
; i
++ )
425 if( ! m_aSubElements
[i
]->emit( rWriteContext
) )
431 void PDFContainer::cloneSubElements( std::vector
<PDFEntry
*>& rNewSubElements
) const
433 int nEle
= m_aSubElements
.size();
434 for( int i
= 0; i
< nEle
; i
++ )
435 rNewSubElements
.push_back( m_aSubElements
[i
]->clone() );
438 PDFObject
* PDFContainer::findObject( unsigned int nNumber
, unsigned int nGeneration
) const
440 unsigned int nEle
= m_aSubElements
.size();
441 for( unsigned int i
= 0; i
< nEle
; i
++ )
443 PDFObject
* pObject
= dynamic_cast<PDFObject
*>(m_aSubElements
[i
]);
445 pObject
->m_nNumber
== nNumber
&&
446 pObject
->m_nGeneration
== nGeneration
)
454 PDFArray::~PDFArray()
458 bool PDFArray::emit( EmitContext
& rWriteContext
) const
460 if( ! rWriteContext
.write( "[", 1 ) )
462 if( ! emitSubElements( rWriteContext
) )
464 return rWriteContext
.write( "]", 1 );
467 PDFEntry
* PDFArray::clone() const
469 PDFArray
* pNewAr
= new PDFArray();
470 cloneSubElements( pNewAr
->m_aSubElements
);
478 bool PDFDict::emit( EmitContext
& rWriteContext
) const
480 if( ! rWriteContext
.write( "<<\n", 3 ) )
482 if( ! emitSubElements( rWriteContext
) )
484 return rWriteContext
.write( "\n>>\n", 4 );
487 void PDFDict::insertValue( const OString
& rName
, PDFEntry
* pValue
)
492 std::hash_map
<OString
,PDFEntry
*,OStringHash
>::iterator it
= m_aMap
.find( rName
);
493 if( it
== m_aMap
.end() )
495 // new name/value, pair, append it
496 m_aSubElements
.push_back( new PDFName( rName
) );
497 m_aSubElements
.push_back( pValue
);
501 unsigned int nSub
= m_aSubElements
.size();
502 for( unsigned int i
= 0; i
< nSub
; i
++ )
503 if( m_aSubElements
[i
] == it
->second
)
504 m_aSubElements
[i
] = pValue
;
507 m_aMap
[ rName
] = pValue
;
510 void PDFDict::eraseValue( const OString
& rName
)
512 unsigned int nEle
= m_aSubElements
.size();
513 for( unsigned int i
= 0; i
< nEle
; i
++ )
515 PDFName
* pName
= dynamic_cast<PDFName
*>(m_aSubElements
[i
]);
516 if( pName
&& pName
->m_aName
.equals( rName
) )
518 for( unsigned int j
= i
+1; j
< nEle
; j
++ )
520 if( dynamic_cast<PDFComment
*>(m_aSubElements
[j
]) == NULL
)
522 // free name and value
523 delete m_aSubElements
[j
];
524 delete m_aSubElements
[i
];
525 // remove subelements from vector
526 m_aSubElements
.erase( m_aSubElements
.begin()+j
);
527 m_aSubElements
.erase( m_aSubElements
.begin()+i
);
536 PDFEntry
* PDFDict::buildMap()
541 unsigned int nEle
= m_aSubElements
.size();
542 PDFName
* pName
= NULL
;
543 for( unsigned int i
= 0; i
< nEle
; i
++ )
545 if( dynamic_cast<PDFComment
*>(m_aSubElements
[i
]) == NULL
)
549 m_aMap
[ pName
->m_aName
] = m_aSubElements
[i
];
552 else if( (pName
= dynamic_cast<PDFName
*>(m_aSubElements
[i
])) == NULL
)
553 return m_aSubElements
[i
];
559 PDFEntry
* PDFDict::clone() const
561 PDFDict
* pNewDict
= new PDFDict();
562 cloneSubElements( pNewDict
->m_aSubElements
);
563 pNewDict
->buildMap();
567 PDFStream::~PDFStream()
571 bool PDFStream::emit( EmitContext
& rWriteContext
) const
573 return rWriteContext
.copyOrigBytes( m_nBeginOffset
, m_nEndOffset
-m_nBeginOffset
);
576 PDFEntry
* PDFStream::clone() const
578 return new PDFStream( m_nBeginOffset
, m_nEndOffset
, NULL
);
581 unsigned int PDFStream::getDictLength( const PDFContainer
* pContainer
) const
585 // find /Length entry, can either be a direct or indirect number object
586 std::hash_map
<OString
,PDFEntry
*,OStringHash
>::const_iterator it
=
587 m_pDict
->m_aMap
.find( "Length" );
588 if( it
== m_pDict
->m_aMap
.end() )
590 PDFNumber
* pNum
= dynamic_cast<PDFNumber
*>(it
->second
);
591 if( ! pNum
&& pContainer
)
593 PDFObjectRef
* pRef
= dynamic_cast<PDFObjectRef
*>(it
->second
);
596 int nEle
= pContainer
->m_aSubElements
.size();
597 for( int i
= 0; i
< nEle
&& ! pNum
; i
++ )
599 PDFObject
* pObj
= dynamic_cast<PDFObject
*>(pContainer
->m_aSubElements
[i
]);
601 pObj
->m_nNumber
== pRef
->m_nNumber
&&
602 pObj
->m_nGeneration
== pRef
->m_nGeneration
)
604 if( pObj
->m_pObject
)
605 pNum
= dynamic_cast<PDFNumber
*>(pObj
->m_pObject
);
611 return pNum
? static_cast<unsigned int>(pNum
->m_fValue
) : 0;
614 PDFObject::~PDFObject()
618 bool PDFObject::getDeflatedStream( char** ppStream
, unsigned int* pBytes
, const PDFContainer
* pObjectContainer
, EmitContext
& rContext
) const
620 bool bIsDeflated
= false;
621 if( m_pStream
&& m_pStream
->m_pDict
&&
622 m_pStream
->m_nEndOffset
> m_pStream
->m_nBeginOffset
+15
625 unsigned int nOuterStreamLen
= m_pStream
->m_nEndOffset
- m_pStream
->m_nBeginOffset
;
626 *ppStream
= static_cast<char*>(rtl_allocateMemory( nOuterStreamLen
));
632 unsigned int nRead
= rContext
.readOrigBytes( m_pStream
->m_nBeginOffset
, nOuterStreamLen
, *ppStream
);
633 if( nRead
!= nOuterStreamLen
)
635 rtl_freeMemory( *ppStream
);
640 // is there a filter entry ?
641 std::hash_map
<OString
,PDFEntry
*,OStringHash
>::const_iterator it
=
642 m_pStream
->m_pDict
->m_aMap
.find( "Filter" );
643 if( it
!= m_pStream
->m_pDict
->m_aMap
.end() )
645 // is the (first) filter FlateDecode ?
646 PDFName
* pFilter
= dynamic_cast<PDFName
*>(it
->second
);
647 if( pFilter
&& pFilter
->m_aName
.equals( "FlateDecode" ) )
652 // prepare compressed data section
653 char* pStream
= *ppStream
;
654 if( pStream
[0] == 's' )
655 pStream
+= 6; // skip "stream"
656 // skip line end after "stream"
657 while( *pStream
== '\r' || *pStream
== '\n' )
659 // get the compressed length
660 *pBytes
= m_pStream
->getDictLength( pObjectContainer
);
661 if( pStream
!= *ppStream
)
662 rtl_moveMemory( *ppStream
, pStream
, *pBytes
);
663 if( rContext
.m_bDecrypt
)
665 EmitImplData
* pEData
= getEmitData( rContext
);
666 pEData
->decrypt( reinterpret_cast<const sal_uInt8
*>(*ppStream
),
668 reinterpret_cast<sal_uInt8
*>(*ppStream
),
671 ); // decrypt inplace
675 *ppStream
= NULL
, *pBytes
= 0;
676 // FIXME: one could also deflate if FlateDecode ws the
677 // first filter in an array
681 static void unzipToBuffer( const char* pBegin
, unsigned int nLen
,
682 sal_uInt8
** pOutBuf
, sal_uInt32
* pOutLen
)
685 aZStr
.next_in
= (Bytef
*)pBegin
;
686 aZStr
.avail_in
= nLen
;
687 aZStr
.zalloc
= ( alloc_func
)0;
688 aZStr
.zfree
= ( free_func
)0;
689 aZStr
.opaque
= ( voidpf
)0;
692 const unsigned int buf_increment_size
= 16384;
694 *pOutBuf
= (sal_uInt8
*)rtl_reallocateMemory( *pOutBuf
, buf_increment_size
);
695 aZStr
.next_out
= (Bytef
*)*pOutBuf
;
696 aZStr
.avail_out
= buf_increment_size
;
698 *pOutLen
= buf_increment_size
;
699 while( err
!= Z_STREAM_END
&& err
>= Z_OK
&& aZStr
.avail_in
)
701 err
= inflate( &aZStr
, Z_NO_FLUSH
);
702 if( aZStr
.avail_out
== 0 )
704 if( err
!= Z_STREAM_END
)
706 const int nNewAlloc
= *pOutLen
+ buf_increment_size
;
707 *pOutBuf
= (sal_uInt8
*)rtl_reallocateMemory( *pOutBuf
, nNewAlloc
);
708 aZStr
.next_out
= (Bytef
*)(*pOutBuf
+ *pOutLen
);
709 aZStr
.avail_out
= buf_increment_size
;
710 *pOutLen
= nNewAlloc
;
714 if( err
== Z_STREAM_END
)
716 if( aZStr
.avail_out
> 0 )
717 *pOutLen
-= aZStr
.avail_out
;;
722 rtl_freeMemory( *pOutBuf
);
728 bool PDFObject::writeStream( EmitContext
& rWriteContext
, const PDFFile
* pParsedFile
) const
730 bool bSuccess
= false;
733 char* pStream
= NULL
;
734 unsigned int nBytes
= 0;
735 if( getDeflatedStream( &pStream
, &nBytes
, pParsedFile
, rWriteContext
) && nBytes
)
737 sal_uInt8
* pOutBytes
= NULL
;
738 sal_uInt32 nOutBytes
= 0;
739 unzipToBuffer( pStream
, nBytes
, &pOutBytes
, &nOutBytes
);
740 rWriteContext
.write( pOutBytes
, nOutBytes
);
741 rtl_freeMemory( pOutBytes
);
743 else if( pStream
&& nBytes
)
744 rWriteContext
.write( pStream
, nBytes
);
745 rtl_freeMemory( pStream
);
750 bool PDFObject::emit( EmitContext
& rWriteContext
) const
752 if( ! rWriteContext
.write( "\n", 1 ) )
755 EmitImplData
* pEData
= getEmitData( rWriteContext
);
757 pEData
->insertXref( m_nNumber
, m_nGeneration
, rWriteContext
.getCurPos() );
759 OStringBuffer
aBuf( 32 );
760 aBuf
.append( sal_Int32( m_nNumber
) );
762 aBuf
.append( sal_Int32( m_nGeneration
) );
763 aBuf
.append( " obj\n" );
764 if( ! rWriteContext
.write( aBuf
.getStr(), aBuf
.getLength() ) )
767 if( rWriteContext
.m_bDeflate
&& pEData
)
769 char* pStream
= NULL
;
770 unsigned int nBytes
= 0;
771 if( getDeflatedStream( &pStream
, &nBytes
, pEData
->m_pObjectContainer
, rWriteContext
)
772 && pStream
&& nBytes
)
775 sal_uInt8
* pOutBytes
= NULL
;
776 sal_uInt32 nOutBytes
= 0;
777 unzipToBuffer( pStream
, nBytes
, &pOutBytes
, &nOutBytes
);
781 PDFObject
* pClone
= static_cast<PDFObject
*>(clone());
782 // set length in the dictionary to new stream length
783 PDFNumber
* pNewLen
= new PDFNumber( double(nOutBytes
) );
784 pClone
->m_pStream
->m_pDict
->insertValue( "Length", pNewLen
);
785 // delete flatedecode filter
786 pClone
->m_pStream
->m_pDict
->eraseValue( "Filter" );
788 // write sub elements except stream
790 unsigned int nEle
= pClone
->m_aSubElements
.size();
791 for( unsigned int i
= 0; i
< nEle
&& bRet
; i
++ )
793 if( pClone
->m_aSubElements
[i
] != pClone
->m_pStream
)
794 bRet
= pClone
->m_aSubElements
[i
]->emit( rWriteContext
);
799 rWriteContext
.write( "stream\n", 7 );
801 bRet
= rWriteContext
.write( pOutBytes
, nOutBytes
);
803 bRet
= rWriteContext
.write( "\nendstream\nendobj\n", 18 );
804 rtl_freeMemory( pStream
);
805 rtl_freeMemory( pOutBytes
);
808 rtl_freeMemory( pOutBytes
);
810 rtl_freeMemory( pStream
);
813 if( ! emitSubElements( rWriteContext
) )
815 return rWriteContext
.write( "\nendobj\n", 8 );
818 PDFEntry
* PDFObject::clone() const
820 PDFObject
* pNewOb
= new PDFObject( m_nNumber
, m_nGeneration
);
821 cloneSubElements( pNewOb
->m_aSubElements
);
822 unsigned int nEle
= m_aSubElements
.size();
823 for( unsigned int i
= 0; i
< nEle
; i
++ )
825 if( m_aSubElements
[i
] == m_pObject
)
826 pNewOb
->m_pObject
= pNewOb
->m_aSubElements
[i
];
827 else if( m_aSubElements
[i
] == m_pStream
&& pNewOb
->m_pObject
)
829 pNewOb
->m_pStream
= dynamic_cast<PDFStream
*>(pNewOb
->m_aSubElements
[i
]);
830 PDFDict
* pNewDict
= dynamic_cast<PDFDict
*>(pNewOb
->m_pObject
);
832 pNewOb
->m_pStream
->m_pDict
= pNewDict
;
838 PDFTrailer::~PDFTrailer()
842 bool PDFTrailer::emit( EmitContext
& rWriteContext
) const
845 unsigned int nXRefPos
= rWriteContext
.getCurPos();
846 // begin xref section, object 0 is always free
847 if( ! rWriteContext
.write( "xref\r\n"
849 "0000000000 65535 f\r\n", 31 ) )
851 // check if we are emitting a complete PDF file
852 EmitImplData
* pEData
= getEmitData( rWriteContext
);
856 const EmitImplData::XRefTable
& rXRefs
= pEData
->m_aXRefTable
;
857 EmitImplData::XRefTable::const_iterator section_begin
, section_end
;
858 section_begin
= rXRefs
.begin();
859 while( section_begin
!= rXRefs
.end() )
861 // find end of continuous object numbers
862 section_end
= section_begin
;
863 unsigned int nLast
= section_begin
->first
;
864 while( (++section_end
) != rXRefs
.end() &&
865 section_end
->first
== nLast
+1 )
866 nLast
= section_end
->first
;
867 // write first object number and number of following entries
868 OStringBuffer
aBuf( 21 );
869 aBuf
.append( sal_Int32( section_begin
->first
) );
871 aBuf
.append( sal_Int32(nLast
- section_begin
->first
+ 1) );
872 aBuf
.append( "\r\n" );
873 if( ! rWriteContext
.write( aBuf
.getStr(), aBuf
.getLength() ) )
875 while( section_begin
!= section_end
)
877 // write 20 char entry of form
878 // 0000offset 00gen n\r\n
880 OString
aOffset( OString::valueOf( sal_Int64(section_begin
->second
.second
) ) );
881 int nPad
= 10 - aOffset
.getLength();
882 for( int i
= 0; i
< nPad
; i
++ )
884 aBuf
.append( aOffset
);
886 OString
aGeneration( OString::valueOf( sal_Int32(section_begin
->second
.first
) ) );
887 nPad
= 5 - aGeneration
.getLength();
888 for( int i
= 0; i
< nPad
; i
++ )
890 aBuf
.append( aGeneration
);
891 aBuf
.append( " n\r\n" );
892 if( ! rWriteContext
.write( aBuf
.getStr(), 20 ) )
898 if( ! rWriteContext
.write( "trailer\n", 8 ) )
900 if( ! emitSubElements( rWriteContext
) )
902 if( ! rWriteContext
.write( "startxref\n", 10 ) )
904 rtl::OString
aOffset( rtl::OString::valueOf( sal_Int32(nXRefPos
) ) );
905 if( ! rWriteContext
.write( aOffset
.getStr(), aOffset
.getLength() ) )
907 return rWriteContext
.write( "\n%%EOF\n", 7 );
910 PDFEntry
* PDFTrailer::clone() const
912 PDFTrailer
* pNewTr
= new PDFTrailer();
913 cloneSubElements( pNewTr
->m_aSubElements
);
914 unsigned int nEle
= m_aSubElements
.size();
915 for( unsigned int i
= 0; i
< nEle
; i
++ )
917 if( m_aSubElements
[i
] == m_pDict
)
919 pNewTr
->m_pDict
= dynamic_cast<PDFDict
*>(pNewTr
->m_aSubElements
[i
]);
926 #define ENCRYPTION_KEY_LEN 16
927 #define ENCRYPTION_BUF_LEN 32
930 struct PDFFileImplData
933 bool m_bStandardHandler
;
934 sal_uInt32 m_nAlgoVersion
;
935 sal_uInt32 m_nStandardRevision
;
936 sal_uInt32 m_nKeyLength
;
937 sal_uInt8 m_aOEntry
[32];
938 sal_uInt8 m_aUEntry
[32];
939 sal_uInt32 m_nPEntry
;
944 sal_uInt8 m_aDecryptionKey
[ENCRYPTION_KEY_LEN
+5]; // maximum handled key length
947 m_bIsEncrypted( false ),
948 m_bStandardHandler( false ),
950 m_nStandardRevision( 0 ),
956 rtl_zeroMemory( m_aOEntry
, sizeof( m_aOEntry
) );
957 rtl_zeroMemory( m_aUEntry
, sizeof( m_aUEntry
) );
958 rtl_zeroMemory( m_aDecryptionKey
, sizeof( m_aDecryptionKey
) );
964 rtl_cipher_destroyARCFOUR( m_aCipher
);
966 rtl_digest_destroyMD5( m_aDigest
);
977 bool PDFFile::isEncrypted() const
979 return impl_getData()->m_bIsEncrypted
;
982 bool PDFFile::decrypt( const sal_uInt8
* pInBuffer
, sal_uInt32 nLen
, sal_uInt8
* pOutBuffer
,
983 unsigned int nObject
, unsigned int nGeneration
) const
985 if( ! isEncrypted() )
988 if( ! m_pData
->m_aCipher
)
989 m_pData
->m_aCipher
= rtl_cipher_createARCFOUR( rtl_Cipher_ModeStream
);
991 // modify encryption key
992 sal_uInt32 i
= m_pData
->m_nKeyLength
;
993 m_pData
->m_aDecryptionKey
[i
++] = sal_uInt8(nObject
&0xff);
994 m_pData
->m_aDecryptionKey
[i
++] = sal_uInt8((nObject
>>8)&0xff);
995 m_pData
->m_aDecryptionKey
[i
++] = sal_uInt8((nObject
>>16)&0xff);
996 m_pData
->m_aDecryptionKey
[i
++] = sal_uInt8(nGeneration
&0xff);
997 m_pData
->m_aDecryptionKey
[i
++] = sal_uInt8((nGeneration
>>8)&0xff);
999 sal_uInt8 aSum
[ENCRYPTION_KEY_LEN
];
1000 rtl_digest_updateMD5( m_pData
->m_aDigest
, m_pData
->m_aDecryptionKey
, i
);
1001 rtl_digest_getMD5( m_pData
->m_aDigest
, aSum
, sizeof( aSum
) );
1006 rtlCipherError aErr
= rtl_cipher_initARCFOUR( m_pData
->m_aCipher
,
1007 rtl_Cipher_DirectionDecode
,
1010 if( aErr
== rtl_Cipher_E_None
)
1011 aErr
= rtl_cipher_decodeARCFOUR( m_pData
->m_aCipher
,
1014 return aErr
== rtl_Cipher_E_None
;
1017 static const sal_uInt8 nPadString
[32] =
1019 0x28, 0xBF, 0x4E, 0x5E, 0x4E, 0x75, 0x8A, 0x41, 0x64, 0x00, 0x4E, 0x56, 0xFF, 0xFA, 0x01, 0x08,
1020 0x2E, 0x2E, 0x00, 0xB6, 0xD0, 0x68, 0x3E, 0x80, 0x2F, 0x0C, 0xA9, 0xFE, 0x64, 0x53, 0x69, 0x7A
1023 static void pad_or_truncate_to_32( const OString
& rStr
, sal_Char
* pBuffer
)
1025 int nLen
= rStr
.getLength();
1028 const sal_Char
* pStr
= rStr
.getStr();
1029 rtl_copyMemory( pBuffer
, pStr
, nLen
);
1032 pBuffer
[nLen
++] = nPadString
[i
++];
1035 // pass at least pData->m_nKeyLength bytes in
1036 static sal_uInt32
password_to_key( const OString
& rPwd
, sal_uInt8
* pOutKey
, PDFFileImplData
* pData
, bool bComputeO
)
1038 // see PDF reference 1.4 Algorithm 3.2
1039 // encrypt pad string
1040 sal_Char aPadPwd
[ENCRYPTION_BUF_LEN
];
1041 pad_or_truncate_to_32( rPwd
, aPadPwd
);
1042 rtl_digest_updateMD5( pData
->m_aDigest
, aPadPwd
, sizeof( aPadPwd
) );
1045 rtl_digest_updateMD5( pData
->m_aDigest
, pData
->m_aOEntry
, 32 );
1046 sal_uInt8 aPEntry
[4];
1047 aPEntry
[0] = static_cast<sal_uInt8
>(pData
->m_nPEntry
& 0xff);
1048 aPEntry
[1] = static_cast<sal_uInt8
>((pData
->m_nPEntry
>> 8 ) & 0xff);
1049 aPEntry
[2] = static_cast<sal_uInt8
>((pData
->m_nPEntry
>> 16) & 0xff);
1050 aPEntry
[3] = static_cast<sal_uInt8
>((pData
->m_nPEntry
>> 24) & 0xff);
1051 rtl_digest_updateMD5( pData
->m_aDigest
, aPEntry
, sizeof(aPEntry
) );
1052 rtl_digest_updateMD5( pData
->m_aDigest
, pData
->m_aDocID
.getStr(), pData
->m_aDocID
.getLength() );
1054 sal_uInt8 nSum
[RTL_DIGEST_LENGTH_MD5
];
1055 rtl_digest_getMD5( pData
->m_aDigest
, nSum
, sizeof(nSum
) );
1056 if( pData
->m_nStandardRevision
== 3 )
1058 for( int i
= 0; i
< 50; i
++ )
1060 rtl_digest_updateMD5( pData
->m_aDigest
, nSum
, sizeof(nSum
) );
1061 rtl_digest_getMD5( pData
->m_aDigest
, nSum
, sizeof(nSum
) );
1064 sal_uInt32 nLen
= pData
->m_nKeyLength
;
1065 if( nLen
> RTL_DIGEST_LENGTH_MD5
)
1066 nLen
= RTL_DIGEST_LENGTH_MD5
;
1067 rtl_copyMemory( pOutKey
, nSum
, nLen
);
1071 static bool check_user_password( const OString
& rPwd
, PDFFileImplData
* pData
)
1073 // see PDF reference 1.4 Algorithm 3.6
1074 bool bValid
= false;
1075 sal_uInt8 aKey
[ENCRYPTION_KEY_LEN
];
1076 sal_uInt8 nEncryptedEntry
[ENCRYPTION_BUF_LEN
];
1077 rtl_zeroMemory( nEncryptedEntry
, sizeof(nEncryptedEntry
) );
1078 sal_uInt32 nKeyLen
= password_to_key( rPwd
, aKey
, pData
, false );
1079 // save (at this time potential) decryption key for later use
1080 rtl_copyMemory( pData
->m_aDecryptionKey
, aKey
, nKeyLen
);
1081 if( pData
->m_nStandardRevision
== 2 )
1083 // see PDF reference 1.4 Algorithm 3.4
1084 // encrypt pad string
1085 rtl_cipher_initARCFOUR( pData
->m_aCipher
, rtl_Cipher_DirectionEncode
,
1088 rtl_cipher_encodeARCFOUR( pData
->m_aCipher
, nPadString
, sizeof( nPadString
),
1089 nEncryptedEntry
, sizeof( nEncryptedEntry
) );
1090 bValid
= (rtl_compareMemory( nEncryptedEntry
, pData
->m_aUEntry
, 32 ) == 0);
1092 else if( pData
->m_nStandardRevision
== 3 )
1094 // see PDF reference 1.4 Algorithm 3.5
1095 rtl_digest_updateMD5( pData
->m_aDigest
, nPadString
, sizeof( nPadString
) );
1096 rtl_digest_updateMD5( pData
->m_aDigest
, pData
->m_aDocID
.getStr(), pData
->m_aDocID
.getLength() );
1097 rtl_digest_getMD5( pData
->m_aDigest
, nEncryptedEntry
, sizeof(nEncryptedEntry
) );
1098 rtl_cipher_initARCFOUR( pData
->m_aCipher
, rtl_Cipher_DirectionEncode
,
1099 aKey
, sizeof(aKey
), NULL
, 0 );
1100 rtl_cipher_encodeARCFOUR( pData
->m_aCipher
,
1101 nEncryptedEntry
, 16,
1102 nEncryptedEntry
, 16 ); // encrypt in place
1103 for( int i
= 1; i
<= 19; i
++ ) // do it 19 times, start with 1
1105 sal_uInt8 aTempKey
[ENCRYPTION_KEY_LEN
];
1106 for( sal_uInt32 j
= 0; j
< sizeof(aTempKey
); j
++ )
1107 aTempKey
[j
] = static_cast<sal_uInt8
>( aKey
[j
] ^ i
);
1109 rtl_cipher_initARCFOUR( pData
->m_aCipher
, rtl_Cipher_DirectionEncode
,
1110 aTempKey
, sizeof(aTempKey
), NULL
, 0 );
1111 rtl_cipher_encodeARCFOUR( pData
->m_aCipher
,
1112 nEncryptedEntry
, 16,
1113 nEncryptedEntry
, 16 ); // encrypt in place
1115 bValid
= (rtl_compareMemory( nEncryptedEntry
, pData
->m_aUEntry
, 16 ) == 0);
1120 bool PDFFile::setupDecryptionData( const OString
& rPwd
) const
1122 if( !impl_getData()->m_bIsEncrypted
)
1123 return rPwd
.getLength() == 0;
1125 // check if we can handle this encryption at all
1126 if( ! m_pData
->m_bStandardHandler
||
1127 m_pData
->m_nAlgoVersion
< 1 ||
1128 m_pData
->m_nAlgoVersion
> 2 ||
1129 m_pData
->m_nStandardRevision
< 2 ||
1130 m_pData
->m_nStandardRevision
> 3 )
1133 if( ! m_pData
->m_aCipher
)
1134 m_pData
->m_aCipher
= rtl_cipher_createARCFOUR(rtl_Cipher_ModeStream
);
1135 if( ! m_pData
->m_aDigest
)
1136 m_pData
->m_aDigest
= rtl_digest_createMD5();
1138 // first try user password
1139 bool bValid
= check_user_password( rPwd
, m_pData
);
1143 // try owner password
1144 // see PDF reference 1.4 Algorithm 3.7
1145 sal_uInt8 aKey
[ENCRYPTION_KEY_LEN
];
1146 sal_uInt8 nPwd
[ENCRYPTION_BUF_LEN
];
1147 rtl_zeroMemory( nPwd
, sizeof(nPwd
) );
1148 sal_uInt32 nKeyLen
= password_to_key( rPwd
, aKey
, m_pData
, true );
1149 if( m_pData
->m_nStandardRevision
== 2 )
1151 rtl_cipher_initARCFOUR( m_pData
->m_aCipher
, rtl_Cipher_DirectionDecode
,
1152 aKey
, nKeyLen
, NULL
, 0 );
1153 rtl_cipher_decodeARCFOUR( m_pData
->m_aCipher
,
1154 m_pData
->m_aOEntry
, 32,
1157 else if( m_pData
->m_nStandardRevision
== 3 )
1159 rtl_copyMemory( nPwd
, m_pData
->m_aOEntry
, 32 );
1160 for( int i
= 19; i
>= 0; i
-- )
1162 sal_uInt8 nTempKey
[ENCRYPTION_KEY_LEN
];
1163 for( unsigned int j
= 0; j
< sizeof(nTempKey
); j
++ )
1164 nTempKey
[j
] = sal_uInt8(aKey
[j
] ^ i
);
1165 rtl_cipher_initARCFOUR( m_pData
->m_aCipher
, rtl_Cipher_DirectionDecode
,
1166 nTempKey
, nKeyLen
, NULL
, 0 );
1167 rtl_cipher_decodeARCFOUR( m_pData
->m_aCipher
,
1169 nPwd
, 32 ); // decrypt inplace
1172 bValid
= check_user_password( OString( (sal_Char
*)nPwd
, 32 ), m_pData
);
1178 PDFFileImplData
* PDFFile::impl_getData() const
1182 m_pData
= new PDFFileImplData();
1183 // check for encryption dict in a trailer
1184 unsigned int nElements
= m_aSubElements
.size();
1185 while( nElements
-- > 0 )
1187 PDFTrailer
* pTrailer
= dynamic_cast<PDFTrailer
*>(m_aSubElements
[nElements
]);
1188 if( pTrailer
&& pTrailer
->m_pDict
)
1191 PDFDict::Map::iterator doc_id
= pTrailer
->m_pDict
->m_aMap
.find( "ID" );
1192 if( doc_id
!= pTrailer
->m_pDict
->m_aMap
.end() )
1194 PDFArray
* pArr
= dynamic_cast<PDFArray
*>(doc_id
->second
);
1195 if( pArr
&& pArr
->m_aSubElements
.size() > 0 )
1197 PDFString
* pStr
= dynamic_cast<PDFString
*>(pArr
->m_aSubElements
[0]);
1199 m_pData
->m_aDocID
= pStr
->getFilteredString();
1200 #if OSL_DEBUG_LEVEL > 1
1201 fprintf( stderr
, "DocId is <" );
1202 for( int i
= 0; i
< m_pData
->m_aDocID
.getLength(); i
++ )
1203 fprintf( stderr
, "%.2x", sal_uInt32(sal_uInt8(m_pData
->m_aDocID
.getStr()[i
])) );
1204 fprintf( stderr
, ">\n" );
1208 // search Encrypt entry
1209 PDFDict::Map::iterator enc
=
1210 pTrailer
->m_pDict
->m_aMap
.find( "Encrypt" );
1211 if( enc
!= pTrailer
->m_pDict
->m_aMap
.end() )
1213 PDFDict
* pDict
= dynamic_cast<PDFDict
*>(enc
->second
);
1216 PDFObjectRef
* pRef
= dynamic_cast<PDFObjectRef
*>(enc
->second
);
1219 PDFObject
* pObj
= findObject( pRef
);
1220 if( pObj
&& pObj
->m_pObject
)
1221 pDict
= dynamic_cast<PDFDict
*>(pObj
->m_pObject
);
1226 PDFDict::Map::iterator filter
= pDict
->m_aMap
.find( "Filter" );
1227 PDFDict::Map::iterator version
= pDict
->m_aMap
.find( "V" );
1228 PDFDict::Map::iterator len
= pDict
->m_aMap
.find( "Length" );
1229 PDFDict::Map::iterator o_ent
= pDict
->m_aMap
.find( "O" );
1230 PDFDict::Map::iterator u_ent
= pDict
->m_aMap
.find( "U" );
1231 PDFDict::Map::iterator r_ent
= pDict
->m_aMap
.find( "R" );
1232 PDFDict::Map::iterator p_ent
= pDict
->m_aMap
.find( "P" );
1233 if( filter
!= pDict
->m_aMap
.end() )
1235 m_pData
->m_bIsEncrypted
= true;
1236 m_pData
->m_nKeyLength
= 5;
1237 if( version
!= pDict
->m_aMap
.end() )
1239 PDFNumber
* pNum
= dynamic_cast<PDFNumber
*>(version
->second
);
1241 m_pData
->m_nAlgoVersion
= static_cast<sal_uInt32
>(pNum
->m_fValue
);
1243 if( m_pData
->m_nAlgoVersion
>= 3 )
1244 m_pData
->m_nKeyLength
= 16;
1245 if( len
!= pDict
->m_aMap
.end() )
1247 PDFNumber
* pNum
= dynamic_cast<PDFNumber
*>(len
->second
);
1249 m_pData
->m_nKeyLength
= static_cast<sal_uInt32
>(pNum
->m_fValue
) / 8;
1251 PDFName
* pFilter
= dynamic_cast<PDFName
*>(filter
->second
);
1252 if( pFilter
&& pFilter
->getFilteredName().equalsAscii( "Standard" ) )
1253 m_pData
->m_bStandardHandler
= true;
1254 if( o_ent
!= pDict
->m_aMap
.end() )
1256 PDFString
* pString
= dynamic_cast<PDFString
*>(o_ent
->second
);
1259 OString aEnt
= pString
->getFilteredString();
1260 if( aEnt
.getLength() == 32 )
1261 rtl_copyMemory( m_pData
->m_aOEntry
, aEnt
.getStr(), 32 );
1262 #if OSL_DEBUG_LEVEL > 1
1265 fprintf( stderr
, "O entry has length %d, should be 32 <", aEnt
.getLength() );
1266 for( int i
= 0; i
< aEnt
.getLength(); i
++ )
1267 fprintf( stderr
, " %.2X", sal_uInt32(sal_uInt8(aEnt
.getStr()[i
])) );
1268 fprintf( stderr
, ">\n" );
1273 if( u_ent
!= pDict
->m_aMap
.end() )
1275 PDFString
* pString
= dynamic_cast<PDFString
*>(u_ent
->second
);
1278 OString aEnt
= pString
->getFilteredString();
1279 if( aEnt
.getLength() == 32 )
1280 rtl_copyMemory( m_pData
->m_aUEntry
, aEnt
.getStr(), 32 );
1281 #if OSL_DEBUG_LEVEL > 1
1284 fprintf( stderr
, "U entry has length %d, should be 32 <", aEnt
.getLength() );
1285 for( int i
= 0; i
< aEnt
.getLength(); i
++ )
1286 fprintf( stderr
, " %.2X", sal_uInt32(sal_uInt8(aEnt
.getStr()[i
])) );
1287 fprintf( stderr
, ">\n" );
1292 if( r_ent
!= pDict
->m_aMap
.end() )
1294 PDFNumber
* pNum
= dynamic_cast<PDFNumber
*>(r_ent
->second
);
1296 m_pData
->m_nStandardRevision
= static_cast<sal_uInt32
>(pNum
->m_fValue
);
1298 if( p_ent
!= pDict
->m_aMap
.end() )
1300 PDFNumber
* pNum
= dynamic_cast<PDFNumber
*>(p_ent
->second
);
1302 m_pData
->m_nPEntry
= static_cast<sal_uInt32
>(static_cast<sal_Int32
>(pNum
->m_fValue
));
1303 #if OSL_DEBUG_LEVEL > 1
1304 fprintf( stderr
, "p entry is 0x%x\n", m_pData
->m_nPEntry
);
1307 #if OSL_DEBUG_LEVEL > 1
1308 fprintf( stderr
, "Encryption dict: sec handler: %s, version = %d, revision = %d, key length = %d\n",
1309 pFilter
? OUStringToOString( pFilter
->getFilteredName(), RTL_TEXTENCODING_UTF8
).getStr() : "<unknown>",
1310 m_pData
->m_nAlgoVersion
, m_pData
->m_nStandardRevision
, m_pData
->m_nKeyLength
);
1322 bool PDFFile::emit( EmitContext
& rWriteContext
) const
1324 setEmitData( rWriteContext
, new EmitImplData( this ) );
1326 OStringBuffer
aBuf( 32 );
1327 aBuf
.append( "%PDF-" );
1328 aBuf
.append( sal_Int32( m_nMajor
) );
1330 aBuf
.append( sal_Int32( m_nMinor
) );
1331 aBuf
.append( "\n" );
1332 if( ! rWriteContext
.write( aBuf
.getStr(), aBuf
.getLength() ) )
1334 return emitSubElements( rWriteContext
);
1337 PDFEntry
* PDFFile::clone() const
1339 PDFFile
* pNewFl
= new PDFFile();
1340 pNewFl
->m_nMajor
= m_nMajor
;
1341 pNewFl
->m_nMinor
= m_nMinor
;
1342 cloneSubElements( pNewFl
->m_aSubElements
);
1350 bool PDFPart::emit( EmitContext
& rWriteContext
) const
1352 return emitSubElements( rWriteContext
);
1355 PDFEntry
* PDFPart::clone() const
1357 PDFPart
* pNewPt
= new PDFPart();
1358 cloneSubElements( pNewPt
->m_aSubElements
);