Update ooo320-m1
[ooovba.git] / sdext / source / pdfimport / test / pdfunzip.cxx
bloba18cd62917117e296defdcdd0ac3f56c32e77a71
1 /*************************************************************************
3 * DO NOT ALTER OR REMOVE COPYRIGHT NOTICES OR THIS FILE HEADER.
4 *
5 * Copyright 2008 by Sun Microsystems, Inc.
7 * OpenOffice.org - a multi-platform office productivity suite
9 * $RCSfile: pdfunzip.cxx,v $
11 * $Revision: 1.2 $
13 * This file is part of OpenOffice.org.
15 * OpenOffice.org is free software: you can redistribute it and/or modify
16 * it under the terms of the GNU Lesser General Public License version 3
17 * only, as published by the Free Software Foundation.
19 * OpenOffice.org is distributed in the hope that it will be useful,
20 * but WITHOUT ANY WARRANTY; without even the implied warranty of
21 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
22 * GNU Lesser General Public License version 3 for more details
23 * (a copy is included in the LICENSE file that accompanied this code).
25 * You should have received a copy of the GNU Lesser General Public License
26 * version 3 along with OpenOffice.org. If not, see
27 * <http://www.openoffice.org/license.html>
28 * for a copy of the LGPLv3 License.
30 ************************************************************************/
32 // MARKER(update_precomp.py): autogen include statement, do not remove
33 #include "precompiled_sdext.hxx"
35 #include <stdio.h>
36 #include <sal/main.h>
37 #include <osl/file.h>
38 #include <osl/thread.h>
39 #include <rtl/alloc.h>
40 #include <rtl/ustring.hxx>
41 #include <rtl/strbuf.hxx>
43 #include "pdfparse.hxx"
45 using namespace rtl;
46 using namespace pdfparse;
48 void printHelp( const char* pExe )
50 fprintf( stdout,
51 "USAGE: %s [-h,--help]\n"
52 " %s [-pw, --password <password>] <inputfile> [<outputfile>]\n"
53 " %s <-a, --extract-add-streams> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
54 " %s <-f, --extract-fonts> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
55 " %s <-o, --extract-objects> <o0>[:<g0>][,<o1>[:g1][,...]] [-pw, --password <password>] <inputfile> [<outputfile>]\n"
56 " -h, --help: show help\n"
57 " -a, --extract-add-streams: extracts additional streams to outputfile_object\n"
58 " and prints the mimetype found to stdout\n"
59 " -f, --extract-fonts: extracts fonts (currently only type1 and truetype are supported\n"
60 " -o, --extract-objects: extracts object streams, the syntax of the argument is comma separated\n"
61 " object numbers, where object number and generation number are separated by \':\'\n"
62 " an omitted generation number defaults to 0\n"
63 " -pw, --password: use password for decryption\n"
64 "\n"
65 "note: -f, -a, -o and normal unzip operation are mutually exclusive\n"
66 , pExe, pExe, pExe, pExe, pExe );
69 class FileEmitContext : public EmitContext
71 oslFileHandle m_aHandle;
72 oslFileHandle m_aReadHandle;
73 unsigned int m_nReadLen;
75 void openReadFile( const char* pOrigName );
77 public:
78 FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop );
79 virtual ~FileEmitContext();
81 virtual bool write( const void* pBuf, unsigned int nLen ) throw();
82 virtual unsigned int getCurPos() throw();
83 virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw();
84 virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw();
87 FileEmitContext::FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop )
88 : EmitContext( pTop ),
89 m_aHandle( NULL ),
90 m_aReadHandle( NULL ),
91 m_nReadLen( 0 )
93 OUString aSysFile( OStringToOUString( OString( pFileName ), osl_getThreadTextEncoding() ) );
94 OUString aURL;
95 if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
97 fprintf( stderr, "filename conversion \"%s\" failed\n", pFileName );
98 return;
101 if( osl_openFile( aURL.pData, &m_aHandle, osl_File_OpenFlag_Write ) == osl_File_E_None )
103 if( osl_setFileSize( m_aHandle, 0 ) != osl_File_E_None )
105 fprintf( stderr, "could not truncate %s\n", pFileName );
106 osl_closeFile( m_aHandle );
107 m_aHandle = NULL;
110 else if( osl_openFile( aURL.pData, &m_aHandle,
111 osl_File_OpenFlag_Write |osl_File_OpenFlag_Create ) != osl_File_E_None )
113 fprintf( stderr, "could not open %s\n", pFileName );
114 return;
116 m_bDeflate = true;
118 openReadFile( pOrigName );
121 FileEmitContext::~FileEmitContext()
123 if( m_aHandle )
124 osl_closeFile( m_aHandle );
125 if( m_aReadHandle )
126 osl_closeFile( m_aReadHandle );
129 void FileEmitContext::openReadFile( const char* pInFile )
131 OUString aSysFile( OStringToOUString( OString( pInFile ), osl_getThreadTextEncoding() ) );
132 OUString aURL;
133 if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
135 fprintf( stderr, "filename conversion \"%s\" failed\n", pInFile );
136 return;
139 if( osl_openFile( aURL.pData, &m_aReadHandle, osl_File_OpenFlag_Read ) != osl_File_E_None )
141 fprintf( stderr, "could not open %s\n", pInFile );
142 return;
145 if( osl_setFilePos( m_aReadHandle, osl_Pos_End, 0 ) != osl_File_E_None )
147 fprintf( stderr, "could not seek to end of %s\n", pInFile );
148 osl_closeFile( m_aReadHandle );
149 return;
152 sal_uInt64 nFileSize = 0;
153 if( osl_getFilePos( m_aReadHandle, &nFileSize ) != osl_File_E_None )
155 fprintf( stderr, "could not get end pos of %s\n", pInFile );
156 osl_closeFile( m_aReadHandle );
157 return;
160 m_nReadLen = static_cast<unsigned int>(nFileSize);
163 bool FileEmitContext::write( const void* pBuf, unsigned int nLen ) throw()
165 if( ! m_aHandle )
166 return false;
168 sal_uInt64 nWrite = static_cast<sal_uInt64>(nLen);
169 sal_uInt64 nWritten = 0;
170 return (osl_writeFile( m_aHandle, pBuf, nWrite, &nWritten ) == osl_File_E_None)
171 && nWrite == nWritten;
174 unsigned int FileEmitContext::getCurPos() throw()
176 sal_uInt64 nFileSize = 0;
177 if( m_aHandle )
179 if( osl_getFilePos( m_aHandle, &nFileSize ) != osl_File_E_None )
180 nFileSize = 0;
182 return static_cast<unsigned int>(nFileSize);
185 bool FileEmitContext::copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw()
187 if( nOrigOffset + nLen > m_nReadLen )
188 return false;
190 if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
192 fprintf( stderr, "could not seek to offset %u\n", nOrigOffset );
193 return false;
195 void* pBuf = rtl_allocateMemory( nLen );
196 if( ! pBuf )
197 return false;
198 sal_uInt64 nBytesRead = 0;
199 if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None
200 || nBytesRead != static_cast<sal_uInt64>(nLen) )
202 fprintf( stderr, "could not read %u bytes\n", nLen );
203 rtl_freeMemory( pBuf );
204 return false;
206 bool bRet = write( pBuf, nLen );
207 rtl_freeMemory( pBuf );
208 return bRet;
211 unsigned int FileEmitContext::readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw()
213 if( nOrigOffset + nLen > m_nReadLen )
214 return 0;
216 if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
218 fprintf( stderr, "could not seek to offset %u\n", nOrigOffset );
219 return 0;
221 sal_uInt64 nBytesRead = 0;
222 if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None )
223 return 0;
224 return static_cast<unsigned int>(nBytesRead);
227 typedef int(*PDFFileHdl)(const char*, const char*, PDFFile*);
229 int handleFile( const char* pInFile, const char* pOutFile, const char* pPassword, PDFFileHdl pHdl )
232 PDFReader aParser;
233 int nRet = 0;
234 PDFEntry* pEntry = aParser.read( pInFile );
235 if( pEntry )
237 PDFFile* pPDFFile = dynamic_cast<PDFFile*>(pEntry);
238 if( pPDFFile )
240 fprintf( stdout, "have a %s PDF file\n", pPDFFile->isEncrypted() ? "encrypted" : "unencrypted" );
241 if( pPassword )
242 fprintf( stdout, "password %s\n",
243 pPDFFile->setupDecryptionData( pPassword ) ? "matches" : "does not match" );
244 nRet = pHdl( pInFile, pOutFile, pPDFFile );
246 else
247 nRet = 20;
248 delete pEntry;
250 return nRet;
253 int write_unzipFile( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile )
255 FileEmitContext aContext( pOutFile, pInFile, pPDFFile );
256 aContext.m_bDecrypt = pPDFFile->isEncrypted();
257 pPDFFile->emit(aContext);
258 return 0;
261 int write_addStreamArray( const char* pOutFile, PDFArray* pStreams, PDFFile* pPDFFile, const char* pInFile )
263 int nRet = 0;
264 unsigned int nArrayElements = pStreams->m_aSubElements.size();
265 for( unsigned int i = 0; i < nArrayElements-1 && nRet == 0; i++ )
267 PDFName* pMimeType = dynamic_cast<PDFName*>(pStreams->m_aSubElements[i]);
268 PDFObjectRef* pStreamRef = dynamic_cast<PDFObjectRef*>(pStreams->m_aSubElements[i+1]);
269 if( ! pMimeType )
270 fprintf( stderr, "error: no mimetype element\n" );
271 if( ! pStreamRef )
272 fprintf( stderr, "error: no stream ref element\n" );
273 if( pMimeType && pStreamRef )
275 fprintf( stdout, "found stream %d %d with mimetype %s\n",
276 pStreamRef->m_nNumber, pStreamRef->m_nGeneration,
277 pMimeType->m_aName.getStr() );
278 PDFObject* pObject = pPDFFile->findObject( pStreamRef->m_nNumber, pStreamRef->m_nGeneration );
279 if( pObject )
281 rtl::OStringBuffer aOutStream( pOutFile );
282 aOutStream.append( "_stream_" );
283 aOutStream.append( sal_Int32(pStreamRef->m_nNumber) );
284 aOutStream.append( "_" );
285 aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) );
286 FileEmitContext aContext( aOutStream.getStr(), pInFile, pPDFFile );
287 aContext.m_bDecrypt = pPDFFile->isEncrypted();
288 pObject->writeStream( aContext, pPDFFile );
290 else
292 fprintf( stderr, "object not found\n" );
293 nRet = 121;
296 else
297 nRet = 120;
299 return nRet;
302 int write_addStreams( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile )
304 // find all trailers
305 int nRet = 0;
306 unsigned int nElements = pPDFFile->m_aSubElements.size();
307 for( unsigned i = 0; i < nElements && nRet == 0; i++ )
309 PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pPDFFile->m_aSubElements[i]);
310 if( pTrailer && pTrailer->m_pDict )
312 // search for AdditionalStreams entry
313 std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash>::iterator add_stream;
314 add_stream = pTrailer->m_pDict->m_aMap.find( "AdditionalStreams" );
315 if( add_stream != pTrailer->m_pDict->m_aMap.end() )
317 PDFArray* pStreams = dynamic_cast<PDFArray*>(add_stream->second);
318 if( pStreams )
319 nRet = write_addStreamArray( pOutFile, pStreams, pPDFFile, pInFile );
323 return nRet;
326 int write_fonts( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
328 int nRet = 0;
329 unsigned int nElements = i_pPDFFile->m_aSubElements.size();
330 for( unsigned i = 0; i < nElements && nRet == 0; i++ )
332 // search FontDescriptors
333 PDFObject* pObj = dynamic_cast<PDFObject*>(i_pPDFFile->m_aSubElements[i]);
334 if( ! pObj )
335 continue;
336 PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
337 if( ! pDict )
338 continue;
340 std::hash_map<rtl::OString,PDFEntry*,rtl::OStringHash>::iterator map_it =
341 pDict->m_aMap.find( "Type" );
342 if( map_it == pDict->m_aMap.end() )
343 continue;
345 PDFName* pName = dynamic_cast<PDFName*>(map_it->second);
346 if( ! pName )
347 continue;
348 if( ! pName->m_aName.equals( "FontDescriptor" ) )
349 continue;
351 // the font name will be helpful, also there must be one in
352 // a font descriptor
353 map_it = pDict->m_aMap.find( "FontName" );
354 if( map_it == pDict->m_aMap.end() )
355 continue;
356 pName = dynamic_cast<PDFName*>(map_it->second);
357 if( ! pName )
358 continue;
359 rtl::OString aFontName( pName->m_aName );
361 PDFObjectRef* pStreamRef = 0;
362 const char* pFileType = NULL;
363 // we have a font descriptor, try for a type 1 font
364 map_it = pDict->m_aMap.find( "FontFile" );
365 if( map_it != pDict->m_aMap.end() )
367 pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
368 if( pStreamRef )
369 pFileType = "pfa";
372 // perhaps it's a truetype file ?
373 if( ! pStreamRef )
375 map_it = pDict->m_aMap.find( "FontFile2" );
376 if( map_it != pDict->m_aMap.end() )
378 pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
379 if( pStreamRef )
380 pFileType = "ttf";
384 if( ! pStreamRef )
385 continue;
387 PDFObject* pStream = i_pPDFFile->findObject( pStreamRef );
388 if( ! pStream )
389 continue;
391 rtl::OStringBuffer aOutStream( i_pOutFile );
392 aOutStream.append( "_font_" );
393 aOutStream.append( sal_Int32(pStreamRef->m_nNumber) );
394 aOutStream.append( "_" );
395 aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) );
396 aOutStream.append( "_" );
397 aOutStream.append( aFontName );
398 if( pFileType )
400 aOutStream.append( "." );
401 aOutStream.append( pFileType );
403 FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
404 aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
405 pStream->writeStream( aContext, i_pPDFFile );
407 return nRet;
410 std::vector< std::pair< sal_Int32, sal_Int32 > > s_aEmitObjects;
412 int write_objects( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
414 int nRet = 0;
415 unsigned int nElements = s_aEmitObjects.size();
416 for( unsigned i = 0; i < nElements && nRet == 0; i++ )
418 sal_Int32 nObject = s_aEmitObjects[i].first;
419 sal_Int32 nGeneration = s_aEmitObjects[i].second;
420 PDFObject* pStream = i_pPDFFile->findObject( nObject, nGeneration );
421 if( ! pStream )
423 fprintf( stderr, "object %d %d not found !\n", (int)nObject, (int)nGeneration );
424 continue;
427 rtl::OStringBuffer aOutStream( i_pOutFile );
428 aOutStream.append( "_stream_" );
429 aOutStream.append( nObject );
430 aOutStream.append( "_" );
431 aOutStream.append( nGeneration );
432 FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
433 aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
434 pStream->writeStream( aContext, i_pPDFFile );
436 return nRet;
439 SAL_IMPLEMENT_MAIN_WITH_ARGS( argc, argv )
441 const char* pInFile = NULL;
442 const char* pOutFile = NULL;
443 const char* pPassword = NULL;
444 OStringBuffer aOutFile( 256 );
445 PDFFileHdl aHdl = write_unzipFile;
447 for( int nArg = 1; nArg < argc; nArg++ )
449 if( argv[nArg][0] == '-' )
451 if( ! rtl_str_compare( "-pw", argv[nArg] ) ||
452 ! rtl_str_compare( "--password" , argv[nArg] ) )
454 if( nArg == argc-1 )
456 fprintf( stderr, "no password given\n" );
457 return 1;
459 nArg++;
460 pPassword = argv[nArg];
462 else if( ! rtl_str_compare( "-h", argv[nArg] ) ||
463 ! rtl_str_compare( "--help", argv[nArg] ) )
465 printHelp( argv[0] );
466 return 0;
468 else if( ! rtl_str_compare( "-a", argv[nArg] ) ||
469 ! rtl_str_compare( "--extract-add-streams", argv[nArg] ) )
471 aHdl = write_addStreams;
473 else if( ! rtl_str_compare( "-f", argv[nArg] ) ||
474 ! rtl_str_compare( "--extract-fonts", argv[nArg] ) )
476 aHdl = write_fonts;
478 else if( ! rtl_str_compare( "-o", argv[nArg] ) ||
479 ! rtl_str_compare( "--extract-objects", argv[nArg] ) )
481 aHdl = write_objects;
482 nArg++;
483 if( nArg < argc )
485 rtl::OString aObjs( argv[nArg] );
486 sal_Int32 nIndex = 0;
487 while( nIndex != -1 )
489 rtl::OString aToken( aObjs.getToken( 0, ',', nIndex ) );
490 sal_Int32 nObject = 0;
491 sal_Int32 nGeneration = 0;
492 sal_Int32 nGenIndex = 0;
493 nObject = aToken.getToken( 0, ':', nGenIndex ).toInt32();
494 if( nGenIndex != -1 )
495 nGeneration = aToken.getToken( 0, ':', nGenIndex ).toInt32();
496 s_aEmitObjects.push_back( std::pair<sal_Int32,sal_Int32>(nObject,nGeneration) );
500 else
502 fprintf( stderr, "unrecognized option \"%s\"\n",
503 argv[nArg] );
504 printHelp( argv[0] );
505 return 1;
508 else if( pInFile == NULL )
509 pInFile = argv[nArg];
510 else if( pOutFile == NULL )
511 pOutFile = argv[nArg];
513 if( ! pInFile )
515 fprintf( stderr, "no input file given\n" );
516 return 10;
518 if( ! pOutFile )
520 OString aFile( pInFile );
521 if( aFile.getLength() > 0 )
523 if( aFile.getLength() > 4 )
525 if( aFile.matchIgnoreAsciiCase( OString( ".pdf" ), aFile.getLength()-4 ) )
526 aOutFile.append( pInFile, aFile.getLength() - 4 );
527 else
528 aOutFile.append( aFile );
530 aOutFile.append( "_unzip.pdf" );
531 pOutFile = aOutFile.getStr();
533 else
535 fprintf( stderr, "no output file given\n" );
536 return 11;
540 return handleFile( pInFile, pOutFile, pPassword, aHdl );