bump product version to 5.0.4.1
[LibreOffice.git] / sdext / source / pdfimport / test / pdfunzip.cxx
blob1aa5a6cd8d4107643dcccd0e16feab4881043191
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
21 #include <stdio.h>
22 #include <sal/main.h>
23 #include <osl/file.h>
24 #include <osl/thread.h>
25 #include <rtl/alloc.h>
26 #include <rtl/ustring.hxx>
27 #include <rtl/strbuf.hxx>
29 #include "pdfparse.hxx"
31 using namespace pdfparse;
34 void printHelp( const char* pExe )
36 fprintf( stdout,
37 "USAGE: %s [-h,--help]\n"
38 " %s [-pw, --password <password>] <inputfile> [<outputfile>]\n"
39 " %s <-a, --extract-add-streams> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
40 " %s <-f, --extract-fonts> [-pw, --password <password>] <inputfile> [<outputfile>]\n"
41 " %s <-o, --extract-objects> <o0>[:<g0>][,<o1>[:g1][,...]] [-pw, --password <password>] <inputfile> [<outputfile>]\n"
42 " -h, --help: show help\n"
43 " -a, --extract-add-streams: extracts additional streams to outputfile_object\n"
44 " and prints the mimetype found to stdout\n"
45 " -f, --extract-fonts: extracts fonts (currently only type1 and truetype are supported\n"
46 " -o, --extract-objects: extracts object streams, the syntax of the argument is comma separated\n"
47 " object numbers, where object number and generation number are separated by \':\'\n"
48 " an omitted generation number defaults to 0\n"
49 " -pw, --password: use password for decryption\n"
50 "\n"
51 "note: -f, -a, -o and normal unzip operation are mutually exclusive\n"
52 , pExe, pExe, pExe, pExe, pExe );
55 class FileEmitContext : public EmitContext
57 oslFileHandle m_aHandle;
58 oslFileHandle m_aReadHandle;
59 unsigned int m_nReadLen;
61 void openReadFile( const char* pOrigName );
63 public:
64 FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop );
65 virtual ~FileEmitContext();
67 virtual bool write( const void* pBuf, unsigned int nLen ) throw() SAL_OVERRIDE;
68 virtual unsigned int getCurPos() throw() SAL_OVERRIDE;
69 virtual bool copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw() SAL_OVERRIDE;
70 virtual unsigned int readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw() SAL_OVERRIDE;
73 FileEmitContext::FileEmitContext( const char* pFileName, const char* pOrigName, const PDFContainer* pTop )
74 : EmitContext( pTop ),
75 m_aHandle( NULL ),
76 m_aReadHandle( NULL ),
77 m_nReadLen( 0 )
79 OUString aSysFile( OStringToOUString( OString( pFileName ), osl_getThreadTextEncoding() ) );
80 OUString aURL;
81 if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
83 fprintf( stderr, "filename conversion \"%s\" failed\n", pFileName );
84 return;
87 if( osl_openFile( aURL.pData, &m_aHandle, osl_File_OpenFlag_Write ) == osl_File_E_None )
89 if( osl_setFileSize( m_aHandle, 0 ) != osl_File_E_None )
91 fprintf( stderr, "could not truncate %s\n", pFileName );
92 osl_closeFile( m_aHandle );
93 m_aHandle = NULL;
96 else if( osl_openFile( aURL.pData, &m_aHandle,
97 osl_File_OpenFlag_Write |osl_File_OpenFlag_Create ) != osl_File_E_None )
99 fprintf( stderr, "could not open %s\n", pFileName );
100 return;
102 m_bDeflate = true;
104 openReadFile( pOrigName );
107 FileEmitContext::~FileEmitContext()
109 if( m_aHandle )
110 osl_closeFile( m_aHandle );
111 if( m_aReadHandle )
112 osl_closeFile( m_aReadHandle );
115 void FileEmitContext::openReadFile( const char* pInFile )
117 OUString aSysFile( OStringToOUString( OString( pInFile ), osl_getThreadTextEncoding() ) );
118 OUString aURL;
119 if( osl_getFileURLFromSystemPath( aSysFile.pData, &aURL.pData ) != osl_File_E_None )
121 fprintf( stderr, "filename conversion \"%s\" failed\n", pInFile );
122 return;
125 if( osl_openFile( aURL.pData, &m_aReadHandle, osl_File_OpenFlag_Read ) != osl_File_E_None )
127 fprintf( stderr, "could not open %s\n", pInFile );
128 return;
131 if( osl_setFilePos( m_aReadHandle, osl_Pos_End, 0 ) != osl_File_E_None )
133 fprintf( stderr, "could not seek to end of %s\n", pInFile );
134 osl_closeFile( m_aReadHandle );
135 return;
138 sal_uInt64 nFileSize = 0;
139 if( osl_getFilePos( m_aReadHandle, &nFileSize ) != osl_File_E_None )
141 fprintf( stderr, "could not get end pos of %s\n", pInFile );
142 osl_closeFile( m_aReadHandle );
143 return;
146 m_nReadLen = static_cast<unsigned int>(nFileSize);
149 bool FileEmitContext::write( const void* pBuf, unsigned int nLen ) throw()
151 if( ! m_aHandle )
152 return false;
154 sal_uInt64 nWrite = static_cast<sal_uInt64>(nLen);
155 sal_uInt64 nWritten = 0;
156 return (osl_writeFile( m_aHandle, pBuf, nWrite, &nWritten ) == osl_File_E_None)
157 && nWrite == nWritten;
160 unsigned int FileEmitContext::getCurPos() throw()
162 sal_uInt64 nFileSize = 0;
163 if( m_aHandle )
165 if( osl_getFilePos( m_aHandle, &nFileSize ) != osl_File_E_None )
166 nFileSize = 0;
168 return static_cast<unsigned int>(nFileSize);
171 bool FileEmitContext::copyOrigBytes( unsigned int nOrigOffset, unsigned int nLen ) throw()
173 if( nOrigOffset + nLen > m_nReadLen )
174 return false;
176 if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
178 fprintf( stderr, "could not seek to offset %u\n", nOrigOffset );
179 return false;
181 void* pBuf = rtl_allocateMemory( nLen );
182 if( ! pBuf )
183 return false;
184 sal_uInt64 nBytesRead = 0;
185 if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None
186 || nBytesRead != static_cast<sal_uInt64>(nLen) )
188 fprintf( stderr, "could not read %u bytes\n", nLen );
189 rtl_freeMemory( pBuf );
190 return false;
192 bool bRet = write( pBuf, nLen );
193 rtl_freeMemory( pBuf );
194 return bRet;
197 unsigned int FileEmitContext::readOrigBytes( unsigned int nOrigOffset, unsigned int nLen, void* pBuf ) throw()
199 if( nOrigOffset + nLen > m_nReadLen )
200 return 0;
202 if( osl_setFilePos( m_aReadHandle, osl_Pos_Absolut, nOrigOffset ) != osl_File_E_None )
204 fprintf( stderr, "could not seek to offset %u\n", nOrigOffset );
205 return 0;
207 sal_uInt64 nBytesRead = 0;
208 if( osl_readFile( m_aReadHandle, pBuf, nLen, &nBytesRead ) != osl_File_E_None )
209 return 0;
210 return static_cast<unsigned int>(nBytesRead);
213 typedef int(*PDFFileHdl)(const char*, const char*, PDFFile*);
215 int handleFile( const char* pInFile, const char* pOutFile, const char* pPassword, PDFFileHdl pHdl )
218 PDFReader aParser;
219 int nRet = 0;
220 PDFEntry* pEntry = pdfparse::PDFReader::read( pInFile );
221 if( pEntry )
223 PDFFile* pPDFFile = dynamic_cast<PDFFile*>(pEntry);
224 if( pPDFFile )
226 fprintf( stdout, "have a %s PDF file\n", pPDFFile->isEncrypted() ? "encrypted" : "unencrypted" );
227 if( pPassword )
228 fprintf( stdout, "password %s\n",
229 pPDFFile->setupDecryptionData( pPassword ) ? "matches" : "does not match" );
230 nRet = pHdl( pInFile, pOutFile, pPDFFile );
232 else
233 nRet = 20;
234 delete pEntry;
236 return nRet;
239 int write_unzipFile( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile )
241 FileEmitContext aContext( pOutFile, pInFile, pPDFFile );
242 aContext.m_bDecrypt = pPDFFile->isEncrypted();
243 pPDFFile->emit(aContext);
244 return 0;
247 int write_addStreamArray( const char* pOutFile, PDFArray* pStreams, PDFFile* pPDFFile, const char* pInFile )
249 int nRet = 0;
250 unsigned int nArrayElements = pStreams->m_aSubElements.size();
251 for( unsigned int i = 0; i < nArrayElements-1 && nRet == 0; i++ )
253 PDFName* pMimeType = dynamic_cast<PDFName*>(pStreams->m_aSubElements[i]);
254 PDFObjectRef* pStreamRef = dynamic_cast<PDFObjectRef*>(pStreams->m_aSubElements[i+1]);
255 if( ! pMimeType )
256 fprintf( stderr, "error: no mimetype element\n" );
257 if( ! pStreamRef )
258 fprintf( stderr, "error: no stream ref element\n" );
259 if( pMimeType && pStreamRef )
261 fprintf( stdout, "found stream %d %d with mimetype %s\n",
262 pStreamRef->m_nNumber, pStreamRef->m_nGeneration,
263 pMimeType->m_aName.getStr() );
264 PDFObject* pObject = pPDFFile->findObject( pStreamRef->m_nNumber, pStreamRef->m_nGeneration );
265 if( pObject )
267 OStringBuffer aOutStream( pOutFile );
268 aOutStream.append( "_stream_" );
269 aOutStream.append( sal_Int32(pStreamRef->m_nNumber) );
270 aOutStream.append( "_" );
271 aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) );
272 FileEmitContext aContext( aOutStream.getStr(), pInFile, pPDFFile );
273 aContext.m_bDecrypt = pPDFFile->isEncrypted();
274 pObject->writeStream( aContext, pPDFFile );
276 else
278 fprintf( stderr, "object not found\n" );
279 nRet = 121;
282 else
283 nRet = 120;
285 return nRet;
288 int write_addStreams( const char* pInFile, const char* pOutFile, PDFFile* pPDFFile )
290 // find all trailers
291 int nRet = 0;
292 unsigned int nElements = pPDFFile->m_aSubElements.size();
293 for( unsigned i = 0; i < nElements && nRet == 0; i++ )
295 PDFTrailer* pTrailer = dynamic_cast<PDFTrailer*>(pPDFFile->m_aSubElements[i]);
296 if( pTrailer && pTrailer->m_pDict )
298 // search for AdditionalStreams entry
299 std::unordered_map<OString,PDFEntry*,OStringHash>::iterator add_stream;
300 add_stream = pTrailer->m_pDict->m_aMap.find( "AdditionalStreams" );
301 if( add_stream != pTrailer->m_pDict->m_aMap.end() )
303 PDFArray* pStreams = dynamic_cast<PDFArray*>(add_stream->second);
304 if( pStreams )
305 nRet = write_addStreamArray( pOutFile, pStreams, pPDFFile, pInFile );
309 return nRet;
312 int write_fonts( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
314 int nRet = 0;
315 unsigned int nElements = i_pPDFFile->m_aSubElements.size();
316 for( unsigned i = 0; i < nElements && nRet == 0; i++ )
318 // search FontDescriptors
319 PDFObject* pObj = dynamic_cast<PDFObject*>(i_pPDFFile->m_aSubElements[i]);
320 if( ! pObj )
321 continue;
322 PDFDict* pDict = dynamic_cast<PDFDict*>(pObj->m_pObject);
323 if( ! pDict )
324 continue;
326 std::unordered_map<OString,PDFEntry*,OStringHash>::iterator map_it =
327 pDict->m_aMap.find( "Type" );
328 if( map_it == pDict->m_aMap.end() )
329 continue;
331 PDFName* pName = dynamic_cast<PDFName*>(map_it->second);
332 if( ! pName )
333 continue;
334 if( ! pName->m_aName.equals( "FontDescriptor" ) )
335 continue;
337 // the font name will be helpful, also there must be one in
338 // a font descriptor
339 map_it = pDict->m_aMap.find( "FontName" );
340 if( map_it == pDict->m_aMap.end() )
341 continue;
342 pName = dynamic_cast<PDFName*>(map_it->second);
343 if( ! pName )
344 continue;
345 OString aFontName( pName->m_aName );
347 PDFObjectRef* pStreamRef = 0;
348 const char* pFileType = NULL;
349 // we have a font descriptor, try for a type 1 font
350 map_it = pDict->m_aMap.find( "FontFile" );
351 if( map_it != pDict->m_aMap.end() )
353 pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
354 if( pStreamRef )
355 pFileType = "pfa";
358 // perhaps it's a truetype file ?
359 if( ! pStreamRef )
361 map_it = pDict->m_aMap.find( "FontFile2" );
362 if( map_it != pDict->m_aMap.end() )
364 pStreamRef = dynamic_cast<PDFObjectRef*>(map_it->second);
365 if( pStreamRef )
366 pFileType = "ttf";
370 if( ! pStreamRef )
371 continue;
373 PDFObject* pStream = i_pPDFFile->findObject( pStreamRef );
374 if( ! pStream )
375 continue;
377 OStringBuffer aOutStream( i_pOutFile );
378 aOutStream.append( "_font_" );
379 aOutStream.append( sal_Int32(pStreamRef->m_nNumber) );
380 aOutStream.append( "_" );
381 aOutStream.append( sal_Int32(pStreamRef->m_nGeneration) );
382 aOutStream.append( "_" );
383 aOutStream.append( aFontName );
384 if( pFileType )
386 aOutStream.append( "." );
387 aOutStream.append( pFileType );
389 FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
390 aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
391 pStream->writeStream( aContext, i_pPDFFile );
393 return nRet;
396 std::vector< std::pair< sal_Int32, sal_Int32 > > s_aEmitObjects;
398 int write_objects( const char* i_pInFile, const char* i_pOutFile, PDFFile* i_pPDFFile )
400 int nRet = 0;
401 unsigned int nElements = s_aEmitObjects.size();
402 for( unsigned i = 0; i < nElements && nRet == 0; i++ )
404 sal_Int32 nObject = s_aEmitObjects[i].first;
405 sal_Int32 nGeneration = s_aEmitObjects[i].second;
406 PDFObject* pStream = i_pPDFFile->findObject( nObject, nGeneration );
407 if( ! pStream )
409 fprintf( stderr, "object %d %d not found !\n", (int)nObject, (int)nGeneration );
410 continue;
413 OStringBuffer aOutStream( i_pOutFile );
414 aOutStream.append( "_stream_" );
415 aOutStream.append( nObject );
416 aOutStream.append( "_" );
417 aOutStream.append( nGeneration );
418 FileEmitContext aContext( aOutStream.getStr(), i_pInFile, i_pPDFFile );
419 aContext.m_bDecrypt = i_pPDFFile->isEncrypted();
420 pStream->writeStream( aContext, i_pPDFFile );
422 return nRet;
425 SAL_IMPLEMENT_MAIN_WITH_ARGS( argc, argv )
427 const char* pInFile = NULL;
428 const char* pOutFile = NULL;
429 const char* pPassword = NULL;
430 OStringBuffer aOutFile( 256 );
431 PDFFileHdl aHdl = write_unzipFile;
433 for( int nArg = 1; nArg < argc; nArg++ )
435 if( argv[nArg][0] == '-' )
437 if( ! rtl_str_compare( "-pw", argv[nArg] ) ||
438 ! rtl_str_compare( "--password" , argv[nArg] ) )
440 if( nArg == argc-1 )
442 fprintf( stderr, "no password given\n" );
443 return 1;
445 nArg++;
446 pPassword = argv[nArg];
448 else if( ! rtl_str_compare( "-h", argv[nArg] ) ||
449 ! rtl_str_compare( "--help", argv[nArg] ) )
451 printHelp( argv[0] );
452 return 0;
454 else if( ! rtl_str_compare( "-a", argv[nArg] ) ||
455 ! rtl_str_compare( "--extract-add-streams", argv[nArg] ) )
457 aHdl = write_addStreams;
459 else if( ! rtl_str_compare( "-f", argv[nArg] ) ||
460 ! rtl_str_compare( "--extract-fonts", argv[nArg] ) )
462 aHdl = write_fonts;
464 else if( ! rtl_str_compare( "-o", argv[nArg] ) ||
465 ! rtl_str_compare( "--extract-objects", argv[nArg] ) )
467 aHdl = write_objects;
468 nArg++;
469 if( nArg < argc )
471 OString aObjs( argv[nArg] );
472 sal_Int32 nIndex = 0;
473 while( nIndex != -1 )
475 OString aToken( aObjs.getToken( 0, ',', nIndex ) );
476 sal_Int32 nObject = 0;
477 sal_Int32 nGeneration = 0;
478 sal_Int32 nGenIndex = 0;
479 nObject = aToken.getToken( 0, ':', nGenIndex ).toInt32();
480 if( nGenIndex != -1 )
481 nGeneration = aToken.getToken( 0, ':', nGenIndex ).toInt32();
482 s_aEmitObjects.push_back( std::pair<sal_Int32,sal_Int32>(nObject,nGeneration) );
486 else
488 fprintf( stderr, "unrecognized option \"%s\"\n",
489 argv[nArg] );
490 printHelp( argv[0] );
491 return 1;
494 else if( pInFile == NULL )
495 pInFile = argv[nArg];
496 else if( pOutFile == NULL )
497 pOutFile = argv[nArg];
499 if( ! pInFile )
501 fprintf( stderr, "no input file given\n" );
502 return 10;
504 if( ! pOutFile )
506 OString aFile( pInFile );
507 if( aFile.getLength() > 0 )
509 if( aFile.getLength() > 4 )
511 if( aFile.matchIgnoreAsciiCase( OString( ".pdf" ), aFile.getLength()-4 ) )
512 aOutFile.append( pInFile, aFile.getLength() - 4 );
513 else
514 aOutFile.append( aFile );
516 aOutFile.append( "_unzip.pdf" );
517 pOutFile = aOutFile.getStr();
519 else
521 fprintf( stderr, "no output file given\n" );
522 return 11;
526 return handleFile( pInFile, pOutFile, pPassword, aHdl );
529 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */