cid#1640468 Dereference after null check
[LibreOffice.git] / sdext / source / pdfimport / wrapper / wrapper.cxx
blob547ab148f9543a998e04bbe0b672283747bea47f
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <config_folders.h>
22 #include <contentsink.hxx>
23 #include <pdfparse.hxx>
24 #include <pdfihelper.hxx>
25 #include <wrapper.hxx>
27 #include <o3tl/string_view.hxx>
28 #include <osl/file.h>
29 #include <osl/file.hxx>
30 #include <osl/thread.h>
31 #include <osl/process.h>
32 #include <osl/diagnose.h>
33 #include <rtl/bootstrap.hxx>
34 #include <rtl/ustring.hxx>
35 #include <rtl/strbuf.hxx>
36 #include <sal/log.hxx>
38 #include <comphelper/propertysequence.hxx>
39 #include <comphelper/string.hxx>
40 #include <com/sun/star/io/XInputStream.hpp>
41 #include <com/sun/star/uno/XComponentContext.hpp>
42 #include <com/sun/star/rendering/PathCapType.hpp>
43 #include <com/sun/star/rendering/XPolyPolygon2D.hpp>
44 #include <com/sun/star/geometry/Matrix2D.hpp>
45 #include <com/sun/star/geometry/AffineMatrix2D.hpp>
46 #include <com/sun/star/geometry/RealRectangle2D.hpp>
47 #include <com/sun/star/geometry/RealSize2D.hpp>
48 #include <com/sun/star/task/XInteractionHandler.hpp>
50 #include <basegfx/point/b2dpoint.hxx>
51 #include <basegfx/polygon/b2dpolypolygon.hxx>
52 #include <basegfx/polygon/b2dpolygon.hxx>
53 #include <basegfx/utils/unopolypolygon.hxx>
54 #include <basegfx/vector/b2enums.hxx>
56 #include <vcl/metric.hxx>
57 #include <vcl/font.hxx>
58 #include <vcl/virdev.hxx>
60 #include <cstddef>
61 #include <memory>
62 #include <string_view>
63 #include <unordered_map>
64 #include <vector>
65 #include <string.h>
67 using namespace com::sun::star;
69 namespace pdfi
72 namespace
75 // identifier of the strings coming from the out-of-process xpdf
76 // converter
77 enum parseKey {
78 CLIPPATH,
79 CLIPTOSTROKEPATH,
80 DRAWCHAR,
81 DRAWIMAGE,
82 DRAWLINK,
83 DRAWMASK,
84 DRAWMASKEDIMAGE,
85 DRAWSOFTMASKEDIMAGE,
86 ENDPAGE,
87 ENDTEXTOBJECT,
88 EOCLIPPATH,
89 EOFILLPATH,
90 FILLPATH,
91 HYPERLINK,
92 INTERSECTCLIP,
93 INTERSECTEOCLIP,
94 POPSTATE,
95 PUSHSTATE,
96 RESTORESTATE,
97 SAVESTATE,
98 SETBLENDMODE,
99 SETFILLCOLOR,
100 SETFONT,
101 SETLINECAP,
102 SETLINEDASH,
103 SETLINEJOIN,
104 SETLINEWIDTH,
105 SETMITERLIMIT,
106 SETPAGENUM,
107 SETSTROKECOLOR,
108 SETTEXTRENDERMODE,
109 SETTRANSFORMATION,
110 STARTPAGE,
111 STROKEPATH,
112 TILINGPATTERNFILL,
113 UPDATEBLENDMODE,
114 UPDATECTM,
115 UPDATEFILLCOLOR,
116 UPDATEFILLOPACITY,
117 UPDATEFLATNESS,
118 UPDATEFONT,
119 UPDATELINECAP,
120 UPDATELINEDASH,
121 UPDATELINEJOIN,
122 UPDATELINEWIDTH,
123 UPDATEMITERLIMIT,
124 UPDATESTROKECOLOR,
125 UPDATESTROKEOPACITY,
126 NONE
129 #if defined _MSC_VER && defined __clang__
130 #pragma clang diagnostic push
131 #pragma clang diagnostic ignored "-Wdeprecated-register"
132 #pragma clang diagnostic ignored "-Wextra-tokens"
133 #endif
134 #include <hash.cxx>
135 #if defined _MSC_VER && defined __clang__
136 #pragma clang diagnostic pop
137 #endif
139 class Parser
141 friend class LineParser;
143 typedef std::unordered_map< sal_Int64,
144 FontAttributes > FontMapType;
146 ScopedVclPtr<VirtualDevice> m_xDev;
147 const uno::Reference<uno::XComponentContext> m_xContext;
148 const ContentSinkSharedPtr m_pSink;
149 const oslFileHandle m_pErr;
150 FontMapType m_aFontMap;
152 public:
153 Parser( const ContentSinkSharedPtr& rSink,
154 oslFileHandle pErr,
155 const uno::Reference<uno::XComponentContext>& xContext ) :
156 m_xContext(xContext),
157 m_pSink(rSink),
158 m_pErr(pErr),
159 m_aFontMap(101)
162 void parseLine( std::string_view aLine );
165 class LineParser {
166 Parser & m_parser;
167 std::string_view m_aLine;
169 static void parseFontFamilyName( FontAttributes& aResult );
170 void readInt32( sal_Int32& o_Value );
171 void readInt64( sal_Int64& o_Value );
172 void readDouble( double& o_Value );
173 void readBinaryData( uno::Sequence<sal_Int8>& rBuf );
175 uno::Sequence<beans::PropertyValue> readImageImpl();
177 public:
178 std::size_t m_nCharIndex = 0;
180 LineParser(Parser & parser, std::string_view line): m_parser(parser), m_aLine(line) {}
182 std::string_view readNextToken();
183 sal_Int32 readInt32();
184 double readDouble();
186 uno::Reference<rendering::XPolyPolygon2D> readPath();
188 void readChar();
189 void readLineCap();
190 void readLineDash();
191 void readLineJoin();
192 void readTransformation();
193 rendering::ARGBColor readColor();
194 void readFont();
196 void readImage();
197 void readMask();
198 void readLink();
199 void readMaskedImage();
200 void readSoftMaskedImage();
201 void readTilingPatternFill();
204 /** Unescapes line-ending characters in input string. These
205 characters are encoded as pairs of characters: '\\' 'n', resp.
206 '\\' 'r'. This function converts them back to '\n', resp. '\r'.
208 OString lcl_unescapeLineFeeds(std::string_view i_rStr)
210 const size_t nOrigLen(i_rStr.size());
211 const char* const pOrig(i_rStr.data());
212 std::unique_ptr<char[]> pBuffer(new char[nOrigLen + 1]);
214 const char* pRead(pOrig);
215 char* pWrite(pBuffer.get());
216 const char* pCur(pOrig);
217 while ((pCur = strchr(pCur, '\\')) != nullptr)
219 const char cNext(pCur[1]);
220 if (cNext == 'n' || cNext == 'r' || cNext == '\\')
222 const size_t nLen(pCur - pRead);
223 strncpy(pWrite, pRead, nLen);
224 pWrite += nLen;
225 *pWrite = cNext == 'n' ? '\n' : (cNext == 'r' ? '\r' : '\\');
226 ++pWrite;
227 pCur = pRead = pCur + 2;
229 else
231 // Just continue on the next character. The current
232 // block will be copied the next time it goes through the
233 // 'if' branch.
234 ++pCur;
237 // maybe there are some data to copy yet
238 if (sal::static_int_cast<size_t>(pRead - pOrig) < nOrigLen)
240 const size_t nLen(nOrigLen - (pRead - pOrig));
241 strncpy(pWrite, pRead, nLen);
242 pWrite += nLen;
244 *pWrite = '\0';
246 OString aResult(pBuffer.get());
247 return aResult;
250 std::string_view LineParser::readNextToken()
252 if (m_nCharIndex == std::string_view::npos) {
253 SAL_WARN("sdext.pdfimport", "insufficient input");
254 return {};
256 return o3tl::getToken(m_aLine,' ',m_nCharIndex);
259 void LineParser::readInt32( sal_Int32& o_Value )
261 std::string_view tok = readNextToken();
262 o_Value = o3tl::toInt32(tok);
265 sal_Int32 LineParser::readInt32()
267 std::string_view tok = readNextToken();
268 return o3tl::toInt32(tok);
271 void LineParser::readInt64( sal_Int64& o_Value )
273 std::string_view tok = readNextToken();
274 o_Value = o3tl::toInt64(tok);
277 void LineParser::readDouble( double& o_Value )
279 std::string_view tok = readNextToken();
280 o_Value = rtl_math_stringToDouble(tok.data(), tok.data() + tok.size(), '.', 0,
281 nullptr, nullptr);
284 double LineParser::readDouble()
286 std::string_view tok = readNextToken();
287 return rtl_math_stringToDouble(tok.data(), tok.data() + tok.size(), '.', 0,
288 nullptr, nullptr);
291 void LineParser::readBinaryData( uno::Sequence<sal_Int8>& rBuf )
293 sal_Int32 nFileLen( rBuf.getLength() );
294 sal_Int8* pBuf( rBuf.getArray() );
295 sal_uInt64 nBytesRead(0);
296 oslFileError nRes=osl_File_E_None;
297 while( nFileLen )
299 nRes = osl_readFile( m_parser.m_pErr, pBuf, nFileLen, &nBytesRead );
300 if (osl_File_E_None != nRes )
301 break;
302 pBuf += nBytesRead;
303 nFileLen -= sal::static_int_cast<sal_Int32>(nBytesRead);
306 OSL_PRECOND(nRes==osl_File_E_None, "inconsistent data");
309 uno::Reference<rendering::XPolyPolygon2D> LineParser::readPath()
311 static const std::string_view aSubPathMarker( "subpath" );
313 if( readNextToken() != aSubPathMarker )
314 OSL_PRECOND(false, "broken path");
316 basegfx::B2DPolyPolygon aResult;
317 while( m_nCharIndex != std::string_view::npos )
319 basegfx::B2DPolygon aSubPath;
321 sal_Int32 nClosedFlag;
322 readInt32( nClosedFlag );
323 aSubPath.setClosed( nClosedFlag != 0 );
325 sal_Int32 nContiguousControlPoints(0);
327 while( m_nCharIndex != std::string_view::npos )
329 std::size_t nDummy=m_nCharIndex;
330 if (o3tl::getToken(m_aLine,' ',nDummy) == aSubPathMarker) {
331 break;
334 sal_Int32 nCurveFlag;
335 double nX, nY;
336 readDouble( nX );
337 readDouble( nY );
338 readInt32( nCurveFlag );
340 aSubPath.append(basegfx::B2DPoint(nX,nY));
341 if( nCurveFlag )
343 ++nContiguousControlPoints;
345 else if( nContiguousControlPoints )
347 OSL_PRECOND(nContiguousControlPoints==2,"broken bezier path");
349 // have two control points before us. the current one
350 // is a normal point - thus, convert previous points
351 // into bezier segment
352 const sal_uInt32 nPoints( aSubPath.count() );
353 const basegfx::B2DPoint aCtrlA( aSubPath.getB2DPoint(nPoints-3) );
354 const basegfx::B2DPoint aCtrlB( aSubPath.getB2DPoint(nPoints-2) );
355 const basegfx::B2DPoint aEnd( aSubPath.getB2DPoint(nPoints-1) );
356 aSubPath.remove(nPoints-3, 3);
357 aSubPath.appendBezierSegment(aCtrlA, aCtrlB, aEnd);
359 nContiguousControlPoints=0;
363 aResult.append( aSubPath );
364 if( m_nCharIndex != std::string_view::npos )
365 readNextToken();
368 return static_cast<rendering::XLinePolyPolygon2D*>(
369 new basegfx::unotools::UnoPolyPolygon(std::move(aResult)));
372 void LineParser::readChar()
374 double fontSize;
375 geometry::Matrix2D aUnoMatrix;
376 geometry::RealRectangle2D aRect;
378 readDouble(aRect.X1);
379 readDouble(aRect.Y1);
380 readDouble(aRect.X2);
381 readDouble(aRect.Y2);
382 readDouble(aUnoMatrix.m00);
383 readDouble(aUnoMatrix.m01);
384 readDouble(aUnoMatrix.m10);
385 readDouble(aUnoMatrix.m11);
386 readDouble(fontSize);
388 OString aChars;
390 if (m_nCharIndex != std::string_view::npos)
391 aChars = lcl_unescapeLineFeeds( m_aLine.substr( m_nCharIndex ) );
393 // chars gobble up rest of line
394 m_nCharIndex = std::string_view::npos;
396 m_parser.m_pSink->drawGlyphs(OStringToOUString(aChars, RTL_TEXTENCODING_UTF8),
397 aRect, aUnoMatrix, fontSize);
400 void LineParser::readLineCap()
402 sal_Int8 nCap(rendering::PathCapType::BUTT);
403 switch( readInt32() )
405 default:
406 case 0: nCap = rendering::PathCapType::BUTT; break;
407 case 1: nCap = rendering::PathCapType::ROUND; break;
408 case 2: nCap = rendering::PathCapType::SQUARE; break;
410 m_parser.m_pSink->setLineCap(nCap);
413 void LineParser::readLineDash()
415 if( m_nCharIndex == std::string_view::npos )
417 m_parser.m_pSink->setLineDash( uno::Sequence<double>(), 0.0 );
418 return;
421 const double nOffset(readDouble());
422 const sal_Int32 nLen(readInt32());
424 uno::Sequence<double> aDashArray(nLen);
425 double* pArray=aDashArray.getArray();
426 for( sal_Int32 i=0; i<nLen; ++i )
427 *pArray++ = readDouble();
429 m_parser.m_pSink->setLineDash( aDashArray, nOffset );
432 void LineParser::readLineJoin()
434 basegfx::B2DLineJoin nJoin(basegfx::B2DLineJoin::Miter);
435 switch( readInt32() )
437 default:
438 case 0: nJoin = basegfx::B2DLineJoin::Miter; break;
439 case 1: nJoin = basegfx::B2DLineJoin::Round; break;
440 case 2: nJoin = basegfx::B2DLineJoin::Bevel; break;
442 m_parser.m_pSink->setLineJoin(nJoin);
445 void LineParser::readTransformation()
447 geometry::AffineMatrix2D aMat;
448 readDouble(aMat.m00);
449 readDouble(aMat.m10);
450 readDouble(aMat.m01);
451 readDouble(aMat.m11);
452 readDouble(aMat.m02);
453 readDouble(aMat.m12);
454 m_parser.m_pSink->setTransformation( aMat );
457 rendering::ARGBColor LineParser::readColor()
459 rendering::ARGBColor aRes;
460 readDouble(aRes.Red);
461 readDouble(aRes.Green);
462 readDouble(aRes.Blue);
463 readDouble(aRes.Alpha);
464 return aRes;
467 /* Parse and convert the font family name (passed from xpdfimport) to correct font names
468 e.g. TimesNewRomanPSMT -> TimesNewRoman
469 TimesNewRomanPS-BoldMT -> TimesNewRoman
470 TimesNewRomanPS-BoldItalicMT -> TimesNewRoman
471 During the conversion, also apply the font features (bold italic etc) to the result.
473 TODO: Further convert the font names to real font names in the system rather than the PS names.
474 e.g., TimesNewRoman -> Times New Roman
476 void LineParser::parseFontFamilyName( FontAttributes& rResult )
478 SAL_INFO("sdext.pdfimport", "Processing " << rResult.familyName << " ---");
479 rResult.familyName = rResult.familyName.trim();
480 for (const OUString& fontAttributesSuffix: fontAttributesSuffixes)
482 if ( rResult.familyName.endsWith(fontAttributesSuffix) )
484 rResult.familyName = rResult.familyName.replaceAll(fontAttributesSuffix, "");
485 SAL_INFO("sdext.pdfimport", rResult.familyName);
486 if (fontAttributesSuffix == u"Heavy" || fontAttributesSuffix == u"Black")
488 rResult.fontWeight = u"900"_ustr;
490 else if (fontAttributesSuffix == u"ExtraBold" || fontAttributesSuffix == u"UltraBold")
492 rResult.fontWeight = u"800"_ustr;
494 else if (fontAttributesSuffix == u"Bold")
496 rResult.fontWeight = u"bold"_ustr;
498 else if (fontAttributesSuffix == u"Semibold")
500 rResult.fontWeight = u"600"_ustr;
502 else if (fontAttributesSuffix == u"Medium")
504 rResult.fontWeight = u"500"_ustr;
506 else if (fontAttributesSuffix == u"Normal" || fontAttributesSuffix == u"Regular" || fontAttributesSuffix == u"Book")
508 rResult.fontWeight = u"400"_ustr;
510 else if (fontAttributesSuffix == u"Light")
512 rResult.fontWeight = u"300"_ustr;
514 else if (fontAttributesSuffix == u"ExtraLight" || fontAttributesSuffix == u"UltraLight")
516 rResult.fontWeight = u"200"_ustr;
518 else if (fontAttributesSuffix == u"Thin")
520 rResult.fontWeight = u"100"_ustr;
523 if ( (fontAttributesSuffix == "Italic") or (fontAttributesSuffix == "Oblique") )
525 rResult.isItalic = true;
531 void LineParser::readFont()
534 xpdf line is like (separated by space):
535 updateFont <FontID> <isEmbedded> <maFontWeight> <isItalic> <isUnderline> <TransformedFontSize> <nEmbedSize> <FontName>
536 updateFont 14 1 4 0 0 1200.000000 23068 TimesNewRomanPSMT
538 If nEmbedSize > 0, then a fontFile is followed as a stream.
540 sal_Int64 nFontID;
541 sal_Int32 nIsEmbedded;
542 sal_Int32 nFontWeight;
543 sal_Int32 nIsItalic;
544 sal_Int32 nIsUnderline;
545 double nSize;
546 sal_Int32 nFileLen;
547 OString aFontName;
549 readInt64(nFontID); // read FontID
550 readInt32(nIsEmbedded); // read isEmbedded
551 readInt32(nFontWeight); // read maFontWeight, see GfxFont enum Weight
552 readInt32(nIsItalic); // read isItalic
553 readInt32(nIsUnderline);// read isUnderline
554 readDouble(nSize); // read TransformedFontSize
555 readInt32(nFileLen); // read nEmbedSize
557 nSize = nSize < 0.0 ? -nSize : nSize;
558 // Read FontName. From the current position to the end (any white spaces will be included).
559 aFontName = lcl_unescapeLineFeeds(m_aLine.substr(m_nCharIndex));
561 // name gobbles up rest of line
562 m_nCharIndex = std::string_view::npos;
564 // Check if this font is already in our font map list.
565 // If yes, update the font size and skip.
566 Parser::FontMapType::const_iterator pFont( m_parser.m_aFontMap.find(nFontID) );
567 if( pFont != m_parser.m_aFontMap.end() )
569 OSL_PRECOND(nFileLen==0,"font data for known font");
570 FontAttributes aRes(pFont->second);
571 aRes.size = nSize;
572 m_parser.m_pSink->setFont( aRes );
574 return;
577 // The font is not yet in the map list - get info and add to map
578 OUString sFontWeight; // font weight name per ODF specifications
579 if (nFontWeight == 0 or nFontWeight == 4) // WeightNotDefined or W400, map to normal font
580 sFontWeight = u"normal"_ustr;
581 else if (nFontWeight == 1) // W100, Thin
582 sFontWeight = u"100"_ustr;
583 else if (nFontWeight == 2) // W200, Extra-Light
584 sFontWeight = u"200"_ustr;
585 else if (nFontWeight == 3) // W300, Light
586 sFontWeight = u"300"_ustr;
587 else if (nFontWeight == 5) // W500, Medium. Is this supported by ODF?
588 sFontWeight = u"500"_ustr;
589 else if (nFontWeight == 6) // W600, Semi-Bold
590 sFontWeight = u"600"_ustr;
591 else if (nFontWeight == 7) // W700, Bold
592 sFontWeight = u"bold"_ustr;
593 else if (nFontWeight == 8) // W800, Extra-Bold
594 sFontWeight = u"800"_ustr;
595 else if (nFontWeight == 9) // W900, Black
596 sFontWeight = u"900"_ustr;
597 SAL_INFO("sdext.pdfimport", "Font weight passed from xpdfimport is: " << sFontWeight);
599 FontAttributes aResult( OStringToOUString( aFontName, RTL_TEXTENCODING_UTF8 ),
600 sFontWeight,
601 nIsItalic != 0,
602 nIsUnderline != 0,
603 nSize,
604 1.0);
606 /* The above font attributes (fontName, fontWeight, italic) are based on
607 xpdf line output and may not be reliable. To get correct attributes,
608 we do the following:
609 1. Read the embedded font file and determine the attributes based on the
610 font file.
611 2. If we failed to read the font file, or empty result is returned, then
612 determine the font attributes from the font name.
613 3. If all these attempts have failed, then use a fallback font.
615 if (nFileLen > 0)
617 uno::Sequence<sal_Int8> aFontFile(nFileLen);
618 readBinaryData(aFontFile); // Read fontFile.
620 vcl::Font aFontReadResult = vcl::Font::identifyFont(aFontFile.getArray(), nFileLen);
621 SAL_INFO("sdext.pdfimport", "familyName: " << aFontReadResult.GetFamilyName());
623 if (!aFontReadResult.GetFamilyName().isEmpty()) // font detection successful
625 // Family name
626 aResult.familyName = aFontReadResult.GetFamilyName();
627 SAL_INFO("sdext.pdfimport", aResult.familyName);
628 // tdf#143959: there are cases when the family name returned by font descriptor
629 // is like "AAAAAA+TimesNewRoman,Bold". In this case, use the font name
630 // determined by parseFontFamilyName instead, but still determine the font
631 // attributes (bold italic etc) from the font descriptor.
632 if (aResult.familyName.getLength() > 7 and aResult.familyName.indexOf(u"+", 6) == 6)
634 aResult.familyName = aResult.familyName.copy(7, aResult.familyName.getLength() - 7);
635 parseFontFamilyName(aResult);
637 if (aResult.familyName.endsWithIgnoreAsciiCase("-VKana"))
639 parseFontFamilyName(aResult);
642 // Font weight
643 if (aFontReadResult.GetWeight() == WEIGHT_THIN)
644 aResult.fontWeight = u"100"_ustr;
645 else if (aFontReadResult.GetWeight() == WEIGHT_ULTRALIGHT)
646 aResult.fontWeight = u"200"_ustr;
647 else if (aFontReadResult.GetWeight() == WEIGHT_LIGHT)
648 aResult.fontWeight = u"300"_ustr;
649 else if (aFontReadResult.GetWeight() == WEIGHT_SEMILIGHT)
650 aResult.fontWeight = u"350"_ustr;
651 // no need to check "normal" here as this is default in nFontWeight above
652 else if (aFontReadResult.GetWeight() == WEIGHT_SEMIBOLD)
653 aResult.fontWeight = u"600"_ustr;
654 else if (aFontReadResult.GetWeight() == WEIGHT_BOLD)
655 aResult.fontWeight = u"bold"_ustr;
656 else if (aFontReadResult.GetWeight() == WEIGHT_ULTRABOLD)
657 aResult.fontWeight = u"800"_ustr;
658 else if (aFontReadResult.GetWeight() == WEIGHT_BLACK)
659 aResult.fontWeight = u"900"_ustr;
660 SAL_INFO("sdext.pdfimport", aResult.fontWeight);
662 // Italic
663 aResult.isItalic = (aFontReadResult.GetItalic() == ITALIC_OBLIQUE ||
664 aFontReadResult.GetItalic() == ITALIC_NORMAL);
665 } else // font detection failed
667 SAL_WARN("sdext.pdfimport",
668 "Font detection from fontFile returned empty result. Guessing font info from font name.");
669 parseFontFamilyName(aResult);
672 } else // no embedded font file - guess font attributes from font name
674 parseFontFamilyName(aResult);
677 // last fallback
678 if (aResult.familyName.isEmpty())
680 SAL_WARN("sdext.pdfimport", "Failed to determine the font, using a fallback font Arial.");
681 aResult.familyName = "Arial";
684 if (!m_parser.m_xDev)
685 m_parser.m_xDev.disposeAndReset(VclPtr<VirtualDevice>::Create());
687 vcl::Font font(aResult.familyName, Size(0, 1000));
688 m_parser.m_xDev->SetFont(font);
689 FontMetric metric(m_parser.m_xDev->GetFontMetric());
690 aResult.ascent = metric.GetAscent() / 1000.0;
692 m_parser.m_aFontMap[nFontID] = aResult;
694 aResult.size = nSize;
695 m_parser.m_pSink->setFont(aResult);
698 uno::Sequence<beans::PropertyValue> LineParser::readImageImpl()
700 std::string_view aToken = readNextToken();
701 const sal_Int32 nImageSize( readInt32() );
703 OUString aFileName;
704 if( aToken == "PNG" )
705 aFileName = "DUMMY.PNG";
706 else if( aToken == "JPEG" )
707 aFileName = "DUMMY.JPEG";
708 else if( aToken == "PBM" )
709 aFileName = "DUMMY.PBM";
710 else
712 SAL_WARN_IF(aToken != "PPM","sdext.pdfimport","Invalid bitmap format");
713 aFileName = "DUMMY.PPM";
716 uno::Sequence<sal_Int8> aDataSequence(nImageSize);
717 readBinaryData( aDataSequence );
719 uno::Sequence< uno::Any > aStreamCreationArgs{ uno::Any(aDataSequence) };
721 uno::Reference< uno::XComponentContext > xContext( m_parser.m_xContext, uno::UNO_SET_THROW );
722 uno::Reference< lang::XMultiComponentFactory > xFactory( xContext->getServiceManager(), uno::UNO_SET_THROW );
723 uno::Reference< io::XInputStream > xDataStream(
724 xFactory->createInstanceWithArgumentsAndContext( u"com.sun.star.io.SequenceInputStream"_ustr, aStreamCreationArgs, m_parser.m_xContext ),
725 uno::UNO_QUERY_THROW );
727 uno::Sequence<beans::PropertyValue> aSequence( comphelper::InitPropertySequence({
728 { "URL", uno::Any(aFileName) },
729 { "InputStream", uno::Any( xDataStream ) },
730 { "InputSequence", uno::Any(aDataSequence) }
731 }));
733 return aSequence;
736 void LineParser::readImage()
738 sal_Int32 nWidth, nHeight,nMaskColors;
739 readInt32(nWidth);
740 readInt32(nHeight);
741 readInt32(nMaskColors);
743 uno::Sequence<beans::PropertyValue> aImg( readImageImpl() );
745 if( nMaskColors )
747 uno::Sequence<sal_Int8> aDataSequence(nMaskColors);
748 readBinaryData( aDataSequence );
750 uno::Sequence<double> aMinRange(nMaskColors/2);
751 auto pMinRange = aMinRange.getArray();
752 uno::Sequence<double> aMaxRange(nMaskColors/2);
753 auto pMaxRange = aMaxRange.getArray();
754 for( sal_Int32 i=0; i<nMaskColors/2; ++i )
756 pMinRange[i] = aDataSequence[i] / 255.0;
757 pMaxRange[i] = aDataSequence[i+nMaskColors/2] / 255.0;
760 uno::Sequence<uno::Any> aMaskRanges{ uno::Any(aMinRange), uno::Any(aMaxRange) };
761 m_parser.m_pSink->drawColorMaskedImage( aImg, aMaskRanges );
763 else
764 m_parser.m_pSink->drawImage( aImg );
767 void LineParser::readMask()
769 sal_Int32 nWidth, nHeight, nInvert;
770 readInt32(nWidth);
771 readInt32(nHeight);
772 readInt32(nInvert);
774 m_parser.m_pSink->drawMask( readImageImpl(), nInvert != 0);
777 void LineParser::readLink()
779 geometry::RealRectangle2D aBounds;
780 readDouble(aBounds.X1);
781 readDouble(aBounds.Y1);
782 readDouble(aBounds.X2);
783 readDouble(aBounds.Y2);
785 m_parser.m_pSink->hyperLink( aBounds,
786 OStringToOUString( lcl_unescapeLineFeeds(
787 m_aLine.substr(m_nCharIndex) ),
788 RTL_TEXTENCODING_UTF8 ) );
789 // name gobbles up rest of line
790 m_nCharIndex = std::string_view::npos;
793 void LineParser::readMaskedImage()
795 sal_Int32 nWidth, nHeight, nMaskWidth, nMaskHeight, nMaskInvert;
796 readInt32(nWidth);
797 readInt32(nHeight);
798 readInt32(nMaskWidth);
799 readInt32(nMaskHeight);
800 readInt32(nMaskInvert);
802 const uno::Sequence<beans::PropertyValue> aImage( readImageImpl() );
803 const uno::Sequence<beans::PropertyValue> aMask ( readImageImpl() );
804 m_parser.m_pSink->drawMaskedImage( aImage, aMask, nMaskInvert != 0 );
807 void LineParser::readSoftMaskedImage()
809 sal_Int32 nWidth, nHeight, nMaskWidth, nMaskHeight;
810 readInt32(nWidth);
811 readInt32(nHeight);
812 readInt32(nMaskWidth);
813 readInt32(nMaskHeight);
815 const uno::Sequence<beans::PropertyValue> aImage( readImageImpl() );
816 const uno::Sequence<beans::PropertyValue> aMask ( readImageImpl() );
817 m_parser.m_pSink->drawAlphaMaskedImage( aImage, aMask );
820 void LineParser::readTilingPatternFill()
822 sal_Int32 nX0, nY0, nX1, nY1, nPaintType;
823 double nXStep, nYStep;
824 geometry::AffineMatrix2D aMat;
825 readInt32(nX0);
826 readInt32(nY0);
827 readInt32(nX1);
828 readInt32(nY1);
830 readDouble(nXStep);
831 readDouble(nYStep);
833 readInt32(nPaintType);
835 readDouble(aMat.m00);
836 readDouble(aMat.m10);
837 readDouble(aMat.m01);
838 readDouble(aMat.m11);
839 readDouble(aMat.m02);
840 readDouble(aMat.m12);
842 // The tile is an image with alpha
843 const uno::Sequence<beans::PropertyValue> aTile ( readImageImpl() );
845 m_parser.m_pSink->tilingPatternFill( nX0, nY0, nX1, nY1,
846 nXStep, nYStep,
847 nPaintType,
848 aMat,
849 aTile );
852 void Parser::parseLine( std::string_view aLine )
854 OSL_PRECOND( m_pSink, "Invalid sink" );
855 OSL_PRECOND( m_pErr, "Invalid filehandle" );
856 OSL_PRECOND( m_xContext.is(), "Invalid service factory" );
858 LineParser lp(*this, aLine);
859 const std::string_view rCmd = lp.readNextToken();
860 const hash_entry* pEntry = PdfKeywordHash::in_word_set( rCmd.data(),
861 rCmd.size() );
862 assert(pEntry);
863 switch( pEntry->eKey )
865 case CLIPPATH:
866 m_pSink->intersectClip(lp.readPath()); break;
867 case CLIPTOSTROKEPATH:
868 m_pSink->intersectClipToStroke(lp.readPath()); break;
869 case DRAWCHAR:
870 lp.readChar(); break;
871 case DRAWIMAGE:
872 lp.readImage(); break;
873 case DRAWLINK:
874 lp.readLink(); break;
875 case DRAWMASK:
876 lp.readMask(); break;
877 case DRAWMASKEDIMAGE:
878 lp.readMaskedImage(); break;
879 case DRAWSOFTMASKEDIMAGE:
880 lp.readSoftMaskedImage(); break;
881 case ENDPAGE:
882 m_pSink->endPage(); break;
883 case ENDTEXTOBJECT:
884 m_pSink->endText(); break;
885 case EOCLIPPATH:
886 m_pSink->intersectEoClip(lp.readPath()); break;
887 case EOFILLPATH:
888 m_pSink->eoFillPath(lp.readPath()); break;
889 case FILLPATH:
890 m_pSink->fillPath(lp.readPath()); break;
891 case RESTORESTATE:
892 m_pSink->popState(); break;
893 case SAVESTATE:
894 m_pSink->pushState(); break;
895 case SETPAGENUM:
896 m_pSink->setPageNum( lp.readInt32() ); break;
897 case STARTPAGE:
899 const double nWidth ( lp.readDouble() );
900 const double nHeight( lp.readDouble() );
901 m_pSink->startPage( geometry::RealSize2D( nWidth, nHeight ) );
902 break;
904 case STROKEPATH:
905 m_pSink->strokePath(lp.readPath()); break;
906 case TILINGPATTERNFILL:
907 lp.readTilingPatternFill(); break;
908 case UPDATECTM:
909 lp.readTransformation(); break;
910 case UPDATEFILLCOLOR:
911 m_pSink->setFillColor( lp.readColor() ); break;
912 case UPDATEFLATNESS:
913 m_pSink->setFlatness( lp.readDouble( ) ); break;
914 case UPDATEFONT:
915 lp.readFont(); break;
916 case UPDATELINECAP:
917 lp.readLineCap(); break;
918 case UPDATELINEDASH:
919 lp.readLineDash(); break;
920 case UPDATELINEJOIN:
921 lp.readLineJoin(); break;
922 case UPDATELINEWIDTH:
923 m_pSink->setLineWidth( lp.readDouble() );break;
924 case UPDATEMITERLIMIT:
925 m_pSink->setMiterLimit( lp.readDouble() ); break;
926 case UPDATESTROKECOLOR:
927 m_pSink->setStrokeColor( lp.readColor() ); break;
928 case UPDATESTROKEOPACITY:
929 break;
930 case SETTEXTRENDERMODE:
931 m_pSink->setTextRenderMode( lp.readInt32() ); break;
933 case NONE:
934 default:
935 OSL_PRECOND(false,"Unknown input");
936 break;
939 // all consumed?
940 SAL_WARN_IF(
941 lp.m_nCharIndex!=std::string_view::npos, "sdext.pdfimport", "leftover scanner input");
944 } // namespace
946 static bool checkEncryption( std::u16string_view i_rPath,
947 const uno::Reference< task::XInteractionHandler >& i_xIHdl,
948 OUString& io_rPwd,
949 bool& o_rIsEncrypted,
950 const OUString& i_rDocName
953 bool bSuccess = false;
955 std::unique_ptr<pdfparse::PDFEntry> pEntry(pdfparse::PDFReader::read(i_rPath));
956 if( pEntry )
958 pdfparse::PDFFile* pPDFFile = dynamic_cast<pdfparse::PDFFile*>(pEntry.get());
959 if( pPDFFile )
961 o_rIsEncrypted = pPDFFile->isEncrypted();
962 if( o_rIsEncrypted )
964 if( pPDFFile->usesSupportedEncryptionFormat() )
966 bool bAuthenticated = false;
967 if( !io_rPwd.isEmpty() )
969 OString aIsoPwd = OUStringToOString( io_rPwd,
970 RTL_TEXTENCODING_ISO_8859_1 );
971 bAuthenticated = pPDFFile->setupDecryptionData( aIsoPwd );
973 if( bAuthenticated )
974 bSuccess = true;
975 else
977 if( i_xIHdl.is() )
979 bool bEntered = false;
982 bEntered = getPassword( i_xIHdl, io_rPwd, ! bEntered, i_rDocName );
983 OString aIsoPwd = OUStringToOString( io_rPwd,
984 RTL_TEXTENCODING_ISO_8859_1 );
985 bAuthenticated = pPDFFile->setupDecryptionData( aIsoPwd );
986 } while( bEntered && ! bAuthenticated );
989 bSuccess = bAuthenticated;
992 else if( i_xIHdl.is() )
994 reportUnsupportedEncryptionFormat( i_xIHdl );
995 //TODO: this should either be handled further down the
996 // call stack, or else information that this has already
997 // been handled should be passed down the call stack, so
998 // that SfxBaseModel::load does not show an additional
999 // "General Error" message box
1002 else
1003 bSuccess = true;
1006 return bSuccess;
1009 namespace {
1011 class Buffering
1013 static const int SIZE = 64*1024;
1014 std::unique_ptr<char[]> aBuffer;
1015 oslFileHandle& pOut;
1016 size_t pos;
1017 sal_uInt64 left;
1019 public:
1020 explicit Buffering(oslFileHandle& out) : aBuffer(new char[SIZE]), pOut(out), pos(0), left(0) {}
1022 oslFileError read(char *pChar, short count, sal_uInt64* pBytesRead)
1024 oslFileError nRes = osl_File_E_None;
1025 sal_uInt64 nBytesRead = 0;
1026 while (count > 0)
1028 if (left == 0)
1030 nRes = osl_readFile(pOut, aBuffer.get(), SIZE, &left);
1031 if (nRes != osl_File_E_None || left == 0)
1033 *pBytesRead = nBytesRead;
1034 return nRes;
1036 pos = 0;
1038 *pChar = aBuffer.get()[pos];
1039 --count;
1040 ++pos;
1041 --left;
1042 ++pChar;
1043 ++nBytesRead;
1045 *pBytesRead = nBytesRead;
1046 return osl_File_E_None;
1052 bool xpdf_ImportFromFile(const OUString& rURL,
1053 const ContentSinkSharedPtr& rSink,
1054 const uno::Reference<task::XInteractionHandler>& xIHdl,
1055 const OUString& rPwd,
1056 const uno::Reference<uno::XComponentContext>& xContext,
1057 const OUString& rFilterOptions)
1059 OSL_ASSERT(rSink);
1061 OUString aSysUPath;
1062 if( osl_getSystemPathFromFileURL( rURL.pData, &aSysUPath.pData ) != osl_File_E_None )
1064 SAL_WARN(
1065 "sdext.pdfimport",
1066 "getSystemPathFromFileURL(" << rURL << ") failed");
1067 return false;
1069 OUString aDocName( rURL.copy( rURL.lastIndexOf( '/' )+1 ) );
1071 // check for encryption, if necessary get password
1072 OUString aPwd( rPwd );
1073 bool bIsEncrypted = false;
1074 if( !checkEncryption( aSysUPath, xIHdl, aPwd, bIsEncrypted, aDocName ) )
1076 SAL_INFO(
1077 "sdext.pdfimport",
1078 "checkEncryption(" << aSysUPath << ") failed");
1079 return false;
1082 // Determine xpdfimport executable URL:
1083 OUString converterURL(u"$BRAND_BASE_DIR/" LIBO_BIN_FOLDER "/xpdfimport"_ustr);
1084 rtl::Bootstrap::expandMacros(converterURL); //TODO: detect failure
1086 // spawn separate process to keep LGPL/GPL code apart.
1088 static constexpr OUString aOptFlag(u"-o"_ustr);
1089 std::vector<rtl_uString*> args({ aSysUPath.pData });
1090 if (!rFilterOptions.isEmpty())
1092 args.push_back(aOptFlag.pData);
1093 args.push_back(rFilterOptions.pData);
1096 oslProcess aProcess;
1097 oslFileHandle pIn = nullptr;
1098 oslFileHandle pOut = nullptr;
1099 oslFileHandle pErr = nullptr;
1100 oslSecurity pSecurity = osl_getCurrentSecurity ();
1101 oslProcessError eErr =
1102 osl_executeProcess_WithRedirectedIO(converterURL.pData,
1103 args.data(),
1104 args.size(),
1105 osl_Process_SEARCHPATH|osl_Process_HIDDEN,
1106 pSecurity,
1107 nullptr, nullptr, 0,
1108 &aProcess, &pIn, &pOut, &pErr);
1109 osl_freeSecurityHandle(pSecurity);
1111 bool bRet=true;
1114 if( eErr!=osl_Process_E_None )
1116 SAL_WARN(
1117 "sdext.pdfimport",
1118 "executeProcess of " << converterURL << " failed with "
1119 << +eErr);
1120 return false;
1123 if( pIn )
1125 OStringBuffer aBuf(256);
1126 if( bIsEncrypted )
1127 aBuf.append( OUStringToOString( aPwd, RTL_TEXTENCODING_ISO_8859_1 ) );
1128 aBuf.append( '\n' );
1130 sal_uInt64 nWritten = 0;
1131 osl_writeFile( pIn, aBuf.getStr(), sal_uInt64(aBuf.getLength()), &nWritten );
1134 if( pOut && pErr )
1136 // read results of PDF parser. One line - one call to
1137 // OutputDev. stderr is used for alternate streams, like
1138 // embedded fonts and bitmaps
1139 Parser aParser(rSink,pErr,xContext);
1140 Buffering aBuffering(pOut);
1141 OStringBuffer line;
1142 for( ;; )
1144 char aChar('\n');
1145 sal_uInt64 nBytesRead;
1146 oslFileError nRes;
1148 // skip garbage \r \n at start of line
1149 for (;;)
1151 nRes = aBuffering.read(&aChar, 1, &nBytesRead);
1152 if (osl_File_E_None != nRes || nBytesRead != 1 || (aChar != '\n' && aChar != '\r') )
1153 break;
1155 if ( osl_File_E_None != nRes )
1156 break;
1158 if( aChar != '\n' && aChar != '\r' )
1159 line.append( aChar );
1161 for (;;)
1163 nRes = aBuffering.read(&aChar, 1, &nBytesRead);
1164 if ( osl_File_E_None != nRes || nBytesRead != 1 || aChar == '\n' || aChar == '\r' )
1165 break;
1166 line.append( aChar );
1168 if ( osl_File_E_None != nRes )
1169 break;
1170 if ( line.isEmpty() )
1171 break;
1173 aParser.parseLine(line);
1174 line.setLength(0);
1178 catch( uno::Exception& )
1180 // crappy C file interface. need manual resource dealloc
1181 bRet = false;
1184 if( pIn )
1185 osl_closeFile(pIn);
1186 if( pOut )
1187 osl_closeFile(pOut);
1188 if( pErr )
1189 osl_closeFile(pErr);
1190 eErr = osl_joinProcess(aProcess);
1191 if (eErr == osl_Process_E_None)
1193 oslProcessInfo info;
1194 info.Size = sizeof info;
1195 eErr = osl_getProcessInfo(aProcess, osl_Process_EXITCODE, &info);
1196 if (eErr == osl_Process_E_None)
1198 if (info.Code != 0)
1200 SAL_WARN(
1201 "sdext.pdfimport",
1202 "getProcessInfo of " << converterURL
1203 << " failed with exit code " << info.Code);
1204 // TODO: use xIHdl and/or exceptions to inform the user; see poppler/ErrorCodes.h
1205 bRet = false;
1208 else
1210 SAL_WARN(
1211 "sdext.pdfimport",
1212 "getProcessInfo of " << converterURL << " failed with "
1213 << +eErr);
1214 bRet = false;
1217 else
1219 SAL_WARN(
1220 "sdext.pdfimport",
1221 "joinProcess of " << converterURL << " failed with " << +eErr);
1222 bRet = false;
1224 osl_freeProcessHandle(aProcess);
1225 return bRet;
1229 bool xpdf_ImportFromStream( const uno::Reference< io::XInputStream >& xInput,
1230 const ContentSinkSharedPtr& rSink,
1231 const uno::Reference<task::XInteractionHandler >& xIHdl,
1232 const OUString& rPwd,
1233 const uno::Reference< uno::XComponentContext >& xContext,
1234 const OUString& rFilterOptions )
1236 OSL_ASSERT(xInput.is());
1237 OSL_ASSERT(rSink);
1239 // convert XInputStream to local temp file
1240 oslFileHandle aFile = nullptr;
1241 OUString aURL;
1242 if( osl_createTempFile( nullptr, &aFile, &aURL.pData ) != osl_File_E_None )
1243 return false;
1245 // copy content, buffered...
1246 const sal_uInt32 nBufSize = 4096;
1247 uno::Sequence<sal_Int8> aBuf( nBufSize );
1248 sal_uInt64 nBytes = 0;
1249 sal_uInt64 nWritten = 0;
1250 bool bSuccess = true;
1255 nBytes = xInput->readBytes( aBuf, nBufSize );
1257 catch( css::uno::Exception& )
1259 osl_closeFile( aFile );
1260 throw;
1262 if( nBytes > 0 )
1264 osl_writeFile( aFile, aBuf.getConstArray(), nBytes, &nWritten );
1265 if( nWritten != nBytes )
1267 bSuccess = false;
1268 break;
1272 while( nBytes == nBufSize );
1274 osl_closeFile( aFile );
1276 if ( bSuccess )
1277 bSuccess = xpdf_ImportFromFile( aURL, rSink, xIHdl, rPwd, xContext, rFilterOptions );
1278 osl_removeFile( aURL.pData );
1280 return bSuccess;
1285 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */