1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
9 * This file incorporates work covered by the following license notice:
11 * Licensed to the Apache Software Foundation (ASF) under one or more
12 * contributor license agreements. See the NOTICE file distributed
13 * with this work for additional information regarding copyright
14 * ownership. The ASF licenses this file to you under the Apache
15 * License, Version 2.0 (the "License"); you may not use this file
16 * except in compliance with the License. You may obtain a copy of
17 * the License at http://www.apache.org/licenses/LICENSE-2.0 .
20 #include <config_folders.h>
22 #include <contentsink.hxx>
23 #include <pdfparse.hxx>
24 #include <pdfihelper.hxx>
25 #include <wrapper.hxx>
27 #include <o3tl/string_view.hxx>
29 #include <osl/file.hxx>
30 #include <osl/thread.h>
31 #include <osl/process.h>
32 #include <osl/diagnose.h>
33 #include <rtl/bootstrap.hxx>
34 #include <rtl/ustring.hxx>
35 #include <rtl/strbuf.hxx>
36 #include <sal/log.hxx>
38 #include <comphelper/propertysequence.hxx>
39 #include <comphelper/string.hxx>
40 #include <com/sun/star/io/XInputStream.hpp>
41 #include <com/sun/star/uno/XComponentContext.hpp>
42 #include <com/sun/star/rendering/PathCapType.hpp>
43 #include <com/sun/star/rendering/XPolyPolygon2D.hpp>
44 #include <com/sun/star/geometry/Matrix2D.hpp>
45 #include <com/sun/star/geometry/AffineMatrix2D.hpp>
46 #include <com/sun/star/geometry/RealRectangle2D.hpp>
47 #include <com/sun/star/geometry/RealSize2D.hpp>
48 #include <com/sun/star/task/XInteractionHandler.hpp>
50 #include <basegfx/point/b2dpoint.hxx>
51 #include <basegfx/polygon/b2dpolypolygon.hxx>
52 #include <basegfx/polygon/b2dpolygon.hxx>
53 #include <basegfx/utils/unopolypolygon.hxx>
54 #include <basegfx/vector/b2enums.hxx>
56 #include <vcl/metric.hxx>
57 #include <vcl/font.hxx>
58 #include <vcl/virdev.hxx>
62 #include <string_view>
63 #include <unordered_map>
67 using namespace com::sun::star
;
75 // identifier of the strings coming from the out-of-process xpdf
129 #if defined _MSC_VER && defined __clang__
130 #pragma clang diagnostic push
131 #pragma clang diagnostic ignored "-Wdeprecated-register"
132 #pragma clang diagnostic ignored "-Wextra-tokens"
135 #if defined _MSC_VER && defined __clang__
136 #pragma clang diagnostic pop
141 friend class LineParser
;
143 typedef std::unordered_map
< sal_Int64
,
144 FontAttributes
> FontMapType
;
146 ScopedVclPtr
<VirtualDevice
> m_xDev
;
147 const uno::Reference
<uno::XComponentContext
> m_xContext
;
148 const ContentSinkSharedPtr m_pSink
;
149 const oslFileHandle m_pErr
;
150 FontMapType m_aFontMap
;
153 Parser( const ContentSinkSharedPtr
& rSink
,
155 const uno::Reference
<uno::XComponentContext
>& xContext
) :
156 m_xContext(xContext
),
162 void parseLine( std::string_view aLine
);
167 std::string_view m_aLine
;
169 static void parseFontFamilyName( FontAttributes
& aResult
);
170 void readInt32( sal_Int32
& o_Value
);
171 void readInt64( sal_Int64
& o_Value
);
172 void readDouble( double& o_Value
);
173 void readBinaryData( uno::Sequence
<sal_Int8
>& rBuf
);
175 uno::Sequence
<beans::PropertyValue
> readImageImpl();
178 std::size_t m_nCharIndex
= 0;
180 LineParser(Parser
& parser
, std::string_view line
): m_parser(parser
), m_aLine(line
) {}
182 std::string_view
readNextToken();
183 sal_Int32
readInt32();
186 uno::Reference
<rendering::XPolyPolygon2D
> readPath();
192 void readTransformation();
193 rendering::ARGBColor
readColor();
199 void readMaskedImage();
200 void readSoftMaskedImage();
201 void readTilingPatternFill();
204 /** Unescapes line-ending characters in input string. These
205 characters are encoded as pairs of characters: '\\' 'n', resp.
206 '\\' 'r'. This function converts them back to '\n', resp. '\r'.
208 OString
lcl_unescapeLineFeeds(std::string_view i_rStr
)
210 const size_t nOrigLen(i_rStr
.size());
211 const char* const pOrig(i_rStr
.data());
212 std::unique_ptr
<char[]> pBuffer(new char[nOrigLen
+ 1]);
214 const char* pRead(pOrig
);
215 char* pWrite(pBuffer
.get());
216 const char* pCur(pOrig
);
217 while ((pCur
= strchr(pCur
, '\\')) != nullptr)
219 const char cNext(pCur
[1]);
220 if (cNext
== 'n' || cNext
== 'r' || cNext
== '\\')
222 const size_t nLen(pCur
- pRead
);
223 strncpy(pWrite
, pRead
, nLen
);
225 *pWrite
= cNext
== 'n' ? '\n' : (cNext
== 'r' ? '\r' : '\\');
227 pCur
= pRead
= pCur
+ 2;
231 // Just continue on the next character. The current
232 // block will be copied the next time it goes through the
237 // maybe there are some data to copy yet
238 if (sal::static_int_cast
<size_t>(pRead
- pOrig
) < nOrigLen
)
240 const size_t nLen(nOrigLen
- (pRead
- pOrig
));
241 strncpy(pWrite
, pRead
, nLen
);
246 OString
aResult(pBuffer
.get());
250 std::string_view
LineParser::readNextToken()
252 if (m_nCharIndex
== std::string_view::npos
) {
253 SAL_WARN("sdext.pdfimport", "insufficient input");
256 return o3tl::getToken(m_aLine
,' ',m_nCharIndex
);
259 void LineParser::readInt32( sal_Int32
& o_Value
)
261 std::string_view tok
= readNextToken();
262 o_Value
= o3tl::toInt32(tok
);
265 sal_Int32
LineParser::readInt32()
267 std::string_view tok
= readNextToken();
268 return o3tl::toInt32(tok
);
271 void LineParser::readInt64( sal_Int64
& o_Value
)
273 std::string_view tok
= readNextToken();
274 o_Value
= o3tl::toInt64(tok
);
277 void LineParser::readDouble( double& o_Value
)
279 std::string_view tok
= readNextToken();
280 o_Value
= rtl_math_stringToDouble(tok
.data(), tok
.data() + tok
.size(), '.', 0,
284 double LineParser::readDouble()
286 std::string_view tok
= readNextToken();
287 return rtl_math_stringToDouble(tok
.data(), tok
.data() + tok
.size(), '.', 0,
291 void LineParser::readBinaryData( uno::Sequence
<sal_Int8
>& rBuf
)
293 sal_Int32
nFileLen( rBuf
.getLength() );
294 sal_Int8
* pBuf( rBuf
.getArray() );
295 sal_uInt64
nBytesRead(0);
296 oslFileError nRes
=osl_File_E_None
;
299 nRes
= osl_readFile( m_parser
.m_pErr
, pBuf
, nFileLen
, &nBytesRead
);
300 if (osl_File_E_None
!= nRes
)
303 nFileLen
-= sal::static_int_cast
<sal_Int32
>(nBytesRead
);
306 OSL_PRECOND(nRes
==osl_File_E_None
, "inconsistent data");
309 uno::Reference
<rendering::XPolyPolygon2D
> LineParser::readPath()
311 static const std::string_view
aSubPathMarker( "subpath" );
313 if( readNextToken() != aSubPathMarker
)
314 OSL_PRECOND(false, "broken path");
316 basegfx::B2DPolyPolygon aResult
;
317 while( m_nCharIndex
!= std::string_view::npos
)
319 basegfx::B2DPolygon aSubPath
;
321 sal_Int32 nClosedFlag
;
322 readInt32( nClosedFlag
);
323 aSubPath
.setClosed( nClosedFlag
!= 0 );
325 sal_Int32
nContiguousControlPoints(0);
327 while( m_nCharIndex
!= std::string_view::npos
)
329 std::size_t nDummy
=m_nCharIndex
;
330 if (o3tl::getToken(m_aLine
,' ',nDummy
) == aSubPathMarker
) {
334 sal_Int32 nCurveFlag
;
338 readInt32( nCurveFlag
);
340 aSubPath
.append(basegfx::B2DPoint(nX
,nY
));
343 ++nContiguousControlPoints
;
345 else if( nContiguousControlPoints
)
347 OSL_PRECOND(nContiguousControlPoints
==2,"broken bezier path");
349 // have two control points before us. the current one
350 // is a normal point - thus, convert previous points
351 // into bezier segment
352 const sal_uInt32
nPoints( aSubPath
.count() );
353 const basegfx::B2DPoint
aCtrlA( aSubPath
.getB2DPoint(nPoints
-3) );
354 const basegfx::B2DPoint
aCtrlB( aSubPath
.getB2DPoint(nPoints
-2) );
355 const basegfx::B2DPoint
aEnd( aSubPath
.getB2DPoint(nPoints
-1) );
356 aSubPath
.remove(nPoints
-3, 3);
357 aSubPath
.appendBezierSegment(aCtrlA
, aCtrlB
, aEnd
);
359 nContiguousControlPoints
=0;
363 aResult
.append( aSubPath
);
364 if( m_nCharIndex
!= std::string_view::npos
)
368 return static_cast<rendering::XLinePolyPolygon2D
*>(
369 new basegfx::unotools::UnoPolyPolygon(std::move(aResult
)));
372 void LineParser::readChar()
375 geometry::Matrix2D aUnoMatrix
;
376 geometry::RealRectangle2D aRect
;
378 readDouble(aRect
.X1
);
379 readDouble(aRect
.Y1
);
380 readDouble(aRect
.X2
);
381 readDouble(aRect
.Y2
);
382 readDouble(aUnoMatrix
.m00
);
383 readDouble(aUnoMatrix
.m01
);
384 readDouble(aUnoMatrix
.m10
);
385 readDouble(aUnoMatrix
.m11
);
386 readDouble(fontSize
);
390 if (m_nCharIndex
!= std::string_view::npos
)
391 aChars
= lcl_unescapeLineFeeds( m_aLine
.substr( m_nCharIndex
) );
393 // chars gobble up rest of line
394 m_nCharIndex
= std::string_view::npos
;
396 m_parser
.m_pSink
->drawGlyphs(OStringToOUString(aChars
, RTL_TEXTENCODING_UTF8
),
397 aRect
, aUnoMatrix
, fontSize
);
400 void LineParser::readLineCap()
402 sal_Int8
nCap(rendering::PathCapType::BUTT
);
403 switch( readInt32() )
406 case 0: nCap
= rendering::PathCapType::BUTT
; break;
407 case 1: nCap
= rendering::PathCapType::ROUND
; break;
408 case 2: nCap
= rendering::PathCapType::SQUARE
; break;
410 m_parser
.m_pSink
->setLineCap(nCap
);
413 void LineParser::readLineDash()
415 if( m_nCharIndex
== std::string_view::npos
)
417 m_parser
.m_pSink
->setLineDash( uno::Sequence
<double>(), 0.0 );
421 const double nOffset(readDouble());
422 const sal_Int32
nLen(readInt32());
424 uno::Sequence
<double> aDashArray(nLen
);
425 double* pArray
=aDashArray
.getArray();
426 for( sal_Int32 i
=0; i
<nLen
; ++i
)
427 *pArray
++ = readDouble();
429 m_parser
.m_pSink
->setLineDash( aDashArray
, nOffset
);
432 void LineParser::readLineJoin()
434 basegfx::B2DLineJoin
nJoin(basegfx::B2DLineJoin::Miter
);
435 switch( readInt32() )
438 case 0: nJoin
= basegfx::B2DLineJoin::Miter
; break;
439 case 1: nJoin
= basegfx::B2DLineJoin::Round
; break;
440 case 2: nJoin
= basegfx::B2DLineJoin::Bevel
; break;
442 m_parser
.m_pSink
->setLineJoin(nJoin
);
445 void LineParser::readTransformation()
447 geometry::AffineMatrix2D aMat
;
448 readDouble(aMat
.m00
);
449 readDouble(aMat
.m10
);
450 readDouble(aMat
.m01
);
451 readDouble(aMat
.m11
);
452 readDouble(aMat
.m02
);
453 readDouble(aMat
.m12
);
454 m_parser
.m_pSink
->setTransformation( aMat
);
457 rendering::ARGBColor
LineParser::readColor()
459 rendering::ARGBColor aRes
;
460 readDouble(aRes
.Red
);
461 readDouble(aRes
.Green
);
462 readDouble(aRes
.Blue
);
463 readDouble(aRes
.Alpha
);
467 /* Parse and convert the font family name (passed from xpdfimport) to correct font names
468 e.g. TimesNewRomanPSMT -> TimesNewRoman
469 TimesNewRomanPS-BoldMT -> TimesNewRoman
470 TimesNewRomanPS-BoldItalicMT -> TimesNewRoman
471 During the conversion, also apply the font features (bold italic etc) to the result.
473 TODO: Further convert the font names to real font names in the system rather than the PS names.
474 e.g., TimesNewRoman -> Times New Roman
476 void LineParser::parseFontFamilyName( FontAttributes
& rResult
)
478 SAL_INFO("sdext.pdfimport", "Processing " << rResult
.familyName
<< " ---");
479 rResult
.familyName
= rResult
.familyName
.trim();
480 for (const OUString
& fontAttributesSuffix
: fontAttributesSuffixes
)
482 if ( rResult
.familyName
.endsWith(fontAttributesSuffix
) )
484 rResult
.familyName
= rResult
.familyName
.replaceAll(fontAttributesSuffix
, "");
485 SAL_INFO("sdext.pdfimport", rResult
.familyName
);
486 if (fontAttributesSuffix
== u
"Heavy" || fontAttributesSuffix
== u
"Black")
488 rResult
.fontWeight
= u
"900"_ustr
;
490 else if (fontAttributesSuffix
== u
"ExtraBold" || fontAttributesSuffix
== u
"UltraBold")
492 rResult
.fontWeight
= u
"800"_ustr
;
494 else if (fontAttributesSuffix
== u
"Bold")
496 rResult
.fontWeight
= u
"bold"_ustr
;
498 else if (fontAttributesSuffix
== u
"Semibold")
500 rResult
.fontWeight
= u
"600"_ustr
;
502 else if (fontAttributesSuffix
== u
"Medium")
504 rResult
.fontWeight
= u
"500"_ustr
;
506 else if (fontAttributesSuffix
== u
"Normal" || fontAttributesSuffix
== u
"Regular" || fontAttributesSuffix
== u
"Book")
508 rResult
.fontWeight
= u
"400"_ustr
;
510 else if (fontAttributesSuffix
== u
"Light")
512 rResult
.fontWeight
= u
"300"_ustr
;
514 else if (fontAttributesSuffix
== u
"ExtraLight" || fontAttributesSuffix
== u
"UltraLight")
516 rResult
.fontWeight
= u
"200"_ustr
;
518 else if (fontAttributesSuffix
== u
"Thin")
520 rResult
.fontWeight
= u
"100"_ustr
;
523 if ( (fontAttributesSuffix
== "Italic") or (fontAttributesSuffix
== "Oblique") )
525 rResult
.isItalic
= true;
531 void LineParser::readFont()
534 xpdf line is like (separated by space):
535 updateFont <FontID> <isEmbedded> <maFontWeight> <isItalic> <isUnderline> <TransformedFontSize> <nEmbedSize> <FontName>
536 updateFont 14 1 4 0 0 1200.000000 23068 TimesNewRomanPSMT
538 If nEmbedSize > 0, then a fontFile is followed as a stream.
541 sal_Int32 nIsEmbedded
;
542 sal_Int32 nFontWeight
;
544 sal_Int32 nIsUnderline
;
549 readInt64(nFontID
); // read FontID
550 readInt32(nIsEmbedded
); // read isEmbedded
551 readInt32(nFontWeight
); // read maFontWeight, see GfxFont enum Weight
552 readInt32(nIsItalic
); // read isItalic
553 readInt32(nIsUnderline
);// read isUnderline
554 readDouble(nSize
); // read TransformedFontSize
555 readInt32(nFileLen
); // read nEmbedSize
557 nSize
= nSize
< 0.0 ? -nSize
: nSize
;
558 // Read FontName. From the current position to the end (any white spaces will be included).
559 aFontName
= lcl_unescapeLineFeeds(m_aLine
.substr(m_nCharIndex
));
561 // name gobbles up rest of line
562 m_nCharIndex
= std::string_view::npos
;
564 // Check if this font is already in our font map list.
565 // If yes, update the font size and skip.
566 Parser::FontMapType::const_iterator
pFont( m_parser
.m_aFontMap
.find(nFontID
) );
567 if( pFont
!= m_parser
.m_aFontMap
.end() )
569 OSL_PRECOND(nFileLen
==0,"font data for known font");
570 FontAttributes
aRes(pFont
->second
);
572 m_parser
.m_pSink
->setFont( aRes
);
577 // The font is not yet in the map list - get info and add to map
578 OUString sFontWeight
; // font weight name per ODF specifications
579 if (nFontWeight
== 0 or nFontWeight
== 4) // WeightNotDefined or W400, map to normal font
580 sFontWeight
= u
"normal"_ustr
;
581 else if (nFontWeight
== 1) // W100, Thin
582 sFontWeight
= u
"100"_ustr
;
583 else if (nFontWeight
== 2) // W200, Extra-Light
584 sFontWeight
= u
"200"_ustr
;
585 else if (nFontWeight
== 3) // W300, Light
586 sFontWeight
= u
"300"_ustr
;
587 else if (nFontWeight
== 5) // W500, Medium. Is this supported by ODF?
588 sFontWeight
= u
"500"_ustr
;
589 else if (nFontWeight
== 6) // W600, Semi-Bold
590 sFontWeight
= u
"600"_ustr
;
591 else if (nFontWeight
== 7) // W700, Bold
592 sFontWeight
= u
"bold"_ustr
;
593 else if (nFontWeight
== 8) // W800, Extra-Bold
594 sFontWeight
= u
"800"_ustr
;
595 else if (nFontWeight
== 9) // W900, Black
596 sFontWeight
= u
"900"_ustr
;
597 SAL_INFO("sdext.pdfimport", "Font weight passed from xpdfimport is: " << sFontWeight
);
599 FontAttributes
aResult( OStringToOUString( aFontName
, RTL_TEXTENCODING_UTF8
),
606 /* The above font attributes (fontName, fontWeight, italic) are based on
607 xpdf line output and may not be reliable. To get correct attributes,
609 1. Read the embedded font file and determine the attributes based on the
611 2. If we failed to read the font file, or empty result is returned, then
612 determine the font attributes from the font name.
613 3. If all these attempts have failed, then use a fallback font.
617 uno::Sequence
<sal_Int8
> aFontFile(nFileLen
);
618 readBinaryData(aFontFile
); // Read fontFile.
620 vcl::Font aFontReadResult
= vcl::Font::identifyFont(aFontFile
.getArray(), nFileLen
);
621 SAL_INFO("sdext.pdfimport", "familyName: " << aFontReadResult
.GetFamilyName());
623 if (!aFontReadResult
.GetFamilyName().isEmpty()) // font detection successful
626 aResult
.familyName
= aFontReadResult
.GetFamilyName();
627 SAL_INFO("sdext.pdfimport", aResult
.familyName
);
628 // tdf#143959: there are cases when the family name returned by font descriptor
629 // is like "AAAAAA+TimesNewRoman,Bold". In this case, use the font name
630 // determined by parseFontFamilyName instead, but still determine the font
631 // attributes (bold italic etc) from the font descriptor.
632 if (aResult
.familyName
.getLength() > 7 and aResult
.familyName
.indexOf(u
"+", 6) == 6)
634 aResult
.familyName
= aResult
.familyName
.copy(7, aResult
.familyName
.getLength() - 7);
635 parseFontFamilyName(aResult
);
637 if (aResult
.familyName
.endsWithIgnoreAsciiCase("-VKana"))
639 parseFontFamilyName(aResult
);
643 if (aFontReadResult
.GetWeight() == WEIGHT_THIN
)
644 aResult
.fontWeight
= u
"100"_ustr
;
645 else if (aFontReadResult
.GetWeight() == WEIGHT_ULTRALIGHT
)
646 aResult
.fontWeight
= u
"200"_ustr
;
647 else if (aFontReadResult
.GetWeight() == WEIGHT_LIGHT
)
648 aResult
.fontWeight
= u
"300"_ustr
;
649 else if (aFontReadResult
.GetWeight() == WEIGHT_SEMILIGHT
)
650 aResult
.fontWeight
= u
"350"_ustr
;
651 // no need to check "normal" here as this is default in nFontWeight above
652 else if (aFontReadResult
.GetWeight() == WEIGHT_SEMIBOLD
)
653 aResult
.fontWeight
= u
"600"_ustr
;
654 else if (aFontReadResult
.GetWeight() == WEIGHT_BOLD
)
655 aResult
.fontWeight
= u
"bold"_ustr
;
656 else if (aFontReadResult
.GetWeight() == WEIGHT_ULTRABOLD
)
657 aResult
.fontWeight
= u
"800"_ustr
;
658 else if (aFontReadResult
.GetWeight() == WEIGHT_BLACK
)
659 aResult
.fontWeight
= u
"900"_ustr
;
660 SAL_INFO("sdext.pdfimport", aResult
.fontWeight
);
663 aResult
.isItalic
= (aFontReadResult
.GetItalic() == ITALIC_OBLIQUE
||
664 aFontReadResult
.GetItalic() == ITALIC_NORMAL
);
665 } else // font detection failed
667 SAL_WARN("sdext.pdfimport",
668 "Font detection from fontFile returned empty result. Guessing font info from font name.");
669 parseFontFamilyName(aResult
);
672 } else // no embedded font file - guess font attributes from font name
674 parseFontFamilyName(aResult
);
678 if (aResult
.familyName
.isEmpty())
680 SAL_WARN("sdext.pdfimport", "Failed to determine the font, using a fallback font Arial.");
681 aResult
.familyName
= "Arial";
684 if (!m_parser
.m_xDev
)
685 m_parser
.m_xDev
.disposeAndReset(VclPtr
<VirtualDevice
>::Create());
687 vcl::Font
font(aResult
.familyName
, Size(0, 1000));
688 m_parser
.m_xDev
->SetFont(font
);
689 FontMetric
metric(m_parser
.m_xDev
->GetFontMetric());
690 aResult
.ascent
= metric
.GetAscent() / 1000.0;
692 m_parser
.m_aFontMap
[nFontID
] = aResult
;
694 aResult
.size
= nSize
;
695 m_parser
.m_pSink
->setFont(aResult
);
698 uno::Sequence
<beans::PropertyValue
> LineParser::readImageImpl()
700 std::string_view aToken
= readNextToken();
701 const sal_Int32
nImageSize( readInt32() );
704 if( aToken
== "PNG" )
705 aFileName
= "DUMMY.PNG";
706 else if( aToken
== "JPEG" )
707 aFileName
= "DUMMY.JPEG";
708 else if( aToken
== "PBM" )
709 aFileName
= "DUMMY.PBM";
712 SAL_WARN_IF(aToken
!= "PPM","sdext.pdfimport","Invalid bitmap format");
713 aFileName
= "DUMMY.PPM";
716 uno::Sequence
<sal_Int8
> aDataSequence(nImageSize
);
717 readBinaryData( aDataSequence
);
719 uno::Sequence
< uno::Any
> aStreamCreationArgs
{ uno::Any(aDataSequence
) };
721 uno::Reference
< uno::XComponentContext
> xContext( m_parser
.m_xContext
, uno::UNO_SET_THROW
);
722 uno::Reference
< lang::XMultiComponentFactory
> xFactory( xContext
->getServiceManager(), uno::UNO_SET_THROW
);
723 uno::Reference
< io::XInputStream
> xDataStream(
724 xFactory
->createInstanceWithArgumentsAndContext( u
"com.sun.star.io.SequenceInputStream"_ustr
, aStreamCreationArgs
, m_parser
.m_xContext
),
725 uno::UNO_QUERY_THROW
);
727 uno::Sequence
<beans::PropertyValue
> aSequence( comphelper::InitPropertySequence({
728 { "URL", uno::Any(aFileName
) },
729 { "InputStream", uno::Any( xDataStream
) },
730 { "InputSequence", uno::Any(aDataSequence
) }
736 void LineParser::readImage()
738 sal_Int32 nWidth
, nHeight
,nMaskColors
;
741 readInt32(nMaskColors
);
743 uno::Sequence
<beans::PropertyValue
> aImg( readImageImpl() );
747 uno::Sequence
<sal_Int8
> aDataSequence(nMaskColors
);
748 readBinaryData( aDataSequence
);
750 uno::Sequence
<double> aMinRange(nMaskColors
/2);
751 auto pMinRange
= aMinRange
.getArray();
752 uno::Sequence
<double> aMaxRange(nMaskColors
/2);
753 auto pMaxRange
= aMaxRange
.getArray();
754 for( sal_Int32 i
=0; i
<nMaskColors
/2; ++i
)
756 pMinRange
[i
] = aDataSequence
[i
] / 255.0;
757 pMaxRange
[i
] = aDataSequence
[i
+nMaskColors
/2] / 255.0;
760 uno::Sequence
<uno::Any
> aMaskRanges
{ uno::Any(aMinRange
), uno::Any(aMaxRange
) };
761 m_parser
.m_pSink
->drawColorMaskedImage( aImg
, aMaskRanges
);
764 m_parser
.m_pSink
->drawImage( aImg
);
767 void LineParser::readMask()
769 sal_Int32 nWidth
, nHeight
, nInvert
;
774 m_parser
.m_pSink
->drawMask( readImageImpl(), nInvert
!= 0);
777 void LineParser::readLink()
779 geometry::RealRectangle2D aBounds
;
780 readDouble(aBounds
.X1
);
781 readDouble(aBounds
.Y1
);
782 readDouble(aBounds
.X2
);
783 readDouble(aBounds
.Y2
);
785 m_parser
.m_pSink
->hyperLink( aBounds
,
786 OStringToOUString( lcl_unescapeLineFeeds(
787 m_aLine
.substr(m_nCharIndex
) ),
788 RTL_TEXTENCODING_UTF8
) );
789 // name gobbles up rest of line
790 m_nCharIndex
= std::string_view::npos
;
793 void LineParser::readMaskedImage()
795 sal_Int32 nWidth
, nHeight
, nMaskWidth
, nMaskHeight
, nMaskInvert
;
798 readInt32(nMaskWidth
);
799 readInt32(nMaskHeight
);
800 readInt32(nMaskInvert
);
802 const uno::Sequence
<beans::PropertyValue
> aImage( readImageImpl() );
803 const uno::Sequence
<beans::PropertyValue
> aMask ( readImageImpl() );
804 m_parser
.m_pSink
->drawMaskedImage( aImage
, aMask
, nMaskInvert
!= 0 );
807 void LineParser::readSoftMaskedImage()
809 sal_Int32 nWidth
, nHeight
, nMaskWidth
, nMaskHeight
;
812 readInt32(nMaskWidth
);
813 readInt32(nMaskHeight
);
815 const uno::Sequence
<beans::PropertyValue
> aImage( readImageImpl() );
816 const uno::Sequence
<beans::PropertyValue
> aMask ( readImageImpl() );
817 m_parser
.m_pSink
->drawAlphaMaskedImage( aImage
, aMask
);
820 void LineParser::readTilingPatternFill()
822 sal_Int32 nX0
, nY0
, nX1
, nY1
, nPaintType
;
823 double nXStep
, nYStep
;
824 geometry::AffineMatrix2D aMat
;
833 readInt32(nPaintType
);
835 readDouble(aMat
.m00
);
836 readDouble(aMat
.m10
);
837 readDouble(aMat
.m01
);
838 readDouble(aMat
.m11
);
839 readDouble(aMat
.m02
);
840 readDouble(aMat
.m12
);
842 // The tile is an image with alpha
843 const uno::Sequence
<beans::PropertyValue
> aTile ( readImageImpl() );
845 m_parser
.m_pSink
->tilingPatternFill( nX0
, nY0
, nX1
, nY1
,
852 void Parser::parseLine( std::string_view aLine
)
854 OSL_PRECOND( m_pSink
, "Invalid sink" );
855 OSL_PRECOND( m_pErr
, "Invalid filehandle" );
856 OSL_PRECOND( m_xContext
.is(), "Invalid service factory" );
858 LineParser
lp(*this, aLine
);
859 const std::string_view rCmd
= lp
.readNextToken();
860 const hash_entry
* pEntry
= PdfKeywordHash::in_word_set( rCmd
.data(),
863 switch( pEntry
->eKey
)
866 m_pSink
->intersectClip(lp
.readPath()); break;
867 case CLIPTOSTROKEPATH
:
868 m_pSink
->intersectClipToStroke(lp
.readPath()); break;
870 lp
.readChar(); break;
872 lp
.readImage(); break;
874 lp
.readLink(); break;
876 lp
.readMask(); break;
877 case DRAWMASKEDIMAGE
:
878 lp
.readMaskedImage(); break;
879 case DRAWSOFTMASKEDIMAGE
:
880 lp
.readSoftMaskedImage(); break;
882 m_pSink
->endPage(); break;
884 m_pSink
->endText(); break;
886 m_pSink
->intersectEoClip(lp
.readPath()); break;
888 m_pSink
->eoFillPath(lp
.readPath()); break;
890 m_pSink
->fillPath(lp
.readPath()); break;
892 m_pSink
->popState(); break;
894 m_pSink
->pushState(); break;
896 m_pSink
->setPageNum( lp
.readInt32() ); break;
899 const double nWidth ( lp
.readDouble() );
900 const double nHeight( lp
.readDouble() );
901 m_pSink
->startPage( geometry::RealSize2D( nWidth
, nHeight
) );
905 m_pSink
->strokePath(lp
.readPath()); break;
906 case TILINGPATTERNFILL
:
907 lp
.readTilingPatternFill(); break;
909 lp
.readTransformation(); break;
910 case UPDATEFILLCOLOR
:
911 m_pSink
->setFillColor( lp
.readColor() ); break;
913 m_pSink
->setFlatness( lp
.readDouble( ) ); break;
915 lp
.readFont(); break;
917 lp
.readLineCap(); break;
919 lp
.readLineDash(); break;
921 lp
.readLineJoin(); break;
922 case UPDATELINEWIDTH
:
923 m_pSink
->setLineWidth( lp
.readDouble() );break;
924 case UPDATEMITERLIMIT
:
925 m_pSink
->setMiterLimit( lp
.readDouble() ); break;
926 case UPDATESTROKECOLOR
:
927 m_pSink
->setStrokeColor( lp
.readColor() ); break;
928 case UPDATESTROKEOPACITY
:
930 case SETTEXTRENDERMODE
:
931 m_pSink
->setTextRenderMode( lp
.readInt32() ); break;
935 OSL_PRECOND(false,"Unknown input");
941 lp
.m_nCharIndex
!=std::string_view::npos
, "sdext.pdfimport", "leftover scanner input");
946 static bool checkEncryption( std::u16string_view i_rPath
,
947 const uno::Reference
< task::XInteractionHandler
>& i_xIHdl
,
949 bool& o_rIsEncrypted
,
950 const OUString
& i_rDocName
953 bool bSuccess
= false;
955 std::unique_ptr
<pdfparse::PDFEntry
> pEntry(pdfparse::PDFReader::read(i_rPath
));
958 pdfparse::PDFFile
* pPDFFile
= dynamic_cast<pdfparse::PDFFile
*>(pEntry
.get());
961 o_rIsEncrypted
= pPDFFile
->isEncrypted();
964 if( pPDFFile
->usesSupportedEncryptionFormat() )
966 bool bAuthenticated
= false;
967 if( !io_rPwd
.isEmpty() )
969 OString aIsoPwd
= OUStringToOString( io_rPwd
,
970 RTL_TEXTENCODING_ISO_8859_1
);
971 bAuthenticated
= pPDFFile
->setupDecryptionData( aIsoPwd
);
979 bool bEntered
= false;
982 bEntered
= getPassword( i_xIHdl
, io_rPwd
, ! bEntered
, i_rDocName
);
983 OString aIsoPwd
= OUStringToOString( io_rPwd
,
984 RTL_TEXTENCODING_ISO_8859_1
);
985 bAuthenticated
= pPDFFile
->setupDecryptionData( aIsoPwd
);
986 } while( bEntered
&& ! bAuthenticated
);
989 bSuccess
= bAuthenticated
;
992 else if( i_xIHdl
.is() )
994 reportUnsupportedEncryptionFormat( i_xIHdl
);
995 //TODO: this should either be handled further down the
996 // call stack, or else information that this has already
997 // been handled should be passed down the call stack, so
998 // that SfxBaseModel::load does not show an additional
999 // "General Error" message box
1013 static const int SIZE
= 64*1024;
1014 std::unique_ptr
<char[]> aBuffer
;
1015 oslFileHandle
& pOut
;
1020 explicit Buffering(oslFileHandle
& out
) : aBuffer(new char[SIZE
]), pOut(out
), pos(0), left(0) {}
1022 oslFileError
read(char *pChar
, short count
, sal_uInt64
* pBytesRead
)
1024 oslFileError nRes
= osl_File_E_None
;
1025 sal_uInt64 nBytesRead
= 0;
1030 nRes
= osl_readFile(pOut
, aBuffer
.get(), SIZE
, &left
);
1031 if (nRes
!= osl_File_E_None
|| left
== 0)
1033 *pBytesRead
= nBytesRead
;
1038 *pChar
= aBuffer
.get()[pos
];
1045 *pBytesRead
= nBytesRead
;
1046 return osl_File_E_None
;
1052 bool xpdf_ImportFromFile(const OUString
& rURL
,
1053 const ContentSinkSharedPtr
& rSink
,
1054 const uno::Reference
<task::XInteractionHandler
>& xIHdl
,
1055 const OUString
& rPwd
,
1056 const uno::Reference
<uno::XComponentContext
>& xContext
,
1057 const OUString
& rFilterOptions
)
1062 if( osl_getSystemPathFromFileURL( rURL
.pData
, &aSysUPath
.pData
) != osl_File_E_None
)
1066 "getSystemPathFromFileURL(" << rURL
<< ") failed");
1069 OUString
aDocName( rURL
.copy( rURL
.lastIndexOf( '/' )+1 ) );
1071 // check for encryption, if necessary get password
1072 OUString
aPwd( rPwd
);
1073 bool bIsEncrypted
= false;
1074 if( !checkEncryption( aSysUPath
, xIHdl
, aPwd
, bIsEncrypted
, aDocName
) )
1078 "checkEncryption(" << aSysUPath
<< ") failed");
1082 // Determine xpdfimport executable URL:
1083 OUString
converterURL(u
"$BRAND_BASE_DIR/" LIBO_BIN_FOLDER
"/xpdfimport"_ustr
);
1084 rtl::Bootstrap::expandMacros(converterURL
); //TODO: detect failure
1086 // spawn separate process to keep LGPL/GPL code apart.
1088 static constexpr OUString
aOptFlag(u
"-o"_ustr
);
1089 std::vector
<rtl_uString
*> args({ aSysUPath
.pData
});
1090 if (!rFilterOptions
.isEmpty())
1092 args
.push_back(aOptFlag
.pData
);
1093 args
.push_back(rFilterOptions
.pData
);
1096 oslProcess aProcess
;
1097 oslFileHandle pIn
= nullptr;
1098 oslFileHandle pOut
= nullptr;
1099 oslFileHandle pErr
= nullptr;
1100 oslSecurity pSecurity
= osl_getCurrentSecurity ();
1101 oslProcessError eErr
=
1102 osl_executeProcess_WithRedirectedIO(converterURL
.pData
,
1105 osl_Process_SEARCHPATH
|osl_Process_HIDDEN
,
1107 nullptr, nullptr, 0,
1108 &aProcess
, &pIn
, &pOut
, &pErr
);
1109 osl_freeSecurityHandle(pSecurity
);
1114 if( eErr
!=osl_Process_E_None
)
1118 "executeProcess of " << converterURL
<< " failed with "
1125 OStringBuffer
aBuf(256);
1127 aBuf
.append( OUStringToOString( aPwd
, RTL_TEXTENCODING_ISO_8859_1
) );
1128 aBuf
.append( '\n' );
1130 sal_uInt64 nWritten
= 0;
1131 osl_writeFile( pIn
, aBuf
.getStr(), sal_uInt64(aBuf
.getLength()), &nWritten
);
1136 // read results of PDF parser. One line - one call to
1137 // OutputDev. stderr is used for alternate streams, like
1138 // embedded fonts and bitmaps
1139 Parser
aParser(rSink
,pErr
,xContext
);
1140 Buffering
aBuffering(pOut
);
1145 sal_uInt64 nBytesRead
;
1148 // skip garbage \r \n at start of line
1151 nRes
= aBuffering
.read(&aChar
, 1, &nBytesRead
);
1152 if (osl_File_E_None
!= nRes
|| nBytesRead
!= 1 || (aChar
!= '\n' && aChar
!= '\r') )
1155 if ( osl_File_E_None
!= nRes
)
1158 if( aChar
!= '\n' && aChar
!= '\r' )
1159 line
.append( aChar
);
1163 nRes
= aBuffering
.read(&aChar
, 1, &nBytesRead
);
1164 if ( osl_File_E_None
!= nRes
|| nBytesRead
!= 1 || aChar
== '\n' || aChar
== '\r' )
1166 line
.append( aChar
);
1168 if ( osl_File_E_None
!= nRes
)
1170 if ( line
.isEmpty() )
1173 aParser
.parseLine(line
);
1178 catch( uno::Exception
& )
1180 // crappy C file interface. need manual resource dealloc
1187 osl_closeFile(pOut
);
1189 osl_closeFile(pErr
);
1190 eErr
= osl_joinProcess(aProcess
);
1191 if (eErr
== osl_Process_E_None
)
1193 oslProcessInfo info
;
1194 info
.Size
= sizeof info
;
1195 eErr
= osl_getProcessInfo(aProcess
, osl_Process_EXITCODE
, &info
);
1196 if (eErr
== osl_Process_E_None
)
1202 "getProcessInfo of " << converterURL
1203 << " failed with exit code " << info
.Code
);
1204 // TODO: use xIHdl and/or exceptions to inform the user; see poppler/ErrorCodes.h
1212 "getProcessInfo of " << converterURL
<< " failed with "
1221 "joinProcess of " << converterURL
<< " failed with " << +eErr
);
1224 osl_freeProcessHandle(aProcess
);
1229 bool xpdf_ImportFromStream( const uno::Reference
< io::XInputStream
>& xInput
,
1230 const ContentSinkSharedPtr
& rSink
,
1231 const uno::Reference
<task::XInteractionHandler
>& xIHdl
,
1232 const OUString
& rPwd
,
1233 const uno::Reference
< uno::XComponentContext
>& xContext
,
1234 const OUString
& rFilterOptions
)
1236 OSL_ASSERT(xInput
.is());
1239 // convert XInputStream to local temp file
1240 oslFileHandle aFile
= nullptr;
1242 if( osl_createTempFile( nullptr, &aFile
, &aURL
.pData
) != osl_File_E_None
)
1245 // copy content, buffered...
1246 const sal_uInt32 nBufSize
= 4096;
1247 uno::Sequence
<sal_Int8
> aBuf( nBufSize
);
1248 sal_uInt64 nBytes
= 0;
1249 sal_uInt64 nWritten
= 0;
1250 bool bSuccess
= true;
1255 nBytes
= xInput
->readBytes( aBuf
, nBufSize
);
1257 catch( css::uno::Exception
& )
1259 osl_closeFile( aFile
);
1264 osl_writeFile( aFile
, aBuf
.getConstArray(), nBytes
, &nWritten
);
1265 if( nWritten
!= nBytes
)
1272 while( nBytes
== nBufSize
);
1274 osl_closeFile( aFile
);
1277 bSuccess
= xpdf_ImportFromFile( aURL
, rSink
, xIHdl
, rPwd
, xContext
, rFilterOptions
);
1278 osl_removeFile( aURL
.pData
);
1285 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */