bump product version to 7.2.5.1
[LibreOffice.git] / vcl / source / filter / ipdf / pdfread.cxx
blobd97259d528077916b1e87280b7bbdbe564986fc9
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <vcl/pdfread.hxx>
12 #include <tools/UnitConversion.hxx>
14 #include <vcl/graph.hxx>
15 #include <bitmap/BitmapWriteAccess.hxx>
16 #include <unotools/ucbstreamhelper.hxx>
17 #include <unotools/datetime.hxx>
19 #include <vcl/filter/PDFiumLibrary.hxx>
20 #include <sal/log.hxx>
22 using namespace com::sun::star;
24 namespace
26 /// Convert to inch, then assume 96 DPI.
27 inline double pointToPixel(const double fPoint, const double fResolutionDPI)
29 return fPoint * fResolutionDPI / 72.;
32 /// Decide if PDF data is old enough to be compatible.
33 bool isCompatible(SvStream& rInStream, sal_uInt64 nPos, sal_uInt64 nSize)
35 if (nSize < 8)
36 return false;
38 // %PDF-x.y
39 sal_uInt8 aFirstBytes[8];
40 rInStream.Seek(nPos);
41 sal_uLong nRead = rInStream.ReadBytes(aFirstBytes, 8);
42 if (nRead < 8)
43 return false;
45 if (aFirstBytes[0] != '%' || aFirstBytes[1] != 'P' || aFirstBytes[2] != 'D'
46 || aFirstBytes[3] != 'F' || aFirstBytes[4] != '-')
47 return false;
49 sal_Int32 nMajor = OString(char(aFirstBytes[5])).toInt32();
50 sal_Int32 nMinor = OString(char(aFirstBytes[7])).toInt32();
51 return !(nMajor > 1 || (nMajor == 1 && nMinor > 6));
54 /// Takes care of transparently downgrading the version of the PDF stream in
55 /// case it's too new for our PDF export.
56 bool getCompatibleStream(SvStream& rInStream, SvStream& rOutStream)
58 sal_uInt64 nPos = STREAM_SEEK_TO_BEGIN;
59 sal_uInt64 nSize = STREAM_SEEK_TO_END;
60 bool bCompatible = isCompatible(rInStream, nPos, nSize);
61 rInStream.Seek(nPos);
62 if (bCompatible)
63 // Not converting.
64 rOutStream.WriteStream(rInStream, nSize);
65 else
67 // Downconvert to PDF-1.6.
68 auto pPdfium = vcl::pdf::PDFiumLibrary::get();
69 if (!pPdfium)
70 return false;
72 // Read input into a buffer.
73 SvMemoryStream aInBuffer;
74 aInBuffer.WriteStream(rInStream, nSize);
76 SvMemoryStream aSaved;
78 // Load the buffer using pdfium.
79 std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument
80 = pPdfium->openDocument(aInBuffer.GetData(), aInBuffer.GetSize());
81 if (!pPdfDocument)
82 return false;
84 // 16 means PDF-1.6.
85 if (!pPdfDocument->saveWithVersion(aSaved, 16))
86 return false;
89 aSaved.Seek(STREAM_SEEK_TO_BEGIN);
90 rOutStream.WriteStream(aSaved);
93 return rOutStream.good();
96 BinaryDataContainer createBinaryDataContainer(SvStream& rStream)
98 // Save the original PDF stream for later use.
99 SvMemoryStream aMemoryStream;
100 if (!getCompatibleStream(rStream, aMemoryStream))
101 return BinaryDataContainer();
103 const sal_uInt32 nStreamLength = aMemoryStream.TellEnd();
105 auto aPdfData = std::make_unique<std::vector<sal_uInt8>>(nStreamLength);
107 aMemoryStream.Seek(STREAM_SEEK_TO_BEGIN);
108 aMemoryStream.ReadBytes(aPdfData->data(), aPdfData->size());
109 if (aMemoryStream.GetError())
110 return BinaryDataContainer();
112 return BinaryDataContainer(std::move(aPdfData));
115 } // end anonymous namespace
117 namespace vcl
119 /// Get the default PDF rendering resolution in DPI.
120 static double getDefaultPdfResolutionDpi()
122 // If an overriding default is set, use it.
123 const char* envar = ::getenv("PDFIMPORT_RESOLUTION_DPI");
124 if (envar)
126 const double dpi = atof(envar);
127 if (dpi > 0)
128 return dpi;
131 // Fallback to a sensible default.
132 return 96.;
135 size_t RenderPDFBitmaps(const void* pBuffer, int nSize, std::vector<BitmapEx>& rBitmaps,
136 const size_t nFirstPage, int nPages, const basegfx::B2DTuple* pSizeHint)
138 static const double fResolutionDPI = getDefaultPdfResolutionDpi();
139 auto pPdfium = vcl::pdf::PDFiumLibrary::get();
140 if (!pPdfium)
142 return 0;
145 // Load the buffer using pdfium.
146 std::unique_ptr<vcl::pdf::PDFiumDocument> pPdfDocument = pPdfium->openDocument(pBuffer, nSize);
147 if (!pPdfDocument)
148 return 0;
150 const int nPageCount = pPdfDocument->getPageCount();
151 if (nPages <= 0)
152 nPages = nPageCount;
153 const size_t nLastPage = std::min<int>(nPageCount, nFirstPage + nPages) - 1;
154 for (size_t nPageIndex = nFirstPage; nPageIndex <= nLastPage; ++nPageIndex)
156 // Render next page.
157 std::unique_ptr<vcl::pdf::PDFiumPage> pPdfPage = pPdfDocument->openPage(nPageIndex);
158 if (!pPdfPage)
159 break;
161 // Calculate the bitmap size in points.
162 size_t nPageWidthPoints = pPdfPage->getWidth();
163 size_t nPageHeightPoints = pPdfPage->getHeight();
164 if (pSizeHint && pSizeHint->getX() && pSizeHint->getY())
166 // Have a size hint, prefer that over the logic size from the PDF.
167 nPageWidthPoints = convertMm100ToTwip(pSizeHint->getX()) / 20;
168 nPageHeightPoints = convertMm100ToTwip(pSizeHint->getY()) / 20;
171 // Returned unit is points, convert that to pixel.
172 const size_t nPageWidth = pointToPixel(nPageWidthPoints, fResolutionDPI);
173 const size_t nPageHeight = pointToPixel(nPageHeightPoints, fResolutionDPI);
174 std::unique_ptr<vcl::pdf::PDFiumBitmap> pPdfBitmap
175 = pPdfium->createBitmap(nPageWidth, nPageHeight, /*alpha=*/1);
176 if (!pPdfBitmap)
177 break;
179 bool bTransparent = pPdfPage->hasTransparency();
180 if (pSizeHint)
182 // This is the PDF-in-EMF case: force transparency, even in case pdfium would tell us
183 // the PDF is not transparent.
184 bTransparent = true;
186 const sal_uInt32 nColor = bTransparent ? 0x00000000 : 0xFFFFFFFF;
187 pPdfBitmap->fillRect(0, 0, nPageWidth, nPageHeight, nColor);
188 pPdfBitmap->renderPageBitmap(pPdfDocument.get(), pPdfPage.get(), /*start_x=*/0,
189 /*start_y=*/0, nPageWidth, nPageHeight);
191 // Save the buffer as a bitmap.
192 Bitmap aBitmap(Size(nPageWidth, nPageHeight), vcl::PixelFormat::N24_BPP);
193 AlphaMask aMask(Size(nPageWidth, nPageHeight));
195 BitmapScopedWriteAccess pWriteAccess(aBitmap);
196 AlphaScopedWriteAccess pMaskAccess(aMask);
197 ConstScanline pPdfBuffer = pPdfBitmap->getBuffer();
198 const int nStride = pPdfBitmap->getStride();
199 std::vector<sal_uInt8> aScanlineAlpha(nPageWidth);
200 for (size_t nRow = 0; nRow < nPageHeight; ++nRow)
202 ConstScanline pPdfLine = pPdfBuffer + (nStride * nRow);
203 // pdfium byte order is BGRA.
204 pWriteAccess->CopyScanline(nRow, pPdfLine, ScanlineFormat::N32BitTcBgra, nStride);
205 for (size_t nCol = 0; nCol < nPageWidth; ++nCol)
207 // Invert alpha (source is alpha, target is opacity).
208 aScanlineAlpha[nCol] = ~pPdfLine[3];
209 pPdfLine += 4;
211 pMaskAccess->CopyScanline(nRow, aScanlineAlpha.data(), ScanlineFormat::N8BitPal,
212 nPageWidth);
216 if (bTransparent)
218 rBitmaps.emplace_back(aBitmap, aMask);
220 else
222 rBitmaps.emplace_back(std::move(aBitmap));
226 return rBitmaps.size();
229 bool importPdfVectorGraphicData(SvStream& rStream,
230 std::shared_ptr<VectorGraphicData>& rVectorGraphicData)
232 BinaryDataContainer aDataContainer = createBinaryDataContainer(rStream);
233 if (aDataContainer.isEmpty())
235 SAL_WARN("vcl.filter", "ImportPDF: empty PDF data array");
236 return false;
239 rVectorGraphicData
240 = std::make_shared<VectorGraphicData>(aDataContainer, VectorGraphicDataType::Pdf);
242 return true;
245 bool ImportPDF(SvStream& rStream, Graphic& rGraphic)
247 std::shared_ptr<VectorGraphicData> pVectorGraphicData;
248 if (!importPdfVectorGraphicData(rStream, pVectorGraphicData))
249 return false;
250 rGraphic = Graphic(pVectorGraphicData);
251 return true;
254 namespace
256 basegfx::B2DPoint convertFromPDFInternalToHMM(basegfx::B2DSize const& rInputPoint,
257 basegfx::B2DSize const& rPageSize)
259 double x = convertPointToMm100(rInputPoint.getX());
260 double y = convertPointToMm100(rPageSize.getY() - rInputPoint.getY());
261 return basegfx::B2DPoint(x, y);
264 std::vector<PDFGraphicAnnotation>
265 findAnnotations(const std::unique_ptr<vcl::pdf::PDFiumPage>& pPage, basegfx::B2DSize aPageSize)
267 std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations;
268 for (int nAnnotation = 0; nAnnotation < pPage->getAnnotationCount(); nAnnotation++)
270 auto pAnnotation = pPage->getAnnotation(nAnnotation);
271 if (pAnnotation)
273 auto eSubtype = pAnnotation->getSubType();
275 if (eSubtype == vcl::pdf::PDFAnnotationSubType::Text
276 || eSubtype == vcl::pdf::PDFAnnotationSubType::Polygon
277 || eSubtype == vcl::pdf::PDFAnnotationSubType::Circle
278 || eSubtype == vcl::pdf::PDFAnnotationSubType::Square
279 || eSubtype == vcl::pdf::PDFAnnotationSubType::Ink
280 || eSubtype == vcl::pdf::PDFAnnotationSubType::Highlight
281 || eSubtype == vcl::pdf::PDFAnnotationSubType::Line)
283 OUString sAuthor = pAnnotation->getString(vcl::pdf::constDictionaryKeyTitle);
284 OUString sText = pAnnotation->getString(vcl::pdf::constDictionaryKeyContents);
286 basegfx::B2DRectangle rRectangle = pAnnotation->getRectangle();
287 basegfx::B2DRectangle rRectangleHMM(
288 convertPointToMm100(rRectangle.getMinX()),
289 convertPointToMm100(aPageSize.getY() - rRectangle.getMinY()),
290 convertPointToMm100(rRectangle.getMaxX()),
291 convertPointToMm100(aPageSize.getY() - rRectangle.getMaxY()));
293 OUString sDateTimeString
294 = pAnnotation->getString(vcl::pdf::constDictionaryKeyModificationDate);
295 OUString sISO8601String = vcl::pdf::convertPdfDateToISO8601(sDateTimeString);
297 css::util::DateTime aDateTime;
298 if (!sISO8601String.isEmpty())
300 utl::ISO8601parseDateTime(sISO8601String, aDateTime);
303 Color aColor = pAnnotation->getColor();
305 aPDFGraphicAnnotations.emplace_back();
307 auto& rPDFGraphicAnnotation = aPDFGraphicAnnotations.back();
308 rPDFGraphicAnnotation.maRectangle = rRectangleHMM;
309 rPDFGraphicAnnotation.maAuthor = sAuthor;
310 rPDFGraphicAnnotation.maText = sText;
311 rPDFGraphicAnnotation.maDateTime = aDateTime;
312 rPDFGraphicAnnotation.meSubType = eSubtype;
313 rPDFGraphicAnnotation.maColor = aColor;
315 if (eSubtype == vcl::pdf::PDFAnnotationSubType::Polygon)
317 auto const& rVertices = pAnnotation->getVertices();
318 if (!rVertices.empty())
320 auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerPolygon>();
321 rPDFGraphicAnnotation.mpMarker = pMarker;
322 for (auto const& rVertex : rVertices)
324 auto aPoint = convertFromPDFInternalToHMM(rVertex, aPageSize);
325 pMarker->maPolygon.append(aPoint);
327 pMarker->maPolygon.setClosed(true);
328 pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
329 if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
330 pMarker->maFillColor = pAnnotation->getInteriorColor();
333 else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Square)
335 auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerSquare>();
336 rPDFGraphicAnnotation.mpMarker = pMarker;
337 pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
338 if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
339 pMarker->maFillColor = pAnnotation->getInteriorColor();
341 else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Circle)
343 auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerCircle>();
344 rPDFGraphicAnnotation.mpMarker = pMarker;
345 pMarker->mnWidth = convertPointToMm100(pAnnotation->getBorderWidth());
346 if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
347 pMarker->maFillColor = pAnnotation->getInteriorColor();
349 else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Ink)
351 auto const& rStrokesList = pAnnotation->getInkStrokes();
352 if (!rStrokesList.empty())
354 auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerInk>();
355 rPDFGraphicAnnotation.mpMarker = pMarker;
356 for (auto const& rStrokes : rStrokesList)
358 basegfx::B2DPolygon aPolygon;
359 for (auto const& rVertex : rStrokes)
361 auto aPoint = convertFromPDFInternalToHMM(rVertex, aPageSize);
362 aPolygon.append(aPoint);
364 pMarker->maStrokes.push_back(aPolygon);
366 float fWidth = pAnnotation->getBorderWidth();
367 pMarker->mnWidth = convertPointToMm100(fWidth);
368 if (pAnnotation->hasKey(vcl::pdf::constDictionaryKeyInteriorColor))
369 pMarker->maFillColor = pAnnotation->getInteriorColor();
372 else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Highlight)
374 size_t nCount = pAnnotation->getAttachmentPointsCount();
375 if (nCount > 0)
377 auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerHighlight>(
378 vcl::pdf::PDFTextMarkerType::Highlight);
379 rPDFGraphicAnnotation.mpMarker = pMarker;
380 for (size_t i = 0; i < nCount; ++i)
382 auto aAttachmentPoints = pAnnotation->getAttachmentPoints(i);
383 if (!aAttachmentPoints.empty())
385 basegfx::B2DPolygon aPolygon;
386 aPolygon.setClosed(true);
388 auto aPoint1
389 = convertFromPDFInternalToHMM(aAttachmentPoints[0], aPageSize);
390 aPolygon.append(aPoint1);
391 auto aPoint2
392 = convertFromPDFInternalToHMM(aAttachmentPoints[1], aPageSize);
393 aPolygon.append(aPoint2);
394 auto aPoint3
395 = convertFromPDFInternalToHMM(aAttachmentPoints[3], aPageSize);
396 aPolygon.append(aPoint3);
397 auto aPoint4
398 = convertFromPDFInternalToHMM(aAttachmentPoints[2], aPageSize);
399 aPolygon.append(aPoint4);
401 pMarker->maQuads.push_back(aPolygon);
406 else if (eSubtype == vcl::pdf::PDFAnnotationSubType::Line)
408 auto const& rLineGeometry = pAnnotation->getLineGeometry();
409 if (!rLineGeometry.empty())
411 auto pMarker = std::make_shared<vcl::pdf::PDFAnnotationMarkerLine>();
412 rPDFGraphicAnnotation.mpMarker = pMarker;
414 auto aPoint1 = convertFromPDFInternalToHMM(rLineGeometry[0], aPageSize);
415 pMarker->maLineStart = aPoint1;
417 auto aPoint2 = convertFromPDFInternalToHMM(rLineGeometry[1], aPageSize);
418 pMarker->maLineEnd = aPoint2;
420 float fWidth = pAnnotation->getBorderWidth();
421 pMarker->mnWidth = convertPointToMm100(fWidth);
427 return aPDFGraphicAnnotations;
430 } // end anonymous namespace
432 size_t ImportPDFUnloaded(const OUString& rURL, std::vector<PDFGraphicResult>& rGraphics)
434 std::unique_ptr<SvStream> xStream(
435 ::utl::UcbStreamHelper::CreateStream(rURL, StreamMode::READ | StreamMode::SHARE_DENYNONE));
437 // Save the original PDF stream for later use.
438 BinaryDataContainer aDataContainer = createBinaryDataContainer(*xStream);
439 if (aDataContainer.isEmpty())
440 return 0;
442 // Prepare the link with the PDF stream.
443 auto pGfxLink = std::make_shared<GfxLink>(aDataContainer, GfxLinkType::NativePdf);
445 auto pPdfium = vcl::pdf::PDFiumLibrary::get();
446 if (!pPdfium)
448 return 0;
451 // Load the buffer using pdfium.
452 auto pPdfDocument = pPdfium->openDocument(pGfxLink->GetData(), pGfxLink->GetDataSize());
454 if (!pPdfDocument)
455 return 0;
457 const int nPageCount = pPdfDocument->getPageCount();
458 if (nPageCount <= 0)
459 return 0;
461 for (int nPageIndex = 0; nPageIndex < nPageCount; ++nPageIndex)
463 basegfx::B2DSize aPageSize = pPdfDocument->getPageSize(nPageIndex);
464 if (aPageSize.getX() <= 0.0 || aPageSize.getY() <= 0.0)
465 continue;
467 // Returned unit is points, convert that to twip
468 // 1 pt = 20 twips
469 constexpr double pointToTwipconversionRatio = 20;
471 tools::Long nPageWidth = convertTwipToMm100(aPageSize.getX() * pointToTwipconversionRatio);
472 tools::Long nPageHeight = convertTwipToMm100(aPageSize.getY() * pointToTwipconversionRatio);
474 // Create the Graphic with the VectorGraphicDataPtr and link the original PDF stream.
475 // We swap out this Graphic as soon as possible, and a later swap in
476 // actually renders the correct Bitmap on demand.
477 Graphic aGraphic(pGfxLink, nPageIndex);
479 auto pPage = pPdfDocument->openPage(nPageIndex);
481 std::vector<PDFGraphicAnnotation> aPDFGraphicAnnotations
482 = findAnnotations(pPage, aPageSize);
484 rGraphics.emplace_back(std::move(aGraphic), Size(nPageWidth, nPageHeight),
485 aPDFGraphicAnnotations);
488 return rGraphics.size();
492 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */