cool#10610 Ensure the parent-child relations of comments.
[LibreOffice.git] / vcl / source / gdi / pdfobjectcopier.cxx
blob3761520e3148ec4501a6983ff8e59fc60b911be5
1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
2 /*
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
8 */
10 #include <sal/log.hxx>
11 #include <sal/types.h>
12 #include <rtl/strbuf.hxx>
13 #include <tools/stream.hxx>
14 #include <tools/zcodec.hxx>
16 #include <vcl/filter/pdfdocument.hxx>
17 #include <vcl/filter/pdfobjectcontainer.hxx>
19 #include <pdf/objectcopier.hxx>
20 #include <pdf/pdfwriter_impl.hxx>
22 #include <o3tl/string_view.hxx>
24 namespace vcl
26 PDFObjectCopier::PDFObjectCopier(PDFObjectContainer& rContainer)
27 : m_rContainer(rContainer)
31 void PDFObjectCopier::copyRecursively(OStringBuffer& rLine, filter::PDFElement& rInputElement,
32 SvMemoryStream& rDocBuffer,
33 std::map<sal_Int32, sal_Int32>& rCopiedResources)
35 if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(&rInputElement))
37 filter::PDFObjectElement* pReferenced = pReference->LookupObject();
38 if (pReferenced)
40 // Copy the referenced object.
41 sal_Int32 nRef = copyExternalResource(rDocBuffer, *pReferenced, rCopiedResources);
43 // Write the updated reference.
44 rLine.append(nRef);
45 rLine.append(" 0 R");
48 else if (auto pInputArray = dynamic_cast<filter::PDFArrayElement*>(&rInputElement))
50 rLine.append("[ ");
51 for (auto const& pElement : pInputArray->GetElements())
53 copyRecursively(rLine, *pElement, rDocBuffer, rCopiedResources);
54 rLine.append(" ");
56 rLine.append("] ");
58 else if (auto pInputDictionary = dynamic_cast<filter::PDFDictionaryElement*>(&rInputElement))
60 rLine.append("<< ");
61 for (auto const& pPair : pInputDictionary->GetItems())
63 rLine.append("/");
64 rLine.append(pPair.first);
65 rLine.append(" ");
66 copyRecursively(rLine, *pPair.second, rDocBuffer, rCopiedResources);
67 rLine.append(" ");
69 rLine.append(">> ");
71 else
73 rInputElement.writeString(rLine);
77 sal_Int32 PDFObjectCopier::copyExternalResource(SvMemoryStream& rDocBuffer,
78 filter::PDFObjectElement& rObject,
79 std::map<sal_Int32, sal_Int32>& rCopiedResources)
81 auto it = rCopiedResources.find(rObject.GetObjectValue());
82 if (it != rCopiedResources.end())
84 // This resource was already copied once, nothing to do.
85 return it->second;
88 sal_Int32 nObject = m_rContainer.createObject();
89 // Remember what is the ID of this object in our output.
90 rCopiedResources[rObject.GetObjectValue()] = nObject;
91 SAL_INFO("vcl.pdfwriter", "PDFObjectCopier::copyExternalResource: " << rObject.GetObjectValue()
92 << " -> " << nObject);
94 OStringBuffer aLine = OString::number(nObject) + " 0 obj\n";
96 if (rObject.GetDictionary())
98 aLine.append("<< ");
99 bool bFirst = true;
100 for (auto const& rPair : rObject.GetDictionaryItems())
102 if (bFirst)
103 bFirst = false;
104 else
105 aLine.append(" ");
107 aLine.append("/" + rPair.first + " ");
108 copyRecursively(aLine, *rPair.second, rDocBuffer, rCopiedResources);
111 aLine.append(" >>\n");
114 filter::PDFStreamElement* pStream = rObject.GetStream();
115 if (pStream)
117 aLine.append("stream\n");
120 if (filter::PDFArrayElement* pArray = rObject.GetArray())
122 aLine.append("[ ");
124 const std::vector<filter::PDFElement*>& rElements = pArray->GetElements();
126 bool bFirst = true;
127 for (auto const& pElement : rElements)
129 if (bFirst)
130 bFirst = false;
131 else
132 aLine.append(" ");
133 copyRecursively(aLine, *pElement, rDocBuffer, rCopiedResources);
135 aLine.append("]\n");
138 // If the object has a number element outside a dictionary or array, copy that.
139 if (filter::PDFNumberElement* pNumber = rObject.GetNumberElement())
141 pNumber->writeString(aLine);
142 aLine.append("\n");
144 // If the object has a name element outside a dictionary or array, copy that.
145 else if (filter::PDFNameElement* pName = rObject.GetNameElement())
147 // currently just handle the exact case seen in the real world
148 if (pName->GetValue() == "DeviceRGB")
150 pName->writeString(aLine);
151 aLine.append("\n");
153 else
155 SAL_INFO("vcl.pdfwriter",
156 "PDFObjectCopier::copyExternalResource: skipping: " << pName->GetValue());
160 // We have the whole object, now write it to the output.
161 if (!m_rContainer.updateObject(nObject))
162 return -1;
163 if (!m_rContainer.writeBuffer(aLine))
164 return -1;
165 aLine.setLength(0);
167 if (pStream)
169 SvMemoryStream& rStream = pStream->GetMemory();
170 m_rContainer.checkAndEnableStreamEncryption(nObject);
171 aLine.append(static_cast<const char*>(rStream.GetData()), rStream.GetSize());
172 if (!m_rContainer.writeBuffer(aLine))
173 return -1;
174 aLine.setLength(0);
175 m_rContainer.disableStreamEncryption();
177 aLine.append("\nendstream\n");
178 if (!m_rContainer.writeBuffer(aLine))
179 return -1;
180 aLine.setLength(0);
183 aLine.append("endobj\n\n");
184 if (!m_rContainer.writeBuffer(aLine))
185 return -1;
187 return nObject;
190 OString PDFObjectCopier::copyExternalResources(filter::PDFObjectElement& rPage,
191 const OString& rKind,
192 std::map<sal_Int32, sal_Int32>& rCopiedResources)
194 // A name - object ID map, IDs as they appear in our output, not the
195 // original ones.
196 std::map<OString, sal_Int32> aRet;
198 // Get the rKind subset of the resource dictionary.
199 std::map<OString, filter::PDFElement*> aItems;
200 filter::PDFObjectElement* pKindObject = nullptr;
201 if (auto pResources
202 = dynamic_cast<filter::PDFDictionaryElement*>(rPage.Lookup("Resources"_ostr)))
204 // Resources is a direct dictionary.
205 filter::PDFElement* pLookup = pResources->LookupElement(rKind);
206 if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pLookup))
208 // rKind is an inline dictionary.
209 aItems = pDictionary->GetItems();
211 else if (auto pReference = dynamic_cast<filter::PDFReferenceElement*>(pLookup))
213 // rKind refers to a dictionary.
214 filter::PDFObjectElement* pReferenced = pReference->LookupObject();
215 if (!pReferenced)
217 return {};
220 pKindObject = pReferenced;
221 aItems = pReferenced->GetDictionaryItems();
224 else if (filter::PDFObjectElement* pPageResources = rPage.LookupObject("Resources"_ostr))
226 // Resources is an indirect object.
227 filter::PDFElement* pValue = pPageResources->Lookup(rKind);
228 if (auto pDictionary = dynamic_cast<filter::PDFDictionaryElement*>(pValue))
230 // Kind is a direct dictionary.
231 aItems = pDictionary->GetItems();
233 else if (filter::PDFObjectElement* pObject = pPageResources->LookupObject(rKind))
235 // Kind is an indirect object.
236 aItems = pObject->GetDictionaryItems();
237 pKindObject = pObject;
240 if (aItems.empty())
241 return {};
243 SvMemoryStream& rDocBuffer = rPage.GetDocument().GetEditBuffer();
244 bool bHasDictValue = false;
246 for (const auto& rItem : aItems)
248 // For each item copy it over to our output then insert it into aRet.
249 auto pReference = dynamic_cast<filter::PDFReferenceElement*>(rItem.second);
250 if (!pReference)
252 if (pKindObject && dynamic_cast<filter::PDFDictionaryElement*>(rItem.second))
254 bHasDictValue = true;
255 break;
258 continue;
261 filter::PDFObjectElement* pValue = pReference->LookupObject();
262 if (!pValue)
263 continue;
265 // Then copying over an object copy its dictionary and its stream.
266 sal_Int32 nObject = copyExternalResource(rDocBuffer, *pValue, rCopiedResources);
267 aRet[rItem.first] = nObject;
270 if (bHasDictValue && pKindObject)
272 sal_Int32 nObject = copyExternalResource(rDocBuffer, *pKindObject, rCopiedResources);
273 return "/" + rKind + " " + OString::number(nObject) + " 0 R";
276 // Build the dictionary entry string.
277 OStringBuffer sRet("/" + rKind + "<<");
278 for (const auto& rPair : aRet)
280 sRet.append("/" + rPair.first + " " + OString::number(rPair.second) + " 0 R");
282 sRet.append(">>");
284 return sRet.makeStringAndClear();
287 void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine)
289 // Maps from source object id (PDF image) to target object id (export result).
290 std::map<sal_Int32, sal_Int32> aCopiedResources;
291 copyPageResources(pPage, rLine, aCopiedResources);
294 void PDFObjectCopier::copyPageResources(filter::PDFObjectElement* pPage, OStringBuffer& rLine,
295 std::map<sal_Int32, sal_Int32>& rCopiedResources)
297 rLine.append(" /Resources <<");
298 static const std::initializer_list<OString> aKeys
299 = { "ColorSpace"_ostr, "ExtGState"_ostr, "Font"_ostr,
300 "XObject"_ostr, "Shading"_ostr, "Pattern"_ostr };
301 for (const auto& rKey : aKeys)
303 rLine.append(copyExternalResources(*pPage, rKey, rCopiedResources));
305 rLine.append(">>");
308 sal_Int32 PDFObjectCopier::copyPageStreams(std::vector<filter::PDFObjectElement*>& rContentStreams,
309 SvMemoryStream& rStream, bool& rCompressed,
310 bool bIsTaggedNonReferenceXObject)
312 for (auto pContent : rContentStreams)
314 filter::PDFStreamElement* pPageStream = pContent->GetStream();
315 if (!pPageStream)
317 SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: contents has no stream");
318 continue;
321 SvMemoryStream& rPageStream = pPageStream->GetMemory();
323 auto pFilter = dynamic_cast<filter::PDFNameElement*>(pContent->Lookup("Filter"_ostr));
324 auto pFilterArray = dynamic_cast<filter::PDFArrayElement*>(pContent->Lookup("Filter"_ostr));
325 if (!pFilter && pFilterArray)
327 auto& aElements = pFilterArray->GetElements();
328 if (!aElements.empty())
329 pFilter = dynamic_cast<filter::PDFNameElement*>(aElements[0]);
332 if (pFilter)
334 if (pFilter->GetValue() != "FlateDecode")
336 continue;
339 SvMemoryStream aMemoryStream;
340 ZCodec aZCodec;
341 rPageStream.Seek(0);
342 aZCodec.BeginCompression();
343 aZCodec.Decompress(rPageStream, aMemoryStream);
344 if (!aZCodec.EndCompression())
346 SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: decompression failed");
347 continue;
350 bool bHasArtifact = false;
351 if (bIsTaggedNonReferenceXObject)
353 auto pStart = static_cast<const char*>(aMemoryStream.GetData());
354 const char* const pEnd = pStart + aMemoryStream.GetSize();
355 std::string_view aStreamView(pStart, pEnd - pStart);
357 std::string_view sArtifact = "/Artifact";
358 std::size_t nPosArtifact = aStreamView.find(sArtifact);
359 if (nPosArtifact != std::string_view::npos)
361 bHasArtifact = true;
362 SvMemoryStream aTmpStream;
363 std::string_view sBMC = "BMC";
364 std::string_view sBDC = "BDC";
365 std::string_view sEMC = "EMC";
367 while (!aStreamView.empty())
369 aTmpStream.WriteOString(aStreamView.substr(0, nPosArtifact));
370 aStreamView.remove_prefix(nPosArtifact + sArtifact.size());
372 std::size_t nPosBMC = aStreamView.find(sBMC);
373 std::size_t nPosBDC = aStreamView.find(sBDC);
374 std::size_t nPos = std::min(nPosBMC, nPosBDC);
376 if (nPos != std::string_view::npos)
378 if (nPos == nPosBMC)
379 aStreamView.remove_prefix(nPos + sBMC.size() + 1);
380 else
381 aStreamView.remove_prefix(nPos + sBDC.size() + 1);
383 std::size_t nPosEMC = aStreamView.find(sEMC);
384 if (nPosEMC != std::string_view::npos)
386 aTmpStream.WriteOString(aStreamView.substr(0, nPosEMC));
387 aStreamView.remove_prefix(nPosEMC + sEMC.size() + 1);
391 nPosArtifact = aStreamView.find(sArtifact);
392 if (nPosArtifact == std::string_view::npos)
394 aTmpStream.WriteOString(aStreamView);
395 break;
398 rStream.WriteBytes(aTmpStream.GetData(), aTmpStream.GetSize());
402 if (!bHasArtifact)
403 rStream.WriteBytes(aMemoryStream.GetData(), aMemoryStream.GetSize());
405 else
407 rStream.WriteBytes(rPageStream.GetData(), rPageStream.GetSize());
411 rCompressed = PDFWriterImpl::compressStream(&rStream);
413 return rStream.Tell();
417 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */