1 /* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*- */
3 * This file is part of the LibreOffice project.
5 * This Source Code Form is subject to the terms of the Mozilla Public
6 * License, v. 2.0. If a copy of the MPL was not distributed with this
7 * file, You can obtain one at http://mozilla.org/MPL/2.0/.
10 #include <sal/log.hxx>
11 #include <sal/types.h>
12 #include <rtl/strbuf.hxx>
13 #include <tools/stream.hxx>
14 #include <tools/zcodec.hxx>
16 #include <vcl/filter/pdfdocument.hxx>
17 #include <vcl/filter/pdfobjectcontainer.hxx>
19 #include <pdf/objectcopier.hxx>
20 #include <pdf/pdfwriter_impl.hxx>
22 #include <o3tl/string_view.hxx>
26 PDFObjectCopier::PDFObjectCopier(PDFObjectContainer
& rContainer
)
27 : m_rContainer(rContainer
)
31 void PDFObjectCopier::copyRecursively(OStringBuffer
& rLine
, filter::PDFElement
& rInputElement
,
32 SvMemoryStream
& rDocBuffer
,
33 std::map
<sal_Int32
, sal_Int32
>& rCopiedResources
)
35 if (auto pReference
= dynamic_cast<filter::PDFReferenceElement
*>(&rInputElement
))
37 filter::PDFObjectElement
* pReferenced
= pReference
->LookupObject();
40 // Copy the referenced object.
41 sal_Int32 nRef
= copyExternalResource(rDocBuffer
, *pReferenced
, rCopiedResources
);
43 // Write the updated reference.
48 else if (auto pInputArray
= dynamic_cast<filter::PDFArrayElement
*>(&rInputElement
))
51 for (auto const& pElement
: pInputArray
->GetElements())
53 copyRecursively(rLine
, *pElement
, rDocBuffer
, rCopiedResources
);
58 else if (auto pInputDictionary
= dynamic_cast<filter::PDFDictionaryElement
*>(&rInputElement
))
61 for (auto const& pPair
: pInputDictionary
->GetItems())
64 rLine
.append(pPair
.first
);
66 copyRecursively(rLine
, *pPair
.second
, rDocBuffer
, rCopiedResources
);
73 rInputElement
.writeString(rLine
);
77 sal_Int32
PDFObjectCopier::copyExternalResource(SvMemoryStream
& rDocBuffer
,
78 filter::PDFObjectElement
& rObject
,
79 std::map
<sal_Int32
, sal_Int32
>& rCopiedResources
)
81 auto it
= rCopiedResources
.find(rObject
.GetObjectValue());
82 if (it
!= rCopiedResources
.end())
84 // This resource was already copied once, nothing to do.
88 sal_Int32 nObject
= m_rContainer
.createObject();
89 // Remember what is the ID of this object in our output.
90 rCopiedResources
[rObject
.GetObjectValue()] = nObject
;
91 SAL_INFO("vcl.pdfwriter", "PDFObjectCopier::copyExternalResource: " << rObject
.GetObjectValue()
92 << " -> " << nObject
);
94 OStringBuffer aLine
= OString::number(nObject
) + " 0 obj\n";
96 if (rObject
.GetDictionary())
100 for (auto const& rPair
: rObject
.GetDictionaryItems())
107 aLine
.append("/" + rPair
.first
+ " ");
108 copyRecursively(aLine
, *rPair
.second
, rDocBuffer
, rCopiedResources
);
111 aLine
.append(" >>\n");
114 filter::PDFStreamElement
* pStream
= rObject
.GetStream();
117 aLine
.append("stream\n");
120 if (filter::PDFArrayElement
* pArray
= rObject
.GetArray())
124 const std::vector
<filter::PDFElement
*>& rElements
= pArray
->GetElements();
127 for (auto const& pElement
: rElements
)
133 copyRecursively(aLine
, *pElement
, rDocBuffer
, rCopiedResources
);
138 // If the object has a number element outside a dictionary or array, copy that.
139 if (filter::PDFNumberElement
* pNumber
= rObject
.GetNumberElement())
141 pNumber
->writeString(aLine
);
144 // If the object has a name element outside a dictionary or array, copy that.
145 else if (filter::PDFNameElement
* pName
= rObject
.GetNameElement())
147 // currently just handle the exact case seen in the real world
148 if (pName
->GetValue() == "DeviceRGB")
150 pName
->writeString(aLine
);
155 SAL_INFO("vcl.pdfwriter",
156 "PDFObjectCopier::copyExternalResource: skipping: " << pName
->GetValue());
160 // We have the whole object, now write it to the output.
161 if (!m_rContainer
.updateObject(nObject
))
163 if (!m_rContainer
.writeBuffer(aLine
))
169 SvMemoryStream
& rStream
= pStream
->GetMemory();
170 m_rContainer
.checkAndEnableStreamEncryption(nObject
);
171 aLine
.append(static_cast<const char*>(rStream
.GetData()), rStream
.GetSize());
172 if (!m_rContainer
.writeBuffer(aLine
))
175 m_rContainer
.disableStreamEncryption();
177 aLine
.append("\nendstream\n");
178 if (!m_rContainer
.writeBuffer(aLine
))
183 aLine
.append("endobj\n\n");
184 if (!m_rContainer
.writeBuffer(aLine
))
190 OString
PDFObjectCopier::copyExternalResources(filter::PDFObjectElement
& rPage
,
191 const OString
& rKind
,
192 std::map
<sal_Int32
, sal_Int32
>& rCopiedResources
)
194 // A name - object ID map, IDs as they appear in our output, not the
196 std::map
<OString
, sal_Int32
> aRet
;
198 // Get the rKind subset of the resource dictionary.
199 std::map
<OString
, filter::PDFElement
*> aItems
;
200 filter::PDFObjectElement
* pKindObject
= nullptr;
202 = dynamic_cast<filter::PDFDictionaryElement
*>(rPage
.Lookup("Resources"_ostr
)))
204 // Resources is a direct dictionary.
205 filter::PDFElement
* pLookup
= pResources
->LookupElement(rKind
);
206 if (auto pDictionary
= dynamic_cast<filter::PDFDictionaryElement
*>(pLookup
))
208 // rKind is an inline dictionary.
209 aItems
= pDictionary
->GetItems();
211 else if (auto pReference
= dynamic_cast<filter::PDFReferenceElement
*>(pLookup
))
213 // rKind refers to a dictionary.
214 filter::PDFObjectElement
* pReferenced
= pReference
->LookupObject();
220 pKindObject
= pReferenced
;
221 aItems
= pReferenced
->GetDictionaryItems();
224 else if (filter::PDFObjectElement
* pPageResources
= rPage
.LookupObject("Resources"_ostr
))
226 // Resources is an indirect object.
227 filter::PDFElement
* pValue
= pPageResources
->Lookup(rKind
);
228 if (auto pDictionary
= dynamic_cast<filter::PDFDictionaryElement
*>(pValue
))
230 // Kind is a direct dictionary.
231 aItems
= pDictionary
->GetItems();
233 else if (filter::PDFObjectElement
* pObject
= pPageResources
->LookupObject(rKind
))
235 // Kind is an indirect object.
236 aItems
= pObject
->GetDictionaryItems();
237 pKindObject
= pObject
;
243 SvMemoryStream
& rDocBuffer
= rPage
.GetDocument().GetEditBuffer();
244 bool bHasDictValue
= false;
246 for (const auto& rItem
: aItems
)
248 // For each item copy it over to our output then insert it into aRet.
249 auto pReference
= dynamic_cast<filter::PDFReferenceElement
*>(rItem
.second
);
252 if (pKindObject
&& dynamic_cast<filter::PDFDictionaryElement
*>(rItem
.second
))
254 bHasDictValue
= true;
261 filter::PDFObjectElement
* pValue
= pReference
->LookupObject();
265 // Then copying over an object copy its dictionary and its stream.
266 sal_Int32 nObject
= copyExternalResource(rDocBuffer
, *pValue
, rCopiedResources
);
267 aRet
[rItem
.first
] = nObject
;
270 if (bHasDictValue
&& pKindObject
)
272 sal_Int32 nObject
= copyExternalResource(rDocBuffer
, *pKindObject
, rCopiedResources
);
273 return "/" + rKind
+ " " + OString::number(nObject
) + " 0 R";
276 // Build the dictionary entry string.
277 OStringBuffer
sRet("/" + rKind
+ "<<");
278 for (const auto& rPair
: aRet
)
280 sRet
.append("/" + rPair
.first
+ " " + OString::number(rPair
.second
) + " 0 R");
284 return sRet
.makeStringAndClear();
287 void PDFObjectCopier::copyPageResources(filter::PDFObjectElement
* pPage
, OStringBuffer
& rLine
)
289 // Maps from source object id (PDF image) to target object id (export result).
290 std::map
<sal_Int32
, sal_Int32
> aCopiedResources
;
291 copyPageResources(pPage
, rLine
, aCopiedResources
);
294 void PDFObjectCopier::copyPageResources(filter::PDFObjectElement
* pPage
, OStringBuffer
& rLine
,
295 std::map
<sal_Int32
, sal_Int32
>& rCopiedResources
)
297 rLine
.append(" /Resources <<");
298 static const std::initializer_list
<OString
> aKeys
299 = { "ColorSpace"_ostr
, "ExtGState"_ostr
, "Font"_ostr
,
300 "XObject"_ostr
, "Shading"_ostr
, "Pattern"_ostr
};
301 for (const auto& rKey
: aKeys
)
303 rLine
.append(copyExternalResources(*pPage
, rKey
, rCopiedResources
));
308 sal_Int32
PDFObjectCopier::copyPageStreams(std::vector
<filter::PDFObjectElement
*>& rContentStreams
,
309 SvMemoryStream
& rStream
, bool& rCompressed
,
310 bool bIsTaggedNonReferenceXObject
)
312 for (auto pContent
: rContentStreams
)
314 filter::PDFStreamElement
* pPageStream
= pContent
->GetStream();
317 SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: contents has no stream");
321 SvMemoryStream
& rPageStream
= pPageStream
->GetMemory();
323 auto pFilter
= dynamic_cast<filter::PDFNameElement
*>(pContent
->Lookup("Filter"_ostr
));
324 auto pFilterArray
= dynamic_cast<filter::PDFArrayElement
*>(pContent
->Lookup("Filter"_ostr
));
325 if (!pFilter
&& pFilterArray
)
327 auto& aElements
= pFilterArray
->GetElements();
328 if (!aElements
.empty())
329 pFilter
= dynamic_cast<filter::PDFNameElement
*>(aElements
[0]);
334 if (pFilter
->GetValue() != "FlateDecode")
339 SvMemoryStream aMemoryStream
;
342 aZCodec
.BeginCompression();
343 aZCodec
.Decompress(rPageStream
, aMemoryStream
);
344 if (!aZCodec
.EndCompression())
346 SAL_WARN("vcl.pdfwriter", "PDFObjectCopier::copyPageStreams: decompression failed");
350 bool bHasArtifact
= false;
351 if (bIsTaggedNonReferenceXObject
)
353 auto pStart
= static_cast<const char*>(aMemoryStream
.GetData());
354 const char* const pEnd
= pStart
+ aMemoryStream
.GetSize();
355 std::string_view
aStreamView(pStart
, pEnd
- pStart
);
357 std::string_view sArtifact
= "/Artifact";
358 std::size_t nPosArtifact
= aStreamView
.find(sArtifact
);
359 if (nPosArtifact
!= std::string_view::npos
)
362 SvMemoryStream aTmpStream
;
363 std::string_view sBMC
= "BMC";
364 std::string_view sBDC
= "BDC";
365 std::string_view sEMC
= "EMC";
367 while (!aStreamView
.empty())
369 aTmpStream
.WriteOString(aStreamView
.substr(0, nPosArtifact
));
370 aStreamView
.remove_prefix(nPosArtifact
+ sArtifact
.size());
372 std::size_t nPosBMC
= aStreamView
.find(sBMC
);
373 std::size_t nPosBDC
= aStreamView
.find(sBDC
);
374 std::size_t nPos
= std::min(nPosBMC
, nPosBDC
);
376 if (nPos
!= std::string_view::npos
)
379 aStreamView
.remove_prefix(nPos
+ sBMC
.size() + 1);
381 aStreamView
.remove_prefix(nPos
+ sBDC
.size() + 1);
383 std::size_t nPosEMC
= aStreamView
.find(sEMC
);
384 if (nPosEMC
!= std::string_view::npos
)
386 aTmpStream
.WriteOString(aStreamView
.substr(0, nPosEMC
));
387 aStreamView
.remove_prefix(nPosEMC
+ sEMC
.size() + 1);
391 nPosArtifact
= aStreamView
.find(sArtifact
);
392 if (nPosArtifact
== std::string_view::npos
)
394 aTmpStream
.WriteOString(aStreamView
);
398 rStream
.WriteBytes(aTmpStream
.GetData(), aTmpStream
.GetSize());
403 rStream
.WriteBytes(aMemoryStream
.GetData(), aMemoryStream
.GetSize());
407 rStream
.WriteBytes(rPageStream
.GetData(), rPageStream
.GetSize());
411 rCompressed
= PDFWriterImpl::compressStream(&rStream
);
413 return rStream
.Tell();
417 /* vim:set shiftwidth=4 softtabstop=4 expandtab: */