1 //===- CodeGenDataReader.cpp ----------------------------------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file contains support for reading codegen data.
11 //===----------------------------------------------------------------------===//
13 #include "llvm/CGData/CodeGenDataReader.h"
14 #include "llvm/CGData/OutlinedHashTreeRecord.h"
15 #include "llvm/Object/ObjectFile.h"
16 #include "llvm/Support/MemoryBuffer.h"
18 #define DEBUG_TYPE "cg-data-reader"
24 static Expected
<std::unique_ptr
<MemoryBuffer
>>
25 setupMemoryBuffer(const Twine
&Filename
, vfs::FileSystem
&FS
) {
26 auto BufferOrErr
= Filename
.str() == "-" ? MemoryBuffer::getSTDIN()
27 : FS
.getBufferForFile(Filename
);
28 if (std::error_code EC
= BufferOrErr
.getError())
29 return errorCodeToError(EC
);
30 return std::move(BufferOrErr
.get());
33 Error
CodeGenDataReader::mergeFromObjectFile(
34 const object::ObjectFile
*Obj
, OutlinedHashTreeRecord
&GlobalOutlineRecord
,
35 StableFunctionMapRecord
&GlobalFunctionMapRecord
,
36 stable_hash
*CombinedHash
) {
37 Triple TT
= Obj
->makeTriple();
39 getCodeGenDataSectionName(CG_outline
, TT
.getObjectFormat(), false);
41 getCodeGenDataSectionName(CG_merge
, TT
.getObjectFormat(), false);
43 auto processSectionContents
= [&](const StringRef
&Name
,
44 const StringRef
&Contents
) {
45 if (Name
!= CGOutlineName
&& Name
!= CGMergeName
)
48 *CombinedHash
= stable_hash_combine(*CombinedHash
, xxh3_64bits(Contents
));
49 auto *Data
= reinterpret_cast<const unsigned char *>(Contents
.data());
50 auto *EndData
= Data
+ Contents
.size();
51 // In case dealing with an executable that has concatenated cgdata,
52 // we want to merge them into a single cgdata.
53 // Although it's not a typical workflow, we support this scenario
54 // by looping over all data in the sections.
55 if (Name
== CGOutlineName
) {
56 while (Data
!= EndData
) {
57 OutlinedHashTreeRecord LocalOutlineRecord
;
58 LocalOutlineRecord
.deserialize(Data
);
59 GlobalOutlineRecord
.merge(LocalOutlineRecord
);
61 } else if (Name
== CGMergeName
) {
62 while (Data
!= EndData
) {
63 StableFunctionMapRecord LocalFunctionMapRecord
;
64 LocalFunctionMapRecord
.deserialize(Data
);
65 GlobalFunctionMapRecord
.merge(LocalFunctionMapRecord
);
70 for (auto &Section
: Obj
->sections()) {
71 Expected
<StringRef
> NameOrErr
= Section
.getName();
73 return NameOrErr
.takeError();
74 Expected
<StringRef
> ContentsOrErr
= Section
.getContents();
76 return ContentsOrErr
.takeError();
77 processSectionContents(*NameOrErr
, *ContentsOrErr
);
80 return Error::success();
83 Error
IndexedCodeGenDataReader::read() {
84 using namespace support
;
86 // The smallest header with the version 1 is 24 bytes.
87 // Do not update this value even with the new version of the header.
88 const unsigned MinHeaderSize
= 24;
89 if (DataBuffer
->getBufferSize() < MinHeaderSize
)
90 return error(cgdata_error::bad_header
);
93 reinterpret_cast<const unsigned char *>(DataBuffer
->getBufferStart());
95 reinterpret_cast<const unsigned char *>(DataBuffer
->getBufferEnd());
96 if (auto E
= IndexedCGData::Header::readFromBuffer(Start
).moveInto(Header
))
99 if (hasOutlinedHashTree()) {
100 const unsigned char *Ptr
= Start
+ Header
.OutlinedHashTreeOffset
;
102 return error(cgdata_error::eof
);
103 HashTreeRecord
.deserialize(Ptr
);
105 if (hasStableFunctionMap()) {
106 const unsigned char *Ptr
= Start
+ Header
.StableFunctionMapOffset
;
108 return error(cgdata_error::eof
);
109 FunctionMapRecord
.deserialize(Ptr
);
115 Expected
<std::unique_ptr
<CodeGenDataReader
>>
116 CodeGenDataReader::create(const Twine
&Path
, vfs::FileSystem
&FS
) {
117 // Set up the buffer to read.
118 auto BufferOrError
= setupMemoryBuffer(Path
, FS
);
119 if (Error E
= BufferOrError
.takeError())
121 return CodeGenDataReader::create(std::move(BufferOrError
.get()));
124 Expected
<std::unique_ptr
<CodeGenDataReader
>>
125 CodeGenDataReader::create(std::unique_ptr
<MemoryBuffer
> Buffer
) {
126 if (Buffer
->getBufferSize() == 0)
127 return make_error
<CGDataError
>(cgdata_error::empty_cgdata
);
129 std::unique_ptr
<CodeGenDataReader
> Reader
;
130 // Create the reader.
131 if (IndexedCodeGenDataReader::hasFormat(*Buffer
))
132 Reader
= std::make_unique
<IndexedCodeGenDataReader
>(std::move(Buffer
));
133 else if (TextCodeGenDataReader::hasFormat(*Buffer
))
134 Reader
= std::make_unique
<TextCodeGenDataReader
>(std::move(Buffer
));
136 return make_error
<CGDataError
>(cgdata_error::malformed
);
138 // Initialize the reader and return the result.
139 if (Error E
= Reader
->read())
142 return std::move(Reader
);
145 bool IndexedCodeGenDataReader::hasFormat(const MemoryBuffer
&DataBuffer
) {
146 using namespace support
;
147 if (DataBuffer
.getBufferSize() < sizeof(IndexedCGData::Magic
))
150 uint64_t Magic
= endian::read
<uint64_t, llvm::endianness::little
, aligned
>(
151 DataBuffer
.getBufferStart());
152 // Verify that it's magical.
153 return Magic
== IndexedCGData::Magic
;
156 bool TextCodeGenDataReader::hasFormat(const MemoryBuffer
&Buffer
) {
157 // Verify that this really looks like plain ASCII text by checking a
158 // 'reasonable' number of characters (up to the magic size).
159 StringRef Prefix
= Buffer
.getBuffer().take_front(sizeof(uint64_t));
160 return llvm::all_of(Prefix
, [](char c
) { return isPrint(c
) || isSpace(c
); });
162 Error
TextCodeGenDataReader::read() {
163 using namespace support
;
165 // Parse the custom header line by line.
166 for (; !Line
.is_at_eof(); ++Line
) {
167 // Skip empty or whitespace-only lines
168 if (Line
->trim().empty())
171 if (!Line
->starts_with(":"))
173 StringRef Str
= Line
->drop_front().rtrim();
174 if (Str
.equals_insensitive("outlined_hash_tree"))
175 DataKind
|= CGDataKind::FunctionOutlinedHashTree
;
176 else if (Str
.equals_insensitive("stable_function_map"))
177 DataKind
|= CGDataKind::StableFunctionMergingMap
;
179 return error(cgdata_error::bad_header
);
182 // We treat an empty header (that is a comment # only) as a valid header.
183 if (Line
.is_at_eof()) {
184 if (DataKind
== CGDataKind::Unknown
)
185 return Error::success();
186 return error(cgdata_error::bad_header
);
189 // The YAML docs follow after the header.
190 const char *Pos
= Line
->data();
191 size_t Size
= reinterpret_cast<size_t>(DataBuffer
->getBufferEnd()) -
192 reinterpret_cast<size_t>(Pos
);
193 yaml::Input
YOS(StringRef(Pos
, Size
));
194 if (hasOutlinedHashTree())
195 HashTreeRecord
.deserializeYAML(YOS
);
196 if (hasStableFunctionMap())
197 FunctionMapRecord
.deserializeYAML(YOS
);
199 return Error::success();
201 } // end namespace llvm