1 //===- ReaderWrappers.cpp - Parse bytecode from file or buffer -----------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the LLVM research group and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This file implements loading and parsing a bytecode file and parsing a
11 // bytecode module from a given buffer.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Bytecode/Analyzer.h"
16 #include "llvm/Bytecode/Reader.h"
18 #include "llvm/Module.h"
19 #include "llvm/Instructions.h"
20 #include "llvm/ADT/StringExtras.h"
21 #include "llvm/System/MappedFile.h"
28 //===----------------------------------------------------------------------===//
29 // BytecodeFileReader - Read from an mmap'able file descriptor.
33 /// BytecodeFileReader - parses a bytecode file from a file
35 class BytecodeFileReader
: public BytecodeReader
{
37 sys::MappedFile mapFile
;
39 BytecodeFileReader(const BytecodeFileReader
&); // Do not implement
40 void operator=(const BytecodeFileReader
&BFR
); // Do not implement
43 BytecodeFileReader(const std::string
&Filename
, llvm::BytecodeHandler
* H
=0);
47 BytecodeFileReader::BytecodeFileReader(const std::string
&Filename
,
48 llvm::BytecodeHandler
* H
)
50 , mapFile( sys::Path(Filename
))
53 unsigned char* buffer
= reinterpret_cast<unsigned char*>(mapFile
.base());
54 ParseBytecode(buffer
, mapFile
.size(), Filename
);
57 //===----------------------------------------------------------------------===//
58 // BytecodeBufferReader - Read from a memory buffer
62 /// BytecodeBufferReader - parses a bytecode file from a buffer
64 class BytecodeBufferReader
: public BytecodeReader
{
66 const unsigned char *Buffer
;
69 BytecodeBufferReader(const BytecodeBufferReader
&); // Do not implement
70 void operator=(const BytecodeBufferReader
&BFR
); // Do not implement
73 BytecodeBufferReader(const unsigned char *Buf
, unsigned Length
,
74 const std::string
&ModuleID
,
75 llvm::BytecodeHandler
* Handler
= 0);
76 ~BytecodeBufferReader();
81 BytecodeBufferReader::BytecodeBufferReader(const unsigned char *Buf
,
83 const std::string
&ModuleID
,
84 llvm::BytecodeHandler
* H
)
87 // If not aligned, allocate a new buffer to hold the bytecode...
88 const unsigned char *ParseBegin
= 0;
89 if (reinterpret_cast<uint64_t>(Buf
) & 3) {
90 Buffer
= new unsigned char[Length
+4];
91 unsigned Offset
= 4 - ((intptr_t)Buffer
& 3); // Make sure it's aligned
92 ParseBegin
= Buffer
+ Offset
;
93 memcpy((unsigned char*)ParseBegin
, Buf
, Length
); // Copy it over
96 // If we don't need to copy it over, just use the caller's copy
97 ParseBegin
= Buffer
= Buf
;
101 ParseBytecode(ParseBegin
, Length
, ModuleID
);
103 if (MustDelete
) delete [] Buffer
;
108 BytecodeBufferReader::~BytecodeBufferReader() {
109 if (MustDelete
) delete [] Buffer
;
112 //===----------------------------------------------------------------------===//
113 // BytecodeStdinReader - Read bytecode from Standard Input
117 /// BytecodeStdinReader - parses a bytecode file from stdin
119 class BytecodeStdinReader
: public BytecodeReader
{
121 std::vector
<unsigned char> FileData
;
122 unsigned char *FileBuf
;
124 BytecodeStdinReader(const BytecodeStdinReader
&); // Do not implement
125 void operator=(const BytecodeStdinReader
&BFR
); // Do not implement
128 BytecodeStdinReader( llvm::BytecodeHandler
* H
= 0 );
132 BytecodeStdinReader::BytecodeStdinReader( BytecodeHandler
* H
)
137 // Read in all of the data from stdin, we cannot mmap stdin...
138 while (std::cin
.good()) {
139 std::cin
.read(Buffer
, 4096*4);
140 int BlockSize
= std::cin
.gcount();
143 FileData
.insert(FileData
.end(), Buffer
, Buffer
+BlockSize
);
146 if (FileData
.empty())
147 throw std::string("Standard Input empty!");
149 FileBuf
= &FileData
[0];
150 ParseBytecode(FileBuf
, FileData
.size(), "<stdin>");
153 //===----------------------------------------------------------------------===//
154 // Varargs transmogrification code...
157 // CheckVarargs - This is used to automatically translate old-style varargs to
158 // new style varargs for backwards compatibility.
159 static ModuleProvider
* CheckVarargs(ModuleProvider
* MP
) {
160 Module
* M
= MP
->getModule();
162 // check to see if va_start takes arguements...
163 Function
* F
= M
->getNamedFunction("llvm.va_start");
164 if(F
== 0) return MP
; //No varargs use, just return.
166 if (F
->getFunctionType()->getNumParams() == 1)
167 return MP
; // Modern varargs processing, just return.
169 // If we get to this point, we know that we have an old-style module.
170 // Materialize the whole thing to perform the rewriting.
171 MP
->materializeModule();
173 if(Function
* F
= M
->getNamedFunction("llvm.va_start")) {
174 assert(F
->arg_size() == 0 && "Obsolete va_start takes 0 argument!");
178 //bar = alloca typeof(foo)
182 const Type
* RetTy
= Type::getPrimitiveType(Type::VoidTyID
);
183 const Type
* ArgTy
= F
->getFunctionType()->getReturnType();
184 const Type
* ArgTyPtr
= PointerType::get(ArgTy
);
185 Function
* NF
= M
->getOrInsertFunction("llvm.va_start",
186 RetTy
, ArgTyPtr
, (Type
*)0);
188 for(Value::use_iterator I
= F
->use_begin(), E
= F
->use_end(); I
!= E
;)
189 if (CallInst
* CI
= dyn_cast
<CallInst
>(*I
++)) {
190 AllocaInst
* bar
= new AllocaInst(ArgTy
, 0, "vastart.fix.1", CI
);
191 new CallInst(NF
, bar
, "", CI
);
192 Value
* foo
= new LoadInst(bar
, "vastart.fix.2", CI
);
193 CI
->replaceAllUsesWith(foo
);
194 CI
->getParent()->getInstList().erase(CI
);
199 if(Function
* F
= M
->getNamedFunction("llvm.va_end")) {
200 assert(F
->arg_size() == 1 && "Obsolete va_end takes 1 argument!");
203 //bar = alloca 1 of typeof(foo)
205 const Type
* RetTy
= Type::getPrimitiveType(Type::VoidTyID
);
206 const Type
* ArgTy
= F
->getFunctionType()->getParamType(0);
207 const Type
* ArgTyPtr
= PointerType::get(ArgTy
);
208 Function
* NF
= M
->getOrInsertFunction("llvm.va_end",
209 RetTy
, ArgTyPtr
, (Type
*)0);
211 for(Value::use_iterator I
= F
->use_begin(), E
= F
->use_end(); I
!= E
;)
212 if (CallInst
* CI
= dyn_cast
<CallInst
>(*I
++)) {
213 AllocaInst
* bar
= new AllocaInst(ArgTy
, 0, "vaend.fix.1", CI
);
214 new StoreInst(CI
->getOperand(1), bar
, CI
);
215 new CallInst(NF
, bar
, "", CI
);
216 CI
->getParent()->getInstList().erase(CI
);
221 if(Function
* F
= M
->getNamedFunction("llvm.va_copy")) {
222 assert(F
->arg_size() == 1 && "Obsolete va_copy takes 1 argument!");
225 //a = alloca 1 of typeof(foo)
226 //b = alloca 1 of typeof(foo)
231 const Type
* RetTy
= Type::getPrimitiveType(Type::VoidTyID
);
232 const Type
* ArgTy
= F
->getFunctionType()->getReturnType();
233 const Type
* ArgTyPtr
= PointerType::get(ArgTy
);
234 Function
* NF
= M
->getOrInsertFunction("llvm.va_copy",
235 RetTy
, ArgTyPtr
, ArgTyPtr
, (Type
*)0);
237 for(Value::use_iterator I
= F
->use_begin(), E
= F
->use_end(); I
!= E
;)
238 if (CallInst
* CI
= dyn_cast
<CallInst
>(*I
++)) {
239 AllocaInst
* a
= new AllocaInst(ArgTy
, 0, "vacopy.fix.1", CI
);
240 AllocaInst
* b
= new AllocaInst(ArgTy
, 0, "vacopy.fix.2", CI
);
241 new StoreInst(CI
->getOperand(1), b
, CI
);
242 new CallInst(NF
, a
, b
, "", CI
);
243 Value
* foo
= new LoadInst(a
, "vacopy.fix.3", CI
);
244 CI
->replaceAllUsesWith(foo
);
245 CI
->getParent()->getInstList().erase(CI
);
252 //===----------------------------------------------------------------------===//
254 //===----------------------------------------------------------------------===//
256 /// getBytecodeBufferModuleProvider - lazy function-at-a-time loading from a
259 llvm::getBytecodeBufferModuleProvider(const unsigned char *Buffer
,
261 const std::string
&ModuleID
,
262 BytecodeHandler
* H
) {
264 new BytecodeBufferReader(Buffer
, Length
, ModuleID
, H
));
267 /// ParseBytecodeBuffer - Parse a given bytecode buffer
269 Module
*llvm::ParseBytecodeBuffer(const unsigned char *Buffer
, unsigned Length
,
270 const std::string
&ModuleID
,
271 std::string
*ErrorStr
){
273 std::auto_ptr
<ModuleProvider
>
274 AMP(getBytecodeBufferModuleProvider(Buffer
, Length
, ModuleID
));
275 return AMP
->releaseModule();
276 } catch (std::string
&err
) {
277 if (ErrorStr
) *ErrorStr
= err
;
282 /// getBytecodeModuleProvider - lazy function-at-a-time loading from a file
284 ModuleProvider
*llvm::getBytecodeModuleProvider(const std::string
&Filename
,
285 BytecodeHandler
* H
) {
286 if (Filename
!= std::string("-")) // Read from a file...
287 return CheckVarargs(new BytecodeFileReader(Filename
,H
));
288 else // Read from stdin
289 return CheckVarargs(new BytecodeStdinReader(H
));
292 /// ParseBytecodeFile - Parse the given bytecode file
294 Module
*llvm::ParseBytecodeFile(const std::string
&Filename
,
295 std::string
*ErrorStr
) {
297 std::auto_ptr
<ModuleProvider
> AMP(getBytecodeModuleProvider(Filename
));
298 return AMP
->releaseModule();
299 } catch (std::string
&err
) {
300 if (ErrorStr
) *ErrorStr
= err
;
305 // AnalyzeBytecodeFile - analyze one file
306 Module
* llvm::AnalyzeBytecodeFile(
307 const std::string
&Filename
, ///< File to analyze
308 BytecodeAnalysis
& bca
, ///< Statistical output
309 std::string
*ErrorStr
, ///< Error output
310 std::ostream
* output
///< Dump output
314 BytecodeHandler
* analyzerHandler
=createBytecodeAnalyzerHandler(bca
,output
);
315 std::auto_ptr
<ModuleProvider
> AMP(
316 getBytecodeModuleProvider(Filename
,analyzerHandler
));
317 return AMP
->releaseModule();
318 } catch (std::string
&err
) {
319 if (ErrorStr
) *ErrorStr
= err
;
324 // AnalyzeBytecodeBuffer - analyze a buffer
325 Module
* llvm::AnalyzeBytecodeBuffer(
326 const unsigned char* Buffer
, ///< Pointer to start of bytecode buffer
327 unsigned Length
, ///< Size of the bytecode buffer
328 const std::string
& ModuleID
, ///< Identifier for the module
329 BytecodeAnalysis
& bca
, ///< The results of the analysis
330 std::string
* ErrorStr
, ///< Errors, if any.
331 std::ostream
* output
///< Dump output, if any
335 BytecodeHandler
* hdlr
= createBytecodeAnalyzerHandler(bca
, output
);
336 std::auto_ptr
<ModuleProvider
>
337 AMP(getBytecodeBufferModuleProvider(Buffer
, Length
, ModuleID
, hdlr
));
338 return AMP
->releaseModule();
339 } catch (std::string
&err
) {
340 if (ErrorStr
) *ErrorStr
= err
;
345 bool llvm::GetBytecodeDependentLibraries(const std::string
&fname
,
346 Module::LibraryListType
& deplibs
) {
348 std::auto_ptr
<ModuleProvider
> AMP( getBytecodeModuleProvider(fname
));
349 Module
* M
= AMP
->releaseModule();
351 deplibs
= M
->getLibraries();
360 static void getSymbols(Module
*M
, std::vector
<std::string
>& symbols
) {
361 // Loop over global variables
362 for (Module::global_iterator GI
= M
->global_begin(), GE
=M
->global_end(); GI
!= GE
; ++GI
)
363 if (!GI
->isExternal() && !GI
->hasInternalLinkage())
364 if (!GI
->getName().empty())
365 symbols
.push_back(GI
->getName());
367 // Loop over functions.
368 for (Module::iterator FI
= M
->begin(), FE
= M
->end(); FI
!= FE
; ++FI
)
369 if (!FI
->isExternal() && !FI
->hasInternalLinkage())
370 if (!FI
->getName().empty())
371 symbols
.push_back(FI
->getName());
374 // Get just the externally visible defined symbols from the bytecode
375 bool llvm::GetBytecodeSymbols(const sys::Path
& fName
,
376 std::vector
<std::string
>& symbols
) {
378 std::auto_ptr
<ModuleProvider
> AMP(
379 getBytecodeModuleProvider(fName
.toString()));
381 // Get the module from the provider
382 Module
* M
= AMP
->materializeModule();
385 getSymbols(M
, symbols
);
387 // Done with the module
396 llvm::GetBytecodeSymbols(const unsigned char*Buffer
, unsigned Length
,
397 const std::string
& ModuleID
,
398 std::vector
<std::string
>& symbols
) {
400 ModuleProvider
* MP
= 0;
402 // Get the module provider
403 MP
= getBytecodeBufferModuleProvider(Buffer
, Length
, ModuleID
);
405 // Get the module from the provider
406 Module
* M
= MP
->materializeModule();
409 getSymbols(M
, symbols
);
411 // Done with the module. Note that ModuleProvider will delete the
412 // Module when it is deleted. Also note that its the caller's responsibility
413 // to delete the ModuleProvider.
417 // We delete only the ModuleProvider here because its destructor will
418 // also delete the Module (we used materializeModule not releaseModule).