1 //===- Reader.cpp - Code to read bytecode files ---------------------------===//
3 // The LLVM Compiler Infrastructure
5 // This file was developed by the LLVM research group and is distributed under
6 // the University of Illinois Open Source License. See LICENSE.TXT for details.
8 //===----------------------------------------------------------------------===//
10 // This library implements the functionality defined in llvm/Bytecode/Reader.h
12 // Note that this library should be as fast as possible, reentrant, and
15 // TODO: Allow passing in an option to ignore the symbol table
17 //===----------------------------------------------------------------------===//
20 #include "llvm/Assembly/AutoUpgrade.h"
21 #include "llvm/Bytecode/BytecodeHandler.h"
22 #include "llvm/BasicBlock.h"
23 #include "llvm/CallingConv.h"
24 #include "llvm/Constants.h"
25 #include "llvm/InlineAsm.h"
26 #include "llvm/Instructions.h"
27 #include "llvm/SymbolTable.h"
28 #include "llvm/Bytecode/Format.h"
29 #include "llvm/Config/alloca.h"
30 #include "llvm/Support/GetElementPtrTypeIterator.h"
31 #include "llvm/Support/Compressor.h"
32 #include "llvm/Support/MathExtras.h"
33 #include "llvm/ADT/StringExtras.h"
39 /// @brief A class for maintaining the slot number definition
40 /// as a placeholder for the actual definition for forward constants defs.
41 class ConstantPlaceHolder
: public ConstantExpr
{
42 ConstantPlaceHolder(); // DO NOT IMPLEMENT
43 void operator=(const ConstantPlaceHolder
&); // DO NOT IMPLEMENT
46 ConstantPlaceHolder(const Type
*Ty
)
47 : ConstantExpr(Ty
, Instruction::UserOp1
, &Op
, 1),
48 Op(UndefValue::get(Type::IntTy
), this) {
53 // Provide some details on error
54 inline void BytecodeReader::error(std::string err
) {
56 err
+= itostr(RevisionNum
) ;
58 err
+= itostr(At
-MemStart
);
63 //===----------------------------------------------------------------------===//
64 // Bytecode Reading Methods
65 //===----------------------------------------------------------------------===//
67 /// Determine if the current block being read contains any more data.
68 inline bool BytecodeReader::moreInBlock() {
72 /// Throw an error if we've read past the end of the current block
73 inline void BytecodeReader::checkPastBlockEnd(const char * block_name
) {
75 error(std::string("Attempt to read past the end of ") + block_name
+
79 /// Align the buffer position to a 32 bit boundary
80 inline void BytecodeReader::align32() {
83 At
= (const unsigned char *)((intptr_t)(At
+3) & (~3UL));
85 if (Handler
) Handler
->handleAlignment(At
- Save
);
87 error("Ran out of data while aligning!");
91 /// Read a whole unsigned integer
92 inline unsigned BytecodeReader::read_uint() {
94 error("Ran out of data reading uint!");
96 return At
[-4] | (At
[-3] << 8) | (At
[-2] << 16) | (At
[-1] << 24);
99 /// Read a variable-bit-rate encoded unsigned integer
100 inline unsigned BytecodeReader::read_vbr_uint() {
107 error("Ran out of data reading vbr_uint!");
108 Result
|= (unsigned)((*At
++) & 0x7F) << Shift
;
110 } while (At
[-1] & 0x80);
111 if (Handler
) Handler
->handleVBR32(At
-Save
);
115 /// Read a variable-bit-rate encoded unsigned 64-bit integer.
116 inline uint64_t BytecodeReader::read_vbr_uint64() {
123 error("Ran out of data reading vbr_uint64!");
124 Result
|= (uint64_t)((*At
++) & 0x7F) << Shift
;
126 } while (At
[-1] & 0x80);
127 if (Handler
) Handler
->handleVBR64(At
-Save
);
131 /// Read a variable-bit-rate encoded signed 64-bit integer.
132 inline int64_t BytecodeReader::read_vbr_int64() {
133 uint64_t R
= read_vbr_uint64();
136 return -(int64_t)(R
>> 1);
137 else // There is no such thing as -0 with integers. "-0" really means
138 // 0x8000000000000000.
141 return (int64_t)(R
>> 1);
144 /// Read a pascal-style string (length followed by text)
145 inline std::string
BytecodeReader::read_str() {
146 unsigned Size
= read_vbr_uint();
147 const unsigned char *OldAt
= At
;
149 if (At
> BlockEnd
) // Size invalid?
150 error("Ran out of data reading a string!");
151 return std::string((char*)OldAt
, Size
);
154 /// Read an arbitrary block of data
155 inline void BytecodeReader::read_data(void *Ptr
, void *End
) {
156 unsigned char *Start
= (unsigned char *)Ptr
;
157 unsigned Amount
= (unsigned char *)End
- Start
;
158 if (At
+Amount
> BlockEnd
)
159 error("Ran out of data!");
160 std::copy(At
, At
+Amount
, Start
);
164 /// Read a float value in little-endian order
165 inline void BytecodeReader::read_float(float& FloatVal
) {
166 /// FIXME: This isn't optimal, it has size problems on some platforms
167 /// where FP is not IEEE.
168 FloatVal
= BitsToFloat(At
[0] | (At
[1] << 8) | (At
[2] << 16) | (At
[3] << 24));
169 At
+=sizeof(uint32_t);
172 /// Read a double value in little-endian order
173 inline void BytecodeReader::read_double(double& DoubleVal
) {
174 /// FIXME: This isn't optimal, it has size problems on some platforms
175 /// where FP is not IEEE.
176 DoubleVal
= BitsToDouble((uint64_t(At
[0]) << 0) | (uint64_t(At
[1]) << 8) |
177 (uint64_t(At
[2]) << 16) | (uint64_t(At
[3]) << 24) |
178 (uint64_t(At
[4]) << 32) | (uint64_t(At
[5]) << 40) |
179 (uint64_t(At
[6]) << 48) | (uint64_t(At
[7]) << 56));
180 At
+=sizeof(uint64_t);
183 /// Read a block header and obtain its type and size
184 inline void BytecodeReader::read_block(unsigned &Type
, unsigned &Size
) {
185 if ( hasLongBlockHeaders
) {
189 case BytecodeFormat::Reserved_DoNotUse
:
190 error("Reserved_DoNotUse used as Module Type?");
191 Type
= BytecodeFormat::ModuleBlockID
; break;
192 case BytecodeFormat::Module
:
193 Type
= BytecodeFormat::ModuleBlockID
; break;
194 case BytecodeFormat::Function
:
195 Type
= BytecodeFormat::FunctionBlockID
; break;
196 case BytecodeFormat::ConstantPool
:
197 Type
= BytecodeFormat::ConstantPoolBlockID
; break;
198 case BytecodeFormat::SymbolTable
:
199 Type
= BytecodeFormat::SymbolTableBlockID
; break;
200 case BytecodeFormat::ModuleGlobalInfo
:
201 Type
= BytecodeFormat::ModuleGlobalInfoBlockID
; break;
202 case BytecodeFormat::GlobalTypePlane
:
203 Type
= BytecodeFormat::GlobalTypePlaneBlockID
; break;
204 case BytecodeFormat::InstructionList
:
205 Type
= BytecodeFormat::InstructionListBlockID
; break;
206 case BytecodeFormat::CompactionTable
:
207 Type
= BytecodeFormat::CompactionTableBlockID
; break;
208 case BytecodeFormat::BasicBlock
:
209 /// This block type isn't used after version 1.1. However, we have to
210 /// still allow the value in case this is an old bc format file.
211 /// We just let its value creep thru.
214 error("Invalid block id found: " + utostr(Type
));
219 Type
= Size
& 0x1F; // mask low order five bits
220 Size
>>= 5; // get rid of five low order bits, leaving high 27
223 if (At
+ Size
> BlockEnd
)
224 error("Attempt to size a block past end of memory");
225 BlockEnd
= At
+ Size
;
226 if (Handler
) Handler
->handleBlock(Type
, BlockStart
, Size
);
230 /// In LLVM 1.2 and before, Types were derived from Value and so they were
231 /// written as part of the type planes along with any other Value. In LLVM
232 /// 1.3 this changed so that Type does not derive from Value. Consequently,
233 /// the BytecodeReader's containers for Values can't contain Types because
234 /// there's no inheritance relationship. This means that the "Type Type"
235 /// plane is defunct along with the Type::TypeTyID TypeID. In LLVM 1.3
236 /// whenever a bytecode construct must have both types and values together,
237 /// the types are always read/written first and then the Values. Furthermore
238 /// since Type::TypeTyID no longer exists, its value (12) now corresponds to
239 /// Type::LabelTyID. In order to overcome this we must "sanitize" all the
240 /// type TypeIDs we encounter. For LLVM 1.3 bytecode files, there's no change.
241 /// For LLVM 1.2 and before, this function will decrement the type id by
242 /// one to account for the missing Type::TypeTyID enumerator if the value is
243 /// larger than 12 (Type::LabelTyID). If the value is exactly 12, then this
244 /// function returns true, otherwise false. This helps detect situations
245 /// where the pre 1.3 bytecode is indicating that what follows is a type.
246 /// @returns true iff type id corresponds to pre 1.3 "type type"
247 inline bool BytecodeReader::sanitizeTypeId(unsigned &TypeId
) {
248 if (hasTypeDerivedFromValue
) { /// do nothing if 1.3 or later
249 if (TypeId
== Type::LabelTyID
) {
250 TypeId
= Type::VoidTyID
; // sanitize it
251 return true; // indicate we got TypeTyID in pre 1.3 bytecode
252 } else if (TypeId
> Type::LabelTyID
)
253 --TypeId
; // shift all planes down because type type plane is missing
258 /// Reads a vbr uint to read in a type id and does the necessary
259 /// conversion on it by calling sanitizeTypeId.
260 /// @returns true iff \p TypeId read corresponds to a pre 1.3 "type type"
261 /// @see sanitizeTypeId
262 inline bool BytecodeReader::read_typeid(unsigned &TypeId
) {
263 TypeId
= read_vbr_uint();
264 if ( !has32BitTypes
)
265 if ( TypeId
== 0x00FFFFFF )
266 TypeId
= read_vbr_uint();
267 return sanitizeTypeId(TypeId
);
270 //===----------------------------------------------------------------------===//
272 //===----------------------------------------------------------------------===//
274 /// Determine if a type id has an implicit null value
275 inline bool BytecodeReader::hasImplicitNull(unsigned TyID
) {
276 if (!hasExplicitPrimitiveZeros
)
277 return TyID
!= Type::LabelTyID
&& TyID
!= Type::VoidTyID
;
278 return TyID
>= Type::FirstDerivedTyID
;
281 /// Obtain a type given a typeid and account for things like compaction tables,
282 /// function level vs module level, and the offsetting for the primitive types.
283 const Type
*BytecodeReader::getType(unsigned ID
) {
284 if (ID
< Type::FirstDerivedTyID
)
285 if (const Type
*T
= Type::getPrimitiveType((Type::TypeID
)ID
))
286 return T
; // Asked for a primitive type...
288 // Otherwise, derived types need offset...
289 ID
-= Type::FirstDerivedTyID
;
291 if (!CompactionTypes
.empty()) {
292 if (ID
>= CompactionTypes
.size())
293 error("Type ID out of range for compaction table!");
294 return CompactionTypes
[ID
].first
;
297 // Is it a module-level type?
298 if (ID
< ModuleTypes
.size())
299 return ModuleTypes
[ID
].get();
301 // Nope, is it a function-level type?
302 ID
-= ModuleTypes
.size();
303 if (ID
< FunctionTypes
.size())
304 return FunctionTypes
[ID
].get();
306 error("Illegal type reference!");
310 /// Get a sanitized type id. This just makes sure that the \p ID
311 /// is both sanitized and not the "type type" of pre-1.3 bytecode.
312 /// @see sanitizeTypeId
313 inline const Type
* BytecodeReader::getSanitizedType(unsigned& ID
) {
314 if (sanitizeTypeId(ID
))
315 error("Invalid type id encountered");
319 /// This method just saves some coding. It uses read_typeid to read
320 /// in a sanitized type id, errors that its not the type type, and
321 /// then calls getType to return the type value.
322 inline const Type
* BytecodeReader::readSanitizedType() {
325 error("Invalid type id encountered");
329 /// Get the slot number associated with a type accounting for primitive
330 /// types, compaction tables, and function level vs module level.
331 unsigned BytecodeReader::getTypeSlot(const Type
*Ty
) {
332 if (Ty
->isPrimitiveType())
333 return Ty
->getTypeID();
335 // Scan the compaction table for the type if needed.
336 if (!CompactionTypes
.empty()) {
337 for (unsigned i
= 0, e
= CompactionTypes
.size(); i
!= e
; ++i
)
338 if (CompactionTypes
[i
].first
== Ty
)
339 return Type::FirstDerivedTyID
+ i
;
341 error("Couldn't find type specified in compaction table!");
344 // Check the function level types first...
345 TypeListTy::iterator I
= std::find(FunctionTypes
.begin(),
346 FunctionTypes
.end(), Ty
);
348 if (I
!= FunctionTypes
.end())
349 return Type::FirstDerivedTyID
+ ModuleTypes
.size() +
350 (&*I
- &FunctionTypes
[0]);
352 // If we don't have our cache yet, build it now.
353 if (ModuleTypeIDCache
.empty()) {
355 ModuleTypeIDCache
.reserve(ModuleTypes
.size());
356 for (TypeListTy::iterator I
= ModuleTypes
.begin(), E
= ModuleTypes
.end();
358 ModuleTypeIDCache
.push_back(std::make_pair(*I
, N
));
360 std::sort(ModuleTypeIDCache
.begin(), ModuleTypeIDCache
.end());
363 // Binary search the cache for the entry.
364 std::vector
<std::pair
<const Type
*, unsigned> >::iterator IT
=
365 std::lower_bound(ModuleTypeIDCache
.begin(), ModuleTypeIDCache
.end(),
366 std::make_pair(Ty
, 0U));
367 if (IT
== ModuleTypeIDCache
.end() || IT
->first
!= Ty
)
368 error("Didn't find type in ModuleTypes.");
370 return Type::FirstDerivedTyID
+ IT
->second
;
373 /// This is just like getType, but when a compaction table is in use, it is
374 /// ignored. It also ignores function level types.
376 const Type
*BytecodeReader::getGlobalTableType(unsigned Slot
) {
377 if (Slot
< Type::FirstDerivedTyID
) {
378 const Type
*Ty
= Type::getPrimitiveType((Type::TypeID
)Slot
);
380 error("Not a primitive type ID?");
383 Slot
-= Type::FirstDerivedTyID
;
384 if (Slot
>= ModuleTypes
.size())
385 error("Illegal compaction table type reference!");
386 return ModuleTypes
[Slot
];
389 /// This is just like getTypeSlot, but when a compaction table is in use, it
390 /// is ignored. It also ignores function level types.
391 unsigned BytecodeReader::getGlobalTableTypeSlot(const Type
*Ty
) {
392 if (Ty
->isPrimitiveType())
393 return Ty
->getTypeID();
395 // If we don't have our cache yet, build it now.
396 if (ModuleTypeIDCache
.empty()) {
398 ModuleTypeIDCache
.reserve(ModuleTypes
.size());
399 for (TypeListTy::iterator I
= ModuleTypes
.begin(), E
= ModuleTypes
.end();
401 ModuleTypeIDCache
.push_back(std::make_pair(*I
, N
));
403 std::sort(ModuleTypeIDCache
.begin(), ModuleTypeIDCache
.end());
406 // Binary search the cache for the entry.
407 std::vector
<std::pair
<const Type
*, unsigned> >::iterator IT
=
408 std::lower_bound(ModuleTypeIDCache
.begin(), ModuleTypeIDCache
.end(),
409 std::make_pair(Ty
, 0U));
410 if (IT
== ModuleTypeIDCache
.end() || IT
->first
!= Ty
)
411 error("Didn't find type in ModuleTypes.");
413 return Type::FirstDerivedTyID
+ IT
->second
;
416 /// Retrieve a value of a given type and slot number, possibly creating
417 /// it if it doesn't already exist.
418 Value
* BytecodeReader::getValue(unsigned type
, unsigned oNum
, bool Create
) {
419 assert(type
!= Type::LabelTyID
&& "getValue() cannot get blocks!");
422 // If there is a compaction table active, it defines the low-level numbers.
423 // If not, the module values define the low-level numbers.
424 if (CompactionValues
.size() > type
&& !CompactionValues
[type
].empty()) {
425 if (Num
< CompactionValues
[type
].size())
426 return CompactionValues
[type
][Num
];
427 Num
-= CompactionValues
[type
].size();
429 // By default, the global type id is the type id passed in
430 unsigned GlobalTyID
= type
;
432 // If the type plane was compactified, figure out the global type ID by
433 // adding the derived type ids and the distance.
434 if (!CompactionTypes
.empty() && type
>= Type::FirstDerivedTyID
)
435 GlobalTyID
= CompactionTypes
[type
-Type::FirstDerivedTyID
].second
;
437 if (hasImplicitNull(GlobalTyID
)) {
438 const Type
*Ty
= getType(type
);
439 if (!isa
<OpaqueType
>(Ty
)) {
441 return Constant::getNullValue(Ty
);
446 if (GlobalTyID
< ModuleValues
.size() && ModuleValues
[GlobalTyID
]) {
447 if (Num
< ModuleValues
[GlobalTyID
]->size())
448 return ModuleValues
[GlobalTyID
]->getOperand(Num
);
449 Num
-= ModuleValues
[GlobalTyID
]->size();
453 if (FunctionValues
.size() > type
&&
454 FunctionValues
[type
] &&
455 Num
< FunctionValues
[type
]->size())
456 return FunctionValues
[type
]->getOperand(Num
);
458 if (!Create
) return 0; // Do not create a placeholder?
460 // Did we already create a place holder?
461 std::pair
<unsigned,unsigned> KeyValue(type
, oNum
);
462 ForwardReferenceMap::iterator I
= ForwardReferences
.lower_bound(KeyValue
);
463 if (I
!= ForwardReferences
.end() && I
->first
== KeyValue
)
464 return I
->second
; // We have already created this placeholder
466 // If the type exists (it should)
467 if (const Type
* Ty
= getType(type
)) {
468 // Create the place holder
469 Value
*Val
= new Argument(Ty
);
470 ForwardReferences
.insert(I
, std::make_pair(KeyValue
, Val
));
473 throw "Can't create placeholder for value of type slot #" + utostr(type
);
476 /// This is just like getValue, but when a compaction table is in use, it
477 /// is ignored. Also, no forward references or other fancy features are
479 Value
* BytecodeReader::getGlobalTableValue(unsigned TyID
, unsigned SlotNo
) {
481 return Constant::getNullValue(getType(TyID
));
483 if (!CompactionTypes
.empty() && TyID
>= Type::FirstDerivedTyID
) {
484 TyID
-= Type::FirstDerivedTyID
;
485 if (TyID
>= CompactionTypes
.size())
486 error("Type ID out of range for compaction table!");
487 TyID
= CompactionTypes
[TyID
].second
;
492 if (TyID
>= ModuleValues
.size() || ModuleValues
[TyID
] == 0 ||
493 SlotNo
>= ModuleValues
[TyID
]->size()) {
494 if (TyID
>= ModuleValues
.size() || ModuleValues
[TyID
] == 0)
495 error("Corrupt compaction table entry!"
496 + utostr(TyID
) + ", " + utostr(SlotNo
) + ": "
497 + utostr(ModuleValues
.size()));
499 error("Corrupt compaction table entry!"
500 + utostr(TyID
) + ", " + utostr(SlotNo
) + ": "
501 + utostr(ModuleValues
.size()) + ", "
502 + utohexstr(reinterpret_cast<uint64_t>(((void*)ModuleValues
[TyID
])))
504 + utostr(ModuleValues
[TyID
]->size()));
506 return ModuleValues
[TyID
]->getOperand(SlotNo
);
509 /// Just like getValue, except that it returns a null pointer
510 /// only on error. It always returns a constant (meaning that if the value is
511 /// defined, but is not a constant, that is an error). If the specified
512 /// constant hasn't been parsed yet, a placeholder is defined and used.
513 /// Later, after the real value is parsed, the placeholder is eliminated.
514 Constant
* BytecodeReader::getConstantValue(unsigned TypeSlot
, unsigned Slot
) {
515 if (Value
*V
= getValue(TypeSlot
, Slot
, false))
516 if (Constant
*C
= dyn_cast
<Constant
>(V
))
517 return C
; // If we already have the value parsed, just return it
519 error("Value for slot " + utostr(Slot
) +
520 " is expected to be a constant!");
522 std::pair
<unsigned, unsigned> Key(TypeSlot
, Slot
);
523 ConstantRefsType::iterator I
= ConstantFwdRefs
.lower_bound(Key
);
525 if (I
!= ConstantFwdRefs
.end() && I
->first
== Key
) {
528 // Create a placeholder for the constant reference and
529 // keep track of the fact that we have a forward ref to recycle it
530 Constant
*C
= new ConstantPlaceHolder(getType(TypeSlot
));
532 // Keep track of the fact that we have a forward ref to recycle it
533 ConstantFwdRefs
.insert(I
, std::make_pair(Key
, C
));
538 //===----------------------------------------------------------------------===//
539 // IR Construction Methods
540 //===----------------------------------------------------------------------===//
542 /// As values are created, they are inserted into the appropriate place
543 /// with this method. The ValueTable argument must be one of ModuleValues
544 /// or FunctionValues data members of this class.
545 unsigned BytecodeReader::insertValue(Value
*Val
, unsigned type
,
546 ValueTable
&ValueTab
) {
547 if (ValueTab
.size() <= type
)
548 ValueTab
.resize(type
+1);
550 if (!ValueTab
[type
]) ValueTab
[type
] = new ValueList();
552 ValueTab
[type
]->push_back(Val
);
554 bool HasOffset
= hasImplicitNull(type
) && !isa
<OpaqueType
>(Val
->getType());
555 return ValueTab
[type
]->size()-1 + HasOffset
;
558 /// Insert the arguments of a function as new values in the reader.
559 void BytecodeReader::insertArguments(Function
* F
) {
560 const FunctionType
*FT
= F
->getFunctionType();
561 Function::arg_iterator AI
= F
->arg_begin();
562 for (FunctionType::param_iterator It
= FT
->param_begin();
563 It
!= FT
->param_end(); ++It
, ++AI
)
564 insertValue(AI
, getTypeSlot(AI
->getType()), FunctionValues
);
567 //===----------------------------------------------------------------------===//
568 // Bytecode Parsing Methods
569 //===----------------------------------------------------------------------===//
571 /// This method parses a single instruction. The instruction is
572 /// inserted at the end of the \p BB provided. The arguments of
573 /// the instruction are provided in the \p Oprnds vector.
574 void BytecodeReader::ParseInstruction(std::vector
<unsigned> &Oprnds
,
578 // Clear instruction data
582 unsigned Op
= read_uint();
584 // bits Instruction format: Common to all formats
585 // --------------------------
586 // 01-00: Opcode type, fixed to 1.
588 Opcode
= (Op
>> 2) & 63;
589 Oprnds
.resize((Op
>> 0) & 03);
591 // Extract the operands
592 switch (Oprnds
.size()) {
594 // bits Instruction format:
595 // --------------------------
596 // 19-08: Resulting type plane
597 // 31-20: Operand #1 (if set to (2^12-1), then zero operands)
599 iType
= (Op
>> 8) & 4095;
600 Oprnds
[0] = (Op
>> 20) & 4095;
601 if (Oprnds
[0] == 4095) // Handle special encoding for 0 operands...
605 // bits Instruction format:
606 // --------------------------
607 // 15-08: Resulting type plane
611 iType
= (Op
>> 8) & 255;
612 Oprnds
[0] = (Op
>> 16) & 255;
613 Oprnds
[1] = (Op
>> 24) & 255;
616 // bits Instruction format:
617 // --------------------------
618 // 13-08: Resulting type plane
623 iType
= (Op
>> 8) & 63;
624 Oprnds
[0] = (Op
>> 14) & 63;
625 Oprnds
[1] = (Op
>> 20) & 63;
626 Oprnds
[2] = (Op
>> 26) & 63;
629 At
-= 4; // Hrm, try this again...
630 Opcode
= read_vbr_uint();
632 iType
= read_vbr_uint();
634 unsigned NumOprnds
= read_vbr_uint();
635 Oprnds
.resize(NumOprnds
);
638 error("Zero-argument instruction found; this is invalid.");
640 for (unsigned i
= 0; i
!= NumOprnds
; ++i
)
641 Oprnds
[i
] = read_vbr_uint();
646 const Type
*InstTy
= getSanitizedType(iType
);
648 // We have enough info to inform the handler now.
649 if (Handler
) Handler
->handleInstruction(Opcode
, InstTy
, Oprnds
, At
-SaveAt
);
651 // Declare the resulting instruction we'll build.
652 Instruction
*Result
= 0;
654 // If this is a bytecode format that did not include the unreachable
655 // instruction, bump up all opcodes numbers to make space.
656 if (hasNoUnreachableInst
) {
657 if (Opcode
>= Instruction::Unreachable
&&
663 // Handle binary operators
664 if (Opcode
>= Instruction::BinaryOpsBegin
&&
665 Opcode
< Instruction::BinaryOpsEnd
&& Oprnds
.size() == 2)
666 Result
= BinaryOperator::create((Instruction::BinaryOps
)Opcode
,
667 getValue(iType
, Oprnds
[0]),
668 getValue(iType
, Oprnds
[1]));
674 error("Illegal instruction read!");
676 case Instruction::VAArg
:
677 Result
= new VAArgInst(getValue(iType
, Oprnds
[0]),
678 getSanitizedType(Oprnds
[1]));
680 case 32: { //VANext_old
681 const Type
* ArgTy
= getValue(iType
, Oprnds
[0])->getType();
682 Function
* NF
= TheModule
->getOrInsertFunction("llvm.va_copy", ArgTy
, ArgTy
,
686 //foo = alloca 1 of t
691 AllocaInst
* foo
= new AllocaInst(ArgTy
, 0, "vanext.fix");
692 BB
->getInstList().push_back(foo
);
693 CallInst
* bar
= new CallInst(NF
, getValue(iType
, Oprnds
[0]));
694 BB
->getInstList().push_back(bar
);
695 BB
->getInstList().push_back(new StoreInst(bar
, foo
));
696 Instruction
* tmp
= new VAArgInst(foo
, getSanitizedType(Oprnds
[1]));
697 BB
->getInstList().push_back(tmp
);
698 Result
= new LoadInst(foo
);
701 case 33: { //VAArg_old
702 const Type
* ArgTy
= getValue(iType
, Oprnds
[0])->getType();
703 Function
* NF
= TheModule
->getOrInsertFunction("llvm.va_copy", ArgTy
, ArgTy
,
707 //foo = alloca 1 of t
711 AllocaInst
* foo
= new AllocaInst(ArgTy
, 0, "vaarg.fix");
712 BB
->getInstList().push_back(foo
);
713 CallInst
* bar
= new CallInst(NF
, getValue(iType
, Oprnds
[0]));
714 BB
->getInstList().push_back(bar
);
715 BB
->getInstList().push_back(new StoreInst(bar
, foo
));
716 Result
= new VAArgInst(foo
, getSanitizedType(Oprnds
[1]));
719 case Instruction::ExtractElement
: {
720 if (Oprnds
.size() != 2)
721 throw std::string("Invalid extractelement instruction!");
722 Value
*V1
= getValue(iType
, Oprnds
[0]);
723 Value
*V2
= getValue(Type::UIntTyID
, Oprnds
[1]);
725 if (!ExtractElementInst::isValidOperands(V1
, V2
))
726 throw std::string("Invalid extractelement instruction!");
728 Result
= new ExtractElementInst(V1
, V2
);
731 case Instruction::InsertElement
: {
732 const PackedType
*PackedTy
= dyn_cast
<PackedType
>(InstTy
);
733 if (!PackedTy
|| Oprnds
.size() != 3)
734 throw std::string("Invalid insertelement instruction!");
736 Value
*V1
= getValue(iType
, Oprnds
[0]);
737 Value
*V2
= getValue(getTypeSlot(PackedTy
->getElementType()), Oprnds
[1]);
738 Value
*V3
= getValue(Type::UIntTyID
, Oprnds
[2]);
740 if (!InsertElementInst::isValidOperands(V1
, V2
, V3
))
741 throw std::string("Invalid insertelement instruction!");
742 Result
= new InsertElementInst(V1
, V2
, V3
);
745 case Instruction::ShuffleVector
: {
746 const PackedType
*PackedTy
= dyn_cast
<PackedType
>(InstTy
);
747 if (!PackedTy
|| Oprnds
.size() != 3)
748 throw std::string("Invalid shufflevector instruction!");
749 Value
*V1
= getValue(iType
, Oprnds
[0]);
750 Value
*V2
= getValue(iType
, Oprnds
[1]);
751 const PackedType
*EltTy
=
752 PackedType::get(Type::UIntTy
, PackedTy
->getNumElements());
753 Value
*V3
= getValue(getTypeSlot(EltTy
), Oprnds
[2]);
754 if (!ShuffleVectorInst::isValidOperands(V1
, V2
, V3
))
755 throw std::string("Invalid shufflevector instruction!");
756 Result
= new ShuffleVectorInst(V1
, V2
, V3
);
759 case Instruction::Cast
:
760 Result
= new CastInst(getValue(iType
, Oprnds
[0]),
761 getSanitizedType(Oprnds
[1]));
763 case Instruction::Select
:
764 Result
= new SelectInst(getValue(Type::BoolTyID
, Oprnds
[0]),
765 getValue(iType
, Oprnds
[1]),
766 getValue(iType
, Oprnds
[2]));
768 case Instruction::PHI
: {
769 if (Oprnds
.size() == 0 || (Oprnds
.size() & 1))
770 error("Invalid phi node encountered!");
772 PHINode
*PN
= new PHINode(InstTy
);
773 PN
->reserveOperandSpace(Oprnds
.size());
774 for (unsigned i
= 0, e
= Oprnds
.size(); i
!= e
; i
+= 2)
775 PN
->addIncoming(getValue(iType
, Oprnds
[i
]), getBasicBlock(Oprnds
[i
+1]));
780 case Instruction::Shl
:
781 case Instruction::Shr
:
782 Result
= new ShiftInst((Instruction::OtherOps
)Opcode
,
783 getValue(iType
, Oprnds
[0]),
784 getValue(Type::UByteTyID
, Oprnds
[1]));
786 case Instruction::Ret
:
787 if (Oprnds
.size() == 0)
788 Result
= new ReturnInst();
789 else if (Oprnds
.size() == 1)
790 Result
= new ReturnInst(getValue(iType
, Oprnds
[0]));
792 error("Unrecognized instruction!");
795 case Instruction::Br
:
796 if (Oprnds
.size() == 1)
797 Result
= new BranchInst(getBasicBlock(Oprnds
[0]));
798 else if (Oprnds
.size() == 3)
799 Result
= new BranchInst(getBasicBlock(Oprnds
[0]),
800 getBasicBlock(Oprnds
[1]), getValue(Type::BoolTyID
, Oprnds
[2]));
802 error("Invalid number of operands for a 'br' instruction!");
804 case Instruction::Switch
: {
805 if (Oprnds
.size() & 1)
806 error("Switch statement with odd number of arguments!");
808 SwitchInst
*I
= new SwitchInst(getValue(iType
, Oprnds
[0]),
809 getBasicBlock(Oprnds
[1]),
811 for (unsigned i
= 2, e
= Oprnds
.size(); i
!= e
; i
+= 2)
812 I
->addCase(cast
<ConstantInt
>(getValue(iType
, Oprnds
[i
])),
813 getBasicBlock(Oprnds
[i
+1]));
818 case 58: // Call with extra operand for calling conv
819 case 59: // tail call, Fast CC
820 case 60: // normal call, Fast CC
821 case 61: // tail call, C Calling Conv
822 case Instruction::Call
: { // Normal Call, C Calling Convention
823 if (Oprnds
.size() == 0)
824 error("Invalid call instruction encountered!");
826 Value
*F
= getValue(iType
, Oprnds
[0]);
828 unsigned CallingConv
= CallingConv::C
;
829 bool isTailCall
= false;
831 if (Opcode
== 61 || Opcode
== 59)
834 // Check to make sure we have a pointer to function type
835 const PointerType
*PTy
= dyn_cast
<PointerType
>(F
->getType());
836 if (PTy
== 0) error("Call to non function pointer value!");
837 const FunctionType
*FTy
= dyn_cast
<FunctionType
>(PTy
->getElementType());
838 if (FTy
== 0) error("Call to non function pointer value!");
840 std::vector
<Value
*> Params
;
841 if (!FTy
->isVarArg()) {
842 FunctionType::param_iterator It
= FTy
->param_begin();
845 isTailCall
= Oprnds
.back() & 1;
846 CallingConv
= Oprnds
.back() >> 1;
848 } else if (Opcode
== 59 || Opcode
== 60)
849 CallingConv
= CallingConv::Fast
;
851 for (unsigned i
= 1, e
= Oprnds
.size(); i
!= e
; ++i
) {
852 if (It
== FTy
->param_end())
853 error("Invalid call instruction!");
854 Params
.push_back(getValue(getTypeSlot(*It
++), Oprnds
[i
]));
856 if (It
!= FTy
->param_end())
857 error("Invalid call instruction!");
859 Oprnds
.erase(Oprnds
.begin(), Oprnds
.begin()+1);
861 unsigned FirstVariableOperand
;
862 if (Oprnds
.size() < FTy
->getNumParams())
863 error("Call instruction missing operands!");
865 // Read all of the fixed arguments
866 for (unsigned i
= 0, e
= FTy
->getNumParams(); i
!= e
; ++i
)
867 Params
.push_back(getValue(getTypeSlot(FTy
->getParamType(i
)),Oprnds
[i
]));
869 FirstVariableOperand
= FTy
->getNumParams();
871 if ((Oprnds
.size()-FirstVariableOperand
) & 1)
872 error("Invalid call instruction!"); // Must be pairs of type/value
874 for (unsigned i
= FirstVariableOperand
, e
= Oprnds
.size();
876 Params
.push_back(getValue(Oprnds
[i
], Oprnds
[i
+1]));
879 Result
= new CallInst(F
, Params
);
880 if (isTailCall
) cast
<CallInst
>(Result
)->setTailCall();
881 if (CallingConv
) cast
<CallInst
>(Result
)->setCallingConv(CallingConv
);
884 case 56: // Invoke with encoded CC
885 case 57: // Invoke Fast CC
886 case Instruction::Invoke
: { // Invoke C CC
887 if (Oprnds
.size() < 3)
888 error("Invalid invoke instruction!");
889 Value
*F
= getValue(iType
, Oprnds
[0]);
891 // Check to make sure we have a pointer to function type
892 const PointerType
*PTy
= dyn_cast
<PointerType
>(F
->getType());
894 error("Invoke to non function pointer value!");
895 const FunctionType
*FTy
= dyn_cast
<FunctionType
>(PTy
->getElementType());
897 error("Invoke to non function pointer value!");
899 std::vector
<Value
*> Params
;
900 BasicBlock
*Normal
, *Except
;
901 unsigned CallingConv
= CallingConv::C
;
904 CallingConv
= CallingConv::Fast
;
905 else if (Opcode
== 56) {
906 CallingConv
= Oprnds
.back();
910 if (!FTy
->isVarArg()) {
911 Normal
= getBasicBlock(Oprnds
[1]);
912 Except
= getBasicBlock(Oprnds
[2]);
914 FunctionType::param_iterator It
= FTy
->param_begin();
915 for (unsigned i
= 3, e
= Oprnds
.size(); i
!= e
; ++i
) {
916 if (It
== FTy
->param_end())
917 error("Invalid invoke instruction!");
918 Params
.push_back(getValue(getTypeSlot(*It
++), Oprnds
[i
]));
920 if (It
!= FTy
->param_end())
921 error("Invalid invoke instruction!");
923 Oprnds
.erase(Oprnds
.begin(), Oprnds
.begin()+1);
925 Normal
= getBasicBlock(Oprnds
[0]);
926 Except
= getBasicBlock(Oprnds
[1]);
928 unsigned FirstVariableArgument
= FTy
->getNumParams()+2;
929 for (unsigned i
= 2; i
!= FirstVariableArgument
; ++i
)
930 Params
.push_back(getValue(getTypeSlot(FTy
->getParamType(i
-2)),
933 if (Oprnds
.size()-FirstVariableArgument
& 1) // Must be type/value pairs
934 error("Invalid invoke instruction!");
936 for (unsigned i
= FirstVariableArgument
; i
< Oprnds
.size(); i
+= 2)
937 Params
.push_back(getValue(Oprnds
[i
], Oprnds
[i
+1]));
940 Result
= new InvokeInst(F
, Normal
, Except
, Params
);
941 if (CallingConv
) cast
<InvokeInst
>(Result
)->setCallingConv(CallingConv
);
944 case Instruction::Malloc
: {
946 if (Oprnds
.size() == 2)
947 Align
= (1 << Oprnds
[1]) >> 1;
948 else if (Oprnds
.size() > 2)
949 error("Invalid malloc instruction!");
950 if (!isa
<PointerType
>(InstTy
))
951 error("Invalid malloc instruction!");
953 Result
= new MallocInst(cast
<PointerType
>(InstTy
)->getElementType(),
954 getValue(Type::UIntTyID
, Oprnds
[0]), Align
);
958 case Instruction::Alloca
: {
960 if (Oprnds
.size() == 2)
961 Align
= (1 << Oprnds
[1]) >> 1;
962 else if (Oprnds
.size() > 2)
963 error("Invalid alloca instruction!");
964 if (!isa
<PointerType
>(InstTy
))
965 error("Invalid alloca instruction!");
967 Result
= new AllocaInst(cast
<PointerType
>(InstTy
)->getElementType(),
968 getValue(Type::UIntTyID
, Oprnds
[0]), Align
);
971 case Instruction::Free
:
972 if (!isa
<PointerType
>(InstTy
))
973 error("Invalid free instruction!");
974 Result
= new FreeInst(getValue(iType
, Oprnds
[0]));
976 case Instruction::GetElementPtr
: {
977 if (Oprnds
.size() == 0 || !isa
<PointerType
>(InstTy
))
978 error("Invalid getelementptr instruction!");
980 std::vector
<Value
*> Idx
;
982 const Type
*NextTy
= InstTy
;
983 for (unsigned i
= 1, e
= Oprnds
.size(); i
!= e
; ++i
) {
984 const CompositeType
*TopTy
= dyn_cast_or_null
<CompositeType
>(NextTy
);
986 error("Invalid getelementptr instruction!");
988 unsigned ValIdx
= Oprnds
[i
];
990 if (!hasRestrictedGEPTypes
) {
991 // Struct indices are always uints, sequential type indices can be any
992 // of the 32 or 64-bit integer types. The actual choice of type is
993 // encoded in the low two bits of the slot number.
994 if (isa
<StructType
>(TopTy
))
995 IdxTy
= Type::UIntTyID
;
997 switch (ValIdx
& 3) {
999 case 0: IdxTy
= Type::UIntTyID
; break;
1000 case 1: IdxTy
= Type::IntTyID
; break;
1001 case 2: IdxTy
= Type::ULongTyID
; break;
1002 case 3: IdxTy
= Type::LongTyID
; break;
1007 IdxTy
= isa
<StructType
>(TopTy
) ? Type::UByteTyID
: Type::LongTyID
;
1010 Idx
.push_back(getValue(IdxTy
, ValIdx
));
1012 // Convert ubyte struct indices into uint struct indices.
1013 if (isa
<StructType
>(TopTy
) && hasRestrictedGEPTypes
)
1014 if (ConstantUInt
*C
= dyn_cast
<ConstantUInt
>(Idx
.back()))
1015 Idx
[Idx
.size()-1] = ConstantExpr::getCast(C
, Type::UIntTy
);
1017 NextTy
= GetElementPtrInst::getIndexedType(InstTy
, Idx
, true);
1020 Result
= new GetElementPtrInst(getValue(iType
, Oprnds
[0]), Idx
);
1024 case 62: // volatile load
1025 case Instruction::Load
:
1026 if (Oprnds
.size() != 1 || !isa
<PointerType
>(InstTy
))
1027 error("Invalid load instruction!");
1028 Result
= new LoadInst(getValue(iType
, Oprnds
[0]), "", Opcode
== 62);
1031 case 63: // volatile store
1032 case Instruction::Store
: {
1033 if (!isa
<PointerType
>(InstTy
) || Oprnds
.size() != 2)
1034 error("Invalid store instruction!");
1036 Value
*Ptr
= getValue(iType
, Oprnds
[1]);
1037 const Type
*ValTy
= cast
<PointerType
>(Ptr
->getType())->getElementType();
1038 Result
= new StoreInst(getValue(getTypeSlot(ValTy
), Oprnds
[0]), Ptr
,
1042 case Instruction::Unwind
:
1043 if (Oprnds
.size() != 0) error("Invalid unwind instruction!");
1044 Result
= new UnwindInst();
1046 case Instruction::Unreachable
:
1047 if (Oprnds
.size() != 0) error("Invalid unreachable instruction!");
1048 Result
= new UnreachableInst();
1050 } // end switch(Opcode)
1052 BB
->getInstList().push_back(Result
);
1055 if (Result
->getType() == InstTy
)
1058 TypeSlot
= getTypeSlot(Result
->getType());
1060 insertValue(Result
, TypeSlot
, FunctionValues
);
1063 /// Get a particular numbered basic block, which might be a forward reference.
1064 /// This works together with ParseBasicBlock to handle these forward references
1065 /// in a clean manner. This function is used when constructing phi, br, switch,
1066 /// and other instructions that reference basic blocks. Blocks are numbered
1067 /// sequentially as they appear in the function.
1068 BasicBlock
*BytecodeReader::getBasicBlock(unsigned ID
) {
1069 // Make sure there is room in the table...
1070 if (ParsedBasicBlocks
.size() <= ID
) ParsedBasicBlocks
.resize(ID
+1);
1072 // First check to see if this is a backwards reference, i.e., ParseBasicBlock
1073 // has already created this block, or if the forward reference has already
1075 if (ParsedBasicBlocks
[ID
])
1076 return ParsedBasicBlocks
[ID
];
1078 // Otherwise, the basic block has not yet been created. Do so and add it to
1079 // the ParsedBasicBlocks list.
1080 return ParsedBasicBlocks
[ID
] = new BasicBlock();
1083 /// In LLVM 1.0 bytecode files, we used to output one basicblock at a time.
1084 /// This method reads in one of the basicblock packets. This method is not used
1085 /// for bytecode files after LLVM 1.0
1086 /// @returns The basic block constructed.
1087 BasicBlock
*BytecodeReader::ParseBasicBlock(unsigned BlockNo
) {
1088 if (Handler
) Handler
->handleBasicBlockBegin(BlockNo
);
1092 if (ParsedBasicBlocks
.size() == BlockNo
)
1093 ParsedBasicBlocks
.push_back(BB
= new BasicBlock());
1094 else if (ParsedBasicBlocks
[BlockNo
] == 0)
1095 BB
= ParsedBasicBlocks
[BlockNo
] = new BasicBlock();
1097 BB
= ParsedBasicBlocks
[BlockNo
];
1099 std::vector
<unsigned> Operands
;
1100 while (moreInBlock())
1101 ParseInstruction(Operands
, BB
);
1103 if (Handler
) Handler
->handleBasicBlockEnd(BlockNo
);
1107 /// Parse all of the BasicBlock's & Instruction's in the body of a function.
1108 /// In post 1.0 bytecode files, we no longer emit basic block individually,
1109 /// in order to avoid per-basic-block overhead.
1110 /// @returns Rhe number of basic blocks encountered.
1111 unsigned BytecodeReader::ParseInstructionList(Function
* F
) {
1112 unsigned BlockNo
= 0;
1113 std::vector
<unsigned> Args
;
1115 while (moreInBlock()) {
1116 if (Handler
) Handler
->handleBasicBlockBegin(BlockNo
);
1118 if (ParsedBasicBlocks
.size() == BlockNo
)
1119 ParsedBasicBlocks
.push_back(BB
= new BasicBlock());
1120 else if (ParsedBasicBlocks
[BlockNo
] == 0)
1121 BB
= ParsedBasicBlocks
[BlockNo
] = new BasicBlock();
1123 BB
= ParsedBasicBlocks
[BlockNo
];
1125 F
->getBasicBlockList().push_back(BB
);
1127 // Read instructions into this basic block until we get to a terminator
1128 while (moreInBlock() && !BB
->getTerminator())
1129 ParseInstruction(Args
, BB
);
1131 if (!BB
->getTerminator())
1132 error("Non-terminated basic block found!");
1134 if (Handler
) Handler
->handleBasicBlockEnd(BlockNo
-1);
1140 /// Parse a symbol table. This works for both module level and function
1141 /// level symbol tables. For function level symbol tables, the CurrentFunction
1142 /// parameter must be non-zero and the ST parameter must correspond to
1143 /// CurrentFunction's symbol table. For Module level symbol tables, the
1144 /// CurrentFunction argument must be zero.
1145 void BytecodeReader::ParseSymbolTable(Function
*CurrentFunction
,
1147 if (Handler
) Handler
->handleSymbolTableBegin(CurrentFunction
,ST
);
1149 // Allow efficient basic block lookup by number.
1150 std::vector
<BasicBlock
*> BBMap
;
1151 if (CurrentFunction
)
1152 for (Function::iterator I
= CurrentFunction
->begin(),
1153 E
= CurrentFunction
->end(); I
!= E
; ++I
)
1156 /// In LLVM 1.3 we write types separately from values so
1157 /// The types are always first in the symbol table. This is
1158 /// because Type no longer derives from Value.
1159 if (!hasTypeDerivedFromValue
) {
1160 // Symtab block header: [num entries]
1161 unsigned NumEntries
= read_vbr_uint();
1162 for (unsigned i
= 0; i
< NumEntries
; ++i
) {
1163 // Symtab entry: [def slot #][name]
1164 unsigned slot
= read_vbr_uint();
1165 std::string Name
= read_str();
1166 const Type
* T
= getType(slot
);
1167 ST
->insert(Name
, T
);
1171 while (moreInBlock()) {
1172 // Symtab block header: [num entries][type id number]
1173 unsigned NumEntries
= read_vbr_uint();
1175 bool isTypeType
= read_typeid(Typ
);
1176 const Type
*Ty
= getType(Typ
);
1178 for (unsigned i
= 0; i
!= NumEntries
; ++i
) {
1179 // Symtab entry: [def slot #][name]
1180 unsigned slot
= read_vbr_uint();
1181 std::string Name
= read_str();
1183 // if we're reading a pre 1.3 bytecode file and the type plane
1184 // is the "type type", handle it here
1186 const Type
* T
= getType(slot
);
1188 error("Failed type look-up for name '" + Name
+ "'");
1189 ST
->insert(Name
, T
);
1190 continue; // code below must be short circuited
1193 if (Typ
== Type::LabelTyID
) {
1194 if (slot
< BBMap
.size())
1197 V
= getValue(Typ
, slot
, false); // Find mapping...
1200 error("Failed value look-up for name '" + Name
+ "'");
1205 checkPastBlockEnd("Symbol Table");
1206 if (Handler
) Handler
->handleSymbolTableEnd();
1209 /// Read in the types portion of a compaction table.
1210 void BytecodeReader::ParseCompactionTypes(unsigned NumEntries
) {
1211 for (unsigned i
= 0; i
!= NumEntries
; ++i
) {
1212 unsigned TypeSlot
= 0;
1213 if (read_typeid(TypeSlot
))
1214 error("Invalid type in compaction table: type type");
1215 const Type
*Typ
= getGlobalTableType(TypeSlot
);
1216 CompactionTypes
.push_back(std::make_pair(Typ
, TypeSlot
));
1217 if (Handler
) Handler
->handleCompactionTableType(i
, TypeSlot
, Typ
);
1221 /// Parse a compaction table.
1222 void BytecodeReader::ParseCompactionTable() {
1224 // Notify handler that we're beginning a compaction table.
1225 if (Handler
) Handler
->handleCompactionTableBegin();
1227 // In LLVM 1.3 Type no longer derives from Value. So,
1228 // we always write them first in the compaction table
1229 // because they can't occupy a "type plane" where the
1231 if (! hasTypeDerivedFromValue
) {
1232 unsigned NumEntries
= read_vbr_uint();
1233 ParseCompactionTypes(NumEntries
);
1236 // Compaction tables live in separate blocks so we have to loop
1237 // until we've read the whole thing.
1238 while (moreInBlock()) {
1239 // Read the number of Value* entries in the compaction table
1240 unsigned NumEntries
= read_vbr_uint();
1242 unsigned isTypeType
= false;
1244 // Decode the type from value read in. Most compaction table
1245 // planes will have one or two entries in them. If that's the
1246 // case then the length is encoded in the bottom two bits and
1247 // the higher bits encode the type. This saves another VBR value.
1248 if ((NumEntries
& 3) == 3) {
1249 // In this case, both low-order bits are set (value 3). This
1250 // is a signal that the typeid follows.
1252 isTypeType
= read_typeid(Ty
);
1254 // In this case, the low-order bits specify the number of entries
1255 // and the high order bits specify the type.
1256 Ty
= NumEntries
>> 2;
1257 isTypeType
= sanitizeTypeId(Ty
);
1261 // if we're reading a pre 1.3 bytecode file and the type plane
1262 // is the "type type", handle it here
1264 ParseCompactionTypes(NumEntries
);
1266 // Make sure we have enough room for the plane.
1267 if (Ty
>= CompactionValues
.size())
1268 CompactionValues
.resize(Ty
+1);
1270 // Make sure the plane is empty or we have some kind of error.
1271 if (!CompactionValues
[Ty
].empty())
1272 error("Compaction table plane contains multiple entries!");
1274 // Notify handler about the plane.
1275 if (Handler
) Handler
->handleCompactionTablePlane(Ty
, NumEntries
);
1277 // Push the implicit zero.
1278 CompactionValues
[Ty
].push_back(Constant::getNullValue(getType(Ty
)));
1280 // Read in each of the entries, put them in the compaction table
1281 // and notify the handler that we have a new compaction table value.
1282 for (unsigned i
= 0; i
!= NumEntries
; ++i
) {
1283 unsigned ValSlot
= read_vbr_uint();
1284 Value
*V
= getGlobalTableValue(Ty
, ValSlot
);
1285 CompactionValues
[Ty
].push_back(V
);
1286 if (Handler
) Handler
->handleCompactionTableValue(i
, Ty
, ValSlot
);
1290 // Notify handler that the compaction table is done.
1291 if (Handler
) Handler
->handleCompactionTableEnd();
1294 // Parse a single type. The typeid is read in first. If its a primitive type
1295 // then nothing else needs to be read, we know how to instantiate it. If its
1296 // a derived type, then additional data is read to fill out the type
1298 const Type
*BytecodeReader::ParseType() {
1299 unsigned PrimType
= 0;
1300 if (read_typeid(PrimType
))
1301 error("Invalid type (type type) in type constants!");
1303 const Type
*Result
= 0;
1304 if ((Result
= Type::getPrimitiveType((Type::TypeID
)PrimType
)))
1308 case Type::FunctionTyID
: {
1309 const Type
*RetType
= readSanitizedType();
1311 unsigned NumParams
= read_vbr_uint();
1313 std::vector
<const Type
*> Params
;
1315 Params
.push_back(readSanitizedType());
1317 bool isVarArg
= Params
.size() && Params
.back() == Type::VoidTy
;
1318 if (isVarArg
) Params
.pop_back();
1320 Result
= FunctionType::get(RetType
, Params
, isVarArg
);
1323 case Type::ArrayTyID
: {
1324 const Type
*ElementType
= readSanitizedType();
1325 unsigned NumElements
= read_vbr_uint();
1326 Result
= ArrayType::get(ElementType
, NumElements
);
1329 case Type::PackedTyID
: {
1330 const Type
*ElementType
= readSanitizedType();
1331 unsigned NumElements
= read_vbr_uint();
1332 Result
= PackedType::get(ElementType
, NumElements
);
1335 case Type::StructTyID
: {
1336 std::vector
<const Type
*> Elements
;
1338 if (read_typeid(Typ
))
1339 error("Invalid element type (type type) for structure!");
1341 while (Typ
) { // List is terminated by void/0 typeid
1342 Elements
.push_back(getType(Typ
));
1343 if (read_typeid(Typ
))
1344 error("Invalid element type (type type) for structure!");
1347 Result
= StructType::get(Elements
);
1350 case Type::PointerTyID
: {
1351 Result
= PointerType::get(readSanitizedType());
1355 case Type::OpaqueTyID
: {
1356 Result
= OpaqueType::get();
1361 error("Don't know how to deserialize primitive type " + utostr(PrimType
));
1364 if (Handler
) Handler
->handleType(Result
);
1368 // ParseTypes - We have to use this weird code to handle recursive
1369 // types. We know that recursive types will only reference the current slab of
1370 // values in the type plane, but they can forward reference types before they
1371 // have been read. For example, Type #0 might be '{ Ty#1 }' and Type #1 might
1372 // be 'Ty#0*'. When reading Type #0, type number one doesn't exist. To fix
1373 // this ugly problem, we pessimistically insert an opaque type for each type we
1374 // are about to read. This means that forward references will resolve to
1375 // something and when we reread the type later, we can replace the opaque type
1376 // with a new resolved concrete type.
1378 void BytecodeReader::ParseTypes(TypeListTy
&Tab
, unsigned NumEntries
){
1379 assert(Tab
.size() == 0 && "should not have read type constants in before!");
1381 // Insert a bunch of opaque types to be resolved later...
1382 Tab
.reserve(NumEntries
);
1383 for (unsigned i
= 0; i
!= NumEntries
; ++i
)
1384 Tab
.push_back(OpaqueType::get());
1387 Handler
->handleTypeList(NumEntries
);
1389 // If we are about to resolve types, make sure the type cache is clear.
1391 ModuleTypeIDCache
.clear();
1393 // Loop through reading all of the types. Forward types will make use of the
1394 // opaque types just inserted.
1396 for (unsigned i
= 0; i
!= NumEntries
; ++i
) {
1397 const Type
* NewTy
= ParseType();
1398 const Type
* OldTy
= Tab
[i
].get();
1400 error("Couldn't parse type!");
1402 // Don't directly push the new type on the Tab. Instead we want to replace
1403 // the opaque type we previously inserted with the new concrete value. This
1404 // approach helps with forward references to types. The refinement from the
1405 // abstract (opaque) type to the new type causes all uses of the abstract
1406 // type to use the concrete type (NewTy). This will also cause the opaque
1407 // type to be deleted.
1408 cast
<DerivedType
>(const_cast<Type
*>(OldTy
))->refineAbstractTypeTo(NewTy
);
1410 // This should have replaced the old opaque type with the new type in the
1411 // value table... or with a preexisting type that was already in the system.
1412 // Let's just make sure it did.
1413 assert(Tab
[i
] != OldTy
&& "refineAbstractType didn't work!");
1417 /// Parse a single constant value
1418 Value
*BytecodeReader::ParseConstantPoolValue(unsigned TypeID
) {
1419 // We must check for a ConstantExpr before switching by type because
1420 // a ConstantExpr can be of any type, and has no explicit value.
1422 // 0 if not expr; numArgs if is expr
1423 unsigned isExprNumArgs
= read_vbr_uint();
1425 if (isExprNumArgs
) {
1426 if (!hasNoUndefValue
) {
1427 // 'undef' is encoded with 'exprnumargs' == 1.
1428 if (isExprNumArgs
== 1)
1429 return UndefValue::get(getType(TypeID
));
1431 // Inline asm is encoded with exprnumargs == ~0U.
1432 if (isExprNumArgs
== ~0U) {
1433 std::string AsmStr
= read_str();
1434 std::string ConstraintStr
= read_str();
1435 unsigned Flags
= read_vbr_uint();
1437 const PointerType
*PTy
= dyn_cast
<PointerType
>(getType(TypeID
));
1438 const FunctionType
*FTy
=
1439 PTy
? dyn_cast
<FunctionType
>(PTy
->getElementType()) : 0;
1441 if (!FTy
|| !InlineAsm::Verify(FTy
, ConstraintStr
))
1442 error("Invalid constraints for inline asm");
1444 error("Invalid flags for inline asm");
1445 bool HasSideEffects
= Flags
& 1;
1446 return InlineAsm::get(FTy
, AsmStr
, ConstraintStr
, HasSideEffects
);
1452 // FIXME: Encoding of constant exprs could be much more compact!
1453 std::vector
<Constant
*> ArgVec
;
1454 ArgVec
.reserve(isExprNumArgs
);
1455 unsigned Opcode
= read_vbr_uint();
1457 // Bytecode files before LLVM 1.4 need have a missing terminator inst.
1458 if (hasNoUnreachableInst
) Opcode
++;
1460 // Read the slot number and types of each of the arguments
1461 for (unsigned i
= 0; i
!= isExprNumArgs
; ++i
) {
1462 unsigned ArgValSlot
= read_vbr_uint();
1463 unsigned ArgTypeSlot
= 0;
1464 if (read_typeid(ArgTypeSlot
))
1465 error("Invalid argument type (type type) for constant value");
1467 // Get the arg value from its slot if it exists, otherwise a placeholder
1468 ArgVec
.push_back(getConstantValue(ArgTypeSlot
, ArgValSlot
));
1471 // Construct a ConstantExpr of the appropriate kind
1472 if (isExprNumArgs
== 1) { // All one-operand expressions
1473 if (Opcode
!= Instruction::Cast
)
1474 error("Only cast instruction has one argument for ConstantExpr");
1476 Constant
* Result
= ConstantExpr::getCast(ArgVec
[0], getType(TypeID
));
1477 if (Handler
) Handler
->handleConstantExpression(Opcode
, ArgVec
, Result
);
1479 } else if (Opcode
== Instruction::GetElementPtr
) { // GetElementPtr
1480 std::vector
<Constant
*> IdxList(ArgVec
.begin()+1, ArgVec
.end());
1482 if (hasRestrictedGEPTypes
) {
1483 const Type
*BaseTy
= ArgVec
[0]->getType();
1484 generic_gep_type_iterator
<std::vector
<Constant
*>::iterator
>
1485 GTI
= gep_type_begin(BaseTy
, IdxList
.begin(), IdxList
.end()),
1486 E
= gep_type_end(BaseTy
, IdxList
.begin(), IdxList
.end());
1487 for (unsigned i
= 0; GTI
!= E
; ++GTI
, ++i
)
1488 if (isa
<StructType
>(*GTI
)) {
1489 if (IdxList
[i
]->getType() != Type::UByteTy
)
1490 error("Invalid index for getelementptr!");
1491 IdxList
[i
] = ConstantExpr::getCast(IdxList
[i
], Type::UIntTy
);
1495 Constant
* Result
= ConstantExpr::getGetElementPtr(ArgVec
[0], IdxList
);
1496 if (Handler
) Handler
->handleConstantExpression(Opcode
, ArgVec
, Result
);
1498 } else if (Opcode
== Instruction::Select
) {
1499 if (ArgVec
.size() != 3)
1500 error("Select instruction must have three arguments.");
1501 Constant
* Result
= ConstantExpr::getSelect(ArgVec
[0], ArgVec
[1],
1503 if (Handler
) Handler
->handleConstantExpression(Opcode
, ArgVec
, Result
);
1505 } else if (Opcode
== Instruction::ExtractElement
) {
1506 if (ArgVec
.size() != 2 ||
1507 !ExtractElementInst::isValidOperands(ArgVec
[0], ArgVec
[1]))
1508 error("Invalid extractelement constand expr arguments");
1509 Constant
* Result
= ConstantExpr::getExtractElement(ArgVec
[0], ArgVec
[1]);
1510 if (Handler
) Handler
->handleConstantExpression(Opcode
, ArgVec
, Result
);
1512 } else if (Opcode
== Instruction::InsertElement
) {
1513 if (ArgVec
.size() != 3 ||
1514 !InsertElementInst::isValidOperands(ArgVec
[0], ArgVec
[1], ArgVec
[2]))
1515 error("Invalid insertelement constand expr arguments");
1518 ConstantExpr::getInsertElement(ArgVec
[0], ArgVec
[1], ArgVec
[2]);
1519 if (Handler
) Handler
->handleConstantExpression(Opcode
, ArgVec
, Result
);
1521 } else if (Opcode
== Instruction::ShuffleVector
) {
1522 if (ArgVec
.size() != 3 ||
1523 !ShuffleVectorInst::isValidOperands(ArgVec
[0], ArgVec
[1], ArgVec
[2]))
1524 error("Invalid shufflevector constant expr arguments.");
1526 ConstantExpr::getShuffleVector(ArgVec
[0], ArgVec
[1], ArgVec
[2]);
1527 if (Handler
) Handler
->handleConstantExpression(Opcode
, ArgVec
, Result
);
1529 } else { // All other 2-operand expressions
1530 Constant
* Result
= ConstantExpr::get(Opcode
, ArgVec
[0], ArgVec
[1]);
1531 if (Handler
) Handler
->handleConstantExpression(Opcode
, ArgVec
, Result
);
1536 // Ok, not an ConstantExpr. We now know how to read the given type...
1537 const Type
*Ty
= getType(TypeID
);
1538 Constant
*Result
= 0;
1539 switch (Ty
->getTypeID()) {
1540 case Type::BoolTyID
: {
1541 unsigned Val
= read_vbr_uint();
1542 if (Val
!= 0 && Val
!= 1)
1543 error("Invalid boolean value read.");
1544 Result
= ConstantBool::get(Val
== 1);
1545 if (Handler
) Handler
->handleConstantValue(Result
);
1549 case Type::UByteTyID
: // Unsigned integer types...
1550 case Type::UShortTyID
:
1551 case Type::UIntTyID
: {
1552 unsigned Val
= read_vbr_uint();
1553 if (!ConstantUInt::isValueValidForType(Ty
, Val
))
1554 error("Invalid unsigned byte/short/int read.");
1555 Result
= ConstantUInt::get(Ty
, Val
);
1556 if (Handler
) Handler
->handleConstantValue(Result
);
1560 case Type::ULongTyID
:
1561 Result
= ConstantUInt::get(Ty
, read_vbr_uint64());
1562 if (Handler
) Handler
->handleConstantValue(Result
);
1565 case Type::SByteTyID
: // Signed integer types...
1566 case Type::ShortTyID
:
1568 case Type::LongTyID
: {
1569 int64_t Val
= read_vbr_int64();
1570 if (!ConstantSInt::isValueValidForType(Ty
, Val
))
1571 error("Invalid signed byte/short/int/long read.");
1572 Result
= ConstantSInt::get(Ty
, Val
);
1573 if (Handler
) Handler
->handleConstantValue(Result
);
1577 case Type::FloatTyID
: {
1580 Result
= ConstantFP::get(Ty
, Val
);
1581 if (Handler
) Handler
->handleConstantValue(Result
);
1585 case Type::DoubleTyID
: {
1588 Result
= ConstantFP::get(Ty
, Val
);
1589 if (Handler
) Handler
->handleConstantValue(Result
);
1593 case Type::ArrayTyID
: {
1594 const ArrayType
*AT
= cast
<ArrayType
>(Ty
);
1595 unsigned NumElements
= AT
->getNumElements();
1596 unsigned TypeSlot
= getTypeSlot(AT
->getElementType());
1597 std::vector
<Constant
*> Elements
;
1598 Elements
.reserve(NumElements
);
1599 while (NumElements
--) // Read all of the elements of the constant.
1600 Elements
.push_back(getConstantValue(TypeSlot
,
1602 Result
= ConstantArray::get(AT
, Elements
);
1603 if (Handler
) Handler
->handleConstantArray(AT
, Elements
, TypeSlot
, Result
);
1607 case Type::StructTyID
: {
1608 const StructType
*ST
= cast
<StructType
>(Ty
);
1610 std::vector
<Constant
*> Elements
;
1611 Elements
.reserve(ST
->getNumElements());
1612 for (unsigned i
= 0; i
!= ST
->getNumElements(); ++i
)
1613 Elements
.push_back(getConstantValue(ST
->getElementType(i
),
1616 Result
= ConstantStruct::get(ST
, Elements
);
1617 if (Handler
) Handler
->handleConstantStruct(ST
, Elements
, Result
);
1621 case Type::PackedTyID
: {
1622 const PackedType
*PT
= cast
<PackedType
>(Ty
);
1623 unsigned NumElements
= PT
->getNumElements();
1624 unsigned TypeSlot
= getTypeSlot(PT
->getElementType());
1625 std::vector
<Constant
*> Elements
;
1626 Elements
.reserve(NumElements
);
1627 while (NumElements
--) // Read all of the elements of the constant.
1628 Elements
.push_back(getConstantValue(TypeSlot
,
1630 Result
= ConstantPacked::get(PT
, Elements
);
1631 if (Handler
) Handler
->handleConstantPacked(PT
, Elements
, TypeSlot
, Result
);
1635 case Type::PointerTyID
: { // ConstantPointerRef value (backwards compat).
1636 const PointerType
*PT
= cast
<PointerType
>(Ty
);
1637 unsigned Slot
= read_vbr_uint();
1639 // Check to see if we have already read this global variable...
1640 Value
*Val
= getValue(TypeID
, Slot
, false);
1642 GlobalValue
*GV
= dyn_cast
<GlobalValue
>(Val
);
1643 if (!GV
) error("GlobalValue not in ValueTable!");
1644 if (Handler
) Handler
->handleConstantPointer(PT
, Slot
, GV
);
1647 error("Forward references are not allowed here.");
1652 error("Don't know how to deserialize constant value of type '" +
1653 Ty
->getDescription());
1657 // Check that we didn't read a null constant if they are implicit for this
1658 // type plane. Do not do this check for constantexprs, as they may be folded
1659 // to a null value in a way that isn't predicted when a .bc file is initially
1661 assert((!isa
<Constant
>(Result
) || !cast
<Constant
>(Result
)->isNullValue()) ||
1662 !hasImplicitNull(TypeID
) &&
1663 "Cannot read null values from bytecode!");
1667 /// Resolve references for constants. This function resolves the forward
1668 /// referenced constants in the ConstantFwdRefs map. It uses the
1669 /// replaceAllUsesWith method of Value class to substitute the placeholder
1670 /// instance with the actual instance.
1671 void BytecodeReader::ResolveReferencesToConstant(Constant
*NewV
, unsigned Typ
,
1673 ConstantRefsType::iterator I
=
1674 ConstantFwdRefs
.find(std::make_pair(Typ
, Slot
));
1675 if (I
== ConstantFwdRefs
.end()) return; // Never forward referenced?
1677 Value
*PH
= I
->second
; // Get the placeholder...
1678 PH
->replaceAllUsesWith(NewV
);
1679 delete PH
; // Delete the old placeholder
1680 ConstantFwdRefs
.erase(I
); // Remove the map entry for it
1683 /// Parse the constant strings section.
1684 void BytecodeReader::ParseStringConstants(unsigned NumEntries
, ValueTable
&Tab
){
1685 for (; NumEntries
; --NumEntries
) {
1687 if (read_typeid(Typ
))
1688 error("Invalid type (type type) for string constant");
1689 const Type
*Ty
= getType(Typ
);
1690 if (!isa
<ArrayType
>(Ty
))
1691 error("String constant data invalid!");
1693 const ArrayType
*ATy
= cast
<ArrayType
>(Ty
);
1694 if (ATy
->getElementType() != Type::SByteTy
&&
1695 ATy
->getElementType() != Type::UByteTy
)
1696 error("String constant data invalid!");
1698 // Read character data. The type tells us how long the string is.
1699 char *Data
= reinterpret_cast<char *>(alloca(ATy
->getNumElements()));
1700 read_data(Data
, Data
+ATy
->getNumElements());
1702 std::vector
<Constant
*> Elements(ATy
->getNumElements());
1703 if (ATy
->getElementType() == Type::SByteTy
)
1704 for (unsigned i
= 0, e
= ATy
->getNumElements(); i
!= e
; ++i
)
1705 Elements
[i
] = ConstantSInt::get(Type::SByteTy
, (signed char)Data
[i
]);
1707 for (unsigned i
= 0, e
= ATy
->getNumElements(); i
!= e
; ++i
)
1708 Elements
[i
] = ConstantUInt::get(Type::UByteTy
, (unsigned char)Data
[i
]);
1710 // Create the constant, inserting it as needed.
1711 Constant
*C
= ConstantArray::get(ATy
, Elements
);
1712 unsigned Slot
= insertValue(C
, Typ
, Tab
);
1713 ResolveReferencesToConstant(C
, Typ
, Slot
);
1714 if (Handler
) Handler
->handleConstantString(cast
<ConstantArray
>(C
));
1718 /// Parse the constant pool.
1719 void BytecodeReader::ParseConstantPool(ValueTable
&Tab
,
1720 TypeListTy
&TypeTab
,
1722 if (Handler
) Handler
->handleGlobalConstantsBegin();
1724 /// In LLVM 1.3 Type does not derive from Value so the types
1725 /// do not occupy a plane. Consequently, we read the types
1726 /// first in the constant pool.
1727 if (isFunction
&& !hasTypeDerivedFromValue
) {
1728 unsigned NumEntries
= read_vbr_uint();
1729 ParseTypes(TypeTab
, NumEntries
);
1732 while (moreInBlock()) {
1733 unsigned NumEntries
= read_vbr_uint();
1735 bool isTypeType
= read_typeid(Typ
);
1737 /// In LLVM 1.2 and before, Types were written to the
1738 /// bytecode file in the "Type Type" plane (#12).
1739 /// In 1.3 plane 12 is now the label plane. Handle this here.
1741 ParseTypes(TypeTab
, NumEntries
);
1742 } else if (Typ
== Type::VoidTyID
) {
1743 /// Use of Type::VoidTyID is a misnomer. It actually means
1744 /// that the following plane is constant strings
1745 assert(&Tab
== &ModuleValues
&& "Cannot read strings in functions!");
1746 ParseStringConstants(NumEntries
, Tab
);
1748 for (unsigned i
= 0; i
< NumEntries
; ++i
) {
1749 Value
*V
= ParseConstantPoolValue(Typ
);
1750 assert(V
&& "ParseConstantPoolValue returned NULL!");
1751 unsigned Slot
= insertValue(V
, Typ
, Tab
);
1753 // If we are reading a function constant table, make sure that we adjust
1754 // the slot number to be the real global constant number.
1756 if (&Tab
!= &ModuleValues
&& Typ
< ModuleValues
.size() &&
1758 Slot
+= ModuleValues
[Typ
]->size();
1759 if (Constant
*C
= dyn_cast
<Constant
>(V
))
1760 ResolveReferencesToConstant(C
, Typ
, Slot
);
1765 // After we have finished parsing the constant pool, we had better not have
1766 // any dangling references left.
1767 if (!ConstantFwdRefs
.empty()) {
1768 ConstantRefsType::const_iterator I
= ConstantFwdRefs
.begin();
1769 Constant
* missingConst
= I
->second
;
1770 error(utostr(ConstantFwdRefs
.size()) +
1771 " unresolved constant reference exist. First one is '" +
1772 missingConst
->getName() + "' of type '" +
1773 missingConst
->getType()->getDescription() + "'.");
1776 checkPastBlockEnd("Constant Pool");
1777 if (Handler
) Handler
->handleGlobalConstantsEnd();
1780 /// Parse the contents of a function. Note that this function can be
1781 /// called lazily by materializeFunction
1782 /// @see materializeFunction
1783 void BytecodeReader::ParseFunctionBody(Function
* F
) {
1785 unsigned FuncSize
= BlockEnd
- At
;
1786 GlobalValue::LinkageTypes Linkage
= GlobalValue::ExternalLinkage
;
1788 unsigned LinkageType
= read_vbr_uint();
1789 switch (LinkageType
) {
1790 case 0: Linkage
= GlobalValue::ExternalLinkage
; break;
1791 case 1: Linkage
= GlobalValue::WeakLinkage
; break;
1792 case 2: Linkage
= GlobalValue::AppendingLinkage
; break;
1793 case 3: Linkage
= GlobalValue::InternalLinkage
; break;
1794 case 4: Linkage
= GlobalValue::LinkOnceLinkage
; break;
1796 error("Invalid linkage type for Function.");
1797 Linkage
= GlobalValue::InternalLinkage
;
1801 F
->setLinkage(Linkage
);
1802 if (Handler
) Handler
->handleFunctionBegin(F
,FuncSize
);
1804 // Keep track of how many basic blocks we have read in...
1805 unsigned BlockNum
= 0;
1806 bool InsertedArguments
= false;
1808 BufPtr MyEnd
= BlockEnd
;
1809 while (At
< MyEnd
) {
1810 unsigned Type
, Size
;
1812 read_block(Type
, Size
);
1815 case BytecodeFormat::ConstantPoolBlockID
:
1816 if (!InsertedArguments
) {
1817 // Insert arguments into the value table before we parse the first basic
1818 // block in the function, but after we potentially read in the
1819 // compaction table.
1821 InsertedArguments
= true;
1824 ParseConstantPool(FunctionValues
, FunctionTypes
, true);
1827 case BytecodeFormat::CompactionTableBlockID
:
1828 ParseCompactionTable();
1831 case BytecodeFormat::BasicBlock
: {
1832 if (!InsertedArguments
) {
1833 // Insert arguments into the value table before we parse the first basic
1834 // block in the function, but after we potentially read in the
1835 // compaction table.
1837 InsertedArguments
= true;
1840 BasicBlock
*BB
= ParseBasicBlock(BlockNum
++);
1841 F
->getBasicBlockList().push_back(BB
);
1845 case BytecodeFormat::InstructionListBlockID
: {
1846 // Insert arguments into the value table before we parse the instruction
1847 // list for the function, but after we potentially read in the compaction
1849 if (!InsertedArguments
) {
1851 InsertedArguments
= true;
1855 error("Already parsed basic blocks!");
1856 BlockNum
= ParseInstructionList(F
);
1860 case BytecodeFormat::SymbolTableBlockID
:
1861 ParseSymbolTable(F
, &F
->getSymbolTable());
1867 error("Wrapped around reading bytecode.");
1872 // Malformed bc file if read past end of block.
1876 // Make sure there were no references to non-existant basic blocks.
1877 if (BlockNum
!= ParsedBasicBlocks
.size())
1878 error("Illegal basic block operand reference");
1880 ParsedBasicBlocks
.clear();
1882 // Resolve forward references. Replace any uses of a forward reference value
1883 // with the real value.
1884 while (!ForwardReferences
.empty()) {
1885 std::map
<std::pair
<unsigned,unsigned>, Value
*>::iterator
1886 I
= ForwardReferences
.begin();
1887 Value
*V
= getValue(I
->first
.first
, I
->first
.second
, false);
1888 Value
*PlaceHolder
= I
->second
;
1889 PlaceHolder
->replaceAllUsesWith(V
);
1890 ForwardReferences
.erase(I
);
1894 // If upgraded intrinsic functions were detected during reading of the
1895 // module information, then we need to look for instructions that need to
1896 // be upgraded. This can't be done while the instructions are read in because
1897 // additional instructions inserted mess up the slot numbering.
1898 if (!upgradedFunctions
.empty()) {
1899 for (Function::iterator BI
= F
->begin(), BE
= F
->end(); BI
!= BE
; ++BI
)
1900 for (BasicBlock::iterator II
= BI
->begin(), IE
= BI
->end();
1902 if (CallInst
* CI
= dyn_cast
<CallInst
>(II
++)) {
1903 std::map
<Function
*,Function
*>::iterator FI
=
1904 upgradedFunctions
.find(CI
->getCalledFunction());
1905 if (FI
!= upgradedFunctions
.end())
1906 UpgradeIntrinsicCall(CI
, FI
->second
);
1910 // Clear out function-level types...
1911 FunctionTypes
.clear();
1912 CompactionTypes
.clear();
1913 CompactionValues
.clear();
1914 freeTable(FunctionValues
);
1916 if (Handler
) Handler
->handleFunctionEnd(F
);
1919 /// This function parses LLVM functions lazily. It obtains the type of the
1920 /// function and records where the body of the function is in the bytecode
1921 /// buffer. The caller can then use the ParseNextFunction and
1922 /// ParseAllFunctionBodies to get handler events for the functions.
1923 void BytecodeReader::ParseFunctionLazily() {
1924 if (FunctionSignatureList
.empty())
1925 error("FunctionSignatureList empty!");
1927 Function
*Func
= FunctionSignatureList
.back();
1928 FunctionSignatureList
.pop_back();
1930 // Save the information for future reading of the function
1931 LazyFunctionLoadMap
[Func
] = LazyFunctionInfo(BlockStart
, BlockEnd
);
1933 // This function has a body but it's not loaded so it appears `External'.
1934 // Mark it as a `Ghost' instead to notify the users that it has a body.
1935 Func
->setLinkage(GlobalValue::GhostLinkage
);
1937 // Pretend we've `parsed' this function
1941 /// The ParserFunction method lazily parses one function. Use this method to
1942 /// casue the parser to parse a specific function in the module. Note that
1943 /// this will remove the function from what is to be included by
1944 /// ParseAllFunctionBodies.
1945 /// @see ParseAllFunctionBodies
1946 /// @see ParseBytecode
1947 void BytecodeReader::ParseFunction(Function
* Func
) {
1948 // Find {start, end} pointers and slot in the map. If not there, we're done.
1949 LazyFunctionMap::iterator Fi
= LazyFunctionLoadMap
.find(Func
);
1951 // Make sure we found it
1952 if (Fi
== LazyFunctionLoadMap
.end()) {
1953 error("Unrecognized function of type " + Func
->getType()->getDescription());
1957 BlockStart
= At
= Fi
->second
.Buf
;
1958 BlockEnd
= Fi
->second
.EndBuf
;
1959 assert(Fi
->first
== Func
&& "Found wrong function?");
1961 LazyFunctionLoadMap
.erase(Fi
);
1963 this->ParseFunctionBody(Func
);
1966 /// The ParseAllFunctionBodies method parses through all the previously
1967 /// unparsed functions in the bytecode file. If you want to completely parse
1968 /// a bytecode file, this method should be called after Parsebytecode because
1969 /// Parsebytecode only records the locations in the bytecode file of where
1970 /// the function definitions are located. This function uses that information
1971 /// to materialize the functions.
1972 /// @see ParseBytecode
1973 void BytecodeReader::ParseAllFunctionBodies() {
1974 LazyFunctionMap::iterator Fi
= LazyFunctionLoadMap
.begin();
1975 LazyFunctionMap::iterator Fe
= LazyFunctionLoadMap
.end();
1978 Function
* Func
= Fi
->first
;
1979 BlockStart
= At
= Fi
->second
.Buf
;
1980 BlockEnd
= Fi
->second
.EndBuf
;
1981 ParseFunctionBody(Func
);
1984 LazyFunctionLoadMap
.clear();
1988 /// Parse the global type list
1989 void BytecodeReader::ParseGlobalTypes() {
1990 // Read the number of types
1991 unsigned NumEntries
= read_vbr_uint();
1993 // Ignore the type plane identifier for types if the bc file is pre 1.3
1994 if (hasTypeDerivedFromValue
)
1997 ParseTypes(ModuleTypes
, NumEntries
);
2000 /// Parse the Global info (types, global vars, constants)
2001 void BytecodeReader::ParseModuleGlobalInfo() {
2003 if (Handler
) Handler
->handleModuleGlobalsBegin();
2005 // SectionID - If a global has an explicit section specified, this map
2006 // remembers the ID until we can translate it into a string.
2007 std::map
<GlobalValue
*, unsigned> SectionID
;
2009 // Read global variables...
2010 unsigned VarType
= read_vbr_uint();
2011 while (VarType
!= Type::VoidTyID
) { // List is terminated by Void
2012 // VarType Fields: bit0 = isConstant, bit1 = hasInitializer, bit2,3,4 =
2013 // Linkage, bit4+ = slot#
2014 unsigned SlotNo
= VarType
>> 5;
2015 if (sanitizeTypeId(SlotNo
))
2016 error("Invalid type (type type) for global var!");
2017 unsigned LinkageID
= (VarType
>> 2) & 7;
2018 bool isConstant
= VarType
& 1;
2019 bool hasInitializer
= (VarType
& 2) != 0;
2020 unsigned Alignment
= 0;
2021 unsigned GlobalSectionID
= 0;
2023 // An extension word is present when linkage = 3 (internal) and hasinit = 0.
2024 if (LinkageID
== 3 && !hasInitializer
) {
2025 unsigned ExtWord
= read_vbr_uint();
2026 // The extension word has this format: bit 0 = has initializer, bit 1-3 =
2027 // linkage, bit 4-8 = alignment (log2), bits 10+ = future use.
2028 hasInitializer
= ExtWord
& 1;
2029 LinkageID
= (ExtWord
>> 1) & 7;
2030 Alignment
= (1 << ((ExtWord
>> 4) & 31)) >> 1;
2032 if (ExtWord
& (1 << 9)) // Has a section ID.
2033 GlobalSectionID
= read_vbr_uint();
2036 GlobalValue::LinkageTypes Linkage
;
2037 switch (LinkageID
) {
2038 case 0: Linkage
= GlobalValue::ExternalLinkage
; break;
2039 case 1: Linkage
= GlobalValue::WeakLinkage
; break;
2040 case 2: Linkage
= GlobalValue::AppendingLinkage
; break;
2041 case 3: Linkage
= GlobalValue::InternalLinkage
; break;
2042 case 4: Linkage
= GlobalValue::LinkOnceLinkage
; break;
2044 error("Unknown linkage type: " + utostr(LinkageID
));
2045 Linkage
= GlobalValue::InternalLinkage
;
2049 const Type
*Ty
= getType(SlotNo
);
2051 error("Global has no type! SlotNo=" + utostr(SlotNo
));
2053 if (!isa
<PointerType
>(Ty
))
2054 error("Global not a pointer type! Ty= " + Ty
->getDescription());
2056 const Type
*ElTy
= cast
<PointerType
>(Ty
)->getElementType();
2058 // Create the global variable...
2059 GlobalVariable
*GV
= new GlobalVariable(ElTy
, isConstant
, Linkage
,
2061 GV
->setAlignment(Alignment
);
2062 insertValue(GV
, SlotNo
, ModuleValues
);
2064 if (GlobalSectionID
!= 0)
2065 SectionID
[GV
] = GlobalSectionID
;
2067 unsigned initSlot
= 0;
2068 if (hasInitializer
) {
2069 initSlot
= read_vbr_uint();
2070 GlobalInits
.push_back(std::make_pair(GV
, initSlot
));
2073 // Notify handler about the global value.
2075 Handler
->handleGlobalVariable(ElTy
, isConstant
, Linkage
, SlotNo
,initSlot
);
2078 VarType
= read_vbr_uint();
2081 // Read the function objects for all of the functions that are coming
2082 unsigned FnSignature
= read_vbr_uint();
2084 if (hasNoFlagsForFunctions
)
2085 FnSignature
= (FnSignature
<< 5) + 1;
2087 // List is terminated by VoidTy.
2088 while (((FnSignature
& (~0U >> 1)) >> 5) != Type::VoidTyID
) {
2089 const Type
*Ty
= getType((FnSignature
& (~0U >> 1)) >> 5);
2090 if (!isa
<PointerType
>(Ty
) ||
2091 !isa
<FunctionType
>(cast
<PointerType
>(Ty
)->getElementType())) {
2092 error("Function not a pointer to function type! Ty = " +
2093 Ty
->getDescription());
2096 // We create functions by passing the underlying FunctionType to create...
2097 const FunctionType
* FTy
=
2098 cast
<FunctionType
>(cast
<PointerType
>(Ty
)->getElementType());
2100 // Insert the place holder.
2101 Function
*Func
= new Function(FTy
, GlobalValue::ExternalLinkage
,
2104 insertValue(Func
, (FnSignature
& (~0U >> 1)) >> 5, ModuleValues
);
2106 // Flags are not used yet.
2107 unsigned Flags
= FnSignature
& 31;
2109 // Save this for later so we know type of lazily instantiated functions.
2110 // Note that known-external functions do not have FunctionInfo blocks, so we
2111 // do not add them to the FunctionSignatureList.
2112 if ((Flags
& (1 << 4)) == 0)
2113 FunctionSignatureList
.push_back(Func
);
2115 // Get the calling convention from the low bits.
2116 unsigned CC
= Flags
& 15;
2117 unsigned Alignment
= 0;
2118 if (FnSignature
& (1 << 31)) { // Has extension word?
2119 unsigned ExtWord
= read_vbr_uint();
2120 Alignment
= (1 << (ExtWord
& 31)) >> 1;
2121 CC
|= ((ExtWord
>> 5) & 15) << 4;
2123 if (ExtWord
& (1 << 10)) // Has a section ID.
2124 SectionID
[Func
] = read_vbr_uint();
2127 Func
->setCallingConv(CC
-1);
2128 Func
->setAlignment(Alignment
);
2130 if (Handler
) Handler
->handleFunctionDeclaration(Func
);
2132 // Get the next function signature.
2133 FnSignature
= read_vbr_uint();
2134 if (hasNoFlagsForFunctions
)
2135 FnSignature
= (FnSignature
<< 5) + 1;
2138 // Now that the function signature list is set up, reverse it so that we can
2139 // remove elements efficiently from the back of the vector.
2140 std::reverse(FunctionSignatureList
.begin(), FunctionSignatureList
.end());
2142 /// SectionNames - This contains the list of section names encoded in the
2143 /// moduleinfoblock. Functions and globals with an explicit section index
2144 /// into this to get their section name.
2145 std::vector
<std::string
> SectionNames
;
2147 if (hasInconsistentModuleGlobalInfo
) {
2149 } else if (!hasNoDependentLibraries
) {
2150 // If this bytecode format has dependent library information in it, read in
2151 // the number of dependent library items that follow.
2152 unsigned num_dep_libs
= read_vbr_uint();
2153 std::string dep_lib
;
2154 while (num_dep_libs
--) {
2155 dep_lib
= read_str();
2156 TheModule
->addLibrary(dep_lib
);
2158 Handler
->handleDependentLibrary(dep_lib
);
2161 // Read target triple and place into the module.
2162 std::string triple
= read_str();
2163 TheModule
->setTargetTriple(triple
);
2165 Handler
->handleTargetTriple(triple
);
2167 if (!hasAlignment
&& At
!= BlockEnd
) {
2168 // If the file has section info in it, read the section names now.
2169 unsigned NumSections
= read_vbr_uint();
2170 while (NumSections
--)
2171 SectionNames
.push_back(read_str());
2174 // If the file has module-level inline asm, read it now.
2175 if (!hasAlignment
&& At
!= BlockEnd
)
2176 TheModule
->setModuleInlineAsm(read_str());
2179 // If any globals are in specified sections, assign them now.
2180 for (std::map
<GlobalValue
*, unsigned>::iterator I
= SectionID
.begin(), E
=
2181 SectionID
.end(); I
!= E
; ++I
)
2183 if (I
->second
> SectionID
.size())
2184 error("SectionID out of range for global!");
2185 I
->first
->setSection(SectionNames
[I
->second
-1]);
2188 // This is for future proofing... in the future extra fields may be added that
2189 // we don't understand, so we transparently ignore them.
2193 if (Handler
) Handler
->handleModuleGlobalsEnd();
2196 /// Parse the version information and decode it by setting flags on the
2197 /// Reader that enable backward compatibility of the reader.
2198 void BytecodeReader::ParseVersionInfo() {
2199 unsigned Version
= read_vbr_uint();
2201 // Unpack version number: low four bits are for flags, top bits = version
2202 Module::Endianness Endianness
;
2203 Module::PointerSize PointerSize
;
2204 Endianness
= (Version
& 1) ? Module::BigEndian
: Module::LittleEndian
;
2205 PointerSize
= (Version
& 2) ? Module::Pointer64
: Module::Pointer32
;
2207 bool hasNoEndianness
= Version
& 4;
2208 bool hasNoPointerSize
= Version
& 8;
2210 RevisionNum
= Version
>> 4;
2212 // Default values for the current bytecode version
2213 hasInconsistentModuleGlobalInfo
= false;
2214 hasExplicitPrimitiveZeros
= false;
2215 hasRestrictedGEPTypes
= false;
2216 hasTypeDerivedFromValue
= false;
2217 hasLongBlockHeaders
= false;
2218 has32BitTypes
= false;
2219 hasNoDependentLibraries
= false;
2220 hasAlignment
= false;
2221 hasNoUndefValue
= false;
2222 hasNoFlagsForFunctions
= false;
2223 hasNoUnreachableInst
= false;
2225 switch (RevisionNum
) {
2226 case 0: // LLVM 1.0, 1.1 (Released)
2227 // Base LLVM 1.0 bytecode format.
2228 hasInconsistentModuleGlobalInfo
= true;
2229 hasExplicitPrimitiveZeros
= true;
2233 case 1: // LLVM 1.2 (Released)
2234 // LLVM 1.2 added explicit support for emitting strings efficiently.
2236 // Also, it fixed the problem where the size of the ModuleGlobalInfo block
2237 // included the size for the alignment at the end, where the rest of the
2240 // LLVM 1.2 and before required that GEP indices be ubyte constants for
2241 // structures and longs for sequential types.
2242 hasRestrictedGEPTypes
= true;
2244 // LLVM 1.2 and before had the Type class derive from Value class. This
2245 // changed in release 1.3 and consequently LLVM 1.3 bytecode files are
2246 // written differently because Types can no longer be part of the
2247 // type planes for Values.
2248 hasTypeDerivedFromValue
= true;
2252 case 2: // 1.2.5 (Not Released)
2254 // LLVM 1.2 and earlier had two-word block headers. This is a bit wasteful,
2255 // especially for small files where the 8 bytes per block is a large
2256 // fraction of the total block size. In LLVM 1.3, the block type and length
2257 // are compressed into a single 32-bit unsigned integer. 27 bits for length,
2258 // 5 bits for block type.
2259 hasLongBlockHeaders
= true;
2261 // LLVM 1.2 and earlier wrote type slot numbers as vbr_uint32. In LLVM 1.3
2262 // this has been reduced to vbr_uint24. It shouldn't make much difference
2263 // since we haven't run into a module with > 24 million types, but for
2264 // safety the 24-bit restriction has been enforced in 1.3 to free some bits
2265 // in various places and to ensure consistency.
2266 has32BitTypes
= true;
2268 // LLVM 1.2 and earlier did not provide a target triple nor a list of
2269 // libraries on which the bytecode is dependent. LLVM 1.3 provides these
2270 // features, for use in future versions of LLVM.
2271 hasNoDependentLibraries
= true;
2275 case 3: // LLVM 1.3 (Released)
2276 // LLVM 1.3 and earlier caused alignment bytes to be written on some block
2277 // boundaries and at the end of some strings. In extreme cases (e.g. lots
2278 // of GEP references to a constant array), this can increase the file size
2279 // by 30% or more. In version 1.4 alignment is done away with completely.
2280 hasAlignment
= true;
2284 case 4: // 1.3.1 (Not Released)
2285 // In version 4, we did not support the 'undef' constant.
2286 hasNoUndefValue
= true;
2288 // In version 4 and above, we did not include space for flags for functions
2289 // in the module info block.
2290 hasNoFlagsForFunctions
= true;
2292 // In version 4 and above, we did not include the 'unreachable' instruction
2293 // in the opcode numbering in the bytecode file.
2294 hasNoUnreachableInst
= true;
2299 case 5: // 1.4 (Released)
2303 error("Unknown bytecode version number: " + itostr(RevisionNum
));
2306 if (hasNoEndianness
) Endianness
= Module::AnyEndianness
;
2307 if (hasNoPointerSize
) PointerSize
= Module::AnyPointerSize
;
2309 TheModule
->setEndianness(Endianness
);
2310 TheModule
->setPointerSize(PointerSize
);
2312 if (Handler
) Handler
->handleVersionInfo(RevisionNum
, Endianness
, PointerSize
);
2315 /// Parse a whole module.
2316 void BytecodeReader::ParseModule() {
2317 unsigned Type
, Size
;
2319 FunctionSignatureList
.clear(); // Just in case...
2321 // Read into instance variables...
2325 bool SeenModuleGlobalInfo
= false;
2326 bool SeenGlobalTypePlane
= false;
2327 BufPtr MyEnd
= BlockEnd
;
2328 while (At
< MyEnd
) {
2330 read_block(Type
, Size
);
2334 case BytecodeFormat::GlobalTypePlaneBlockID
:
2335 if (SeenGlobalTypePlane
)
2336 error("Two GlobalTypePlane Blocks Encountered!");
2340 SeenGlobalTypePlane
= true;
2343 case BytecodeFormat::ModuleGlobalInfoBlockID
:
2344 if (SeenModuleGlobalInfo
)
2345 error("Two ModuleGlobalInfo Blocks Encountered!");
2346 ParseModuleGlobalInfo();
2347 SeenModuleGlobalInfo
= true;
2350 case BytecodeFormat::ConstantPoolBlockID
:
2351 ParseConstantPool(ModuleValues
, ModuleTypes
,false);
2354 case BytecodeFormat::FunctionBlockID
:
2355 ParseFunctionLazily();
2358 case BytecodeFormat::SymbolTableBlockID
:
2359 ParseSymbolTable(0, &TheModule
->getSymbolTable());
2365 error("Unexpected Block of Type #" + utostr(Type
) + " encountered!");
2373 // After the module constant pool has been read, we can safely initialize
2374 // global variables...
2375 while (!GlobalInits
.empty()) {
2376 GlobalVariable
*GV
= GlobalInits
.back().first
;
2377 unsigned Slot
= GlobalInits
.back().second
;
2378 GlobalInits
.pop_back();
2380 // Look up the initializer value...
2381 // FIXME: Preserve this type ID!
2383 const llvm::PointerType
* GVType
= GV
->getType();
2384 unsigned TypeSlot
= getTypeSlot(GVType
->getElementType());
2385 if (Constant
*CV
= getConstantValue(TypeSlot
, Slot
)) {
2386 if (GV
->hasInitializer())
2387 error("Global *already* has an initializer?!");
2388 if (Handler
) Handler
->handleGlobalInitializer(GV
,CV
);
2389 GV
->setInitializer(CV
);
2391 error("Cannot find initializer value.");
2394 if (!ConstantFwdRefs
.empty())
2395 error("Use of undefined constants in a module");
2397 /// Make sure we pulled them all out. If we didn't then there's a declaration
2398 /// but a missing body. That's not allowed.
2399 if (!FunctionSignatureList
.empty())
2400 error("Function declared, but bytecode stream ended before definition");
2403 /// This function completely parses a bytecode buffer given by the \p Buf
2404 /// and \p Length parameters.
2405 void BytecodeReader::ParseBytecode(BufPtr Buf
, unsigned Length
,
2406 const std::string
&ModuleID
) {
2410 At
= MemStart
= BlockStart
= Buf
;
2411 MemEnd
= BlockEnd
= Buf
+ Length
;
2413 // Create the module
2414 TheModule
= new Module(ModuleID
);
2416 if (Handler
) Handler
->handleStart(TheModule
, Length
);
2418 // Read the four bytes of the signature.
2419 unsigned Sig
= read_uint();
2421 // If this is a compressed file
2422 if (Sig
== ('l' | ('l' << 8) | ('v' << 16) | ('c' << 24))) {
2424 // Invoke the decompression of the bytecode. Note that we have to skip the
2425 // file's magic number which is not part of the compressed block. Hence,
2426 // the Buf+4 and Length-4. The result goes into decompressedBlock, a data
2427 // member for retention until BytecodeReader is destructed.
2428 unsigned decompressedLength
= Compressor::decompressToNewBuffer(
2429 (char*)Buf
+4,Length
-4,decompressedBlock
);
2431 // We must adjust the buffer pointers used by the bytecode reader to point
2432 // into the new decompressed block. After decompression, the
2433 // decompressedBlock will point to a contiguous memory area that has
2434 // the decompressed data.
2435 At
= MemStart
= BlockStart
= Buf
= (BufPtr
) decompressedBlock
;
2436 MemEnd
= BlockEnd
= Buf
+ decompressedLength
;
2438 // else if this isn't a regular (uncompressed) bytecode file, then its
2439 // and error, generate that now.
2440 } else if (Sig
!= ('l' | ('l' << 8) | ('v' << 16) | ('m' << 24))) {
2441 error("Invalid bytecode signature: " + utohexstr(Sig
));
2444 // Tell the handler we're starting a module
2445 if (Handler
) Handler
->handleModuleBegin(ModuleID
);
2447 // Get the module block and size and verify. This is handled specially
2448 // because the module block/size is always written in long format. Other
2449 // blocks are written in short format so the read_block method is used.
2450 unsigned Type
, Size
;
2453 if (Type
!= BytecodeFormat::ModuleBlockID
) {
2454 error("Expected Module Block! Type:" + utostr(Type
) + ", Size:"
2458 // It looks like the darwin ranlib program is broken, and adds trailing
2459 // garbage to the end of some bytecode files. This hack allows the bc
2460 // reader to ignore trailing garbage on bytecode files.
2461 if (At
+ Size
< MemEnd
)
2462 MemEnd
= BlockEnd
= At
+Size
;
2464 if (At
+ Size
!= MemEnd
)
2465 error("Invalid Top Level Block Length! Type:" + utostr(Type
)
2466 + ", Size:" + utostr(Size
));
2468 // Parse the module contents
2469 this->ParseModule();
2471 // Check for missing functions
2473 error("Function expected, but bytecode stream ended!");
2475 // Look for intrinsic functions to upgrade, upgrade them, and save the
2476 // mapping from old function to new for use later when instructions are
2478 for (Module::iterator FI
= TheModule
->begin(), FE
= TheModule
->end();
2480 if (Function
* newF
= UpgradeIntrinsicFunction(FI
)) {
2481 upgradedFunctions
.insert(std::make_pair(FI
, newF
));
2485 // Tell the handler we're done with the module
2487 Handler
->handleModuleEnd(ModuleID
);
2489 // Tell the handler we're finished the parse
2490 if (Handler
) Handler
->handleFinish();
2492 } catch (std::string
& errstr
) {
2493 if (Handler
) Handler
->handleError(errstr
);
2497 if (decompressedBlock
!= 0 ) {
2498 ::free(decompressedBlock
);
2499 decompressedBlock
= 0;
2503 std::string
msg("Unknown Exception Occurred");
2504 if (Handler
) Handler
->handleError(msg
);
2508 if (decompressedBlock
!= 0) {
2509 ::free(decompressedBlock
);
2510 decompressedBlock
= 0;
2516 //===----------------------------------------------------------------------===//
2517 //=== Default Implementations of Handler Methods
2518 //===----------------------------------------------------------------------===//
2520 BytecodeHandler::~BytecodeHandler() {}