1 //===------ BPFAbstractMemberAccess.cpp - Abstracting Member Accesses -----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This pass abstracted struct/union member accesses in order to support
10 // compile-once run-everywhere (CO-RE). The CO-RE intends to compile the program
11 // which can run on different kernels. In particular, if bpf program tries to
12 // access a particular kernel data structure member, the details of the
13 // intermediate member access will be remembered so bpf loader can do
14 // necessary adjustment right before program loading.
28 // For the member access e.c.b, the compiler will generate code
31 // The compile-once run-everywhere instead generates the following code
34 // The "4" in "r = 4" can be changed based on a particular kernel version.
35 // For example, on a particular kernel version, if struct s is changed to
43 // By repeating the member access on the host, the bpf loader can
44 // adjust "r = 4" as "r = 8".
46 // This feature relies on the following three intrinsic calls:
47 // addr = preserve_array_access_index(base, dimension, index)
48 // addr = preserve_union_access_index(base, di_index)
49 // !llvm.preserve.access.index <union_ditype>
50 // addr = preserve_struct_access_index(base, gep_index, di_index)
51 // !llvm.preserve.access.index <struct_ditype>
53 // Bitfield member access needs special attention. User cannot take the
54 // address of a bitfield acceess. To facilitate kernel verifier
55 // for easy bitfield code optimization, a new clang intrinsic is introduced:
56 // uint32_t __builtin_preserve_field_info(member_access, info_kind)
57 // In IR, a chain with two (or more) intrinsic calls will be generated:
59 // addr = preserve_struct_access_index(base, 1, 1) !struct s
60 // uint32_t result = bpf_preserve_field_info(addr, info_kind)
62 // Suppose the info_kind is FIELD_SIGNEDNESS,
63 // The above two IR intrinsics will be replaced with
64 // a relocatable insn:
65 // signness = /* signness of member_access */
66 // and signness can be changed by bpf loader based on the
69 // User can also test whether a field exists or not with
70 // uint32_t result = bpf_preserve_field_info(member_access, FIELD_EXISTENCE)
71 // The field will be always available (result = 1) during initial
72 // compilation, but bpf loader can patch with the correct value
73 // on the target host where the member_access may or may not be available
75 //===----------------------------------------------------------------------===//
79 #include "BPFTargetMachine.h"
80 #include "llvm/BinaryFormat/Dwarf.h"
81 #include "llvm/DebugInfo/BTF/BTF.h"
82 #include "llvm/IR/DebugInfoMetadata.h"
83 #include "llvm/IR/GlobalVariable.h"
84 #include "llvm/IR/Instruction.h"
85 #include "llvm/IR/Instructions.h"
86 #include "llvm/IR/IntrinsicsBPF.h"
87 #include "llvm/IR/Module.h"
88 #include "llvm/IR/PassManager.h"
89 #include "llvm/IR/Type.h"
90 #include "llvm/IR/User.h"
91 #include "llvm/IR/Value.h"
92 #include "llvm/Pass.h"
93 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
96 #define DEBUG_TYPE "bpf-abstract-member-access"
99 constexpr StringRef
BPFCoreSharedInfo::AmaAttr
;
100 uint32_t BPFCoreSharedInfo::SeqNum
;
102 Instruction
*BPFCoreSharedInfo::insertPassThrough(Module
*M
, BasicBlock
*BB
,
104 Instruction
*Before
) {
105 Function
*Fn
= Intrinsic::getDeclaration(
106 M
, Intrinsic::bpf_passthrough
, {Input
->getType(), Input
->getType()});
107 Constant
*SeqNumVal
= ConstantInt::get(Type::getInt32Ty(BB
->getContext()),
108 BPFCoreSharedInfo::SeqNum
++);
110 auto *NewInst
= CallInst::Create(Fn
, {SeqNumVal
, Input
});
111 NewInst
->insertBefore(Before
);
116 using namespace llvm
;
119 class BPFAbstractMemberAccess final
{
121 BPFAbstractMemberAccess(BPFTargetMachine
*TM
) : TM(TM
) {}
123 bool run(Function
&F
);
127 uint32_t AccessIndex
;
128 MaybeAlign RecordAlignment
;
132 typedef std::stack
<std::pair
<CallInst
*, CallInfo
>> CallInfoStack
;
136 BPFPreserveArrayAI
= 1,
137 BPFPreserveUnionAI
= 2,
138 BPFPreserveStructAI
= 3,
139 BPFPreserveFieldInfoAI
= 4,
143 const DataLayout
*DL
= nullptr;
146 static std::map
<std::string
, GlobalVariable
*> GEPGlobals
;
147 // A map to link preserve_*_access_index intrinsic calls.
148 std::map
<CallInst
*, std::pair
<CallInst
*, CallInfo
>> AIChain
;
149 // A map to hold all the base preserve_*_access_index intrinsic calls.
150 // The base call is not an input of any other preserve_*
152 std::map
<CallInst
*, CallInfo
> BaseAICalls
;
153 // A map to hold <AnonRecord, TypeDef> relationships
154 std::map
<DICompositeType
*, DIDerivedType
*> AnonRecords
;
156 void CheckAnonRecordType(DIDerivedType
*ParentTy
, DIType
*Ty
);
157 void CheckCompositeType(DIDerivedType
*ParentTy
, DICompositeType
*CTy
);
158 void CheckDerivedType(DIDerivedType
*ParentTy
, DIDerivedType
*DTy
);
159 void ResetMetadata(struct CallInfo
&CInfo
);
161 bool doTransformation(Function
&F
);
163 void traceAICall(CallInst
*Call
, CallInfo
&ParentInfo
);
164 void traceBitCast(BitCastInst
*BitCast
, CallInst
*Parent
,
165 CallInfo
&ParentInfo
);
166 void traceGEP(GetElementPtrInst
*GEP
, CallInst
*Parent
,
167 CallInfo
&ParentInfo
);
168 void collectAICallChains(Function
&F
);
170 bool IsPreserveDIAccessIndexCall(const CallInst
*Call
, CallInfo
&Cinfo
);
171 bool IsValidAIChain(const MDNode
*ParentMeta
, uint32_t ParentAI
,
172 const MDNode
*ChildMeta
);
173 bool removePreserveAccessIndexIntrinsic(Function
&F
);
174 void replaceWithGEP(std::vector
<CallInst
*> &CallList
,
175 uint32_t NumOfZerosIndex
, uint32_t DIIndex
);
176 bool HasPreserveFieldInfoCall(CallInfoStack
&CallStack
);
177 void GetStorageBitRange(DIDerivedType
*MemberTy
, Align RecordAlignment
,
178 uint32_t &StartBitOffset
, uint32_t &EndBitOffset
);
179 uint32_t GetFieldInfo(uint32_t InfoKind
, DICompositeType
*CTy
,
180 uint32_t AccessIndex
, uint32_t PatchImm
,
181 MaybeAlign RecordAlignment
);
183 Value
*computeBaseAndAccessKey(CallInst
*Call
, CallInfo
&CInfo
,
184 std::string
&AccessKey
, MDNode
*&BaseMeta
);
185 MDNode
*computeAccessKey(CallInst
*Call
, CallInfo
&CInfo
,
186 std::string
&AccessKey
, bool &IsInt32Ret
);
187 uint64_t getConstant(const Value
*IndexValue
);
188 bool transformGEPChain(CallInst
*Call
, CallInfo
&CInfo
);
191 std::map
<std::string
, GlobalVariable
*> BPFAbstractMemberAccess::GEPGlobals
;
192 } // End anonymous namespace
194 bool BPFAbstractMemberAccess::run(Function
&F
) {
195 LLVM_DEBUG(dbgs() << "********** Abstract Member Accesses **********\n");
201 // Bail out if no debug info.
202 if (M
->debug_compile_units().empty())
205 // For each argument/return/local_variable type, trace the type
206 // pattern like '[derived_type]* [composite_type]' to check
207 // and remember (anon record -> typedef) relations where the
208 // anon record is defined as
209 // typedef [const/volatile/restrict]* [anon record]
210 DISubprogram
*SP
= F
.getSubprogram();
211 if (SP
&& SP
->isDefinition()) {
212 for (DIType
*Ty
: SP
->getType()->getTypeArray())
213 CheckAnonRecordType(nullptr, Ty
);
214 for (const DINode
*DN
: SP
->getRetainedNodes()) {
215 if (const auto *DV
= dyn_cast
<DILocalVariable
>(DN
))
216 CheckAnonRecordType(nullptr, DV
->getType());
220 DL
= &M
->getDataLayout();
221 return doTransformation(F
);
224 void BPFAbstractMemberAccess::ResetMetadata(struct CallInfo
&CInfo
) {
225 if (auto Ty
= dyn_cast
<DICompositeType
>(CInfo
.Metadata
)) {
226 if (AnonRecords
.find(Ty
) != AnonRecords
.end()) {
227 if (AnonRecords
[Ty
] != nullptr)
228 CInfo
.Metadata
= AnonRecords
[Ty
];
233 void BPFAbstractMemberAccess::CheckCompositeType(DIDerivedType
*ParentTy
,
234 DICompositeType
*CTy
) {
235 if (!CTy
->getName().empty() || !ParentTy
||
236 ParentTy
->getTag() != dwarf::DW_TAG_typedef
)
239 if (AnonRecords
.find(CTy
) == AnonRecords
.end()) {
240 AnonRecords
[CTy
] = ParentTy
;
244 // Two or more typedef's may point to the same anon record.
245 // If this is the case, set the typedef DIType to be nullptr
246 // to indicate the duplication case.
247 DIDerivedType
*CurrTy
= AnonRecords
[CTy
];
248 if (CurrTy
== ParentTy
)
250 AnonRecords
[CTy
] = nullptr;
253 void BPFAbstractMemberAccess::CheckDerivedType(DIDerivedType
*ParentTy
,
254 DIDerivedType
*DTy
) {
255 DIType
*BaseType
= DTy
->getBaseType();
259 unsigned Tag
= DTy
->getTag();
260 if (Tag
== dwarf::DW_TAG_pointer_type
)
261 CheckAnonRecordType(nullptr, BaseType
);
262 else if (Tag
== dwarf::DW_TAG_typedef
)
263 CheckAnonRecordType(DTy
, BaseType
);
265 CheckAnonRecordType(ParentTy
, BaseType
);
268 void BPFAbstractMemberAccess::CheckAnonRecordType(DIDerivedType
*ParentTy
,
273 if (auto *CTy
= dyn_cast
<DICompositeType
>(Ty
))
274 return CheckCompositeType(ParentTy
, CTy
);
275 else if (auto *DTy
= dyn_cast
<DIDerivedType
>(Ty
))
276 return CheckDerivedType(ParentTy
, DTy
);
279 static bool SkipDIDerivedTag(unsigned Tag
, bool skipTypedef
) {
280 if (Tag
!= dwarf::DW_TAG_typedef
&& Tag
!= dwarf::DW_TAG_const_type
&&
281 Tag
!= dwarf::DW_TAG_volatile_type
&&
282 Tag
!= dwarf::DW_TAG_restrict_type
&&
283 Tag
!= dwarf::DW_TAG_member
)
285 if (Tag
== dwarf::DW_TAG_typedef
&& !skipTypedef
)
290 static DIType
* stripQualifiers(DIType
*Ty
, bool skipTypedef
= true) {
291 while (auto *DTy
= dyn_cast
<DIDerivedType
>(Ty
)) {
292 if (!SkipDIDerivedTag(DTy
->getTag(), skipTypedef
))
294 Ty
= DTy
->getBaseType();
299 static const DIType
* stripQualifiers(const DIType
*Ty
) {
300 while (auto *DTy
= dyn_cast
<DIDerivedType
>(Ty
)) {
301 if (!SkipDIDerivedTag(DTy
->getTag(), true))
303 Ty
= DTy
->getBaseType();
308 static uint32_t calcArraySize(const DICompositeType
*CTy
, uint32_t StartDim
) {
309 DINodeArray Elements
= CTy
->getElements();
310 uint32_t DimSize
= 1;
311 for (uint32_t I
= StartDim
; I
< Elements
.size(); ++I
) {
312 if (auto *Element
= dyn_cast_or_null
<DINode
>(Elements
[I
]))
313 if (Element
->getTag() == dwarf::DW_TAG_subrange_type
) {
314 const DISubrange
*SR
= cast
<DISubrange
>(Element
);
315 auto *CI
= SR
->getCount().dyn_cast
<ConstantInt
*>();
316 DimSize
*= CI
->getSExtValue();
323 static Type
*getBaseElementType(const CallInst
*Call
) {
324 // Element type is stored in an elementtype() attribute on the first param.
325 return Call
->getParamElementType(0);
328 /// Check whether a call is a preserve_*_access_index intrinsic call or not.
329 bool BPFAbstractMemberAccess::IsPreserveDIAccessIndexCall(const CallInst
*Call
,
334 const auto *GV
= dyn_cast
<GlobalValue
>(Call
->getCalledOperand());
337 if (GV
->getName().startswith("llvm.preserve.array.access.index")) {
338 CInfo
.Kind
= BPFPreserveArrayAI
;
339 CInfo
.Metadata
= Call
->getMetadata(LLVMContext::MD_preserve_access_index
);
341 report_fatal_error("Missing metadata for llvm.preserve.array.access.index intrinsic");
342 CInfo
.AccessIndex
= getConstant(Call
->getArgOperand(2));
343 CInfo
.Base
= Call
->getArgOperand(0);
344 CInfo
.RecordAlignment
= DL
->getABITypeAlign(getBaseElementType(Call
));
347 if (GV
->getName().startswith("llvm.preserve.union.access.index")) {
348 CInfo
.Kind
= BPFPreserveUnionAI
;
349 CInfo
.Metadata
= Call
->getMetadata(LLVMContext::MD_preserve_access_index
);
351 report_fatal_error("Missing metadata for llvm.preserve.union.access.index intrinsic");
352 ResetMetadata(CInfo
);
353 CInfo
.AccessIndex
= getConstant(Call
->getArgOperand(1));
354 CInfo
.Base
= Call
->getArgOperand(0);
357 if (GV
->getName().startswith("llvm.preserve.struct.access.index")) {
358 CInfo
.Kind
= BPFPreserveStructAI
;
359 CInfo
.Metadata
= Call
->getMetadata(LLVMContext::MD_preserve_access_index
);
361 report_fatal_error("Missing metadata for llvm.preserve.struct.access.index intrinsic");
362 ResetMetadata(CInfo
);
363 CInfo
.AccessIndex
= getConstant(Call
->getArgOperand(2));
364 CInfo
.Base
= Call
->getArgOperand(0);
365 CInfo
.RecordAlignment
= DL
->getABITypeAlign(getBaseElementType(Call
));
368 if (GV
->getName().startswith("llvm.bpf.preserve.field.info")) {
369 CInfo
.Kind
= BPFPreserveFieldInfoAI
;
370 CInfo
.Metadata
= nullptr;
371 // Check validity of info_kind as clang did not check this.
372 uint64_t InfoKind
= getConstant(Call
->getArgOperand(1));
373 if (InfoKind
>= BTF::MAX_FIELD_RELOC_KIND
)
374 report_fatal_error("Incorrect info_kind for llvm.bpf.preserve.field.info intrinsic");
375 CInfo
.AccessIndex
= InfoKind
;
378 if (GV
->getName().startswith("llvm.bpf.preserve.type.info")) {
379 CInfo
.Kind
= BPFPreserveFieldInfoAI
;
380 CInfo
.Metadata
= Call
->getMetadata(LLVMContext::MD_preserve_access_index
);
382 report_fatal_error("Missing metadata for llvm.preserve.type.info intrinsic");
383 uint64_t Flag
= getConstant(Call
->getArgOperand(1));
384 if (Flag
>= BPFCoreSharedInfo::MAX_PRESERVE_TYPE_INFO_FLAG
)
385 report_fatal_error("Incorrect flag for llvm.bpf.preserve.type.info intrinsic");
386 if (Flag
== BPFCoreSharedInfo::PRESERVE_TYPE_INFO_EXISTENCE
)
387 CInfo
.AccessIndex
= BTF::TYPE_EXISTENCE
;
388 else if (Flag
== BPFCoreSharedInfo::PRESERVE_TYPE_INFO_MATCH
)
389 CInfo
.AccessIndex
= BTF::TYPE_MATCH
;
391 CInfo
.AccessIndex
= BTF::TYPE_SIZE
;
394 if (GV
->getName().startswith("llvm.bpf.preserve.enum.value")) {
395 CInfo
.Kind
= BPFPreserveFieldInfoAI
;
396 CInfo
.Metadata
= Call
->getMetadata(LLVMContext::MD_preserve_access_index
);
398 report_fatal_error("Missing metadata for llvm.preserve.enum.value intrinsic");
399 uint64_t Flag
= getConstant(Call
->getArgOperand(2));
400 if (Flag
>= BPFCoreSharedInfo::MAX_PRESERVE_ENUM_VALUE_FLAG
)
401 report_fatal_error("Incorrect flag for llvm.bpf.preserve.enum.value intrinsic");
402 if (Flag
== BPFCoreSharedInfo::PRESERVE_ENUM_VALUE_EXISTENCE
)
403 CInfo
.AccessIndex
= BTF::ENUM_VALUE_EXISTENCE
;
405 CInfo
.AccessIndex
= BTF::ENUM_VALUE
;
412 void BPFAbstractMemberAccess::replaceWithGEP(std::vector
<CallInst
*> &CallList
,
413 uint32_t DimensionIndex
,
415 for (auto *Call
: CallList
) {
416 uint32_t Dimension
= 1;
417 if (DimensionIndex
> 0)
418 Dimension
= getConstant(Call
->getArgOperand(DimensionIndex
));
421 ConstantInt::get(Type::getInt32Ty(Call
->getParent()->getContext()), 0);
422 SmallVector
<Value
*, 4> IdxList
;
423 for (unsigned I
= 0; I
< Dimension
; ++I
)
424 IdxList
.push_back(Zero
);
425 IdxList
.push_back(Call
->getArgOperand(GEPIndex
));
427 auto *GEP
= GetElementPtrInst::CreateInBounds(
428 getBaseElementType(Call
), Call
->getArgOperand(0), IdxList
, "", Call
);
429 Call
->replaceAllUsesWith(GEP
);
430 Call
->eraseFromParent();
434 bool BPFAbstractMemberAccess::removePreserveAccessIndexIntrinsic(Function
&F
) {
435 std::vector
<CallInst
*> PreserveArrayIndexCalls
;
436 std::vector
<CallInst
*> PreserveUnionIndexCalls
;
437 std::vector
<CallInst
*> PreserveStructIndexCalls
;
442 auto *Call
= dyn_cast
<CallInst
>(&I
);
444 if (!IsPreserveDIAccessIndexCall(Call
, CInfo
))
448 if (CInfo
.Kind
== BPFPreserveArrayAI
)
449 PreserveArrayIndexCalls
.push_back(Call
);
450 else if (CInfo
.Kind
== BPFPreserveUnionAI
)
451 PreserveUnionIndexCalls
.push_back(Call
);
453 PreserveStructIndexCalls
.push_back(Call
);
456 // do the following transformation:
457 // . addr = preserve_array_access_index(base, dimension, index)
459 // addr = GEP(base, dimenion's zero's, index)
460 // . addr = preserve_union_access_index(base, di_index)
462 // addr = base, i.e., all usages of "addr" are replaced by "base".
463 // . addr = preserve_struct_access_index(base, gep_index, di_index)
465 // addr = GEP(base, 0, gep_index)
466 replaceWithGEP(PreserveArrayIndexCalls
, 1, 2);
467 replaceWithGEP(PreserveStructIndexCalls
, 0, 1);
468 for (auto *Call
: PreserveUnionIndexCalls
) {
469 Call
->replaceAllUsesWith(Call
->getArgOperand(0));
470 Call
->eraseFromParent();
476 /// Check whether the access index chain is valid. We check
477 /// here because there may be type casts between two
478 /// access indexes. We want to ensure memory access still valid.
479 bool BPFAbstractMemberAccess::IsValidAIChain(const MDNode
*ParentType
,
481 const MDNode
*ChildType
) {
483 return true; // preserve_field_info, no type comparison needed.
485 const DIType
*PType
= stripQualifiers(cast
<DIType
>(ParentType
));
486 const DIType
*CType
= stripQualifiers(cast
<DIType
>(ChildType
));
488 // Child is a derived/pointer type, which is due to type casting.
489 // Pointer type cannot be in the middle of chain.
490 if (isa
<DIDerivedType
>(CType
))
493 // Parent is a pointer type.
494 if (const auto *PtrTy
= dyn_cast
<DIDerivedType
>(PType
)) {
495 if (PtrTy
->getTag() != dwarf::DW_TAG_pointer_type
)
497 return stripQualifiers(PtrTy
->getBaseType()) == CType
;
500 // Otherwise, struct/union/array types
501 const auto *PTy
= dyn_cast
<DICompositeType
>(PType
);
502 const auto *CTy
= dyn_cast
<DICompositeType
>(CType
);
503 assert(PTy
&& CTy
&& "ParentType or ChildType is null or not composite");
505 uint32_t PTyTag
= PTy
->getTag();
506 assert(PTyTag
== dwarf::DW_TAG_array_type
||
507 PTyTag
== dwarf::DW_TAG_structure_type
||
508 PTyTag
== dwarf::DW_TAG_union_type
);
510 uint32_t CTyTag
= CTy
->getTag();
511 assert(CTyTag
== dwarf::DW_TAG_array_type
||
512 CTyTag
== dwarf::DW_TAG_structure_type
||
513 CTyTag
== dwarf::DW_TAG_union_type
);
515 // Multi dimensional arrays, base element should be the same
516 if (PTyTag
== dwarf::DW_TAG_array_type
&& PTyTag
== CTyTag
)
517 return PTy
->getBaseType() == CTy
->getBaseType();
520 if (PTyTag
== dwarf::DW_TAG_array_type
)
521 Ty
= PTy
->getBaseType();
523 Ty
= dyn_cast
<DIType
>(PTy
->getElements()[ParentAI
]);
525 return dyn_cast
<DICompositeType
>(stripQualifiers(Ty
)) == CTy
;
528 void BPFAbstractMemberAccess::traceAICall(CallInst
*Call
,
529 CallInfo
&ParentInfo
) {
530 for (User
*U
: Call
->users()) {
531 Instruction
*Inst
= dyn_cast
<Instruction
>(U
);
535 if (auto *BI
= dyn_cast
<BitCastInst
>(Inst
)) {
536 traceBitCast(BI
, Call
, ParentInfo
);
537 } else if (auto *CI
= dyn_cast
<CallInst
>(Inst
)) {
540 if (IsPreserveDIAccessIndexCall(CI
, ChildInfo
) &&
541 IsValidAIChain(ParentInfo
.Metadata
, ParentInfo
.AccessIndex
,
542 ChildInfo
.Metadata
)) {
543 AIChain
[CI
] = std::make_pair(Call
, ParentInfo
);
544 traceAICall(CI
, ChildInfo
);
546 BaseAICalls
[Call
] = ParentInfo
;
548 } else if (auto *GI
= dyn_cast
<GetElementPtrInst
>(Inst
)) {
549 if (GI
->hasAllZeroIndices())
550 traceGEP(GI
, Call
, ParentInfo
);
552 BaseAICalls
[Call
] = ParentInfo
;
554 BaseAICalls
[Call
] = ParentInfo
;
559 void BPFAbstractMemberAccess::traceBitCast(BitCastInst
*BitCast
,
561 CallInfo
&ParentInfo
) {
562 for (User
*U
: BitCast
->users()) {
563 Instruction
*Inst
= dyn_cast
<Instruction
>(U
);
567 if (auto *BI
= dyn_cast
<BitCastInst
>(Inst
)) {
568 traceBitCast(BI
, Parent
, ParentInfo
);
569 } else if (auto *CI
= dyn_cast
<CallInst
>(Inst
)) {
571 if (IsPreserveDIAccessIndexCall(CI
, ChildInfo
) &&
572 IsValidAIChain(ParentInfo
.Metadata
, ParentInfo
.AccessIndex
,
573 ChildInfo
.Metadata
)) {
574 AIChain
[CI
] = std::make_pair(Parent
, ParentInfo
);
575 traceAICall(CI
, ChildInfo
);
577 BaseAICalls
[Parent
] = ParentInfo
;
579 } else if (auto *GI
= dyn_cast
<GetElementPtrInst
>(Inst
)) {
580 if (GI
->hasAllZeroIndices())
581 traceGEP(GI
, Parent
, ParentInfo
);
583 BaseAICalls
[Parent
] = ParentInfo
;
585 BaseAICalls
[Parent
] = ParentInfo
;
590 void BPFAbstractMemberAccess::traceGEP(GetElementPtrInst
*GEP
, CallInst
*Parent
,
591 CallInfo
&ParentInfo
) {
592 for (User
*U
: GEP
->users()) {
593 Instruction
*Inst
= dyn_cast
<Instruction
>(U
);
597 if (auto *BI
= dyn_cast
<BitCastInst
>(Inst
)) {
598 traceBitCast(BI
, Parent
, ParentInfo
);
599 } else if (auto *CI
= dyn_cast
<CallInst
>(Inst
)) {
601 if (IsPreserveDIAccessIndexCall(CI
, ChildInfo
) &&
602 IsValidAIChain(ParentInfo
.Metadata
, ParentInfo
.AccessIndex
,
603 ChildInfo
.Metadata
)) {
604 AIChain
[CI
] = std::make_pair(Parent
, ParentInfo
);
605 traceAICall(CI
, ChildInfo
);
607 BaseAICalls
[Parent
] = ParentInfo
;
609 } else if (auto *GI
= dyn_cast
<GetElementPtrInst
>(Inst
)) {
610 if (GI
->hasAllZeroIndices())
611 traceGEP(GI
, Parent
, ParentInfo
);
613 BaseAICalls
[Parent
] = ParentInfo
;
615 BaseAICalls
[Parent
] = ParentInfo
;
620 void BPFAbstractMemberAccess::collectAICallChains(Function
&F
) {
627 auto *Call
= dyn_cast
<CallInst
>(&I
);
628 if (!IsPreserveDIAccessIndexCall(Call
, CInfo
) ||
629 AIChain
.find(Call
) != AIChain
.end())
632 traceAICall(Call
, CInfo
);
636 uint64_t BPFAbstractMemberAccess::getConstant(const Value
*IndexValue
) {
637 const ConstantInt
*CV
= dyn_cast
<ConstantInt
>(IndexValue
);
639 return CV
->getValue().getZExtValue();
642 /// Get the start and the end of storage offset for \p MemberTy.
643 void BPFAbstractMemberAccess::GetStorageBitRange(DIDerivedType
*MemberTy
,
644 Align RecordAlignment
,
645 uint32_t &StartBitOffset
,
646 uint32_t &EndBitOffset
) {
647 uint32_t MemberBitSize
= MemberTy
->getSizeInBits();
648 uint32_t MemberBitOffset
= MemberTy
->getOffsetInBits();
650 if (RecordAlignment
> 8) {
651 // If the Bits are within an aligned 8-byte, set the RecordAlignment
652 // to 8, other report the fatal error.
653 if (MemberBitOffset
/ 64 != (MemberBitOffset
+ MemberBitSize
) / 64)
654 report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, "
655 "requiring too big alignment");
656 RecordAlignment
= Align(8);
659 uint32_t AlignBits
= RecordAlignment
.value() * 8;
660 if (MemberBitSize
> AlignBits
)
661 report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, "
662 "bitfield size greater than record alignment");
664 StartBitOffset
= MemberBitOffset
& ~(AlignBits
- 1);
665 if ((StartBitOffset
+ AlignBits
) < (MemberBitOffset
+ MemberBitSize
))
666 report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info, "
667 "cross alignment boundary");
668 EndBitOffset
= StartBitOffset
+ AlignBits
;
671 uint32_t BPFAbstractMemberAccess::GetFieldInfo(uint32_t InfoKind
,
672 DICompositeType
*CTy
,
673 uint32_t AccessIndex
,
675 MaybeAlign RecordAlignment
) {
676 if (InfoKind
== BTF::FIELD_EXISTENCE
)
679 uint32_t Tag
= CTy
->getTag();
680 if (InfoKind
== BTF::FIELD_BYTE_OFFSET
) {
681 if (Tag
== dwarf::DW_TAG_array_type
) {
682 auto *EltTy
= stripQualifiers(CTy
->getBaseType());
683 PatchImm
+= AccessIndex
* calcArraySize(CTy
, 1) *
684 (EltTy
->getSizeInBits() >> 3);
685 } else if (Tag
== dwarf::DW_TAG_structure_type
) {
686 auto *MemberTy
= cast
<DIDerivedType
>(CTy
->getElements()[AccessIndex
]);
687 if (!MemberTy
->isBitField()) {
688 PatchImm
+= MemberTy
->getOffsetInBits() >> 3;
690 unsigned SBitOffset
, NextSBitOffset
;
691 GetStorageBitRange(MemberTy
, *RecordAlignment
, SBitOffset
,
693 PatchImm
+= SBitOffset
>> 3;
699 if (InfoKind
== BTF::FIELD_BYTE_SIZE
) {
700 if (Tag
== dwarf::DW_TAG_array_type
) {
701 auto *EltTy
= stripQualifiers(CTy
->getBaseType());
702 return calcArraySize(CTy
, 1) * (EltTy
->getSizeInBits() >> 3);
704 auto *MemberTy
= cast
<DIDerivedType
>(CTy
->getElements()[AccessIndex
]);
705 uint32_t SizeInBits
= MemberTy
->getSizeInBits();
706 if (!MemberTy
->isBitField())
707 return SizeInBits
>> 3;
709 unsigned SBitOffset
, NextSBitOffset
;
710 GetStorageBitRange(MemberTy
, *RecordAlignment
, SBitOffset
,
712 SizeInBits
= NextSBitOffset
- SBitOffset
;
713 if (SizeInBits
& (SizeInBits
- 1))
714 report_fatal_error("Unsupported field expression for llvm.bpf.preserve.field.info");
715 return SizeInBits
>> 3;
719 if (InfoKind
== BTF::FIELD_SIGNEDNESS
) {
720 const DIType
*BaseTy
;
721 if (Tag
== dwarf::DW_TAG_array_type
) {
722 // Signedness only checked when final array elements are accessed.
723 if (CTy
->getElements().size() != 1)
724 report_fatal_error("Invalid array expression for llvm.bpf.preserve.field.info");
725 BaseTy
= stripQualifiers(CTy
->getBaseType());
727 auto *MemberTy
= cast
<DIDerivedType
>(CTy
->getElements()[AccessIndex
]);
728 BaseTy
= stripQualifiers(MemberTy
->getBaseType());
731 // Only basic types and enum types have signedness.
732 const auto *BTy
= dyn_cast
<DIBasicType
>(BaseTy
);
734 const auto *CompTy
= dyn_cast
<DICompositeType
>(BaseTy
);
735 // Report an error if the field expression does not have signedness.
736 if (!CompTy
|| CompTy
->getTag() != dwarf::DW_TAG_enumeration_type
)
737 report_fatal_error("Invalid field expression for llvm.bpf.preserve.field.info");
738 BaseTy
= stripQualifiers(CompTy
->getBaseType());
739 BTy
= dyn_cast
<DIBasicType
>(BaseTy
);
741 uint32_t Encoding
= BTy
->getEncoding();
742 return (Encoding
== dwarf::DW_ATE_signed
|| Encoding
== dwarf::DW_ATE_signed_char
);
745 if (InfoKind
== BTF::FIELD_LSHIFT_U64
) {
746 // The value is loaded into a value with FIELD_BYTE_SIZE size,
747 // and then zero or sign extended to U64.
748 // FIELD_LSHIFT_U64 and FIELD_RSHIFT_U64 are operations
749 // to extract the original value.
750 const Triple
&Triple
= TM
->getTargetTriple();
751 DIDerivedType
*MemberTy
= nullptr;
752 bool IsBitField
= false;
755 if (Tag
== dwarf::DW_TAG_array_type
) {
756 auto *EltTy
= stripQualifiers(CTy
->getBaseType());
757 SizeInBits
= calcArraySize(CTy
, 1) * EltTy
->getSizeInBits();
759 MemberTy
= cast
<DIDerivedType
>(CTy
->getElements()[AccessIndex
]);
760 SizeInBits
= MemberTy
->getSizeInBits();
761 IsBitField
= MemberTy
->isBitField();
766 report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
767 return 64 - SizeInBits
;
770 unsigned SBitOffset
, NextSBitOffset
;
771 GetStorageBitRange(MemberTy
, *RecordAlignment
, SBitOffset
, NextSBitOffset
);
772 if (NextSBitOffset
- SBitOffset
> 64)
773 report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
775 unsigned OffsetInBits
= MemberTy
->getOffsetInBits();
776 if (Triple
.getArch() == Triple::bpfel
)
777 return SBitOffset
+ 64 - OffsetInBits
- SizeInBits
;
779 return OffsetInBits
+ 64 - NextSBitOffset
;
782 if (InfoKind
== BTF::FIELD_RSHIFT_U64
) {
783 DIDerivedType
*MemberTy
= nullptr;
784 bool IsBitField
= false;
786 if (Tag
== dwarf::DW_TAG_array_type
) {
787 auto *EltTy
= stripQualifiers(CTy
->getBaseType());
788 SizeInBits
= calcArraySize(CTy
, 1) * EltTy
->getSizeInBits();
790 MemberTy
= cast
<DIDerivedType
>(CTy
->getElements()[AccessIndex
]);
791 SizeInBits
= MemberTy
->getSizeInBits();
792 IsBitField
= MemberTy
->isBitField();
797 report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
798 return 64 - SizeInBits
;
801 unsigned SBitOffset
, NextSBitOffset
;
802 GetStorageBitRange(MemberTy
, *RecordAlignment
, SBitOffset
, NextSBitOffset
);
803 if (NextSBitOffset
- SBitOffset
> 64)
804 report_fatal_error("too big field size for llvm.bpf.preserve.field.info");
806 return 64 - SizeInBits
;
809 llvm_unreachable("Unknown llvm.bpf.preserve.field.info info kind");
812 bool BPFAbstractMemberAccess::HasPreserveFieldInfoCall(CallInfoStack
&CallStack
) {
813 // This is called in error return path, no need to maintain CallStack.
814 while (CallStack
.size()) {
815 auto StackElem
= CallStack
.top();
816 if (StackElem
.second
.Kind
== BPFPreserveFieldInfoAI
)
823 /// Compute the base of the whole preserve_* intrinsics chains, i.e., the base
824 /// pointer of the first preserve_*_access_index call, and construct the access
825 /// string, which will be the name of a global variable.
826 Value
*BPFAbstractMemberAccess::computeBaseAndAccessKey(CallInst
*Call
,
828 std::string
&AccessKey
,
830 Value
*Base
= nullptr;
831 std::string TypeName
;
832 CallInfoStack CallStack
;
834 // Put the access chain into a stack with the top as the head of the chain.
836 CallStack
.push(std::make_pair(Call
, CInfo
));
837 CInfo
= AIChain
[Call
].second
;
838 Call
= AIChain
[Call
].first
;
841 // The access offset from the base of the head of chain is also
842 // calculated here as all debuginfo types are available.
844 // Get type name and calculate the first index.
845 // We only want to get type name from typedef, structure or union.
846 // If user wants a relocation like
847 // int *p; ... __builtin_preserve_access_index(&p[4]) ...
849 // int a[10][20]; ... __builtin_preserve_access_index(&a[2][3]) ...
850 // we will skip them.
851 uint32_t FirstIndex
= 0;
852 uint32_t PatchImm
= 0; // AccessOffset or the requested field info
853 uint32_t InfoKind
= BTF::FIELD_BYTE_OFFSET
;
854 while (CallStack
.size()) {
855 auto StackElem
= CallStack
.top();
856 Call
= StackElem
.first
;
857 CInfo
= StackElem
.second
;
862 DIType
*PossibleTypeDef
= stripQualifiers(cast
<DIType
>(CInfo
.Metadata
),
864 DIType
*Ty
= stripQualifiers(PossibleTypeDef
);
865 if (CInfo
.Kind
== BPFPreserveUnionAI
||
866 CInfo
.Kind
== BPFPreserveStructAI
) {
867 // struct or union type. If the typedef is in the metadata, always
869 TypeName
= std::string(PossibleTypeDef
->getName());
870 TypeMeta
= PossibleTypeDef
;
871 PatchImm
+= FirstIndex
* (Ty
->getSizeInBits() >> 3);
875 assert(CInfo
.Kind
== BPFPreserveArrayAI
);
877 // Array entries will always be consumed for accumulative initial index.
880 // BPFPreserveArrayAI
881 uint64_t AccessIndex
= CInfo
.AccessIndex
;
883 DIType
*BaseTy
= nullptr;
884 bool CheckElemType
= false;
885 if (const auto *CTy
= dyn_cast
<DICompositeType
>(Ty
)) {
887 assert(CTy
->getTag() == dwarf::DW_TAG_array_type
);
890 FirstIndex
+= AccessIndex
* calcArraySize(CTy
, 1);
891 BaseTy
= stripQualifiers(CTy
->getBaseType());
892 CheckElemType
= CTy
->getElements().size() == 1;
895 auto *DTy
= cast
<DIDerivedType
>(Ty
);
896 assert(DTy
->getTag() == dwarf::DW_TAG_pointer_type
);
898 BaseTy
= stripQualifiers(DTy
->getBaseType());
899 CTy
= dyn_cast
<DICompositeType
>(BaseTy
);
901 CheckElemType
= true;
902 } else if (CTy
->getTag() != dwarf::DW_TAG_array_type
) {
903 FirstIndex
+= AccessIndex
;
904 CheckElemType
= true;
906 FirstIndex
+= AccessIndex
* calcArraySize(CTy
, 0);
911 auto *CTy
= dyn_cast
<DICompositeType
>(BaseTy
);
913 if (HasPreserveFieldInfoCall(CallStack
))
914 report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic");
918 unsigned CTag
= CTy
->getTag();
919 if (CTag
== dwarf::DW_TAG_structure_type
|| CTag
== dwarf::DW_TAG_union_type
) {
920 TypeName
= std::string(CTy
->getName());
922 if (HasPreserveFieldInfoCall(CallStack
))
923 report_fatal_error("Invalid field access for llvm.preserve.field.info intrinsic");
927 PatchImm
+= FirstIndex
* (CTy
->getSizeInBits() >> 3);
931 assert(TypeName
.size());
932 AccessKey
+= std::to_string(FirstIndex
);
934 // Traverse the rest of access chain to complete offset calculation
935 // and access key construction.
936 while (CallStack
.size()) {
937 auto StackElem
= CallStack
.top();
938 CInfo
= StackElem
.second
;
941 if (CInfo
.Kind
== BPFPreserveFieldInfoAI
) {
942 InfoKind
= CInfo
.AccessIndex
;
943 if (InfoKind
== BTF::FIELD_EXISTENCE
)
948 // If the next Call (the top of the stack) is a BPFPreserveFieldInfoAI,
949 // the action will be extracting field info.
950 if (CallStack
.size()) {
951 auto StackElem2
= CallStack
.top();
952 CallInfo CInfo2
= StackElem2
.second
;
953 if (CInfo2
.Kind
== BPFPreserveFieldInfoAI
) {
954 InfoKind
= CInfo2
.AccessIndex
;
955 assert(CallStack
.size() == 1);
960 uint64_t AccessIndex
= CInfo
.AccessIndex
;
961 AccessKey
+= ":" + std::to_string(AccessIndex
);
963 MDNode
*MDN
= CInfo
.Metadata
;
964 // At this stage, it cannot be pointer type.
965 auto *CTy
= cast
<DICompositeType
>(stripQualifiers(cast
<DIType
>(MDN
)));
966 PatchImm
= GetFieldInfo(InfoKind
, CTy
, AccessIndex
, PatchImm
,
967 CInfo
.RecordAlignment
);
971 // "llvm." + type name + ":" + reloc type + ":" + patched imm + "$" +
973 // uniquely identifying one relocation.
974 // The prefix "llvm." indicates this is a temporary global, which should
975 // not be emitted to ELF file.
976 AccessKey
= "llvm." + TypeName
+ ":" + std::to_string(InfoKind
) + ":" +
977 std::to_string(PatchImm
) + "$" + AccessKey
;
982 MDNode
*BPFAbstractMemberAccess::computeAccessKey(CallInst
*Call
,
984 std::string
&AccessKey
,
986 DIType
*Ty
= stripQualifiers(cast
<DIType
>(CInfo
.Metadata
), false);
987 assert(!Ty
->getName().empty());
990 std::string
AccessStr("0");
991 if (CInfo
.AccessIndex
== BTF::TYPE_EXISTENCE
||
992 CInfo
.AccessIndex
== BTF::TYPE_MATCH
) {
994 } else if (CInfo
.AccessIndex
== BTF::TYPE_SIZE
) {
995 // typedef debuginfo type has size 0, get the eventual base type.
996 DIType
*BaseTy
= stripQualifiers(Ty
, true);
997 PatchImm
= BaseTy
->getSizeInBits() / 8;
999 // ENUM_VALUE_EXISTENCE and ENUM_VALUE
1002 // The argument could be a global variable or a getelementptr with base to
1003 // a global variable depending on whether the clang option `opaque-options`
1005 const GlobalVariable
*GV
=
1006 cast
<GlobalVariable
>(Call
->getArgOperand(1)->stripPointerCasts());
1007 assert(GV
->hasInitializer());
1008 const ConstantDataArray
*DA
= cast
<ConstantDataArray
>(GV
->getInitializer());
1009 assert(DA
->isString());
1010 StringRef ValueStr
= DA
->getAsString();
1012 // ValueStr format: <EnumeratorStr>:<Value>
1013 size_t Separator
= ValueStr
.find_first_of(':');
1014 StringRef EnumeratorStr
= ValueStr
.substr(0, Separator
);
1016 // Find enumerator index in the debuginfo
1017 DIType
*BaseTy
= stripQualifiers(Ty
, true);
1018 const auto *CTy
= cast
<DICompositeType
>(BaseTy
);
1019 assert(CTy
->getTag() == dwarf::DW_TAG_enumeration_type
);
1021 for (const auto Element
: CTy
->getElements()) {
1022 const auto *Enum
= cast
<DIEnumerator
>(Element
);
1023 if (Enum
->getName() == EnumeratorStr
) {
1024 AccessStr
= std::to_string(EnumIndex
);
1030 if (CInfo
.AccessIndex
== BTF::ENUM_VALUE
) {
1031 StringRef EValueStr
= ValueStr
.substr(Separator
+ 1);
1032 PatchImm
= std::stoll(std::string(EValueStr
));
1038 AccessKey
= "llvm." + Ty
->getName().str() + ":" +
1039 std::to_string(CInfo
.AccessIndex
) + std::string(":") +
1040 std::to_string(PatchImm
) + std::string("$") + AccessStr
;
1045 /// Call/Kind is the base preserve_*_access_index() call. Attempts to do
1046 /// transformation to a chain of relocable GEPs.
1047 bool BPFAbstractMemberAccess::transformGEPChain(CallInst
*Call
,
1049 std::string AccessKey
;
1051 Value
*Base
= nullptr;
1054 IsInt32Ret
= CInfo
.Kind
== BPFPreserveFieldInfoAI
;
1055 if (CInfo
.Kind
== BPFPreserveFieldInfoAI
&& CInfo
.Metadata
) {
1056 TypeMeta
= computeAccessKey(Call
, CInfo
, AccessKey
, IsInt32Ret
);
1058 Base
= computeBaseAndAccessKey(Call
, CInfo
, AccessKey
, TypeMeta
);
1063 BasicBlock
*BB
= Call
->getParent();
1066 if (GEPGlobals
.find(AccessKey
) == GEPGlobals
.end()) {
1067 IntegerType
*VarType
;
1069 VarType
= Type::getInt32Ty(BB
->getContext()); // 32bit return value
1071 VarType
= Type::getInt64Ty(BB
->getContext()); // 64bit ptr or enum value
1073 GV
= new GlobalVariable(*M
, VarType
, false, GlobalVariable::ExternalLinkage
,
1074 nullptr, AccessKey
);
1075 GV
->addAttribute(BPFCoreSharedInfo::AmaAttr
);
1076 GV
->setMetadata(LLVMContext::MD_preserve_access_index
, TypeMeta
);
1077 GEPGlobals
[AccessKey
] = GV
;
1079 GV
= GEPGlobals
[AccessKey
];
1082 if (CInfo
.Kind
== BPFPreserveFieldInfoAI
) {
1083 // Load the global variable which represents the returned field info.
1086 LDInst
= new LoadInst(Type::getInt32Ty(BB
->getContext()), GV
, "", Call
);
1088 LDInst
= new LoadInst(Type::getInt64Ty(BB
->getContext()), GV
, "", Call
);
1090 Instruction
*PassThroughInst
=
1091 BPFCoreSharedInfo::insertPassThrough(M
, BB
, LDInst
, Call
);
1092 Call
->replaceAllUsesWith(PassThroughInst
);
1093 Call
->eraseFromParent();
1097 // For any original GEP Call and Base %2 like
1098 // %4 = bitcast %struct.net_device** %dev1 to i64*
1099 // it is transformed to:
1100 // %6 = load llvm.sk_buff:0:50$0:0:0:2:0
1101 // %7 = bitcast %struct.sk_buff* %2 to i8*
1102 // %8 = getelementptr i8, i8* %7, %6
1103 // %9 = bitcast i8* %8 to i64*
1104 // using %9 instead of %4
1105 // The original Call inst is removed.
1107 // Load the global variable.
1108 auto *LDInst
= new LoadInst(Type::getInt64Ty(BB
->getContext()), GV
, "", Call
);
1110 // Generate a BitCast
1111 auto *BCInst
= new BitCastInst(Base
, Type::getInt8PtrTy(BB
->getContext()));
1112 BCInst
->insertBefore(Call
);
1114 // Generate a GetElementPtr
1115 auto *GEP
= GetElementPtrInst::Create(Type::getInt8Ty(BB
->getContext()),
1117 GEP
->insertBefore(Call
);
1119 // Generate a BitCast
1120 auto *BCInst2
= new BitCastInst(GEP
, Call
->getType());
1121 BCInst2
->insertBefore(Call
);
1123 // For the following code,
1126 // if (...) goto Block1 else ...
1128 // %6 = load llvm.sk_buff:0:50$0:0:0:2:0
1129 // %7 = bitcast %struct.sk_buff* %2 to i8*
1130 // %8 = getelementptr i8, i8* %7, %6
1135 // if (...) goto Block3 else ...
1137 // %6 = load llvm.bpf_map:0:40$0:0:0:2:0
1138 // %7 = bitcast %struct.sk_buff* %2 to i8*
1139 // %8 = getelementptr i8, i8* %7, %6
1143 // SimplifyCFG may generate:
1146 // if (...) goto Block_Common else ...
1149 // if (...) goto Block_Common else ...
1151 // PHI = [llvm.sk_buff:0:50$0:0:0:2:0, llvm.bpf_map:0:40$0:0:0:2:0]
1153 // %7 = bitcast %struct.sk_buff* %2 to i8*
1154 // %8 = getelementptr i8, i8* %7, %6
1157 // For the above code, we cannot perform proper relocation since
1158 // "load PHI" has two possible relocations.
1160 // To prevent above tail merging, we use __builtin_bpf_passthrough()
1161 // where one of its parameters is a seq_num. Since two
1162 // __builtin_bpf_passthrough() funcs will always have different seq_num,
1163 // tail merging cannot happen. The __builtin_bpf_passthrough() will be
1164 // removed in the beginning of Target IR passes.
1166 // This approach is also used in other places when global var
1167 // representing a relocation is used.
1168 Instruction
*PassThroughInst
=
1169 BPFCoreSharedInfo::insertPassThrough(M
, BB
, BCInst2
, Call
);
1170 Call
->replaceAllUsesWith(PassThroughInst
);
1171 Call
->eraseFromParent();
1176 bool BPFAbstractMemberAccess::doTransformation(Function
&F
) {
1177 bool Transformed
= false;
1179 // Collect PreserveDIAccessIndex Intrinsic call chains.
1180 // The call chains will be used to generate the access
1181 // patterns similar to GEP.
1182 collectAICallChains(F
);
1184 for (auto &C
: BaseAICalls
)
1185 Transformed
= transformGEPChain(C
.first
, C
.second
) || Transformed
;
1187 return removePreserveAccessIndexIntrinsic(F
) || Transformed
;
1191 BPFAbstractMemberAccessPass::run(Function
&F
, FunctionAnalysisManager
&AM
) {
1192 return BPFAbstractMemberAccess(TM
).run(F
) ? PreservedAnalyses::none()
1193 : PreservedAnalyses::all();