1 //===-- EfficiencySanitizer.cpp - performance tuner -----------------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This file is a part of EfficiencySanitizer, a family of performance tuners
10 // that detects multiple performance issues via separate sub-tools.
12 // The instrumentation phase is straightforward:
13 // - Take action on every memory access: either inlined instrumentation,
14 // or Inserted calls to our run-time library.
15 // - Optimizations may apply to avoid instrumenting some of the accesses.
16 // - Turn mem{set,cpy,move} instrinsics into library calls.
17 // The rest is handled by the run-time library.
18 //===----------------------------------------------------------------------===//
20 #include "llvm/ADT/SmallString.h"
21 #include "llvm/ADT/SmallVector.h"
22 #include "llvm/ADT/Statistic.h"
23 #include "llvm/ADT/StringExtras.h"
24 #include "llvm/Analysis/TargetLibraryInfo.h"
25 #include "llvm/Transforms/Utils/Local.h"
26 #include "llvm/IR/Function.h"
27 #include "llvm/IR/IRBuilder.h"
28 #include "llvm/IR/IntrinsicInst.h"
29 #include "llvm/IR/Module.h"
30 #include "llvm/IR/Type.h"
31 #include "llvm/Support/CommandLine.h"
32 #include "llvm/Support/Debug.h"
33 #include "llvm/Support/raw_ostream.h"
34 #include "llvm/Transforms/Instrumentation.h"
35 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
36 #include "llvm/Transforms/Utils/ModuleUtils.h"
40 #define DEBUG_TYPE "esan"
42 // The tool type must be just one of these ClTool* options, as the tools
43 // cannot be combined due to shadow memory constraints.
45 ClToolCacheFrag("esan-cache-frag", cl::init(false),
46 cl::desc("Detect data cache fragmentation"), cl::Hidden
);
48 ClToolWorkingSet("esan-working-set", cl::init(false),
49 cl::desc("Measure the working set size"), cl::Hidden
);
50 // Each new tool will get its own opt flag here.
51 // These are converted to EfficiencySanitizerOptions for use
54 static cl::opt
<bool> ClInstrumentLoadsAndStores(
55 "esan-instrument-loads-and-stores", cl::init(true),
56 cl::desc("Instrument loads and stores"), cl::Hidden
);
57 static cl::opt
<bool> ClInstrumentMemIntrinsics(
58 "esan-instrument-memintrinsics", cl::init(true),
59 cl::desc("Instrument memintrinsics (memset/memcpy/memmove)"), cl::Hidden
);
60 static cl::opt
<bool> ClInstrumentFastpath(
61 "esan-instrument-fastpath", cl::init(true),
62 cl::desc("Instrument fastpath"), cl::Hidden
);
63 static cl::opt
<bool> ClAuxFieldInfo(
64 "esan-aux-field-info", cl::init(true),
65 cl::desc("Generate binary with auxiliary struct field information"),
68 // Experiments show that the performance difference can be 2x or more,
69 // and accuracy loss is typically negligible, so we turn this on by default.
70 static cl::opt
<bool> ClAssumeIntraCacheLine(
71 "esan-assume-intra-cache-line", cl::init(true),
72 cl::desc("Assume each memory access touches just one cache line, for "
73 "better performance but with a potential loss of accuracy."),
76 STATISTIC(NumInstrumentedLoads
, "Number of instrumented loads");
77 STATISTIC(NumInstrumentedStores
, "Number of instrumented stores");
78 STATISTIC(NumFastpaths
, "Number of instrumented fastpaths");
79 STATISTIC(NumAccessesWithIrregularSize
,
80 "Number of accesses with a size outside our targeted callout sizes");
81 STATISTIC(NumIgnoredStructs
, "Number of ignored structs");
82 STATISTIC(NumIgnoredGEPs
, "Number of ignored GEP instructions");
83 STATISTIC(NumInstrumentedGEPs
, "Number of instrumented GEP instructions");
84 STATISTIC(NumAssumedIntraCacheLine
,
85 "Number of accesses assumed to be intra-cache-line");
87 static const uint64_t EsanCtorAndDtorPriority
= 0;
88 static const char *const EsanModuleCtorName
= "esan.module_ctor";
89 static const char *const EsanModuleDtorName
= "esan.module_dtor";
90 static const char *const EsanInitName
= "__esan_init";
91 static const char *const EsanExitName
= "__esan_exit";
93 // We need to specify the tool to the runtime earlier than
94 // the ctor is called in some cases, so we set a global variable.
95 static const char *const EsanWhichToolName
= "__esan_which_tool";
97 // We must keep these Shadow* constants consistent with the esan runtime.
98 // FIXME: Try to place these shadow constants, the names of the __esan_*
99 // interface functions, and the ToolType enum into a header shared between
100 // llvm and compiler-rt.
101 struct ShadowMemoryParams
{
103 uint64_t ShadowOffs
[3];
106 static const ShadowMemoryParams ShadowParams47
= {
107 0x00000fffffffffffull
,
109 0x0000130000000000ull
, 0x0000220000000000ull
, 0x0000440000000000ull
,
112 static const ShadowMemoryParams ShadowParams40
= {
115 0x1300000000ull
, 0x2200000000ull
, 0x4400000000ull
,
118 // This array is indexed by the ToolType enum.
119 static const int ShadowScale
[] = {
121 2, // ESAN_CacheFrag: 4B:1B, so 4 to 1 == >>2.
122 6, // ESAN_WorkingSet: 64B:1B, so 64 to 1 == >>6.
125 // MaxStructCounterNameSize is a soft size limit to avoid insanely long
126 // names for those extremely large structs.
127 static const unsigned MaxStructCounterNameSize
= 512;
131 static EfficiencySanitizerOptions
132 OverrideOptionsFromCL(EfficiencySanitizerOptions Options
) {
134 Options
.ToolType
= EfficiencySanitizerOptions::ESAN_CacheFrag
;
135 else if (ClToolWorkingSet
)
136 Options
.ToolType
= EfficiencySanitizerOptions::ESAN_WorkingSet
;
138 // Direct opt invocation with no params will have the default ESAN_None.
139 // We run the default tool in that case.
140 if (Options
.ToolType
== EfficiencySanitizerOptions::ESAN_None
)
141 Options
.ToolType
= EfficiencySanitizerOptions::ESAN_CacheFrag
;
146 /// EfficiencySanitizer: instrument each module to find performance issues.
147 class EfficiencySanitizer
: public ModulePass
{
150 const EfficiencySanitizerOptions
&Opts
= EfficiencySanitizerOptions())
151 : ModulePass(ID
), Options(OverrideOptionsFromCL(Opts
)) {}
152 StringRef
getPassName() const override
;
153 void getAnalysisUsage(AnalysisUsage
&AU
) const override
;
154 bool runOnModule(Module
&M
) override
;
158 bool initOnModule(Module
&M
);
159 void initializeCallbacks(Module
&M
);
160 bool shouldIgnoreStructType(StructType
*StructTy
);
161 void createStructCounterName(
162 StructType
*StructTy
, SmallString
<MaxStructCounterNameSize
> &NameStr
);
163 void createCacheFragAuxGV(
164 Module
&M
, const DataLayout
&DL
, StructType
*StructTy
,
165 GlobalVariable
*&TypeNames
, GlobalVariable
*&Offsets
, GlobalVariable
*&Size
);
166 GlobalVariable
*createCacheFragInfoGV(Module
&M
, const DataLayout
&DL
,
168 Constant
*createEsanInitToolInfoArg(Module
&M
, const DataLayout
&DL
);
169 void createDestructor(Module
&M
, Constant
*ToolInfoArg
);
170 bool runOnFunction(Function
&F
, Module
&M
);
171 bool instrumentLoadOrStore(Instruction
*I
, const DataLayout
&DL
);
172 bool instrumentMemIntrinsic(MemIntrinsic
*MI
);
173 bool instrumentGetElementPtr(Instruction
*I
, Module
&M
);
174 bool insertCounterUpdate(Instruction
*I
, StructType
*StructTy
,
175 unsigned CounterIdx
);
176 unsigned getFieldCounterIdx(StructType
*StructTy
) {
179 unsigned getArrayCounterIdx(StructType
*StructTy
) {
180 return StructTy
->getNumElements();
182 unsigned getStructCounterSize(StructType
*StructTy
) {
183 // The struct counter array includes:
184 // - one counter for each struct field,
185 // - one counter for the struct access within an array.
186 return (StructTy
->getNumElements()/*field*/ + 1/*array*/);
188 bool shouldIgnoreMemoryAccess(Instruction
*I
);
189 int getMemoryAccessFuncIndex(Value
*Addr
, const DataLayout
&DL
);
190 Value
*appToShadow(Value
*Shadow
, IRBuilder
<> &IRB
);
191 bool instrumentFastpath(Instruction
*I
, const DataLayout
&DL
, bool IsStore
,
192 Value
*Addr
, unsigned Alignment
);
193 // Each tool has its own fastpath routine:
194 bool instrumentFastpathCacheFrag(Instruction
*I
, const DataLayout
&DL
,
195 Value
*Addr
, unsigned Alignment
);
196 bool instrumentFastpathWorkingSet(Instruction
*I
, const DataLayout
&DL
,
197 Value
*Addr
, unsigned Alignment
);
199 EfficiencySanitizerOptions Options
;
202 // Our slowpath involves callouts to the runtime library.
203 // Access sizes are powers of two: 1, 2, 4, 8, 16.
204 static const size_t NumberOfAccessSizes
= 5;
205 FunctionCallee EsanAlignedLoad
[NumberOfAccessSizes
];
206 FunctionCallee EsanAlignedStore
[NumberOfAccessSizes
];
207 FunctionCallee EsanUnalignedLoad
[NumberOfAccessSizes
];
208 FunctionCallee EsanUnalignedStore
[NumberOfAccessSizes
];
209 // For irregular sizes of any alignment:
210 FunctionCallee EsanUnalignedLoadN
, EsanUnalignedStoreN
;
211 FunctionCallee MemmoveFn
, MemcpyFn
, MemsetFn
;
212 Function
*EsanCtorFunction
;
213 Function
*EsanDtorFunction
;
214 // Remember the counter variable for each struct type to avoid
215 // recomputing the variable name later during instrumentation.
216 std::map
<Type
*, GlobalVariable
*> StructTyMap
;
217 ShadowMemoryParams ShadowParams
;
221 char EfficiencySanitizer::ID
= 0;
222 INITIALIZE_PASS_BEGIN(
223 EfficiencySanitizer
, "esan",
224 "EfficiencySanitizer: finds performance issues.", false, false)
225 INITIALIZE_PASS_DEPENDENCY(TargetLibraryInfoWrapperPass
)
227 EfficiencySanitizer
, "esan",
228 "EfficiencySanitizer: finds performance issues.", false, false)
230 StringRef
EfficiencySanitizer::getPassName() const {
231 return "EfficiencySanitizer";
234 void EfficiencySanitizer::getAnalysisUsage(AnalysisUsage
&AU
) const {
235 AU
.addRequired
<TargetLibraryInfoWrapperPass
>();
239 llvm::createEfficiencySanitizerPass(const EfficiencySanitizerOptions
&Options
) {
240 return new EfficiencySanitizer(Options
);
243 void EfficiencySanitizer::initializeCallbacks(Module
&M
) {
244 IRBuilder
<> IRB(M
.getContext());
245 // Initialize the callbacks.
246 for (size_t Idx
= 0; Idx
< NumberOfAccessSizes
; ++Idx
) {
247 const unsigned ByteSize
= 1U << Idx
;
248 std::string ByteSizeStr
= utostr(ByteSize
);
249 // We'll inline the most common (i.e., aligned and frequent sizes)
250 // load + store instrumentation: these callouts are for the slowpath.
251 SmallString
<32> AlignedLoadName("__esan_aligned_load" + ByteSizeStr
);
252 EsanAlignedLoad
[Idx
] = M
.getOrInsertFunction(
253 AlignedLoadName
, IRB
.getVoidTy(), IRB
.getInt8PtrTy());
254 SmallString
<32> AlignedStoreName("__esan_aligned_store" + ByteSizeStr
);
255 EsanAlignedStore
[Idx
] = M
.getOrInsertFunction(
256 AlignedStoreName
, IRB
.getVoidTy(), IRB
.getInt8PtrTy());
257 SmallString
<32> UnalignedLoadName("__esan_unaligned_load" + ByteSizeStr
);
258 EsanUnalignedLoad
[Idx
] = M
.getOrInsertFunction(
259 UnalignedLoadName
, IRB
.getVoidTy(), IRB
.getInt8PtrTy());
260 SmallString
<32> UnalignedStoreName("__esan_unaligned_store" + ByteSizeStr
);
261 EsanUnalignedStore
[Idx
] = M
.getOrInsertFunction(
262 UnalignedStoreName
, IRB
.getVoidTy(), IRB
.getInt8PtrTy());
264 EsanUnalignedLoadN
= M
.getOrInsertFunction(
265 "__esan_unaligned_loadN", IRB
.getVoidTy(), IRB
.getInt8PtrTy(), IntptrTy
);
266 EsanUnalignedStoreN
= M
.getOrInsertFunction(
267 "__esan_unaligned_storeN", IRB
.getVoidTy(), IRB
.getInt8PtrTy(), IntptrTy
);
269 M
.getOrInsertFunction("memmove", IRB
.getInt8PtrTy(), IRB
.getInt8PtrTy(),
270 IRB
.getInt8PtrTy(), IntptrTy
);
272 M
.getOrInsertFunction("memcpy", IRB
.getInt8PtrTy(), IRB
.getInt8PtrTy(),
273 IRB
.getInt8PtrTy(), IntptrTy
);
275 M
.getOrInsertFunction("memset", IRB
.getInt8PtrTy(), IRB
.getInt8PtrTy(),
276 IRB
.getInt32Ty(), IntptrTy
);
279 bool EfficiencySanitizer::shouldIgnoreStructType(StructType
*StructTy
) {
280 if (StructTy
== nullptr || StructTy
->isOpaque() /* no struct body */)
285 void EfficiencySanitizer::createStructCounterName(
286 StructType
*StructTy
, SmallString
<MaxStructCounterNameSize
> &NameStr
) {
287 // Append NumFields and field type ids to avoid struct conflicts
288 // with the same name but different fields.
289 if (StructTy
->hasName())
290 NameStr
+= StructTy
->getName();
292 NameStr
+= "struct.anon";
293 // We allow the actual size of the StructCounterName to be larger than
294 // MaxStructCounterNameSize and append $NumFields and at least one
296 // Append $NumFields.
298 Twine(StructTy
->getNumElements()).toVector(NameStr
);
299 // Append struct field type ids in the reverse order.
300 for (int i
= StructTy
->getNumElements() - 1; i
>= 0; --i
) {
302 Twine(StructTy
->getElementType(i
)->getTypeID()).toVector(NameStr
);
303 if (NameStr
.size() >= MaxStructCounterNameSize
)
306 if (StructTy
->isLiteral()) {
307 // End with $ for literal struct.
312 // Create global variables with auxiliary information (e.g., struct field size,
313 // offset, and type name) for better user report.
314 void EfficiencySanitizer::createCacheFragAuxGV(
315 Module
&M
, const DataLayout
&DL
, StructType
*StructTy
,
316 GlobalVariable
*&TypeName
, GlobalVariable
*&Offset
,
317 GlobalVariable
*&Size
) {
318 auto *Int8PtrTy
= Type::getInt8PtrTy(*Ctx
);
319 auto *Int32Ty
= Type::getInt32Ty(*Ctx
);
321 auto *TypeNameArrayTy
= ArrayType::get(Int8PtrTy
, StructTy
->getNumElements());
322 TypeName
= new GlobalVariable(M
, TypeNameArrayTy
, true,
323 GlobalVariable::InternalLinkage
, nullptr);
324 SmallVector
<Constant
*, 16> TypeNameVec
;
326 auto *OffsetArrayTy
= ArrayType::get(Int32Ty
, StructTy
->getNumElements());
327 Offset
= new GlobalVariable(M
, OffsetArrayTy
, true,
328 GlobalVariable::InternalLinkage
, nullptr);
329 SmallVector
<Constant
*, 16> OffsetVec
;
331 auto *SizeArrayTy
= ArrayType::get(Int32Ty
, StructTy
->getNumElements());
332 Size
= new GlobalVariable(M
, SizeArrayTy
, true,
333 GlobalVariable::InternalLinkage
, nullptr);
334 SmallVector
<Constant
*, 16> SizeVec
;
335 for (unsigned i
= 0; i
< StructTy
->getNumElements(); ++i
) {
336 Type
*Ty
= StructTy
->getElementType(i
);
338 raw_string_ostream
StrOS(Str
);
340 TypeNameVec
.push_back(
341 ConstantExpr::getPointerCast(
342 createPrivateGlobalForString(M
, StrOS
.str(), true),
345 ConstantInt::get(Int32Ty
,
346 DL
.getStructLayout(StructTy
)->getElementOffset(i
)));
347 SizeVec
.push_back(ConstantInt::get(Int32Ty
,
348 DL
.getTypeAllocSize(Ty
)));
350 TypeName
->setInitializer(ConstantArray::get(TypeNameArrayTy
, TypeNameVec
));
351 Offset
->setInitializer(ConstantArray::get(OffsetArrayTy
, OffsetVec
));
352 Size
->setInitializer(ConstantArray::get(SizeArrayTy
, SizeVec
));
355 // Create the global variable for the cache-fragmentation tool.
356 GlobalVariable
*EfficiencySanitizer::createCacheFragInfoGV(
357 Module
&M
, const DataLayout
&DL
, Constant
*UnitName
) {
358 assert(Options
.ToolType
== EfficiencySanitizerOptions::ESAN_CacheFrag
);
360 auto *Int8PtrTy
= Type::getInt8PtrTy(*Ctx
);
361 auto *Int8PtrPtrTy
= Int8PtrTy
->getPointerTo();
362 auto *Int32Ty
= Type::getInt32Ty(*Ctx
);
363 auto *Int32PtrTy
= Type::getInt32PtrTy(*Ctx
);
364 auto *Int64Ty
= Type::getInt64Ty(*Ctx
);
365 auto *Int64PtrTy
= Type::getInt64PtrTy(*Ctx
);
366 // This structure should be kept consistent with the StructInfo struct
367 // in the runtime library.
368 // struct StructInfo {
369 // const char *StructName;
372 // u32 *FieldOffset; // auxiliary struct field info.
373 // u32 *FieldSize; // auxiliary struct field info.
374 // const char **FieldTypeName; // auxiliary struct field info.
375 // u64 *FieldCounters;
376 // u64 *ArrayCounter;
379 StructType::get(Int8PtrTy
, Int32Ty
, Int32Ty
, Int32PtrTy
, Int32PtrTy
,
380 Int8PtrPtrTy
, Int64PtrTy
, Int64PtrTy
);
381 auto *StructInfoPtrTy
= StructInfoTy
->getPointerTo();
382 // This structure should be kept consistent with the CacheFragInfo struct
383 // in the runtime library.
384 // struct CacheFragInfo {
385 // const char *UnitName;
387 // StructInfo *Structs;
389 auto *CacheFragInfoTy
= StructType::get(Int8PtrTy
, Int32Ty
, StructInfoPtrTy
);
391 std::vector
<StructType
*> Vec
= M
.getIdentifiedStructTypes();
392 unsigned NumStructs
= 0;
393 SmallVector
<Constant
*, 16> Initializers
;
395 for (auto &StructTy
: Vec
) {
396 if (shouldIgnoreStructType(StructTy
)) {
403 SmallString
<MaxStructCounterNameSize
> CounterNameStr
;
404 createStructCounterName(StructTy
, CounterNameStr
);
405 GlobalVariable
*StructCounterName
= createPrivateGlobalForString(
406 M
, CounterNameStr
, /*AllowMerging*/true);
409 // We create the counter array with StructCounterName and weak linkage
410 // so that the structs with the same name and layout from different
411 // compilation units will be merged into one.
412 auto *CounterArrayTy
= ArrayType::get(Int64Ty
,
413 getStructCounterSize(StructTy
));
414 GlobalVariable
*Counters
=
415 new GlobalVariable(M
, CounterArrayTy
, false,
416 GlobalVariable::WeakAnyLinkage
,
417 ConstantAggregateZero::get(CounterArrayTy
),
420 // Remember the counter variable for each struct type.
421 StructTyMap
.insert(std::pair
<Type
*, GlobalVariable
*>(StructTy
, Counters
));
423 // We pass the field type name array, offset array, and size array to
424 // the runtime for better reporting.
425 GlobalVariable
*TypeName
= nullptr, *Offset
= nullptr, *Size
= nullptr;
427 createCacheFragAuxGV(M
, DL
, StructTy
, TypeName
, Offset
, Size
);
429 Constant
*FieldCounterIdx
[2];
430 FieldCounterIdx
[0] = ConstantInt::get(Int32Ty
, 0);
431 FieldCounterIdx
[1] = ConstantInt::get(Int32Ty
,
432 getFieldCounterIdx(StructTy
));
433 Constant
*ArrayCounterIdx
[2];
434 ArrayCounterIdx
[0] = ConstantInt::get(Int32Ty
, 0);
435 ArrayCounterIdx
[1] = ConstantInt::get(Int32Ty
,
436 getArrayCounterIdx(StructTy
));
437 Initializers
.push_back(ConstantStruct::get(
439 ConstantExpr::getPointerCast(StructCounterName
, Int8PtrTy
),
440 ConstantInt::get(Int32Ty
,
441 DL
.getStructLayout(StructTy
)->getSizeInBytes()),
442 ConstantInt::get(Int32Ty
, StructTy
->getNumElements()),
443 Offset
== nullptr ? ConstantPointerNull::get(Int32PtrTy
)
444 : ConstantExpr::getPointerCast(Offset
, Int32PtrTy
),
445 Size
== nullptr ? ConstantPointerNull::get(Int32PtrTy
)
446 : ConstantExpr::getPointerCast(Size
, Int32PtrTy
),
448 ? ConstantPointerNull::get(Int8PtrPtrTy
)
449 : ConstantExpr::getPointerCast(TypeName
, Int8PtrPtrTy
),
450 ConstantExpr::getGetElementPtr(CounterArrayTy
, Counters
,
452 ConstantExpr::getGetElementPtr(CounterArrayTy
, Counters
,
456 Constant
*StructInfo
;
457 if (NumStructs
== 0) {
458 StructInfo
= ConstantPointerNull::get(StructInfoPtrTy
);
460 auto *StructInfoArrayTy
= ArrayType::get(StructInfoTy
, NumStructs
);
461 StructInfo
= ConstantExpr::getPointerCast(
462 new GlobalVariable(M
, StructInfoArrayTy
, false,
463 GlobalVariable::InternalLinkage
,
464 ConstantArray::get(StructInfoArrayTy
, Initializers
)),
468 auto *CacheFragInfoGV
= new GlobalVariable(
469 M
, CacheFragInfoTy
, true, GlobalVariable::InternalLinkage
,
470 ConstantStruct::get(CacheFragInfoTy
, UnitName
,
471 ConstantInt::get(Int32Ty
, NumStructs
), StructInfo
));
472 return CacheFragInfoGV
;
475 // Create the tool-specific argument passed to EsanInit and EsanExit.
476 Constant
*EfficiencySanitizer::createEsanInitToolInfoArg(Module
&M
,
477 const DataLayout
&DL
) {
478 // This structure contains tool-specific information about each compilation
479 // unit (module) and is passed to the runtime library.
480 GlobalVariable
*ToolInfoGV
= nullptr;
482 auto *Int8PtrTy
= Type::getInt8PtrTy(*Ctx
);
483 // Compilation unit name.
484 auto *UnitName
= ConstantExpr::getPointerCast(
485 createPrivateGlobalForString(M
, M
.getModuleIdentifier(), true),
488 // Create the tool-specific variable.
489 if (Options
.ToolType
== EfficiencySanitizerOptions::ESAN_CacheFrag
)
490 ToolInfoGV
= createCacheFragInfoGV(M
, DL
, UnitName
);
492 if (ToolInfoGV
!= nullptr)
493 return ConstantExpr::getPointerCast(ToolInfoGV
, Int8PtrTy
);
495 // Create the null pointer if no tool-specific variable created.
496 return ConstantPointerNull::get(Int8PtrTy
);
499 void EfficiencySanitizer::createDestructor(Module
&M
, Constant
*ToolInfoArg
) {
500 PointerType
*Int8PtrTy
= Type::getInt8PtrTy(*Ctx
);
501 EsanDtorFunction
= Function::Create(FunctionType::get(Type::getVoidTy(*Ctx
),
503 GlobalValue::InternalLinkage
,
504 EsanModuleDtorName
, &M
);
505 ReturnInst::Create(*Ctx
, BasicBlock::Create(*Ctx
, "", EsanDtorFunction
));
506 IRBuilder
<> IRB_Dtor(EsanDtorFunction
->getEntryBlock().getTerminator());
507 FunctionCallee EsanExit
=
508 M
.getOrInsertFunction(EsanExitName
, IRB_Dtor
.getVoidTy(), Int8PtrTy
);
509 IRB_Dtor
.CreateCall(EsanExit
, {ToolInfoArg
});
510 appendToGlobalDtors(M
, EsanDtorFunction
, EsanCtorAndDtorPriority
);
513 bool EfficiencySanitizer::initOnModule(Module
&M
) {
515 Triple
TargetTriple(M
.getTargetTriple());
516 if (TargetTriple
.isMIPS64())
517 ShadowParams
= ShadowParams40
;
519 ShadowParams
= ShadowParams47
;
521 Ctx
= &M
.getContext();
522 const DataLayout
&DL
= M
.getDataLayout();
523 IRBuilder
<> IRB(M
.getContext());
524 IntegerType
*OrdTy
= IRB
.getInt32Ty();
525 PointerType
*Int8PtrTy
= Type::getInt8PtrTy(*Ctx
);
526 IntptrTy
= DL
.getIntPtrType(M
.getContext());
527 // Create the variable passed to EsanInit and EsanExit.
528 Constant
*ToolInfoArg
= createEsanInitToolInfoArg(M
, DL
);
530 // We specify the tool type both in the EsanWhichToolName global
531 // and as an arg to the init routine as a sanity check.
532 std::tie(EsanCtorFunction
, std::ignore
) = createSanitizerCtorAndInitFunctions(
533 M
, EsanModuleCtorName
, EsanInitName
, /*InitArgTypes=*/{OrdTy
, Int8PtrTy
},
535 ConstantInt::get(OrdTy
, static_cast<int>(Options
.ToolType
)),
537 appendToGlobalCtors(M
, EsanCtorFunction
, EsanCtorAndDtorPriority
);
539 createDestructor(M
, ToolInfoArg
);
541 new GlobalVariable(M
, OrdTy
, true,
542 GlobalValue::WeakAnyLinkage
,
543 ConstantInt::get(OrdTy
,
544 static_cast<int>(Options
.ToolType
)),
550 Value
*EfficiencySanitizer::appToShadow(Value
*Shadow
, IRBuilder
<> &IRB
) {
551 // Shadow = ((App & Mask) + Offs) >> Scale
552 Shadow
= IRB
.CreateAnd(Shadow
, ConstantInt::get(IntptrTy
, ShadowParams
.ShadowMask
));
554 int Scale
= ShadowScale
[Options
.ToolType
];
556 Offs
= ShadowParams
.ShadowOffs
[Scale
];
558 Offs
= ShadowParams
.ShadowOffs
[0] << Scale
;
559 Shadow
= IRB
.CreateAdd(Shadow
, ConstantInt::get(IntptrTy
, Offs
));
561 Shadow
= IRB
.CreateLShr(Shadow
, Scale
);
565 bool EfficiencySanitizer::shouldIgnoreMemoryAccess(Instruction
*I
) {
566 if (Options
.ToolType
== EfficiencySanitizerOptions::ESAN_CacheFrag
) {
567 // We'd like to know about cache fragmentation in vtable accesses and
568 // constant data references, so we do not currently ignore anything.
570 } else if (Options
.ToolType
== EfficiencySanitizerOptions::ESAN_WorkingSet
) {
571 // TODO: the instrumentation disturbs the data layout on the stack, so we
572 // may want to add an option to ignore stack references (if we can
573 // distinguish them) to reduce overhead.
575 // TODO(bruening): future tools will be returning true for some cases.
579 bool EfficiencySanitizer::runOnModule(Module
&M
) {
580 bool Res
= initOnModule(M
);
581 initializeCallbacks(M
);
583 Res
|= runOnFunction(F
, M
);
588 bool EfficiencySanitizer::runOnFunction(Function
&F
, Module
&M
) {
589 // This is required to prevent instrumenting the call to __esan_init from
590 // within the module constructor.
591 if (&F
== EsanCtorFunction
)
593 SmallVector
<Instruction
*, 8> LoadsAndStores
;
594 SmallVector
<Instruction
*, 8> MemIntrinCalls
;
595 SmallVector
<Instruction
*, 8> GetElementPtrs
;
597 const DataLayout
&DL
= M
.getDataLayout();
598 const TargetLibraryInfo
*TLI
=
599 &getAnalysis
<TargetLibraryInfoWrapperPass
>().getTLI();
602 for (auto &Inst
: BB
) {
603 if ((isa
<LoadInst
>(Inst
) || isa
<StoreInst
>(Inst
) ||
604 isa
<AtomicRMWInst
>(Inst
) || isa
<AtomicCmpXchgInst
>(Inst
)) &&
605 !shouldIgnoreMemoryAccess(&Inst
))
606 LoadsAndStores
.push_back(&Inst
);
607 else if (isa
<MemIntrinsic
>(Inst
))
608 MemIntrinCalls
.push_back(&Inst
);
609 else if (isa
<GetElementPtrInst
>(Inst
))
610 GetElementPtrs
.push_back(&Inst
);
611 else if (CallInst
*CI
= dyn_cast
<CallInst
>(&Inst
))
612 maybeMarkSanitizerLibraryCallNoBuiltin(CI
, TLI
);
616 if (ClInstrumentLoadsAndStores
) {
617 for (auto Inst
: LoadsAndStores
) {
618 Res
|= instrumentLoadOrStore(Inst
, DL
);
622 if (ClInstrumentMemIntrinsics
) {
623 for (auto Inst
: MemIntrinCalls
) {
624 Res
|= instrumentMemIntrinsic(cast
<MemIntrinsic
>(Inst
));
628 if (Options
.ToolType
== EfficiencySanitizerOptions::ESAN_CacheFrag
) {
629 for (auto Inst
: GetElementPtrs
) {
630 Res
|= instrumentGetElementPtr(Inst
, M
);
637 bool EfficiencySanitizer::instrumentLoadOrStore(Instruction
*I
,
638 const DataLayout
&DL
) {
643 if (LoadInst
*Load
= dyn_cast
<LoadInst
>(I
)) {
645 Alignment
= Load
->getAlignment();
646 Addr
= Load
->getPointerOperand();
647 } else if (StoreInst
*Store
= dyn_cast
<StoreInst
>(I
)) {
649 Alignment
= Store
->getAlignment();
650 Addr
= Store
->getPointerOperand();
651 } else if (AtomicRMWInst
*RMW
= dyn_cast
<AtomicRMWInst
>(I
)) {
654 Addr
= RMW
->getPointerOperand();
655 } else if (AtomicCmpXchgInst
*Xchg
= dyn_cast
<AtomicCmpXchgInst
>(I
)) {
658 Addr
= Xchg
->getPointerOperand();
660 llvm_unreachable("Unsupported mem access type");
662 Type
*OrigTy
= cast
<PointerType
>(Addr
->getType())->getElementType();
663 const uint32_t TypeSizeBytes
= DL
.getTypeStoreSizeInBits(OrigTy
) / 8;
664 FunctionCallee OnAccessFunc
= nullptr;
666 // Convert 0 to the default alignment.
668 Alignment
= DL
.getPrefTypeAlignment(OrigTy
);
671 NumInstrumentedStores
++;
673 NumInstrumentedLoads
++;
674 int Idx
= getMemoryAccessFuncIndex(Addr
, DL
);
676 OnAccessFunc
= IsStore
? EsanUnalignedStoreN
: EsanUnalignedLoadN
;
677 IRB
.CreateCall(OnAccessFunc
,
678 {IRB
.CreatePointerCast(Addr
, IRB
.getInt8PtrTy()),
679 ConstantInt::get(IntptrTy
, TypeSizeBytes
)});
681 if (ClInstrumentFastpath
&&
682 instrumentFastpath(I
, DL
, IsStore
, Addr
, Alignment
)) {
686 if (Alignment
== 0 || (Alignment
% TypeSizeBytes
) == 0)
687 OnAccessFunc
= IsStore
? EsanAlignedStore
[Idx
] : EsanAlignedLoad
[Idx
];
689 OnAccessFunc
= IsStore
? EsanUnalignedStore
[Idx
] : EsanUnalignedLoad
[Idx
];
690 IRB
.CreateCall(OnAccessFunc
,
691 IRB
.CreatePointerCast(Addr
, IRB
.getInt8PtrTy()));
696 // It's simplest to replace the memset/memmove/memcpy intrinsics with
697 // calls that the runtime library intercepts.
698 // Our pass is late enough that calls should not turn back into intrinsics.
699 bool EfficiencySanitizer::instrumentMemIntrinsic(MemIntrinsic
*MI
) {
702 if (isa
<MemSetInst
>(MI
)) {
705 {IRB
.CreatePointerCast(MI
->getArgOperand(0), IRB
.getInt8PtrTy()),
706 IRB
.CreateIntCast(MI
->getArgOperand(1), IRB
.getInt32Ty(), false),
707 IRB
.CreateIntCast(MI
->getArgOperand(2), IntptrTy
, false)});
708 MI
->eraseFromParent();
710 } else if (isa
<MemTransferInst
>(MI
)) {
712 isa
<MemCpyInst
>(MI
) ? MemcpyFn
: MemmoveFn
,
713 {IRB
.CreatePointerCast(MI
->getArgOperand(0), IRB
.getInt8PtrTy()),
714 IRB
.CreatePointerCast(MI
->getArgOperand(1), IRB
.getInt8PtrTy()),
715 IRB
.CreateIntCast(MI
->getArgOperand(2), IntptrTy
, false)});
716 MI
->eraseFromParent();
719 llvm_unreachable("Unsupported mem intrinsic type");
723 bool EfficiencySanitizer::instrumentGetElementPtr(Instruction
*I
, Module
&M
) {
724 GetElementPtrInst
*GepInst
= dyn_cast
<GetElementPtrInst
>(I
);
726 if (GepInst
== nullptr || GepInst
->getNumIndices() == 1) {
730 Type
*SourceTy
= GepInst
->getSourceElementType();
731 StructType
*StructTy
= nullptr;
733 // Check if GEP calculates address from a struct array.
734 if (isa
<StructType
>(SourceTy
)) {
735 StructTy
= cast
<StructType
>(SourceTy
);
736 Idx
= dyn_cast
<ConstantInt
>(GepInst
->getOperand(1));
737 if ((Idx
== nullptr || Idx
->getSExtValue() != 0) &&
738 !shouldIgnoreStructType(StructTy
) && StructTyMap
.count(StructTy
) != 0)
739 Res
|= insertCounterUpdate(I
, StructTy
, getArrayCounterIdx(StructTy
));
741 // Iterate all (except the first and the last) idx within each GEP instruction
742 // for possible nested struct field address calculation.
743 for (unsigned i
= 1; i
< GepInst
->getNumIndices(); ++i
) {
744 SmallVector
<Value
*, 8> IdxVec(GepInst
->idx_begin(),
745 GepInst
->idx_begin() + i
);
746 Type
*Ty
= GetElementPtrInst::getIndexedType(SourceTy
, IdxVec
);
747 unsigned CounterIdx
= 0;
748 if (isa
<ArrayType
>(Ty
)) {
749 ArrayType
*ArrayTy
= cast
<ArrayType
>(Ty
);
750 StructTy
= dyn_cast
<StructType
>(ArrayTy
->getElementType());
751 if (shouldIgnoreStructType(StructTy
) || StructTyMap
.count(StructTy
) == 0)
753 // The last counter for struct array access.
754 CounterIdx
= getArrayCounterIdx(StructTy
);
755 } else if (isa
<StructType
>(Ty
)) {
756 StructTy
= cast
<StructType
>(Ty
);
757 if (shouldIgnoreStructType(StructTy
) || StructTyMap
.count(StructTy
) == 0)
759 // Get the StructTy's subfield index.
760 Idx
= cast
<ConstantInt
>(GepInst
->getOperand(i
+1));
761 assert(Idx
->getSExtValue() >= 0 &&
762 Idx
->getSExtValue() < StructTy
->getNumElements());
763 CounterIdx
= getFieldCounterIdx(StructTy
) + Idx
->getSExtValue();
765 Res
|= insertCounterUpdate(I
, StructTy
, CounterIdx
);
768 ++NumInstrumentedGEPs
;
774 bool EfficiencySanitizer::insertCounterUpdate(Instruction
*I
,
775 StructType
*StructTy
,
776 unsigned CounterIdx
) {
777 GlobalVariable
*CounterArray
= StructTyMap
[StructTy
];
778 if (CounterArray
== nullptr)
781 Constant
*Indices
[2];
782 // Xref http://llvm.org/docs/LangRef.html#i-getelementptr and
783 // http://llvm.org/docs/GetElementPtr.html.
784 // The first index of the GEP instruction steps through the first operand,
785 // i.e., the array itself.
786 Indices
[0] = ConstantInt::get(IRB
.getInt32Ty(), 0);
787 // The second index is the index within the array.
788 Indices
[1] = ConstantInt::get(IRB
.getInt32Ty(), CounterIdx
);
790 ConstantExpr::getGetElementPtr(
791 ArrayType::get(IRB
.getInt64Ty(), getStructCounterSize(StructTy
)),
792 CounterArray
, Indices
);
793 Value
*Load
= IRB
.CreateLoad(IRB
.getInt64Ty(), Counter
);
794 IRB
.CreateStore(IRB
.CreateAdd(Load
, ConstantInt::get(IRB
.getInt64Ty(), 1)),
799 int EfficiencySanitizer::getMemoryAccessFuncIndex(Value
*Addr
,
800 const DataLayout
&DL
) {
801 Type
*OrigPtrTy
= Addr
->getType();
802 Type
*OrigTy
= cast
<PointerType
>(OrigPtrTy
)->getElementType();
803 assert(OrigTy
->isSized());
804 // The size is always a multiple of 8.
805 uint32_t TypeSizeBytes
= DL
.getTypeStoreSizeInBits(OrigTy
) / 8;
806 if (TypeSizeBytes
!= 1 && TypeSizeBytes
!= 2 && TypeSizeBytes
!= 4 &&
807 TypeSizeBytes
!= 8 && TypeSizeBytes
!= 16) {
808 // Irregular sizes do not have per-size call targets.
809 NumAccessesWithIrregularSize
++;
812 size_t Idx
= countTrailingZeros(TypeSizeBytes
);
813 assert(Idx
< NumberOfAccessSizes
);
817 bool EfficiencySanitizer::instrumentFastpath(Instruction
*I
,
818 const DataLayout
&DL
, bool IsStore
,
819 Value
*Addr
, unsigned Alignment
) {
820 if (Options
.ToolType
== EfficiencySanitizerOptions::ESAN_CacheFrag
) {
821 return instrumentFastpathCacheFrag(I
, DL
, Addr
, Alignment
);
822 } else if (Options
.ToolType
== EfficiencySanitizerOptions::ESAN_WorkingSet
) {
823 return instrumentFastpathWorkingSet(I
, DL
, Addr
, Alignment
);
828 bool EfficiencySanitizer::instrumentFastpathCacheFrag(Instruction
*I
,
829 const DataLayout
&DL
,
831 unsigned Alignment
) {
833 return true; // Return true to avoid slowpath instrumentation.
836 bool EfficiencySanitizer::instrumentFastpathWorkingSet(
837 Instruction
*I
, const DataLayout
&DL
, Value
*Addr
, unsigned Alignment
) {
838 assert(ShadowScale
[Options
.ToolType
] == 6); // The code below assumes this
840 Type
*OrigTy
= cast
<PointerType
>(Addr
->getType())->getElementType();
841 const uint32_t TypeSize
= DL
.getTypeStoreSizeInBits(OrigTy
);
842 // Bail to the slowpath if the access might touch multiple cache lines.
843 // An access aligned to its size is guaranteed to be intra-cache-line.
844 // getMemoryAccessFuncIndex has already ruled out a size larger than 16
845 // and thus larger than a cache line for platforms this tool targets
846 // (and our shadow memory setup assumes 64-byte cache lines).
847 assert(TypeSize
<= 128);
848 if (!(TypeSize
== 8 ||
849 (Alignment
% (TypeSize
/ 8)) == 0)) {
850 if (ClAssumeIntraCacheLine
)
851 ++NumAssumedIntraCacheLine
;
856 // We inline instrumentation to set the corresponding shadow bits for
857 // each cache line touched by the application. Here we handle a single
858 // load or store where we've already ruled out the possibility that it
859 // might touch more than one cache line and thus we simply update the
860 // shadow memory for a single cache line.
861 // Our shadow memory model is fine with races when manipulating shadow values.
862 // We generate the following code:
864 // const char BitMask = 0x81;
865 // char *ShadowAddr = appToShadow(AppAddr);
866 // if ((*ShadowAddr & BitMask) != BitMask)
867 // *ShadowAddr |= Bitmask;
869 Value
*AddrPtr
= IRB
.CreatePointerCast(Addr
, IntptrTy
);
870 Value
*ShadowPtr
= appToShadow(AddrPtr
, IRB
);
871 Type
*ShadowTy
= IntegerType::get(*Ctx
, 8U);
872 Type
*ShadowPtrTy
= PointerType::get(ShadowTy
, 0);
873 // The bottom bit is used for the current sampling period's working set.
874 // The top bit is used for the total working set. We set both on each
875 // memory access, if they are not already set.
876 Value
*ValueMask
= ConstantInt::get(ShadowTy
, 0x81); // 10000001B
879 IRB
.CreateLoad(ShadowTy
, IRB
.CreateIntToPtr(ShadowPtr
, ShadowPtrTy
));
880 // The AND and CMP will be turned into a TEST instruction by the compiler.
881 Value
*Cmp
= IRB
.CreateICmpNE(IRB
.CreateAnd(OldValue
, ValueMask
), ValueMask
);
882 Instruction
*CmpTerm
= SplitBlockAndInsertIfThen(Cmp
, I
, false);
883 // FIXME: do I need to call SetCurrentDebugLocation?
884 IRB
.SetInsertPoint(CmpTerm
);
885 // We use OR to set the shadow bits to avoid corrupting the middle 6 bits,
886 // which are used by the runtime library.
887 Value
*NewVal
= IRB
.CreateOr(OldValue
, ValueMask
);
888 IRB
.CreateStore(NewVal
, IRB
.CreateIntToPtr(ShadowPtr
, ShadowPtrTy
));
889 IRB
.SetInsertPoint(I
);