1 //===- OpenMPIRBuilder.cpp - Builder for LLVM-IR for OpenMP directives ----===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
10 /// This file implements the OpenMPIRBuilder class, which is used as a
11 /// convenient way to create LLVM instructions for OpenMP directives.
13 //===----------------------------------------------------------------------===//
15 #include "llvm/Frontend/OpenMP/OMPIRBuilder.h"
17 #include "llvm/ADT/StringRef.h"
18 #include "llvm/ADT/Triple.h"
19 #include "llvm/IR/CFG.h"
20 #include "llvm/IR/DebugInfo.h"
21 #include "llvm/IR/IRBuilder.h"
22 #include "llvm/IR/MDBuilder.h"
23 #include "llvm/IR/Value.h"
24 #include "llvm/Support/CommandLine.h"
25 #include "llvm/Support/Error.h"
26 #include "llvm/Transforms/Utils/BasicBlockUtils.h"
27 #include "llvm/Transforms/Utils/CodeExtractor.h"
31 #define DEBUG_TYPE "openmp-ir-builder"
37 OptimisticAttributes("openmp-ir-builder-optimistic-attributes", cl::Hidden
,
38 cl::desc("Use optimistic attributes describing "
39 "'as-if' properties of runtime calls."),
42 void OpenMPIRBuilder::addAttributes(omp::RuntimeFunction FnID
, Function
&Fn
) {
43 LLVMContext
&Ctx
= Fn
.getContext();
45 // Get the function's current attributes.
46 auto Attrs
= Fn
.getAttributes();
47 auto FnAttrs
= Attrs
.getFnAttrs();
48 auto RetAttrs
= Attrs
.getRetAttrs();
49 SmallVector
<AttributeSet
, 4> ArgAttrs
;
50 for (size_t ArgNo
= 0; ArgNo
< Fn
.arg_size(); ++ArgNo
)
51 ArgAttrs
.emplace_back(Attrs
.getParamAttrs(ArgNo
));
53 #define OMP_ATTRS_SET(VarName, AttrSet) AttributeSet VarName = AttrSet;
54 #include "llvm/Frontend/OpenMP/OMPKinds.def"
56 // Add attributes to the function declaration.
58 #define OMP_RTL_ATTRS(Enum, FnAttrSet, RetAttrSet, ArgAttrSets) \
60 FnAttrs = FnAttrs.addAttributes(Ctx, FnAttrSet); \
61 RetAttrs = RetAttrs.addAttributes(Ctx, RetAttrSet); \
62 for (size_t ArgNo = 0; ArgNo < ArgAttrSets.size(); ++ArgNo) \
64 ArgAttrs[ArgNo].addAttributes(Ctx, ArgAttrSets[ArgNo]); \
65 Fn.setAttributes(AttributeList::get(Ctx, FnAttrs, RetAttrs, ArgAttrs)); \
67 #include "llvm/Frontend/OpenMP/OMPKinds.def"
69 // Attributes are optional.
75 OpenMPIRBuilder::getOrCreateRuntimeFunction(Module
&M
, RuntimeFunction FnID
) {
76 FunctionType
*FnTy
= nullptr;
77 Function
*Fn
= nullptr;
79 // Try to find the declation in the module first.
81 #define OMP_RTL(Enum, Str, IsVarArg, ReturnType, ...) \
83 FnTy = FunctionType::get(ReturnType, ArrayRef<Type *>{__VA_ARGS__}, \
85 Fn = M.getFunction(Str); \
87 #include "llvm/Frontend/OpenMP/OMPKinds.def"
91 // Create a new declaration if we need one.
93 #define OMP_RTL(Enum, Str, ...) \
95 Fn = Function::Create(FnTy, GlobalValue::ExternalLinkage, Str, M); \
97 #include "llvm/Frontend/OpenMP/OMPKinds.def"
100 // Add information if the runtime function takes a callback function
101 if (FnID
== OMPRTL___kmpc_fork_call
|| FnID
== OMPRTL___kmpc_fork_teams
) {
102 if (!Fn
->hasMetadata(LLVMContext::MD_callback
)) {
103 LLVMContext
&Ctx
= Fn
->getContext();
105 // Annotate the callback behavior of the runtime function:
106 // - The callback callee is argument number 2 (microtask).
107 // - The first two arguments of the callback callee are unknown (-1).
108 // - All variadic arguments to the runtime function are passed to the
111 LLVMContext::MD_callback
,
112 *MDNode::get(Ctx
, {MDB
.createCallbackEncoding(
113 2, {-1, -1}, /* VarArgsArePassed */ true)}));
117 LLVM_DEBUG(dbgs() << "Created OpenMP runtime function " << Fn
->getName()
118 << " with type " << *Fn
->getFunctionType() << "\n");
119 addAttributes(FnID
, *Fn
);
122 LLVM_DEBUG(dbgs() << "Found OpenMP runtime function " << Fn
->getName()
123 << " with type " << *Fn
->getFunctionType() << "\n");
126 assert(Fn
&& "Failed to create OpenMP runtime function");
128 // Cast the function to the expected type if necessary
129 Constant
*C
= ConstantExpr::getBitCast(Fn
, FnTy
->getPointerTo());
133 Function
*OpenMPIRBuilder::getOrCreateRuntimeFunctionPtr(RuntimeFunction FnID
) {
134 FunctionCallee RTLFn
= getOrCreateRuntimeFunction(M
, FnID
);
135 auto *Fn
= dyn_cast
<llvm::Function
>(RTLFn
.getCallee());
136 assert(Fn
&& "Failed to create OpenMP runtime function pointer");
140 void OpenMPIRBuilder::initialize() { initializeTypes(M
); }
142 void OpenMPIRBuilder::finalize(Function
*Fn
, bool AllowExtractorSinking
) {
143 SmallPtrSet
<BasicBlock
*, 32> ParallelRegionBlockSet
;
144 SmallVector
<BasicBlock
*, 32> Blocks
;
145 SmallVector
<OutlineInfo
, 16> DeferredOutlines
;
146 for (OutlineInfo
&OI
: OutlineInfos
) {
147 // Skip functions that have not finalized yet; may happen with nested
148 // function generation.
149 if (Fn
&& OI
.getFunction() != Fn
) {
150 DeferredOutlines
.push_back(OI
);
154 ParallelRegionBlockSet
.clear();
156 OI
.collectBlocks(ParallelRegionBlockSet
, Blocks
);
158 Function
*OuterFn
= OI
.getFunction();
159 CodeExtractorAnalysisCache
CEAC(*OuterFn
);
160 CodeExtractor
Extractor(Blocks
, /* DominatorTree */ nullptr,
161 /* AggregateArgs */ false,
162 /* BlockFrequencyInfo */ nullptr,
163 /* BranchProbabilityInfo */ nullptr,
164 /* AssumptionCache */ nullptr,
165 /* AllowVarArgs */ true,
166 /* AllowAlloca */ true,
167 /* Suffix */ ".omp_par");
169 LLVM_DEBUG(dbgs() << "Before outlining: " << *OuterFn
<< "\n");
170 LLVM_DEBUG(dbgs() << "Entry " << OI
.EntryBB
->getName()
171 << " Exit: " << OI
.ExitBB
->getName() << "\n");
172 assert(Extractor
.isEligible() &&
173 "Expected OpenMP outlining to be possible!");
175 Function
*OutlinedFn
= Extractor
.extractCodeRegion(CEAC
);
177 LLVM_DEBUG(dbgs() << "After outlining: " << *OuterFn
<< "\n");
178 LLVM_DEBUG(dbgs() << " Outlined function: " << *OutlinedFn
<< "\n");
179 assert(OutlinedFn
->getReturnType()->isVoidTy() &&
180 "OpenMP outlined functions should not return a value!");
182 // For compability with the clang CG we move the outlined function after the
183 // one with the parallel region.
184 OutlinedFn
->removeFromParent();
185 M
.getFunctionList().insertAfter(OuterFn
->getIterator(), OutlinedFn
);
187 // Remove the artificial entry introduced by the extractor right away, we
188 // made our own entry block after all.
190 BasicBlock
&ArtificialEntry
= OutlinedFn
->getEntryBlock();
191 assert(ArtificialEntry
.getUniqueSuccessor() == OI
.EntryBB
);
192 assert(OI
.EntryBB
->getUniquePredecessor() == &ArtificialEntry
);
193 if (AllowExtractorSinking
) {
194 // Move instructions from the to-be-deleted ArtificialEntry to the entry
195 // basic block of the parallel region. CodeExtractor may have sunk
196 // allocas/bitcasts for values that are solely used in the outlined
197 // region and do not escape.
198 assert(!ArtificialEntry
.empty() &&
199 "Expected instructions to sink in the outlined region");
200 for (BasicBlock::iterator It
= ArtificialEntry
.begin(),
201 End
= ArtificialEntry
.end();
203 Instruction
&I
= *It
;
206 if (I
.isTerminator())
209 I
.moveBefore(*OI
.EntryBB
, OI
.EntryBB
->getFirstInsertionPt());
212 OI
.EntryBB
->moveBefore(&ArtificialEntry
);
213 ArtificialEntry
.eraseFromParent();
215 assert(&OutlinedFn
->getEntryBlock() == OI
.EntryBB
);
216 assert(OutlinedFn
&& OutlinedFn
->getNumUses() == 1);
218 // Run a user callback, e.g. to add attributes.
219 if (OI
.PostOutlineCB
)
220 OI
.PostOutlineCB(*OutlinedFn
);
223 // Remove work items that have been completed.
224 OutlineInfos
= std::move(DeferredOutlines
);
227 OpenMPIRBuilder::~OpenMPIRBuilder() {
228 assert(OutlineInfos
.empty() && "There must be no outstanding outlinings");
231 Value
*OpenMPIRBuilder::getOrCreateIdent(Constant
*SrcLocStr
,
233 unsigned Reserve2Flags
) {
235 LocFlags
|= OMP_IDENT_FLAG_KMPC
;
238 IdentMap
[{SrcLocStr
, uint64_t(LocFlags
) << 31 | Reserve2Flags
}];
240 Constant
*I32Null
= ConstantInt::getNullValue(Int32
);
241 Constant
*IdentData
[] = {
242 I32Null
, ConstantInt::get(Int32
, uint32_t(LocFlags
)),
243 ConstantInt::get(Int32
, Reserve2Flags
), I32Null
, SrcLocStr
};
244 Constant
*Initializer
= ConstantStruct::get(
245 cast
<StructType
>(IdentPtr
->getPointerElementType()), IdentData
);
247 // Look for existing encoding of the location + flags, not needed but
248 // minimizes the difference to the existing solution while we transition.
249 for (GlobalVariable
&GV
: M
.getGlobalList())
250 if (GV
.getType() == IdentPtr
&& GV
.hasInitializer())
251 if (GV
.getInitializer() == Initializer
)
254 auto *GV
= new GlobalVariable(M
, IdentPtr
->getPointerElementType(),
255 /* isConstant = */ true,
256 GlobalValue::PrivateLinkage
, Initializer
);
257 GV
->setUnnamedAddr(GlobalValue::UnnamedAddr::Global
);
258 GV
->setAlignment(Align(8));
261 return Builder
.CreatePointerCast(Ident
, IdentPtr
);
264 Constant
*OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef LocStr
) {
265 Constant
*&SrcLocStr
= SrcLocStrMap
[LocStr
];
267 Constant
*Initializer
=
268 ConstantDataArray::getString(M
.getContext(), LocStr
);
270 // Look for existing encoding of the location, not needed but minimizes the
271 // difference to the existing solution while we transition.
272 for (GlobalVariable
&GV
: M
.getGlobalList())
273 if (GV
.isConstant() && GV
.hasInitializer() &&
274 GV
.getInitializer() == Initializer
)
275 return SrcLocStr
= ConstantExpr::getPointerCast(&GV
, Int8Ptr
);
277 SrcLocStr
= Builder
.CreateGlobalStringPtr(LocStr
, /* Name */ "",
278 /* AddressSpace */ 0, &M
);
283 Constant
*OpenMPIRBuilder::getOrCreateSrcLocStr(StringRef FunctionName
,
287 SmallString
<128> Buffer
;
288 Buffer
.push_back(';');
289 Buffer
.append(FileName
);
290 Buffer
.push_back(';');
291 Buffer
.append(FunctionName
);
292 Buffer
.push_back(';');
293 Buffer
.append(std::to_string(Line
));
294 Buffer
.push_back(';');
295 Buffer
.append(std::to_string(Column
));
296 Buffer
.push_back(';');
297 Buffer
.push_back(';');
298 return getOrCreateSrcLocStr(Buffer
.str());
301 Constant
*OpenMPIRBuilder::getOrCreateDefaultSrcLocStr() {
302 return getOrCreateSrcLocStr(";unknown;unknown;0;0;;");
305 Constant
*OpenMPIRBuilder::getOrCreateSrcLocStr(DebugLoc DL
, Function
*F
) {
306 DILocation
*DIL
= DL
.get();
308 return getOrCreateDefaultSrcLocStr();
309 StringRef FileName
= M
.getName();
310 if (DIFile
*DIF
= DIL
->getFile())
311 if (Optional
<StringRef
> Source
= DIF
->getSource())
313 StringRef Function
= DIL
->getScope()->getSubprogram()->getName();
314 if (Function
.empty() && F
)
315 Function
= F
->getName();
316 return getOrCreateSrcLocStr(Function
, FileName
, DIL
->getLine(),
321 OpenMPIRBuilder::getOrCreateSrcLocStr(const LocationDescription
&Loc
) {
322 return getOrCreateSrcLocStr(Loc
.DL
, Loc
.IP
.getBlock()->getParent());
325 Value
*OpenMPIRBuilder::getOrCreateThreadID(Value
*Ident
) {
326 return Builder
.CreateCall(
327 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num
), Ident
,
328 "omp_global_thread_num");
331 OpenMPIRBuilder::InsertPointTy
332 OpenMPIRBuilder::createBarrier(const LocationDescription
&Loc
, Directive DK
,
333 bool ForceSimpleCall
, bool CheckCancelFlag
) {
334 if (!updateToLocation(Loc
))
336 return emitBarrierImpl(Loc
, DK
, ForceSimpleCall
, CheckCancelFlag
);
339 OpenMPIRBuilder::InsertPointTy
340 OpenMPIRBuilder::emitBarrierImpl(const LocationDescription
&Loc
, Directive Kind
,
341 bool ForceSimpleCall
, bool CheckCancelFlag
) {
342 // Build call __kmpc_cancel_barrier(loc, thread_id) or
343 // __kmpc_barrier(loc, thread_id);
345 IdentFlag BarrierLocFlags
;
348 BarrierLocFlags
= OMP_IDENT_FLAG_BARRIER_IMPL_FOR
;
351 BarrierLocFlags
= OMP_IDENT_FLAG_BARRIER_IMPL_SECTIONS
;
354 BarrierLocFlags
= OMP_IDENT_FLAG_BARRIER_IMPL_SINGLE
;
357 BarrierLocFlags
= OMP_IDENT_FLAG_BARRIER_EXPL
;
360 BarrierLocFlags
= OMP_IDENT_FLAG_BARRIER_IMPL
;
364 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
365 Value
*Args
[] = {getOrCreateIdent(SrcLocStr
, BarrierLocFlags
),
366 getOrCreateThreadID(getOrCreateIdent(SrcLocStr
))};
368 // If we are in a cancellable parallel region, barriers are cancellation
370 // TODO: Check why we would force simple calls or to ignore the cancel flag.
371 bool UseCancelBarrier
=
372 !ForceSimpleCall
&& isLastFinalizationInfoCancellable(OMPD_parallel
);
375 Builder
.CreateCall(getOrCreateRuntimeFunctionPtr(
376 UseCancelBarrier
? OMPRTL___kmpc_cancel_barrier
377 : OMPRTL___kmpc_barrier
),
380 if (UseCancelBarrier
&& CheckCancelFlag
)
381 emitCancelationCheckImpl(Result
, OMPD_parallel
);
383 return Builder
.saveIP();
386 OpenMPIRBuilder::InsertPointTy
387 OpenMPIRBuilder::createCancel(const LocationDescription
&Loc
,
389 omp::Directive CanceledDirective
) {
390 if (!updateToLocation(Loc
))
393 // LLVM utilities like blocks with terminators.
394 auto *UI
= Builder
.CreateUnreachable();
396 Instruction
*ThenTI
= UI
, *ElseTI
= nullptr;
398 SplitBlockAndInsertIfThenElse(IfCondition
, UI
, &ThenTI
, &ElseTI
);
399 Builder
.SetInsertPoint(ThenTI
);
401 Value
*CancelKind
= nullptr;
402 switch (CanceledDirective
) {
403 #define OMP_CANCEL_KIND(Enum, Str, DirectiveEnum, Value) \
404 case DirectiveEnum: \
405 CancelKind = Builder.getInt32(Value); \
407 #include "llvm/Frontend/OpenMP/OMPKinds.def"
409 llvm_unreachable("Unknown cancel kind!");
412 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
413 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
414 Value
*Args
[] = {Ident
, getOrCreateThreadID(Ident
), CancelKind
};
415 Value
*Result
= Builder
.CreateCall(
416 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_cancel
), Args
);
417 auto ExitCB
= [this, CanceledDirective
, Loc
](InsertPointTy IP
) {
418 if (CanceledDirective
== OMPD_parallel
) {
419 IRBuilder
<>::InsertPointGuard
IPG(Builder
);
420 Builder
.restoreIP(IP
);
421 createBarrier(LocationDescription(Builder
.saveIP(), Loc
.DL
),
422 omp::Directive::OMPD_unknown
, /* ForceSimpleCall */ false,
423 /* CheckCancelFlag */ false);
427 // The actual cancel logic is shared with others, e.g., cancel_barriers.
428 emitCancelationCheckImpl(Result
, CanceledDirective
, ExitCB
);
430 // Update the insertion point and remove the terminator we introduced.
431 Builder
.SetInsertPoint(UI
->getParent());
432 UI
->eraseFromParent();
434 return Builder
.saveIP();
437 void OpenMPIRBuilder::emitCancelationCheckImpl(Value
*CancelFlag
,
438 omp::Directive CanceledDirective
,
439 FinalizeCallbackTy ExitCB
) {
440 assert(isLastFinalizationInfoCancellable(CanceledDirective
) &&
441 "Unexpected cancellation!");
443 // For a cancel barrier we create two new blocks.
444 BasicBlock
*BB
= Builder
.GetInsertBlock();
445 BasicBlock
*NonCancellationBlock
;
446 if (Builder
.GetInsertPoint() == BB
->end()) {
447 // TODO: This branch will not be needed once we moved to the
448 // OpenMPIRBuilder codegen completely.
449 NonCancellationBlock
= BasicBlock::Create(
450 BB
->getContext(), BB
->getName() + ".cont", BB
->getParent());
452 NonCancellationBlock
= SplitBlock(BB
, &*Builder
.GetInsertPoint());
453 BB
->getTerminator()->eraseFromParent();
454 Builder
.SetInsertPoint(BB
);
456 BasicBlock
*CancellationBlock
= BasicBlock::Create(
457 BB
->getContext(), BB
->getName() + ".cncl", BB
->getParent());
459 // Jump to them based on the return value.
460 Value
*Cmp
= Builder
.CreateIsNull(CancelFlag
);
461 Builder
.CreateCondBr(Cmp
, NonCancellationBlock
, CancellationBlock
,
462 /* TODO weight */ nullptr, nullptr);
464 // From the cancellation block we finalize all variables and go to the
465 // post finalization block that is known to the FiniCB callback.
466 Builder
.SetInsertPoint(CancellationBlock
);
468 ExitCB(Builder
.saveIP());
469 auto &FI
= FinalizationStack
.back();
470 FI
.FiniCB(Builder
.saveIP());
472 // The continuation block is where code generation continues.
473 Builder
.SetInsertPoint(NonCancellationBlock
, NonCancellationBlock
->begin());
476 IRBuilder
<>::InsertPoint
OpenMPIRBuilder::createParallel(
477 const LocationDescription
&Loc
, InsertPointTy OuterAllocaIP
,
478 BodyGenCallbackTy BodyGenCB
, PrivatizeCallbackTy PrivCB
,
479 FinalizeCallbackTy FiniCB
, Value
*IfCondition
, Value
*NumThreads
,
480 omp::ProcBindKind ProcBind
, bool IsCancellable
) {
481 if (!updateToLocation(Loc
))
484 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
485 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
486 Value
*ThreadID
= getOrCreateThreadID(Ident
);
489 // Build call __kmpc_push_num_threads(&Ident, global_tid, num_threads)
492 Builder
.CreateIntCast(NumThreads
, Int32
, /*isSigned*/ false)};
494 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_num_threads
), Args
);
497 if (ProcBind
!= OMP_PROC_BIND_default
) {
498 // Build call __kmpc_push_proc_bind(&Ident, global_tid, proc_bind)
501 ConstantInt::get(Int32
, unsigned(ProcBind
), /*isSigned=*/true)};
503 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_push_proc_bind
), Args
);
506 BasicBlock
*InsertBB
= Builder
.GetInsertBlock();
507 Function
*OuterFn
= InsertBB
->getParent();
509 // Save the outer alloca block because the insertion iterator may get
510 // invalidated and we still need this later.
511 BasicBlock
*OuterAllocaBlock
= OuterAllocaIP
.getBlock();
513 // Vector to remember instructions we used only during the modeling but which
514 // we want to delete at the end.
515 SmallVector
<Instruction
*, 4> ToBeDeleted
;
517 // Change the location to the outer alloca insertion point to create and
518 // initialize the allocas we pass into the parallel region.
519 Builder
.restoreIP(OuterAllocaIP
);
520 AllocaInst
*TIDAddr
= Builder
.CreateAlloca(Int32
, nullptr, "tid.addr");
521 AllocaInst
*ZeroAddr
= Builder
.CreateAlloca(Int32
, nullptr, "zero.addr");
523 // If there is an if condition we actually use the TIDAddr and ZeroAddr in the
524 // program, otherwise we only need them for modeling purposes to get the
525 // associated arguments in the outlined function. In the former case,
526 // initialize the allocas properly, in the latter case, delete them later.
528 Builder
.CreateStore(Constant::getNullValue(Int32
), TIDAddr
);
529 Builder
.CreateStore(Constant::getNullValue(Int32
), ZeroAddr
);
531 ToBeDeleted
.push_back(TIDAddr
);
532 ToBeDeleted
.push_back(ZeroAddr
);
535 // Create an artificial insertion point that will also ensure the blocks we
536 // are about to split are not degenerated.
537 auto *UI
= new UnreachableInst(Builder
.getContext(), InsertBB
);
539 Instruction
*ThenTI
= UI
, *ElseTI
= nullptr;
541 SplitBlockAndInsertIfThenElse(IfCondition
, UI
, &ThenTI
, &ElseTI
);
543 BasicBlock
*ThenBB
= ThenTI
->getParent();
544 BasicBlock
*PRegEntryBB
= ThenBB
->splitBasicBlock(ThenTI
, "omp.par.entry");
545 BasicBlock
*PRegBodyBB
=
546 PRegEntryBB
->splitBasicBlock(ThenTI
, "omp.par.region");
547 BasicBlock
*PRegPreFiniBB
=
548 PRegBodyBB
->splitBasicBlock(ThenTI
, "omp.par.pre_finalize");
549 BasicBlock
*PRegExitBB
=
550 PRegPreFiniBB
->splitBasicBlock(ThenTI
, "omp.par.exit");
552 auto FiniCBWrapper
= [&](InsertPointTy IP
) {
553 // Hide "open-ended" blocks from the given FiniCB by setting the right jump
554 // target to the region exit block.
555 if (IP
.getBlock()->end() == IP
.getPoint()) {
556 IRBuilder
<>::InsertPointGuard
IPG(Builder
);
557 Builder
.restoreIP(IP
);
558 Instruction
*I
= Builder
.CreateBr(PRegExitBB
);
559 IP
= InsertPointTy(I
->getParent(), I
->getIterator());
561 assert(IP
.getBlock()->getTerminator()->getNumSuccessors() == 1 &&
562 IP
.getBlock()->getTerminator()->getSuccessor(0) == PRegExitBB
&&
563 "Unexpected insertion point for finalization call!");
567 FinalizationStack
.push_back({FiniCBWrapper
, OMPD_parallel
, IsCancellable
});
569 // Generate the privatization allocas in the block that will become the entry
570 // of the outlined function.
571 Builder
.SetInsertPoint(PRegEntryBB
->getTerminator());
572 InsertPointTy InnerAllocaIP
= Builder
.saveIP();
574 AllocaInst
*PrivTIDAddr
=
575 Builder
.CreateAlloca(Int32
, nullptr, "tid.addr.local");
576 Instruction
*PrivTID
= Builder
.CreateLoad(Int32
, PrivTIDAddr
, "tid");
578 // Add some fake uses for OpenMP provided arguments.
579 ToBeDeleted
.push_back(Builder
.CreateLoad(Int32
, TIDAddr
, "tid.addr.use"));
580 Instruction
*ZeroAddrUse
= Builder
.CreateLoad(Int32
, ZeroAddr
,
582 ToBeDeleted
.push_back(ZeroAddrUse
);
587 // PRegionEntryBB <- Privatization allocas are placed here.
590 // PRegionBodyBB <- BodeGen is invoked here.
593 // PRegPreFiniBB <- The block we will start finalization from.
596 // PRegionExitBB <- A common exit to simplify block collection.
599 LLVM_DEBUG(dbgs() << "Before body codegen: " << *OuterFn
<< "\n");
601 // Let the caller create the body.
602 assert(BodyGenCB
&& "Expected body generation callback!");
603 InsertPointTy
CodeGenIP(PRegBodyBB
, PRegBodyBB
->begin());
604 BodyGenCB(InnerAllocaIP
, CodeGenIP
, *PRegPreFiniBB
);
606 LLVM_DEBUG(dbgs() << "After body codegen: " << *OuterFn
<< "\n");
608 FunctionCallee RTLFn
= getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_fork_call
);
609 if (auto *F
= dyn_cast
<llvm::Function
>(RTLFn
.getCallee())) {
610 if (!F
->hasMetadata(llvm::LLVMContext::MD_callback
)) {
611 llvm::LLVMContext
&Ctx
= F
->getContext();
613 // Annotate the callback behavior of the __kmpc_fork_call:
614 // - The callback callee is argument number 2 (microtask).
615 // - The first two arguments of the callback callee are unknown (-1).
616 // - All variadic arguments to the __kmpc_fork_call are passed to the
619 llvm::LLVMContext::MD_callback
,
621 Ctx
, {MDB
.createCallbackEncoding(2, {-1, -1},
622 /* VarArgsArePassed */ true)}));
627 OI
.PostOutlineCB
= [=](Function
&OutlinedFn
) {
628 // Add some known attributes.
629 OutlinedFn
.addParamAttr(0, Attribute::NoAlias
);
630 OutlinedFn
.addParamAttr(1, Attribute::NoAlias
);
631 OutlinedFn
.addFnAttr(Attribute::NoUnwind
);
632 OutlinedFn
.addFnAttr(Attribute::NoRecurse
);
634 assert(OutlinedFn
.arg_size() >= 2 &&
635 "Expected at least tid and bounded tid as arguments");
636 unsigned NumCapturedVars
=
637 OutlinedFn
.arg_size() - /* tid & bounded tid */ 2;
639 CallInst
*CI
= cast
<CallInst
>(OutlinedFn
.user_back());
640 CI
->getParent()->setName("omp_parallel");
641 Builder
.SetInsertPoint(CI
);
643 // Build call __kmpc_fork_call(Ident, n, microtask, var1, .., varn);
644 Value
*ForkCallArgs
[] = {
645 Ident
, Builder
.getInt32(NumCapturedVars
),
646 Builder
.CreateBitCast(&OutlinedFn
, ParallelTaskPtr
)};
648 SmallVector
<Value
*, 16> RealArgs
;
649 RealArgs
.append(std::begin(ForkCallArgs
), std::end(ForkCallArgs
));
650 RealArgs
.append(CI
->arg_begin() + /* tid & bound tid */ 2, CI
->arg_end());
652 Builder
.CreateCall(RTLFn
, RealArgs
);
654 LLVM_DEBUG(dbgs() << "With fork_call placed: "
655 << *Builder
.GetInsertBlock()->getParent() << "\n");
657 InsertPointTy
ExitIP(PRegExitBB
, PRegExitBB
->end());
659 // Initialize the local TID stack location with the argument value.
660 Builder
.SetInsertPoint(PrivTID
);
661 Function::arg_iterator OutlinedAI
= OutlinedFn
.arg_begin();
662 Builder
.CreateStore(Builder
.CreateLoad(Int32
, OutlinedAI
), PrivTIDAddr
);
664 // If no "if" clause was present we do not need the call created during
665 // outlining, otherwise we reuse it in the serialized parallel region.
667 CI
->eraseFromParent();
670 // If an "if" clause was present we are now generating the serialized
671 // version into the "else" branch.
672 Builder
.SetInsertPoint(ElseTI
);
674 // Build calls __kmpc_serialized_parallel(&Ident, GTid);
675 Value
*SerializedParallelCallArgs
[] = {Ident
, ThreadID
};
677 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_serialized_parallel
),
678 SerializedParallelCallArgs
);
680 // OutlinedFn(>id, &zero, CapturedStruct);
681 CI
->removeFromParent();
684 // __kmpc_end_serialized_parallel(&Ident, GTid);
685 Value
*EndArgs
[] = {Ident
, ThreadID
};
687 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_serialized_parallel
),
690 LLVM_DEBUG(dbgs() << "With serialized parallel region: "
691 << *Builder
.GetInsertBlock()->getParent() << "\n");
694 for (Instruction
*I
: ToBeDeleted
)
695 I
->eraseFromParent();
698 // Adjust the finalization stack, verify the adjustment, and call the
699 // finalize function a last time to finalize values between the pre-fini
700 // block and the exit block if we left the parallel "the normal way".
701 auto FiniInfo
= FinalizationStack
.pop_back_val();
703 assert(FiniInfo
.DK
== OMPD_parallel
&&
704 "Unexpected finalization stack state!");
706 Instruction
*PRegPreFiniTI
= PRegPreFiniBB
->getTerminator();
708 InsertPointTy
PreFiniIP(PRegPreFiniBB
, PRegPreFiniTI
->getIterator());
711 OI
.EntryBB
= PRegEntryBB
;
712 OI
.ExitBB
= PRegExitBB
;
714 SmallPtrSet
<BasicBlock
*, 32> ParallelRegionBlockSet
;
715 SmallVector
<BasicBlock
*, 32> Blocks
;
716 OI
.collectBlocks(ParallelRegionBlockSet
, Blocks
);
718 // Ensure a single exit node for the outlined region by creating one.
719 // We might have multiple incoming edges to the exit now due to finalizations,
720 // e.g., cancel calls that cause the control flow to leave the region.
721 BasicBlock
*PRegOutlinedExitBB
= PRegExitBB
;
722 PRegExitBB
= SplitBlock(PRegExitBB
, &*PRegExitBB
->getFirstInsertionPt());
723 PRegOutlinedExitBB
->setName("omp.par.outlined.exit");
724 Blocks
.push_back(PRegOutlinedExitBB
);
726 CodeExtractorAnalysisCache
CEAC(*OuterFn
);
727 CodeExtractor
Extractor(Blocks
, /* DominatorTree */ nullptr,
728 /* AggregateArgs */ false,
729 /* BlockFrequencyInfo */ nullptr,
730 /* BranchProbabilityInfo */ nullptr,
731 /* AssumptionCache */ nullptr,
732 /* AllowVarArgs */ true,
733 /* AllowAlloca */ true,
734 /* Suffix */ ".omp_par");
736 // Find inputs to, outputs from the code region.
737 BasicBlock
*CommonExit
= nullptr;
738 SetVector
<Value
*> Inputs
, Outputs
, SinkingCands
, HoistingCands
;
739 Extractor
.findAllocas(CEAC
, SinkingCands
, HoistingCands
, CommonExit
);
740 Extractor
.findInputsOutputs(Inputs
, Outputs
, SinkingCands
);
742 LLVM_DEBUG(dbgs() << "Before privatization: " << *OuterFn
<< "\n");
744 FunctionCallee TIDRTLFn
=
745 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_global_thread_num
);
747 auto PrivHelper
= [&](Value
&V
) {
748 if (&V
== TIDAddr
|| &V
== ZeroAddr
)
751 SetVector
<Use
*> Uses
;
752 for (Use
&U
: V
.uses())
753 if (auto *UserI
= dyn_cast
<Instruction
>(U
.getUser()))
754 if (ParallelRegionBlockSet
.count(UserI
->getParent()))
757 // __kmpc_fork_call expects extra arguments as pointers. If the input
758 // already has a pointer type, everything is fine. Otherwise, store the
759 // value onto stack and load it back inside the to-be-outlined region. This
760 // will ensure only the pointer will be passed to the function.
761 // FIXME: if there are more than 15 trailing arguments, they must be
762 // additionally packed in a struct.
764 if (!V
.getType()->isPointerTy()) {
765 IRBuilder
<>::InsertPointGuard
Guard(Builder
);
766 LLVM_DEBUG(llvm::dbgs() << "Forwarding input as pointer: " << V
<< "\n");
768 Builder
.restoreIP(OuterAllocaIP
);
770 Builder
.CreateAlloca(V
.getType(), nullptr, V
.getName() + ".reloaded");
772 // Store to stack at end of the block that currently branches to the entry
773 // block of the to-be-outlined region.
774 Builder
.SetInsertPoint(InsertBB
,
775 InsertBB
->getTerminator()->getIterator());
776 Builder
.CreateStore(&V
, Ptr
);
778 // Load back next to allocations in the to-be-outlined region.
779 Builder
.restoreIP(InnerAllocaIP
);
780 Inner
= Builder
.CreateLoad(V
.getType(), Ptr
);
783 Value
*ReplacementValue
= nullptr;
784 CallInst
*CI
= dyn_cast
<CallInst
>(&V
);
785 if (CI
&& CI
->getCalledFunction() == TIDRTLFn
.getCallee()) {
786 ReplacementValue
= PrivTID
;
789 PrivCB(InnerAllocaIP
, Builder
.saveIP(), V
, *Inner
, ReplacementValue
));
790 assert(ReplacementValue
&&
791 "Expected copy/create callback to set replacement value!");
792 if (ReplacementValue
== &V
)
796 for (Use
*UPtr
: Uses
)
797 UPtr
->set(ReplacementValue
);
800 // Reset the inner alloca insertion as it will be used for loading the values
801 // wrapped into pointers before passing them into the to-be-outlined region.
802 // Configure it to insert immediately after the fake use of zero address so
803 // that they are available in the generated body and so that the
804 // OpenMP-related values (thread ID and zero address pointers) remain leading
805 // in the argument list.
806 InnerAllocaIP
= IRBuilder
<>::InsertPoint(
807 ZeroAddrUse
->getParent(), ZeroAddrUse
->getNextNode()->getIterator());
809 // Reset the outer alloca insertion point to the entry of the relevant block
810 // in case it was invalidated.
811 OuterAllocaIP
= IRBuilder
<>::InsertPoint(
812 OuterAllocaBlock
, OuterAllocaBlock
->getFirstInsertionPt());
814 for (Value
*Input
: Inputs
) {
815 LLVM_DEBUG(dbgs() << "Captured input: " << *Input
<< "\n");
819 for (Value
*Output
: Outputs
)
820 LLVM_DEBUG(dbgs() << "Captured output: " << *Output
<< "\n");
822 assert(Outputs
.empty() &&
823 "OpenMP outlining should not produce live-out values!");
825 LLVM_DEBUG(dbgs() << "After privatization: " << *OuterFn
<< "\n");
827 for (auto *BB
: Blocks
)
828 dbgs() << " PBR: " << BB
->getName() << "\n";
831 // Register the outlined info.
832 addOutlineInfo(std::move(OI
));
834 InsertPointTy
AfterIP(UI
->getParent(), UI
->getParent()->end());
835 UI
->eraseFromParent();
840 void OpenMPIRBuilder::emitFlush(const LocationDescription
&Loc
) {
841 // Build call void __kmpc_flush(ident_t *loc)
842 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
843 Value
*Args
[] = {getOrCreateIdent(SrcLocStr
)};
845 Builder
.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_flush
), Args
);
848 void OpenMPIRBuilder::createFlush(const LocationDescription
&Loc
) {
849 if (!updateToLocation(Loc
))
854 void OpenMPIRBuilder::emitTaskwaitImpl(const LocationDescription
&Loc
) {
855 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
857 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
858 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
859 Value
*Args
[] = {Ident
, getOrCreateThreadID(Ident
)};
861 // Ignore return result until untied tasks are supported.
862 Builder
.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskwait
),
866 void OpenMPIRBuilder::createTaskwait(const LocationDescription
&Loc
) {
867 if (!updateToLocation(Loc
))
869 emitTaskwaitImpl(Loc
);
872 void OpenMPIRBuilder::emitTaskyieldImpl(const LocationDescription
&Loc
) {
873 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
874 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
875 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
876 Constant
*I32Null
= ConstantInt::getNullValue(Int32
);
877 Value
*Args
[] = {Ident
, getOrCreateThreadID(Ident
), I32Null
};
879 Builder
.CreateCall(getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_omp_taskyield
),
883 void OpenMPIRBuilder::createTaskyield(const LocationDescription
&Loc
) {
884 if (!updateToLocation(Loc
))
886 emitTaskyieldImpl(Loc
);
889 OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createSections(
890 const LocationDescription
&Loc
, InsertPointTy AllocaIP
,
891 ArrayRef
<StorableBodyGenCallbackTy
> SectionCBs
, PrivatizeCallbackTy PrivCB
,
892 FinalizeCallbackTy FiniCB
, bool IsCancellable
, bool IsNowait
) {
893 if (!updateToLocation(Loc
))
896 auto FiniCBWrapper
= [&](InsertPointTy IP
) {
897 if (IP
.getBlock()->end() != IP
.getPoint())
899 // This must be done otherwise any nested constructs using FinalizeOMPRegion
900 // will fail because that function requires the Finalization Basic Block to
901 // have a terminator, which is already removed by EmitOMPRegionBody.
902 // IP is currently at cancelation block.
903 // We need to backtrack to the condition block to fetch
904 // the exit block and create a branch from cancelation
906 IRBuilder
<>::InsertPointGuard
IPG(Builder
);
907 Builder
.restoreIP(IP
);
908 auto *CaseBB
= IP
.getBlock()->getSinglePredecessor();
909 auto *CondBB
= CaseBB
->getSinglePredecessor()->getSinglePredecessor();
910 auto *ExitBB
= CondBB
->getTerminator()->getSuccessor(1);
911 Instruction
*I
= Builder
.CreateBr(ExitBB
);
912 IP
= InsertPointTy(I
->getParent(), I
->getIterator());
916 FinalizationStack
.push_back({FiniCBWrapper
, OMPD_sections
, IsCancellable
});
918 // Each section is emitted as a switch case
919 // Each finalization callback is handled from clang.EmitOMPSectionDirective()
920 // -> OMP.createSection() which generates the IR for each section
921 // Iterate through all sections and emit a switch construct:
927 // case <NumSection> - 1:
928 // <SectionStmt[<NumSection> - 1]>;
932 // section_loop.after:
934 auto LoopBodyGenCB
= [&](InsertPointTy CodeGenIP
, Value
*IndVar
) {
935 auto *CurFn
= CodeGenIP
.getBlock()->getParent();
936 auto *ForIncBB
= CodeGenIP
.getBlock()->getSingleSuccessor();
937 auto *ForExitBB
= CodeGenIP
.getBlock()
938 ->getSinglePredecessor()
941 SwitchInst
*SwitchStmt
= Builder
.CreateSwitch(IndVar
, ForIncBB
);
942 Builder
.restoreIP(CodeGenIP
);
943 unsigned CaseNumber
= 0;
944 for (auto SectionCB
: SectionCBs
) {
945 auto *CaseBB
= BasicBlock::Create(M
.getContext(),
946 "omp_section_loop.body.case", CurFn
);
947 SwitchStmt
->addCase(Builder
.getInt32(CaseNumber
), CaseBB
);
948 Builder
.SetInsertPoint(CaseBB
);
949 SectionCB(InsertPointTy(), Builder
.saveIP(), *ForExitBB
);
952 // remove the existing terminator from body BB since there can be no
953 // terminators after switch/case
954 CodeGenIP
.getBlock()->getTerminator()->eraseFromParent();
956 // Loop body ends here
957 // LowerBound, UpperBound, and STride for createCanonicalLoop
958 Type
*I32Ty
= Type::getInt32Ty(M
.getContext());
959 Value
*LB
= ConstantInt::get(I32Ty
, 0);
960 Value
*UB
= ConstantInt::get(I32Ty
, SectionCBs
.size());
961 Value
*ST
= ConstantInt::get(I32Ty
, 1);
962 llvm::CanonicalLoopInfo
*LoopInfo
= createCanonicalLoop(
963 Loc
, LoopBodyGenCB
, LB
, UB
, ST
, true, false, AllocaIP
, "section_loop");
964 InsertPointTy AfterIP
=
965 applyStaticWorkshareLoop(Loc
.DL
, LoopInfo
, AllocaIP
, true);
966 BasicBlock
*LoopAfterBB
= AfterIP
.getBlock();
967 Instruction
*SplitPos
= LoopAfterBB
->getTerminator();
968 if (!isa_and_nonnull
<BranchInst
>(SplitPos
))
969 SplitPos
= new UnreachableInst(Builder
.getContext(), LoopAfterBB
);
970 // ExitBB after LoopAfterBB because LoopAfterBB is used for FinalizationCB,
971 // which requires a BB with branch
973 LoopAfterBB
->splitBasicBlock(SplitPos
, "omp_sections.end");
974 SplitPos
->eraseFromParent();
976 // Apply the finalization callback in LoopAfterBB
977 auto FiniInfo
= FinalizationStack
.pop_back_val();
978 assert(FiniInfo
.DK
== OMPD_sections
&&
979 "Unexpected finalization stack state!");
980 Builder
.SetInsertPoint(LoopAfterBB
->getTerminator());
981 FiniInfo
.FiniCB(Builder
.saveIP());
982 Builder
.SetInsertPoint(ExitBB
);
984 return Builder
.saveIP();
987 OpenMPIRBuilder::InsertPointTy
988 OpenMPIRBuilder::createSection(const LocationDescription
&Loc
,
989 BodyGenCallbackTy BodyGenCB
,
990 FinalizeCallbackTy FiniCB
) {
991 if (!updateToLocation(Loc
))
994 auto FiniCBWrapper
= [&](InsertPointTy IP
) {
995 if (IP
.getBlock()->end() != IP
.getPoint())
997 // This must be done otherwise any nested constructs using FinalizeOMPRegion
998 // will fail because that function requires the Finalization Basic Block to
999 // have a terminator, which is already removed by EmitOMPRegionBody.
1000 // IP is currently at cancelation block.
1001 // We need to backtrack to the condition block to fetch
1002 // the exit block and create a branch from cancelation
1004 IRBuilder
<>::InsertPointGuard
IPG(Builder
);
1005 Builder
.restoreIP(IP
);
1006 auto *CaseBB
= Loc
.IP
.getBlock();
1007 auto *CondBB
= CaseBB
->getSinglePredecessor()->getSinglePredecessor();
1008 auto *ExitBB
= CondBB
->getTerminator()->getSuccessor(1);
1009 Instruction
*I
= Builder
.CreateBr(ExitBB
);
1010 IP
= InsertPointTy(I
->getParent(), I
->getIterator());
1014 Directive OMPD
= Directive::OMPD_sections
;
1015 // Since we are using Finalization Callback here, HasFinalize
1016 // and IsCancellable have to be true
1017 return EmitOMPInlinedRegion(OMPD
, nullptr, nullptr, BodyGenCB
, FiniCBWrapper
,
1018 /*Conditional*/ false, /*hasFinalize*/ true,
1019 /*IsCancellable*/ true);
1022 /// Create a function with a unique name and a "void (i8*, i8*)" signature in
1023 /// the given module and return it.
1024 Function
*getFreshReductionFunc(Module
&M
) {
1025 Type
*VoidTy
= Type::getVoidTy(M
.getContext());
1026 Type
*Int8PtrTy
= Type::getInt8PtrTy(M
.getContext());
1028 FunctionType::get(VoidTy
, {Int8PtrTy
, Int8PtrTy
}, /* IsVarArg */ false);
1029 return Function::Create(FuncTy
, GlobalVariable::InternalLinkage
,
1030 M
.getDataLayout().getDefaultGlobalsAddressSpace(),
1031 ".omp.reduction.func", &M
);
1034 OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createReductions(
1035 const LocationDescription
&Loc
, InsertPointTy AllocaIP
,
1036 ArrayRef
<ReductionInfo
> ReductionInfos
, bool IsNoWait
) {
1037 for (const ReductionInfo
&RI
: ReductionInfos
) {
1039 assert(RI
.Variable
&& "expected non-null variable");
1040 assert(RI
.PrivateVariable
&& "expected non-null private variable");
1041 assert(RI
.ReductionGen
&& "expected non-null reduction generator callback");
1042 assert(RI
.Variable
->getType() == RI
.PrivateVariable
->getType() &&
1043 "expected variables and their private equivalents to have the same "
1045 assert(RI
.Variable
->getType()->isPointerTy() &&
1046 "expected variables to be pointers");
1049 if (!updateToLocation(Loc
))
1050 return InsertPointTy();
1052 BasicBlock
*InsertBlock
= Loc
.IP
.getBlock();
1053 BasicBlock
*ContinuationBlock
=
1054 InsertBlock
->splitBasicBlock(Loc
.IP
.getPoint(), "reduce.finalize");
1055 InsertBlock
->getTerminator()->eraseFromParent();
1057 // Create and populate array of type-erased pointers to private reduction
1059 unsigned NumReductions
= ReductionInfos
.size();
1060 Type
*RedArrayTy
= ArrayType::get(Builder
.getInt8PtrTy(), NumReductions
);
1061 Builder
.restoreIP(AllocaIP
);
1062 Value
*RedArray
= Builder
.CreateAlloca(RedArrayTy
, nullptr, "red.array");
1064 Builder
.SetInsertPoint(InsertBlock
, InsertBlock
->end());
1066 for (auto En
: enumerate(ReductionInfos
)) {
1067 unsigned Index
= En
.index();
1068 const ReductionInfo
&RI
= En
.value();
1069 Value
*RedArrayElemPtr
= Builder
.CreateConstInBoundsGEP2_64(
1070 RedArrayTy
, RedArray
, 0, Index
, "red.array.elem." + Twine(Index
));
1072 Builder
.CreateBitCast(RI
.PrivateVariable
, Builder
.getInt8PtrTy(),
1073 "private.red.var." + Twine(Index
) + ".casted");
1074 Builder
.CreateStore(Casted
, RedArrayElemPtr
);
1077 // Emit a call to the runtime function that orchestrates the reduction.
1078 // Declare the reduction function in the process.
1079 Function
*Func
= Builder
.GetInsertBlock()->getParent();
1080 Module
*Module
= Func
->getParent();
1081 Value
*RedArrayPtr
=
1082 Builder
.CreateBitCast(RedArray
, Builder
.getInt8PtrTy(), "red.array.ptr");
1083 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
1084 bool CanGenerateAtomic
=
1085 llvm::all_of(ReductionInfos
, [](const ReductionInfo
&RI
) {
1086 return RI
.AtomicReductionGen
;
1088 Value
*Ident
= getOrCreateIdent(
1089 SrcLocStr
, CanGenerateAtomic
? IdentFlag::OMP_IDENT_FLAG_ATOMIC_REDUCE
1091 Value
*ThreadId
= getOrCreateThreadID(Ident
);
1092 Constant
*NumVariables
= Builder
.getInt32(NumReductions
);
1093 const DataLayout
&DL
= Module
->getDataLayout();
1094 unsigned RedArrayByteSize
= DL
.getTypeStoreSize(RedArrayTy
);
1095 Constant
*RedArraySize
= Builder
.getInt64(RedArrayByteSize
);
1096 Function
*ReductionFunc
= getFreshReductionFunc(*Module
);
1097 Value
*Lock
= getOMPCriticalRegionLock(".reduction");
1098 Function
*ReduceFunc
= getOrCreateRuntimeFunctionPtr(
1099 IsNoWait
? RuntimeFunction::OMPRTL___kmpc_reduce_nowait
1100 : RuntimeFunction::OMPRTL___kmpc_reduce
);
1101 CallInst
*ReduceCall
=
1102 Builder
.CreateCall(ReduceFunc
,
1103 {Ident
, ThreadId
, NumVariables
, RedArraySize
,
1104 RedArrayPtr
, ReductionFunc
, Lock
},
1107 // Create final reduction entry blocks for the atomic and non-atomic case.
1108 // Emit IR that dispatches control flow to one of the blocks based on the
1109 // reduction supporting the atomic mode.
1110 BasicBlock
*NonAtomicRedBlock
=
1111 BasicBlock::Create(Module
->getContext(), "reduce.switch.nonatomic", Func
);
1112 BasicBlock
*AtomicRedBlock
=
1113 BasicBlock::Create(Module
->getContext(), "reduce.switch.atomic", Func
);
1114 SwitchInst
*Switch
=
1115 Builder
.CreateSwitch(ReduceCall
, ContinuationBlock
, /* NumCases */ 2);
1116 Switch
->addCase(Builder
.getInt32(1), NonAtomicRedBlock
);
1117 Switch
->addCase(Builder
.getInt32(2), AtomicRedBlock
);
1119 // Populate the non-atomic reduction using the elementwise reduction function.
1120 // This loads the elements from the global and private variables and reduces
1121 // them before storing back the result to the global variable.
1122 Builder
.SetInsertPoint(NonAtomicRedBlock
);
1123 for (auto En
: enumerate(ReductionInfos
)) {
1124 const ReductionInfo
&RI
= En
.value();
1125 Type
*ValueType
= RI
.getElementType();
1126 Value
*RedValue
= Builder
.CreateLoad(ValueType
, RI
.Variable
,
1127 "red.value." + Twine(En
.index()));
1128 Value
*PrivateRedValue
=
1129 Builder
.CreateLoad(ValueType
, RI
.PrivateVariable
,
1130 "red.private.value." + Twine(En
.index()));
1133 RI
.ReductionGen(Builder
.saveIP(), RedValue
, PrivateRedValue
, Reduced
));
1134 if (!Builder
.GetInsertBlock())
1135 return InsertPointTy();
1136 Builder
.CreateStore(Reduced
, RI
.Variable
);
1138 Function
*EndReduceFunc
= getOrCreateRuntimeFunctionPtr(
1139 IsNoWait
? RuntimeFunction::OMPRTL___kmpc_end_reduce_nowait
1140 : RuntimeFunction::OMPRTL___kmpc_end_reduce
);
1141 Builder
.CreateCall(EndReduceFunc
, {Ident
, ThreadId
, Lock
});
1142 Builder
.CreateBr(ContinuationBlock
);
1144 // Populate the atomic reduction using the atomic elementwise reduction
1145 // function. There are no loads/stores here because they will be happening
1146 // inside the atomic elementwise reduction.
1147 Builder
.SetInsertPoint(AtomicRedBlock
);
1148 if (CanGenerateAtomic
) {
1149 for (const ReductionInfo
&RI
: ReductionInfos
) {
1150 Builder
.restoreIP(RI
.AtomicReductionGen(Builder
.saveIP(), RI
.Variable
,
1151 RI
.PrivateVariable
));
1152 if (!Builder
.GetInsertBlock())
1153 return InsertPointTy();
1155 Builder
.CreateBr(ContinuationBlock
);
1157 Builder
.CreateUnreachable();
1160 // Populate the outlined reduction function using the elementwise reduction
1161 // function. Partial values are extracted from the type-erased array of
1162 // pointers to private variables.
1163 BasicBlock
*ReductionFuncBlock
=
1164 BasicBlock::Create(Module
->getContext(), "", ReductionFunc
);
1165 Builder
.SetInsertPoint(ReductionFuncBlock
);
1166 Value
*LHSArrayPtr
= Builder
.CreateBitCast(ReductionFunc
->getArg(0),
1167 RedArrayTy
->getPointerTo());
1168 Value
*RHSArrayPtr
= Builder
.CreateBitCast(ReductionFunc
->getArg(1),
1169 RedArrayTy
->getPointerTo());
1170 for (auto En
: enumerate(ReductionInfos
)) {
1171 const ReductionInfo
&RI
= En
.value();
1172 Value
*LHSI8PtrPtr
= Builder
.CreateConstInBoundsGEP2_64(
1173 RedArrayTy
, LHSArrayPtr
, 0, En
.index());
1174 Value
*LHSI8Ptr
= Builder
.CreateLoad(Builder
.getInt8PtrTy(), LHSI8PtrPtr
);
1175 Value
*LHSPtr
= Builder
.CreateBitCast(LHSI8Ptr
, RI
.Variable
->getType());
1176 Value
*LHS
= Builder
.CreateLoad(RI
.getElementType(), LHSPtr
);
1177 Value
*RHSI8PtrPtr
= Builder
.CreateConstInBoundsGEP2_64(
1178 RedArrayTy
, RHSArrayPtr
, 0, En
.index());
1179 Value
*RHSI8Ptr
= Builder
.CreateLoad(Builder
.getInt8PtrTy(), RHSI8PtrPtr
);
1181 Builder
.CreateBitCast(RHSI8Ptr
, RI
.PrivateVariable
->getType());
1182 Value
*RHS
= Builder
.CreateLoad(RI
.getElementType(), RHSPtr
);
1184 Builder
.restoreIP(RI
.ReductionGen(Builder
.saveIP(), LHS
, RHS
, Reduced
));
1185 if (!Builder
.GetInsertBlock())
1186 return InsertPointTy();
1187 Builder
.CreateStore(Reduced
, LHSPtr
);
1189 Builder
.CreateRetVoid();
1191 Builder
.SetInsertPoint(ContinuationBlock
);
1192 return Builder
.saveIP();
1195 OpenMPIRBuilder::InsertPointTy
1196 OpenMPIRBuilder::createMaster(const LocationDescription
&Loc
,
1197 BodyGenCallbackTy BodyGenCB
,
1198 FinalizeCallbackTy FiniCB
) {
1200 if (!updateToLocation(Loc
))
1203 Directive OMPD
= Directive::OMPD_master
;
1204 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
1205 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
1206 Value
*ThreadId
= getOrCreateThreadID(Ident
);
1207 Value
*Args
[] = {Ident
, ThreadId
};
1209 Function
*EntryRTLFn
= getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_master
);
1210 Instruction
*EntryCall
= Builder
.CreateCall(EntryRTLFn
, Args
);
1212 Function
*ExitRTLFn
= getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_master
);
1213 Instruction
*ExitCall
= Builder
.CreateCall(ExitRTLFn
, Args
);
1215 return EmitOMPInlinedRegion(OMPD
, EntryCall
, ExitCall
, BodyGenCB
, FiniCB
,
1216 /*Conditional*/ true, /*hasFinalize*/ true);
1219 OpenMPIRBuilder::InsertPointTy
1220 OpenMPIRBuilder::createMasked(const LocationDescription
&Loc
,
1221 BodyGenCallbackTy BodyGenCB
,
1222 FinalizeCallbackTy FiniCB
, Value
*Filter
) {
1223 if (!updateToLocation(Loc
))
1226 Directive OMPD
= Directive::OMPD_masked
;
1227 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
1228 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
1229 Value
*ThreadId
= getOrCreateThreadID(Ident
);
1230 Value
*Args
[] = {Ident
, ThreadId
, Filter
};
1231 Value
*ArgsEnd
[] = {Ident
, ThreadId
};
1233 Function
*EntryRTLFn
= getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_masked
);
1234 Instruction
*EntryCall
= Builder
.CreateCall(EntryRTLFn
, Args
);
1236 Function
*ExitRTLFn
= getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_masked
);
1237 Instruction
*ExitCall
= Builder
.CreateCall(ExitRTLFn
, ArgsEnd
);
1239 return EmitOMPInlinedRegion(OMPD
, EntryCall
, ExitCall
, BodyGenCB
, FiniCB
,
1240 /*Conditional*/ true, /*hasFinalize*/ true);
1243 CanonicalLoopInfo
*OpenMPIRBuilder::createLoopSkeleton(
1244 DebugLoc DL
, Value
*TripCount
, Function
*F
, BasicBlock
*PreInsertBefore
,
1245 BasicBlock
*PostInsertBefore
, const Twine
&Name
) {
1246 Module
*M
= F
->getParent();
1247 LLVMContext
&Ctx
= M
->getContext();
1248 Type
*IndVarTy
= TripCount
->getType();
1250 // Create the basic block structure.
1251 BasicBlock
*Preheader
=
1252 BasicBlock::Create(Ctx
, "omp_" + Name
+ ".preheader", F
, PreInsertBefore
);
1253 BasicBlock
*Header
=
1254 BasicBlock::Create(Ctx
, "omp_" + Name
+ ".header", F
, PreInsertBefore
);
1256 BasicBlock::Create(Ctx
, "omp_" + Name
+ ".cond", F
, PreInsertBefore
);
1258 BasicBlock::Create(Ctx
, "omp_" + Name
+ ".body", F
, PreInsertBefore
);
1260 BasicBlock::Create(Ctx
, "omp_" + Name
+ ".inc", F
, PostInsertBefore
);
1262 BasicBlock::Create(Ctx
, "omp_" + Name
+ ".exit", F
, PostInsertBefore
);
1264 BasicBlock::Create(Ctx
, "omp_" + Name
+ ".after", F
, PostInsertBefore
);
1266 // Use specified DebugLoc for new instructions.
1267 Builder
.SetCurrentDebugLocation(DL
);
1269 Builder
.SetInsertPoint(Preheader
);
1270 Builder
.CreateBr(Header
);
1272 Builder
.SetInsertPoint(Header
);
1273 PHINode
*IndVarPHI
= Builder
.CreatePHI(IndVarTy
, 2, "omp_" + Name
+ ".iv");
1274 IndVarPHI
->addIncoming(ConstantInt::get(IndVarTy
, 0), Preheader
);
1275 Builder
.CreateBr(Cond
);
1277 Builder
.SetInsertPoint(Cond
);
1279 Builder
.CreateICmpULT(IndVarPHI
, TripCount
, "omp_" + Name
+ ".cmp");
1280 Builder
.CreateCondBr(Cmp
, Body
, Exit
);
1282 Builder
.SetInsertPoint(Body
);
1283 Builder
.CreateBr(Latch
);
1285 Builder
.SetInsertPoint(Latch
);
1286 Value
*Next
= Builder
.CreateAdd(IndVarPHI
, ConstantInt::get(IndVarTy
, 1),
1287 "omp_" + Name
+ ".next", /*HasNUW=*/true);
1288 Builder
.CreateBr(Header
);
1289 IndVarPHI
->addIncoming(Next
, Latch
);
1291 Builder
.SetInsertPoint(Exit
);
1292 Builder
.CreateBr(After
);
1294 // Remember and return the canonical control flow.
1295 LoopInfos
.emplace_front();
1296 CanonicalLoopInfo
*CL
= &LoopInfos
.front();
1298 CL
->Preheader
= Preheader
;
1299 CL
->Header
= Header
;
1313 OpenMPIRBuilder::createCanonicalLoop(const LocationDescription
&Loc
,
1314 LoopBodyGenCallbackTy BodyGenCB
,
1315 Value
*TripCount
, const Twine
&Name
) {
1316 BasicBlock
*BB
= Loc
.IP
.getBlock();
1317 BasicBlock
*NextBB
= BB
->getNextNode();
1319 CanonicalLoopInfo
*CL
= createLoopSkeleton(Loc
.DL
, TripCount
, BB
->getParent(),
1320 NextBB
, NextBB
, Name
);
1321 BasicBlock
*After
= CL
->getAfter();
1323 // If location is not set, don't connect the loop.
1324 if (updateToLocation(Loc
)) {
1325 // Split the loop at the insertion point: Branch to the preheader and move
1326 // every following instruction to after the loop (the After BB). Also, the
1327 // new successor is the loop's after block.
1328 Builder
.CreateBr(CL
->Preheader
);
1329 After
->getInstList().splice(After
->begin(), BB
->getInstList(),
1330 Builder
.GetInsertPoint(), BB
->end());
1331 After
->replaceSuccessorsPhiUsesWith(BB
, After
);
1334 // Emit the body content. We do it after connecting the loop to the CFG to
1335 // avoid that the callback encounters degenerate BBs.
1336 BodyGenCB(CL
->getBodyIP(), CL
->getIndVar());
1344 CanonicalLoopInfo
*OpenMPIRBuilder::createCanonicalLoop(
1345 const LocationDescription
&Loc
, LoopBodyGenCallbackTy BodyGenCB
,
1346 Value
*Start
, Value
*Stop
, Value
*Step
, bool IsSigned
, bool InclusiveStop
,
1347 InsertPointTy ComputeIP
, const Twine
&Name
) {
1349 // Consider the following difficulties (assuming 8-bit signed integers):
1350 // * Adding \p Step to the loop counter which passes \p Stop may overflow:
1351 // DO I = 1, 100, 50
1352 /// * A \p Step of INT_MIN cannot not be normalized to a positive direction:
1353 // DO I = 100, 0, -128
1355 // Start, Stop and Step must be of the same integer type.
1356 auto *IndVarTy
= cast
<IntegerType
>(Start
->getType());
1357 assert(IndVarTy
== Stop
->getType() && "Stop type mismatch");
1358 assert(IndVarTy
== Step
->getType() && "Step type mismatch");
1360 LocationDescription ComputeLoc
=
1361 ComputeIP
.isSet() ? LocationDescription(ComputeIP
, Loc
.DL
) : Loc
;
1362 updateToLocation(ComputeLoc
);
1364 ConstantInt
*Zero
= ConstantInt::get(IndVarTy
, 0);
1365 ConstantInt
*One
= ConstantInt::get(IndVarTy
, 1);
1367 // Like Step, but always positive.
1370 // Distance between Start and Stop; always positive.
1373 // Condition whether there are no iterations are executed at all, e.g. because
1378 // Ensure that increment is positive. If not, negate and invert LB and UB.
1379 Value
*IsNeg
= Builder
.CreateICmpSLT(Step
, Zero
);
1380 Incr
= Builder
.CreateSelect(IsNeg
, Builder
.CreateNeg(Step
), Step
);
1381 Value
*LB
= Builder
.CreateSelect(IsNeg
, Stop
, Start
);
1382 Value
*UB
= Builder
.CreateSelect(IsNeg
, Start
, Stop
);
1383 Span
= Builder
.CreateSub(UB
, LB
, "", false, true);
1384 ZeroCmp
= Builder
.CreateICmp(
1385 InclusiveStop
? CmpInst::ICMP_SLT
: CmpInst::ICMP_SLE
, UB
, LB
);
1387 Span
= Builder
.CreateSub(Stop
, Start
, "", true);
1388 ZeroCmp
= Builder
.CreateICmp(
1389 InclusiveStop
? CmpInst::ICMP_ULT
: CmpInst::ICMP_ULE
, Stop
, Start
);
1392 Value
*CountIfLooping
;
1393 if (InclusiveStop
) {
1394 CountIfLooping
= Builder
.CreateAdd(Builder
.CreateUDiv(Span
, Incr
), One
);
1396 // Avoid incrementing past stop since it could overflow.
1397 Value
*CountIfTwo
= Builder
.CreateAdd(
1398 Builder
.CreateUDiv(Builder
.CreateSub(Span
, One
), Incr
), One
);
1399 Value
*OneCmp
= Builder
.CreateICmp(
1400 InclusiveStop
? CmpInst::ICMP_ULT
: CmpInst::ICMP_ULE
, Span
, Incr
);
1401 CountIfLooping
= Builder
.CreateSelect(OneCmp
, One
, CountIfTwo
);
1403 Value
*TripCount
= Builder
.CreateSelect(ZeroCmp
, Zero
, CountIfLooping
,
1404 "omp_" + Name
+ ".tripcount");
1406 auto BodyGen
= [=](InsertPointTy CodeGenIP
, Value
*IV
) {
1407 Builder
.restoreIP(CodeGenIP
);
1408 Value
*Span
= Builder
.CreateMul(IV
, Step
);
1409 Value
*IndVar
= Builder
.CreateAdd(Span
, Start
);
1410 BodyGenCB(Builder
.saveIP(), IndVar
);
1412 LocationDescription LoopLoc
= ComputeIP
.isSet() ? Loc
.IP
: Builder
.saveIP();
1413 return createCanonicalLoop(LoopLoc
, BodyGen
, TripCount
, Name
);
1416 // Returns an LLVM function to call for initializing loop bounds using OpenMP
1417 // static scheduling depending on `type`. Only i32 and i64 are supported by the
1418 // runtime. Always interpret integers as unsigned similarly to
1419 // CanonicalLoopInfo.
1420 static FunctionCallee
getKmpcForStaticInitForType(Type
*Ty
, Module
&M
,
1421 OpenMPIRBuilder
&OMPBuilder
) {
1422 unsigned Bitwidth
= Ty
->getIntegerBitWidth();
1424 return OMPBuilder
.getOrCreateRuntimeFunction(
1425 M
, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_4u
);
1427 return OMPBuilder
.getOrCreateRuntimeFunction(
1428 M
, omp::RuntimeFunction::OMPRTL___kmpc_for_static_init_8u
);
1429 llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1432 // Sets the number of loop iterations to the given value. This value must be
1433 // valid in the condition block (i.e., defined in the preheader) and is
1434 // interpreted as an unsigned integer.
1435 void setCanonicalLoopTripCount(CanonicalLoopInfo
*CLI
, Value
*TripCount
) {
1436 Instruction
*CmpI
= &CLI
->getCond()->front();
1437 assert(isa
<CmpInst
>(CmpI
) && "First inst must compare IV with TripCount");
1438 CmpI
->setOperand(1, TripCount
);
1442 OpenMPIRBuilder::InsertPointTy
1443 OpenMPIRBuilder::applyStaticWorkshareLoop(DebugLoc DL
, CanonicalLoopInfo
*CLI
,
1444 InsertPointTy AllocaIP
,
1445 bool NeedsBarrier
, Value
*Chunk
) {
1446 assert(CLI
->isValid() && "Requires a valid canonical loop");
1448 // Set up the source location value for OpenMP runtime.
1449 Builder
.restoreIP(CLI
->getPreheaderIP());
1450 Builder
.SetCurrentDebugLocation(DL
);
1452 Constant
*SrcLocStr
= getOrCreateSrcLocStr(DL
);
1453 Value
*SrcLoc
= getOrCreateIdent(SrcLocStr
);
1455 // Declare useful OpenMP runtime functions.
1456 Value
*IV
= CLI
->getIndVar();
1457 Type
*IVTy
= IV
->getType();
1458 FunctionCallee StaticInit
= getKmpcForStaticInitForType(IVTy
, M
, *this);
1459 FunctionCallee StaticFini
=
1460 getOrCreateRuntimeFunction(M
, omp::OMPRTL___kmpc_for_static_fini
);
1462 // Allocate space for computed loop bounds as expected by the "init" function.
1463 Builder
.restoreIP(AllocaIP
);
1464 Type
*I32Type
= Type::getInt32Ty(M
.getContext());
1465 Value
*PLastIter
= Builder
.CreateAlloca(I32Type
, nullptr, "p.lastiter");
1466 Value
*PLowerBound
= Builder
.CreateAlloca(IVTy
, nullptr, "p.lowerbound");
1467 Value
*PUpperBound
= Builder
.CreateAlloca(IVTy
, nullptr, "p.upperbound");
1468 Value
*PStride
= Builder
.CreateAlloca(IVTy
, nullptr, "p.stride");
1470 // At the end of the preheader, prepare for calling the "init" function by
1471 // storing the current loop bounds into the allocated space. A canonical loop
1472 // always iterates from 0 to trip-count with step 1. Note that "init" expects
1473 // and produces an inclusive upper bound.
1474 Builder
.SetInsertPoint(CLI
->getPreheader()->getTerminator());
1475 Constant
*Zero
= ConstantInt::get(IVTy
, 0);
1476 Constant
*One
= ConstantInt::get(IVTy
, 1);
1477 Builder
.CreateStore(Zero
, PLowerBound
);
1478 Value
*UpperBound
= Builder
.CreateSub(CLI
->getTripCount(), One
);
1479 Builder
.CreateStore(UpperBound
, PUpperBound
);
1480 Builder
.CreateStore(One
, PStride
);
1482 // FIXME: schedule(static) is NOT the same as schedule(static,1)
1486 Value
*ThreadNum
= getOrCreateThreadID(SrcLoc
);
1488 Constant
*SchedulingType
=
1489 ConstantInt::get(I32Type
, static_cast<int>(OMPScheduleType::Static
));
1491 // Call the "init" function and update the trip count of the loop with the
1492 // value it produced.
1493 Builder
.CreateCall(StaticInit
,
1494 {SrcLoc
, ThreadNum
, SchedulingType
, PLastIter
, PLowerBound
,
1495 PUpperBound
, PStride
, One
, Chunk
});
1496 Value
*LowerBound
= Builder
.CreateLoad(IVTy
, PLowerBound
);
1497 Value
*InclusiveUpperBound
= Builder
.CreateLoad(IVTy
, PUpperBound
);
1498 Value
*TripCountMinusOne
= Builder
.CreateSub(InclusiveUpperBound
, LowerBound
);
1499 Value
*TripCount
= Builder
.CreateAdd(TripCountMinusOne
, One
);
1500 setCanonicalLoopTripCount(CLI
, TripCount
);
1502 // Update all uses of the induction variable except the one in the condition
1503 // block that compares it with the actual upper bound, and the increment in
1505 // TODO: this can eventually move to CanonicalLoopInfo or to a new
1506 // CanonicalLoopInfoUpdater interface.
1507 Builder
.SetInsertPoint(CLI
->getBody(), CLI
->getBody()->getFirstInsertionPt());
1508 Value
*UpdatedIV
= Builder
.CreateAdd(IV
, LowerBound
);
1509 IV
->replaceUsesWithIf(UpdatedIV
, [&](Use
&U
) {
1510 auto *Instr
= dyn_cast
<Instruction
>(U
.getUser());
1512 (Instr
->getParent() != CLI
->getCond() &&
1513 Instr
->getParent() != CLI
->getLatch() && Instr
!= UpdatedIV
);
1516 // In the "exit" block, call the "fini" function.
1517 Builder
.SetInsertPoint(CLI
->getExit(),
1518 CLI
->getExit()->getTerminator()->getIterator());
1519 Builder
.CreateCall(StaticFini
, {SrcLoc
, ThreadNum
});
1521 // Add the barrier if requested.
1523 createBarrier(LocationDescription(Builder
.saveIP(), DL
),
1524 omp::Directive::OMPD_for
, /* ForceSimpleCall */ false,
1525 /* CheckCancelFlag */ false);
1527 InsertPointTy AfterIP
= CLI
->getAfterIP();
1533 OpenMPIRBuilder::InsertPointTy
1534 OpenMPIRBuilder::applyWorkshareLoop(DebugLoc DL
, CanonicalLoopInfo
*CLI
,
1535 InsertPointTy AllocaIP
, bool NeedsBarrier
) {
1536 // Currently only supports static schedules.
1537 return applyStaticWorkshareLoop(DL
, CLI
, AllocaIP
, NeedsBarrier
);
1540 /// Returns an LLVM function to call for initializing loop bounds using OpenMP
1541 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1542 /// the runtime. Always interpret integers as unsigned similarly to
1543 /// CanonicalLoopInfo.
1544 static FunctionCallee
1545 getKmpcForDynamicInitForType(Type
*Ty
, Module
&M
, OpenMPIRBuilder
&OMPBuilder
) {
1546 unsigned Bitwidth
= Ty
->getIntegerBitWidth();
1548 return OMPBuilder
.getOrCreateRuntimeFunction(
1549 M
, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_4u
);
1551 return OMPBuilder
.getOrCreateRuntimeFunction(
1552 M
, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_init_8u
);
1553 llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1556 /// Returns an LLVM function to call for updating the next loop using OpenMP
1557 /// dynamic scheduling depending on `type`. Only i32 and i64 are supported by
1558 /// the runtime. Always interpret integers as unsigned similarly to
1559 /// CanonicalLoopInfo.
1560 static FunctionCallee
1561 getKmpcForDynamicNextForType(Type
*Ty
, Module
&M
, OpenMPIRBuilder
&OMPBuilder
) {
1562 unsigned Bitwidth
= Ty
->getIntegerBitWidth();
1564 return OMPBuilder
.getOrCreateRuntimeFunction(
1565 M
, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_4u
);
1567 return OMPBuilder
.getOrCreateRuntimeFunction(
1568 M
, omp::RuntimeFunction::OMPRTL___kmpc_dispatch_next_8u
);
1569 llvm_unreachable("unknown OpenMP loop iterator bitwidth");
1572 OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::applyDynamicWorkshareLoop(
1573 DebugLoc DL
, CanonicalLoopInfo
*CLI
, InsertPointTy AllocaIP
,
1574 OMPScheduleType SchedType
, bool NeedsBarrier
, Value
*Chunk
) {
1575 assert(CLI
->isValid() && "Requires a valid canonical loop");
1577 // Set up the source location value for OpenMP runtime.
1578 Builder
.SetCurrentDebugLocation(DL
);
1580 Constant
*SrcLocStr
= getOrCreateSrcLocStr(DL
);
1581 Value
*SrcLoc
= getOrCreateIdent(SrcLocStr
);
1583 // Declare useful OpenMP runtime functions.
1584 Value
*IV
= CLI
->getIndVar();
1585 Type
*IVTy
= IV
->getType();
1586 FunctionCallee DynamicInit
= getKmpcForDynamicInitForType(IVTy
, M
, *this);
1587 FunctionCallee DynamicNext
= getKmpcForDynamicNextForType(IVTy
, M
, *this);
1589 // Allocate space for computed loop bounds as expected by the "init" function.
1590 Builder
.restoreIP(AllocaIP
);
1591 Type
*I32Type
= Type::getInt32Ty(M
.getContext());
1592 Value
*PLastIter
= Builder
.CreateAlloca(I32Type
, nullptr, "p.lastiter");
1593 Value
*PLowerBound
= Builder
.CreateAlloca(IVTy
, nullptr, "p.lowerbound");
1594 Value
*PUpperBound
= Builder
.CreateAlloca(IVTy
, nullptr, "p.upperbound");
1595 Value
*PStride
= Builder
.CreateAlloca(IVTy
, nullptr, "p.stride");
1597 // At the end of the preheader, prepare for calling the "init" function by
1598 // storing the current loop bounds into the allocated space. A canonical loop
1599 // always iterates from 0 to trip-count with step 1. Note that "init" expects
1600 // and produces an inclusive upper bound.
1601 BasicBlock
*PreHeader
= CLI
->getPreheader();
1602 Builder
.SetInsertPoint(PreHeader
->getTerminator());
1603 Constant
*One
= ConstantInt::get(IVTy
, 1);
1604 Builder
.CreateStore(One
, PLowerBound
);
1605 Value
*UpperBound
= CLI
->getTripCount();
1606 Builder
.CreateStore(UpperBound
, PUpperBound
);
1607 Builder
.CreateStore(One
, PStride
);
1609 BasicBlock
*Header
= CLI
->getHeader();
1610 BasicBlock
*Exit
= CLI
->getExit();
1611 BasicBlock
*Cond
= CLI
->getCond();
1612 InsertPointTy AfterIP
= CLI
->getAfterIP();
1614 // The CLI will be "broken" in the code below, as the loop is no longer
1615 // a valid canonical loop.
1620 Value
*ThreadNum
= getOrCreateThreadID(SrcLoc
);
1622 Constant
*SchedulingType
=
1623 ConstantInt::get(I32Type
, static_cast<int>(SchedType
));
1625 // Call the "init" function.
1626 Builder
.CreateCall(DynamicInit
,
1627 {SrcLoc
, ThreadNum
, SchedulingType
, /* LowerBound */ One
,
1628 UpperBound
, /* step */ One
, Chunk
});
1630 // An outer loop around the existing one.
1631 BasicBlock
*OuterCond
= BasicBlock::Create(
1632 PreHeader
->getContext(), Twine(PreHeader
->getName()) + ".outer.cond",
1633 PreHeader
->getParent());
1634 // This needs to be 32-bit always, so can't use the IVTy Zero above.
1635 Builder
.SetInsertPoint(OuterCond
, OuterCond
->getFirstInsertionPt());
1637 Builder
.CreateCall(DynamicNext
, {SrcLoc
, ThreadNum
, PLastIter
,
1638 PLowerBound
, PUpperBound
, PStride
});
1639 Constant
*Zero32
= ConstantInt::get(I32Type
, 0);
1640 Value
*MoreWork
= Builder
.CreateCmp(CmpInst::ICMP_NE
, Res
, Zero32
);
1642 Builder
.CreateSub(Builder
.CreateLoad(IVTy
, PLowerBound
), One
, "lb");
1643 Builder
.CreateCondBr(MoreWork
, Header
, Exit
);
1645 // Change PHI-node in loop header to use outer cond rather than preheader,
1646 // and set IV to the LowerBound.
1647 Instruction
*Phi
= &Header
->front();
1648 auto *PI
= cast
<PHINode
>(Phi
);
1649 PI
->setIncomingBlock(0, OuterCond
);
1650 PI
->setIncomingValue(0, LowerBound
);
1652 // Then set the pre-header to jump to the OuterCond
1653 Instruction
*Term
= PreHeader
->getTerminator();
1654 auto *Br
= cast
<BranchInst
>(Term
);
1655 Br
->setSuccessor(0, OuterCond
);
1657 // Modify the inner condition:
1658 // * Use the UpperBound returned from the DynamicNext call.
1659 // * jump to the loop outer loop when done with one of the inner loops.
1660 Builder
.SetInsertPoint(Cond
, Cond
->getFirstInsertionPt());
1661 UpperBound
= Builder
.CreateLoad(IVTy
, PUpperBound
, "ub");
1662 Instruction
*Comp
= &*Builder
.GetInsertPoint();
1663 auto *CI
= cast
<CmpInst
>(Comp
);
1664 CI
->setOperand(1, UpperBound
);
1665 // Redirect the inner exit to branch to outer condition.
1666 Instruction
*Branch
= &Cond
->back();
1667 auto *BI
= cast
<BranchInst
>(Branch
);
1668 assert(BI
->getSuccessor(1) == Exit
);
1669 BI
->setSuccessor(1, OuterCond
);
1671 // Add the barrier if requested.
1673 Builder
.SetInsertPoint(&Exit
->back());
1674 createBarrier(LocationDescription(Builder
.saveIP(), DL
),
1675 omp::Directive::OMPD_for
, /* ForceSimpleCall */ false,
1676 /* CheckCancelFlag */ false);
1683 /// Make \p Source branch to \p Target.
1685 /// Handles two situations:
1686 /// * \p Source already has an unconditional branch.
1687 /// * \p Source is a degenerate block (no terminator because the BB is
1688 /// the current head of the IR construction).
1689 static void redirectTo(BasicBlock
*Source
, BasicBlock
*Target
, DebugLoc DL
) {
1690 if (Instruction
*Term
= Source
->getTerminator()) {
1691 auto *Br
= cast
<BranchInst
>(Term
);
1692 assert(!Br
->isConditional() &&
1693 "BB's terminator must be an unconditional branch (or degenerate)");
1694 BasicBlock
*Succ
= Br
->getSuccessor(0);
1695 Succ
->removePredecessor(Source
, /*KeepOneInputPHIs=*/true);
1696 Br
->setSuccessor(0, Target
);
1700 auto *NewBr
= BranchInst::Create(Target
, Source
);
1701 NewBr
->setDebugLoc(DL
);
1704 /// Redirect all edges that branch to \p OldTarget to \p NewTarget. That is,
1705 /// after this \p OldTarget will be orphaned.
1706 static void redirectAllPredecessorsTo(BasicBlock
*OldTarget
,
1707 BasicBlock
*NewTarget
, DebugLoc DL
) {
1708 for (BasicBlock
*Pred
: make_early_inc_range(predecessors(OldTarget
)))
1709 redirectTo(Pred
, NewTarget
, DL
);
1712 /// Determine which blocks in \p BBs are reachable from outside and remove the
1713 /// ones that are not reachable from the function.
1714 static void removeUnusedBlocksFromParent(ArrayRef
<BasicBlock
*> BBs
) {
1715 SmallPtrSet
<BasicBlock
*, 6> BBsToErase
{BBs
.begin(), BBs
.end()};
1716 auto HasRemainingUses
= [&BBsToErase
](BasicBlock
*BB
) {
1717 for (Use
&U
: BB
->uses()) {
1718 auto *UseInst
= dyn_cast
<Instruction
>(U
.getUser());
1721 if (BBsToErase
.count(UseInst
->getParent()))
1729 bool Changed
= false;
1730 for (BasicBlock
*BB
: make_early_inc_range(BBsToErase
)) {
1731 if (HasRemainingUses(BB
)) {
1732 BBsToErase
.erase(BB
);
1740 SmallVector
<BasicBlock
*, 7> BBVec(BBsToErase
.begin(), BBsToErase
.end());
1741 DeleteDeadBlocks(BBVec
);
1745 OpenMPIRBuilder::collapseLoops(DebugLoc DL
, ArrayRef
<CanonicalLoopInfo
*> Loops
,
1746 InsertPointTy ComputeIP
) {
1747 assert(Loops
.size() >= 1 && "At least one loop required");
1748 size_t NumLoops
= Loops
.size();
1750 // Nothing to do if there is already just one loop.
1752 return Loops
.front();
1754 CanonicalLoopInfo
*Outermost
= Loops
.front();
1755 CanonicalLoopInfo
*Innermost
= Loops
.back();
1756 BasicBlock
*OrigPreheader
= Outermost
->getPreheader();
1757 BasicBlock
*OrigAfter
= Outermost
->getAfter();
1758 Function
*F
= OrigPreheader
->getParent();
1760 // Setup the IRBuilder for inserting the trip count computation.
1761 Builder
.SetCurrentDebugLocation(DL
);
1762 if (ComputeIP
.isSet())
1763 Builder
.restoreIP(ComputeIP
);
1765 Builder
.restoreIP(Outermost
->getPreheaderIP());
1767 // Derive the collapsed' loop trip count.
1768 // TODO: Find common/largest indvar type.
1769 Value
*CollapsedTripCount
= nullptr;
1770 for (CanonicalLoopInfo
*L
: Loops
) {
1771 assert(L
->isValid() &&
1772 "All loops to collapse must be valid canonical loops");
1773 Value
*OrigTripCount
= L
->getTripCount();
1774 if (!CollapsedTripCount
) {
1775 CollapsedTripCount
= OrigTripCount
;
1779 // TODO: Enable UndefinedSanitizer to diagnose an overflow here.
1780 CollapsedTripCount
= Builder
.CreateMul(CollapsedTripCount
, OrigTripCount
,
1781 {}, /*HasNUW=*/true);
1784 // Create the collapsed loop control flow.
1785 CanonicalLoopInfo
*Result
=
1786 createLoopSkeleton(DL
, CollapsedTripCount
, F
,
1787 OrigPreheader
->getNextNode(), OrigAfter
, "collapsed");
1789 // Build the collapsed loop body code.
1790 // Start with deriving the input loop induction variables from the collapsed
1791 // one, using a divmod scheme. To preserve the original loops' order, the
1792 // innermost loop use the least significant bits.
1793 Builder
.restoreIP(Result
->getBodyIP());
1795 Value
*Leftover
= Result
->getIndVar();
1796 SmallVector
<Value
*> NewIndVars
;
1797 NewIndVars
.set_size(NumLoops
);
1798 for (int i
= NumLoops
- 1; i
>= 1; --i
) {
1799 Value
*OrigTripCount
= Loops
[i
]->getTripCount();
1801 Value
*NewIndVar
= Builder
.CreateURem(Leftover
, OrigTripCount
);
1802 NewIndVars
[i
] = NewIndVar
;
1804 Leftover
= Builder
.CreateUDiv(Leftover
, OrigTripCount
);
1806 // Outermost loop gets all the remaining bits.
1807 NewIndVars
[0] = Leftover
;
1809 // Construct the loop body control flow.
1810 // We progressively construct the branch structure following in direction of
1811 // the control flow, from the leading in-between code, the loop nest body, the
1812 // trailing in-between code, and rejoining the collapsed loop's latch.
1813 // ContinueBlock and ContinuePred keep track of the source(s) of next edge. If
1814 // the ContinueBlock is set, continue with that block. If ContinuePred, use
1815 // its predecessors as sources.
1816 BasicBlock
*ContinueBlock
= Result
->getBody();
1817 BasicBlock
*ContinuePred
= nullptr;
1818 auto ContinueWith
= [&ContinueBlock
, &ContinuePred
, DL
](BasicBlock
*Dest
,
1819 BasicBlock
*NextSrc
) {
1821 redirectTo(ContinueBlock
, Dest
, DL
);
1823 redirectAllPredecessorsTo(ContinuePred
, Dest
, DL
);
1825 ContinueBlock
= nullptr;
1826 ContinuePred
= NextSrc
;
1829 // The code before the nested loop of each level.
1830 // Because we are sinking it into the nest, it will be executed more often
1831 // that the original loop. More sophisticated schemes could keep track of what
1832 // the in-between code is and instantiate it only once per thread.
1833 for (size_t i
= 0; i
< NumLoops
- 1; ++i
)
1834 ContinueWith(Loops
[i
]->getBody(), Loops
[i
+ 1]->getHeader());
1836 // Connect the loop nest body.
1837 ContinueWith(Innermost
->getBody(), Innermost
->getLatch());
1839 // The code after the nested loop at each level.
1840 for (size_t i
= NumLoops
- 1; i
> 0; --i
)
1841 ContinueWith(Loops
[i
]->getAfter(), Loops
[i
- 1]->getLatch());
1843 // Connect the finished loop to the collapsed loop latch.
1844 ContinueWith(Result
->getLatch(), nullptr);
1846 // Replace the input loops with the new collapsed loop.
1847 redirectTo(Outermost
->getPreheader(), Result
->getPreheader(), DL
);
1848 redirectTo(Result
->getAfter(), Outermost
->getAfter(), DL
);
1850 // Replace the input loop indvars with the derived ones.
1851 for (size_t i
= 0; i
< NumLoops
; ++i
)
1852 Loops
[i
]->getIndVar()->replaceAllUsesWith(NewIndVars
[i
]);
1854 // Remove unused parts of the input loops.
1855 SmallVector
<BasicBlock
*, 12> OldControlBBs
;
1856 OldControlBBs
.reserve(6 * Loops
.size());
1857 for (CanonicalLoopInfo
*Loop
: Loops
)
1858 Loop
->collectControlBlocks(OldControlBBs
);
1859 removeUnusedBlocksFromParent(OldControlBBs
);
1861 for (CanonicalLoopInfo
*L
: Loops
)
1870 std::vector
<CanonicalLoopInfo
*>
1871 OpenMPIRBuilder::tileLoops(DebugLoc DL
, ArrayRef
<CanonicalLoopInfo
*> Loops
,
1872 ArrayRef
<Value
*> TileSizes
) {
1873 assert(TileSizes
.size() == Loops
.size() &&
1874 "Must pass as many tile sizes as there are loops");
1875 int NumLoops
= Loops
.size();
1876 assert(NumLoops
>= 1 && "At least one loop to tile required");
1878 CanonicalLoopInfo
*OutermostLoop
= Loops
.front();
1879 CanonicalLoopInfo
*InnermostLoop
= Loops
.back();
1880 Function
*F
= OutermostLoop
->getBody()->getParent();
1881 BasicBlock
*InnerEnter
= InnermostLoop
->getBody();
1882 BasicBlock
*InnerLatch
= InnermostLoop
->getLatch();
1884 // Collect original trip counts and induction variable to be accessible by
1885 // index. Also, the structure of the original loops is not preserved during
1886 // the construction of the tiled loops, so do it before we scavenge the BBs of
1887 // any original CanonicalLoopInfo.
1888 SmallVector
<Value
*, 4> OrigTripCounts
, OrigIndVars
;
1889 for (CanonicalLoopInfo
*L
: Loops
) {
1890 assert(L
->isValid() && "All input loops must be valid canonical loops");
1891 OrigTripCounts
.push_back(L
->getTripCount());
1892 OrigIndVars
.push_back(L
->getIndVar());
1895 // Collect the code between loop headers. These may contain SSA definitions
1896 // that are used in the loop nest body. To be usable with in the innermost
1897 // body, these BasicBlocks will be sunk into the loop nest body. That is,
1898 // these instructions may be executed more often than before the tiling.
1899 // TODO: It would be sufficient to only sink them into body of the
1900 // corresponding tile loop.
1901 SmallVector
<std::pair
<BasicBlock
*, BasicBlock
*>, 4> InbetweenCode
;
1902 for (int i
= 0; i
< NumLoops
- 1; ++i
) {
1903 CanonicalLoopInfo
*Surrounding
= Loops
[i
];
1904 CanonicalLoopInfo
*Nested
= Loops
[i
+ 1];
1906 BasicBlock
*EnterBB
= Surrounding
->getBody();
1907 BasicBlock
*ExitBB
= Nested
->getHeader();
1908 InbetweenCode
.emplace_back(EnterBB
, ExitBB
);
1911 // Compute the trip counts of the floor loops.
1912 Builder
.SetCurrentDebugLocation(DL
);
1913 Builder
.restoreIP(OutermostLoop
->getPreheaderIP());
1914 SmallVector
<Value
*, 4> FloorCount
, FloorRems
;
1915 for (int i
= 0; i
< NumLoops
; ++i
) {
1916 Value
*TileSize
= TileSizes
[i
];
1917 Value
*OrigTripCount
= OrigTripCounts
[i
];
1918 Type
*IVType
= OrigTripCount
->getType();
1920 Value
*FloorTripCount
= Builder
.CreateUDiv(OrigTripCount
, TileSize
);
1921 Value
*FloorTripRem
= Builder
.CreateURem(OrigTripCount
, TileSize
);
1923 // 0 if tripcount divides the tilesize, 1 otherwise.
1924 // 1 means we need an additional iteration for a partial tile.
1926 // Unfortunately we cannot just use the roundup-formula
1927 // (tripcount + tilesize - 1)/tilesize
1928 // because the summation might overflow. We do not want introduce undefined
1929 // behavior when the untiled loop nest did not.
1930 Value
*FloorTripOverflow
=
1931 Builder
.CreateICmpNE(FloorTripRem
, ConstantInt::get(IVType
, 0));
1933 FloorTripOverflow
= Builder
.CreateZExt(FloorTripOverflow
, IVType
);
1935 Builder
.CreateAdd(FloorTripCount
, FloorTripOverflow
,
1936 "omp_floor" + Twine(i
) + ".tripcount", true);
1938 // Remember some values for later use.
1939 FloorCount
.push_back(FloorTripCount
);
1940 FloorRems
.push_back(FloorTripRem
);
1943 // Generate the new loop nest, from the outermost to the innermost.
1944 std::vector
<CanonicalLoopInfo
*> Result
;
1945 Result
.reserve(NumLoops
* 2);
1947 // The basic block of the surrounding loop that enters the nest generated
1949 BasicBlock
*Enter
= OutermostLoop
->getPreheader();
1951 // The basic block of the surrounding loop where the inner code should
1953 BasicBlock
*Continue
= OutermostLoop
->getAfter();
1955 // Where the next loop basic block should be inserted.
1956 BasicBlock
*OutroInsertBefore
= InnermostLoop
->getExit();
1958 auto EmbeddNewLoop
=
1959 [this, DL
, F
, InnerEnter
, &Enter
, &Continue
, &OutroInsertBefore
](
1960 Value
*TripCount
, const Twine
&Name
) -> CanonicalLoopInfo
* {
1961 CanonicalLoopInfo
*EmbeddedLoop
= createLoopSkeleton(
1962 DL
, TripCount
, F
, InnerEnter
, OutroInsertBefore
, Name
);
1963 redirectTo(Enter
, EmbeddedLoop
->getPreheader(), DL
);
1964 redirectTo(EmbeddedLoop
->getAfter(), Continue
, DL
);
1966 // Setup the position where the next embedded loop connects to this loop.
1967 Enter
= EmbeddedLoop
->getBody();
1968 Continue
= EmbeddedLoop
->getLatch();
1969 OutroInsertBefore
= EmbeddedLoop
->getLatch();
1970 return EmbeddedLoop
;
1973 auto EmbeddNewLoops
= [&Result
, &EmbeddNewLoop
](ArrayRef
<Value
*> TripCounts
,
1974 const Twine
&NameBase
) {
1975 for (auto P
: enumerate(TripCounts
)) {
1976 CanonicalLoopInfo
*EmbeddedLoop
=
1977 EmbeddNewLoop(P
.value(), NameBase
+ Twine(P
.index()));
1978 Result
.push_back(EmbeddedLoop
);
1982 EmbeddNewLoops(FloorCount
, "floor");
1984 // Within the innermost floor loop, emit the code that computes the tile
1986 Builder
.SetInsertPoint(Enter
->getTerminator());
1987 SmallVector
<Value
*, 4> TileCounts
;
1988 for (int i
= 0; i
< NumLoops
; ++i
) {
1989 CanonicalLoopInfo
*FloorLoop
= Result
[i
];
1990 Value
*TileSize
= TileSizes
[i
];
1992 Value
*FloorIsEpilogue
=
1993 Builder
.CreateICmpEQ(FloorLoop
->getIndVar(), FloorCount
[i
]);
1994 Value
*TileTripCount
=
1995 Builder
.CreateSelect(FloorIsEpilogue
, FloorRems
[i
], TileSize
);
1997 TileCounts
.push_back(TileTripCount
);
2000 // Create the tile loops.
2001 EmbeddNewLoops(TileCounts
, "tile");
2003 // Insert the inbetween code into the body.
2004 BasicBlock
*BodyEnter
= Enter
;
2005 BasicBlock
*BodyEntered
= nullptr;
2006 for (std::pair
<BasicBlock
*, BasicBlock
*> P
: InbetweenCode
) {
2007 BasicBlock
*EnterBB
= P
.first
;
2008 BasicBlock
*ExitBB
= P
.second
;
2011 redirectTo(BodyEnter
, EnterBB
, DL
);
2013 redirectAllPredecessorsTo(BodyEntered
, EnterBB
, DL
);
2015 BodyEnter
= nullptr;
2016 BodyEntered
= ExitBB
;
2019 // Append the original loop nest body into the generated loop nest body.
2021 redirectTo(BodyEnter
, InnerEnter
, DL
);
2023 redirectAllPredecessorsTo(BodyEntered
, InnerEnter
, DL
);
2024 redirectAllPredecessorsTo(InnerLatch
, Continue
, DL
);
2026 // Replace the original induction variable with an induction variable computed
2027 // from the tile and floor induction variables.
2028 Builder
.restoreIP(Result
.back()->getBodyIP());
2029 for (int i
= 0; i
< NumLoops
; ++i
) {
2030 CanonicalLoopInfo
*FloorLoop
= Result
[i
];
2031 CanonicalLoopInfo
*TileLoop
= Result
[NumLoops
+ i
];
2032 Value
*OrigIndVar
= OrigIndVars
[i
];
2033 Value
*Size
= TileSizes
[i
];
2036 Builder
.CreateMul(Size
, FloorLoop
->getIndVar(), {}, /*HasNUW=*/true);
2038 Builder
.CreateAdd(Scale
, TileLoop
->getIndVar(), {}, /*HasNUW=*/true);
2039 OrigIndVar
->replaceAllUsesWith(Shift
);
2042 // Remove unused parts of the original loops.
2043 SmallVector
<BasicBlock
*, 12> OldControlBBs
;
2044 OldControlBBs
.reserve(6 * Loops
.size());
2045 for (CanonicalLoopInfo
*Loop
: Loops
)
2046 Loop
->collectControlBlocks(OldControlBBs
);
2047 removeUnusedBlocksFromParent(OldControlBBs
);
2049 for (CanonicalLoopInfo
*L
: Loops
)
2053 for (CanonicalLoopInfo
*GenL
: Result
)
2059 OpenMPIRBuilder::InsertPointTy
2060 OpenMPIRBuilder::createCopyPrivate(const LocationDescription
&Loc
,
2061 llvm::Value
*BufSize
, llvm::Value
*CpyBuf
,
2062 llvm::Value
*CpyFn
, llvm::Value
*DidIt
) {
2063 if (!updateToLocation(Loc
))
2066 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
2067 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
2068 Value
*ThreadId
= getOrCreateThreadID(Ident
);
2070 llvm::Value
*DidItLD
= Builder
.CreateLoad(Builder
.getInt32Ty(), DidIt
);
2072 Value
*Args
[] = {Ident
, ThreadId
, BufSize
, CpyBuf
, CpyFn
, DidItLD
};
2074 Function
*Fn
= getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_copyprivate
);
2075 Builder
.CreateCall(Fn
, Args
);
2077 return Builder
.saveIP();
2080 OpenMPIRBuilder::InsertPointTy
2081 OpenMPIRBuilder::createSingle(const LocationDescription
&Loc
,
2082 BodyGenCallbackTy BodyGenCB
,
2083 FinalizeCallbackTy FiniCB
, llvm::Value
*DidIt
) {
2085 if (!updateToLocation(Loc
))
2088 // If needed (i.e. not null), initialize `DidIt` with 0
2090 Builder
.CreateStore(Builder
.getInt32(0), DidIt
);
2093 Directive OMPD
= Directive::OMPD_single
;
2094 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
2095 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
2096 Value
*ThreadId
= getOrCreateThreadID(Ident
);
2097 Value
*Args
[] = {Ident
, ThreadId
};
2099 Function
*EntryRTLFn
= getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_single
);
2100 Instruction
*EntryCall
= Builder
.CreateCall(EntryRTLFn
, Args
);
2102 Function
*ExitRTLFn
= getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_single
);
2103 Instruction
*ExitCall
= Builder
.CreateCall(ExitRTLFn
, Args
);
2105 // generates the following:
2106 // if (__kmpc_single()) {
2107 // .... single region ...
2108 // __kmpc_end_single
2111 return EmitOMPInlinedRegion(OMPD
, EntryCall
, ExitCall
, BodyGenCB
, FiniCB
,
2112 /*Conditional*/ true, /*hasFinalize*/ true);
2115 OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createCritical(
2116 const LocationDescription
&Loc
, BodyGenCallbackTy BodyGenCB
,
2117 FinalizeCallbackTy FiniCB
, StringRef CriticalName
, Value
*HintInst
) {
2119 if (!updateToLocation(Loc
))
2122 Directive OMPD
= Directive::OMPD_critical
;
2123 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
2124 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
2125 Value
*ThreadId
= getOrCreateThreadID(Ident
);
2126 Value
*LockVar
= getOMPCriticalRegionLock(CriticalName
);
2127 Value
*Args
[] = {Ident
, ThreadId
, LockVar
};
2129 SmallVector
<llvm::Value
*, 4> EnterArgs(std::begin(Args
), std::end(Args
));
2130 Function
*RTFn
= nullptr;
2132 // Add Hint to entry Args and create call
2133 EnterArgs
.push_back(HintInst
);
2134 RTFn
= getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical_with_hint
);
2136 RTFn
= getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_critical
);
2138 Instruction
*EntryCall
= Builder
.CreateCall(RTFn
, EnterArgs
);
2140 Function
*ExitRTLFn
=
2141 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_end_critical
);
2142 Instruction
*ExitCall
= Builder
.CreateCall(ExitRTLFn
, Args
);
2144 return EmitOMPInlinedRegion(OMPD
, EntryCall
, ExitCall
, BodyGenCB
, FiniCB
,
2145 /*Conditional*/ false, /*hasFinalize*/ true);
2148 OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::EmitOMPInlinedRegion(
2149 Directive OMPD
, Instruction
*EntryCall
, Instruction
*ExitCall
,
2150 BodyGenCallbackTy BodyGenCB
, FinalizeCallbackTy FiniCB
, bool Conditional
,
2151 bool HasFinalize
, bool IsCancellable
) {
2154 FinalizationStack
.push_back({FiniCB
, OMPD
, IsCancellable
});
2156 // Create inlined region's entry and body blocks, in preparation
2157 // for conditional creation
2158 BasicBlock
*EntryBB
= Builder
.GetInsertBlock();
2159 Instruction
*SplitPos
= EntryBB
->getTerminator();
2160 if (!isa_and_nonnull
<BranchInst
>(SplitPos
))
2161 SplitPos
= new UnreachableInst(Builder
.getContext(), EntryBB
);
2162 BasicBlock
*ExitBB
= EntryBB
->splitBasicBlock(SplitPos
, "omp_region.end");
2163 BasicBlock
*FiniBB
=
2164 EntryBB
->splitBasicBlock(EntryBB
->getTerminator(), "omp_region.finalize");
2166 Builder
.SetInsertPoint(EntryBB
->getTerminator());
2167 emitCommonDirectiveEntry(OMPD
, EntryCall
, ExitBB
, Conditional
);
2170 BodyGenCB(/* AllocaIP */ InsertPointTy(),
2171 /* CodeGenIP */ Builder
.saveIP(), *FiniBB
);
2173 // If we didn't emit a branch to FiniBB during body generation, it means
2174 // FiniBB is unreachable (e.g. while(1);). stop generating all the
2175 // unreachable blocks, and remove anything we are not going to use.
2176 auto SkipEmittingRegion
= FiniBB
->hasNPredecessors(0);
2177 if (SkipEmittingRegion
) {
2178 FiniBB
->eraseFromParent();
2179 ExitCall
->eraseFromParent();
2180 // Discard finalization if we have it.
2182 assert(!FinalizationStack
.empty() &&
2183 "Unexpected finalization stack state!");
2184 FinalizationStack
.pop_back();
2187 // emit exit call and do any needed finalization.
2188 auto FinIP
= InsertPointTy(FiniBB
, FiniBB
->getFirstInsertionPt());
2189 assert(FiniBB
->getTerminator()->getNumSuccessors() == 1 &&
2190 FiniBB
->getTerminator()->getSuccessor(0) == ExitBB
&&
2191 "Unexpected control flow graph state!!");
2192 emitCommonDirectiveExit(OMPD
, FinIP
, ExitCall
, HasFinalize
);
2193 assert(FiniBB
->getUniquePredecessor()->getUniqueSuccessor() == FiniBB
&&
2194 "Unexpected Control Flow State!");
2195 MergeBlockIntoPredecessor(FiniBB
);
2198 // If we are skipping the region of a non conditional, remove the exit
2199 // block, and clear the builder's insertion point.
2200 assert(SplitPos
->getParent() == ExitBB
&&
2201 "Unexpected Insertion point location!");
2202 if (!Conditional
&& SkipEmittingRegion
) {
2203 ExitBB
->eraseFromParent();
2204 Builder
.ClearInsertionPoint();
2206 auto merged
= MergeBlockIntoPredecessor(ExitBB
);
2207 BasicBlock
*ExitPredBB
= SplitPos
->getParent();
2208 auto InsertBB
= merged
? ExitPredBB
: ExitBB
;
2209 if (!isa_and_nonnull
<BranchInst
>(SplitPos
))
2210 SplitPos
->eraseFromParent();
2211 Builder
.SetInsertPoint(InsertBB
);
2214 return Builder
.saveIP();
2217 OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::emitCommonDirectiveEntry(
2218 Directive OMPD
, Value
*EntryCall
, BasicBlock
*ExitBB
, bool Conditional
) {
2219 // if nothing to do, Return current insertion point.
2220 if (!Conditional
|| !EntryCall
)
2221 return Builder
.saveIP();
2223 BasicBlock
*EntryBB
= Builder
.GetInsertBlock();
2224 Value
*CallBool
= Builder
.CreateIsNotNull(EntryCall
);
2225 auto *ThenBB
= BasicBlock::Create(M
.getContext(), "omp_region.body");
2226 auto *UI
= new UnreachableInst(Builder
.getContext(), ThenBB
);
2228 // Emit thenBB and set the Builder's insertion point there for
2229 // body generation next. Place the block after the current block.
2230 Function
*CurFn
= EntryBB
->getParent();
2231 CurFn
->getBasicBlockList().insertAfter(EntryBB
->getIterator(), ThenBB
);
2233 // Move Entry branch to end of ThenBB, and replace with conditional
2235 Instruction
*EntryBBTI
= EntryBB
->getTerminator();
2236 Builder
.CreateCondBr(CallBool
, ThenBB
, ExitBB
);
2237 EntryBBTI
->removeFromParent();
2238 Builder
.SetInsertPoint(UI
);
2239 Builder
.Insert(EntryBBTI
);
2240 UI
->eraseFromParent();
2241 Builder
.SetInsertPoint(ThenBB
->getTerminator());
2243 // return an insertion point to ExitBB.
2244 return IRBuilder
<>::InsertPoint(ExitBB
, ExitBB
->getFirstInsertionPt());
2247 OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::emitCommonDirectiveExit(
2248 omp::Directive OMPD
, InsertPointTy FinIP
, Instruction
*ExitCall
,
2251 Builder
.restoreIP(FinIP
);
2253 // If there is finalization to do, emit it before the exit call
2255 assert(!FinalizationStack
.empty() &&
2256 "Unexpected finalization stack state!");
2258 FinalizationInfo Fi
= FinalizationStack
.pop_back_val();
2259 assert(Fi
.DK
== OMPD
&& "Unexpected Directive for Finalization call!");
2263 BasicBlock
*FiniBB
= FinIP
.getBlock();
2264 Instruction
*FiniBBTI
= FiniBB
->getTerminator();
2266 // set Builder IP for call creation
2267 Builder
.SetInsertPoint(FiniBBTI
);
2271 return Builder
.saveIP();
2273 // place the Exitcall as last instruction before Finalization block terminator
2274 ExitCall
->removeFromParent();
2275 Builder
.Insert(ExitCall
);
2277 return IRBuilder
<>::InsertPoint(ExitCall
->getParent(),
2278 ExitCall
->getIterator());
2281 OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createCopyinClauseBlocks(
2282 InsertPointTy IP
, Value
*MasterAddr
, Value
*PrivateAddr
,
2283 llvm::IntegerType
*IntPtrTy
, bool BranchtoEnd
) {
2287 IRBuilder
<>::InsertPointGuard
IPG(Builder
);
2289 // creates the following CFG structure
2290 // OMP_Entry : (MasterAddr != PrivateAddr)?
2293 // | copin.not.master
2296 // copyin.not.master.end
2301 BasicBlock
*OMP_Entry
= IP
.getBlock();
2302 Function
*CurFn
= OMP_Entry
->getParent();
2303 BasicBlock
*CopyBegin
=
2304 BasicBlock::Create(M
.getContext(), "copyin.not.master", CurFn
);
2305 BasicBlock
*CopyEnd
= nullptr;
2307 // If entry block is terminated, split to preserve the branch to following
2308 // basic block (i.e. OMP.Entry.Next), otherwise, leave everything as is.
2309 if (isa_and_nonnull
<BranchInst
>(OMP_Entry
->getTerminator())) {
2310 CopyEnd
= OMP_Entry
->splitBasicBlock(OMP_Entry
->getTerminator(),
2311 "copyin.not.master.end");
2312 OMP_Entry
->getTerminator()->eraseFromParent();
2315 BasicBlock::Create(M
.getContext(), "copyin.not.master.end", CurFn
);
2318 Builder
.SetInsertPoint(OMP_Entry
);
2319 Value
*MasterPtr
= Builder
.CreatePtrToInt(MasterAddr
, IntPtrTy
);
2320 Value
*PrivatePtr
= Builder
.CreatePtrToInt(PrivateAddr
, IntPtrTy
);
2321 Value
*cmp
= Builder
.CreateICmpNE(MasterPtr
, PrivatePtr
);
2322 Builder
.CreateCondBr(cmp
, CopyBegin
, CopyEnd
);
2324 Builder
.SetInsertPoint(CopyBegin
);
2326 Builder
.SetInsertPoint(Builder
.CreateBr(CopyEnd
));
2328 return Builder
.saveIP();
2331 CallInst
*OpenMPIRBuilder::createOMPAlloc(const LocationDescription
&Loc
,
2332 Value
*Size
, Value
*Allocator
,
2334 IRBuilder
<>::InsertPointGuard
IPG(Builder
);
2335 Builder
.restoreIP(Loc
.IP
);
2337 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
2338 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
2339 Value
*ThreadId
= getOrCreateThreadID(Ident
);
2340 Value
*Args
[] = {ThreadId
, Size
, Allocator
};
2342 Function
*Fn
= getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_alloc
);
2344 return Builder
.CreateCall(Fn
, Args
, Name
);
2347 CallInst
*OpenMPIRBuilder::createOMPFree(const LocationDescription
&Loc
,
2348 Value
*Addr
, Value
*Allocator
,
2350 IRBuilder
<>::InsertPointGuard
IPG(Builder
);
2351 Builder
.restoreIP(Loc
.IP
);
2353 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
2354 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
2355 Value
*ThreadId
= getOrCreateThreadID(Ident
);
2356 Value
*Args
[] = {ThreadId
, Addr
, Allocator
};
2357 Function
*Fn
= getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_free
);
2358 return Builder
.CreateCall(Fn
, Args
, Name
);
2361 CallInst
*OpenMPIRBuilder::createCachedThreadPrivate(
2362 const LocationDescription
&Loc
, llvm::Value
*Pointer
,
2363 llvm::ConstantInt
*Size
, const llvm::Twine
&Name
) {
2364 IRBuilder
<>::InsertPointGuard
IPG(Builder
);
2365 Builder
.restoreIP(Loc
.IP
);
2367 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
2368 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
2369 Value
*ThreadId
= getOrCreateThreadID(Ident
);
2370 Constant
*ThreadPrivateCache
=
2371 getOrCreateOMPInternalVariable(Int8PtrPtr
, Name
);
2372 llvm::Value
*Args
[] = {Ident
, ThreadId
, Pointer
, Size
, ThreadPrivateCache
};
2375 getOrCreateRuntimeFunctionPtr(OMPRTL___kmpc_threadprivate_cached
);
2377 return Builder
.CreateCall(Fn
, Args
);
2380 OpenMPIRBuilder::InsertPointTy
2381 OpenMPIRBuilder::createTargetInit(const LocationDescription
&Loc
, bool IsSPMD
, bool RequiresFullRuntime
) {
2382 if (!updateToLocation(Loc
))
2385 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
2386 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
2387 ConstantInt
*IsSPMDVal
= ConstantInt::getBool(Int32
->getContext(), IsSPMD
);
2388 ConstantInt
*UseGenericStateMachine
=
2389 ConstantInt::getBool(Int32
->getContext(), !IsSPMD
);
2390 ConstantInt
*RequiresFullRuntimeVal
= ConstantInt::getBool(Int32
->getContext(), RequiresFullRuntime
);
2392 Function
*Fn
= getOrCreateRuntimeFunctionPtr(
2393 omp::RuntimeFunction::OMPRTL___kmpc_target_init
);
2395 CallInst
*ThreadKind
=
2396 Builder
.CreateCall(Fn
, {Ident
, IsSPMDVal
, UseGenericStateMachine
, RequiresFullRuntimeVal
});
2398 Value
*ExecUserCode
= Builder
.CreateICmpEQ(
2399 ThreadKind
, ConstantInt::get(ThreadKind
->getType(), -1), "exec_user_code");
2401 // ThreadKind = __kmpc_target_init(...)
2402 // if (ThreadKind == -1)
2407 auto *UI
= Builder
.CreateUnreachable();
2408 BasicBlock
*CheckBB
= UI
->getParent();
2409 BasicBlock
*UserCodeEntryBB
= CheckBB
->splitBasicBlock(UI
, "user_code.entry");
2411 BasicBlock
*WorkerExitBB
= BasicBlock::Create(
2412 CheckBB
->getContext(), "worker.exit", CheckBB
->getParent());
2413 Builder
.SetInsertPoint(WorkerExitBB
);
2414 Builder
.CreateRetVoid();
2416 auto *CheckBBTI
= CheckBB
->getTerminator();
2417 Builder
.SetInsertPoint(CheckBBTI
);
2418 Builder
.CreateCondBr(ExecUserCode
, UI
->getParent(), WorkerExitBB
);
2420 CheckBBTI
->eraseFromParent();
2421 UI
->eraseFromParent();
2423 // Continue in the "user_code" block, see diagram above and in
2424 // openmp/libomptarget/deviceRTLs/common/include/target.h .
2425 return InsertPointTy(UserCodeEntryBB
, UserCodeEntryBB
->getFirstInsertionPt());
2428 void OpenMPIRBuilder::createTargetDeinit(const LocationDescription
&Loc
,
2429 bool IsSPMD
, bool RequiresFullRuntime
) {
2430 if (!updateToLocation(Loc
))
2433 Constant
*SrcLocStr
= getOrCreateSrcLocStr(Loc
);
2434 Value
*Ident
= getOrCreateIdent(SrcLocStr
);
2435 ConstantInt
*IsSPMDVal
= ConstantInt::getBool(Int32
->getContext(), IsSPMD
);
2436 ConstantInt
*RequiresFullRuntimeVal
= ConstantInt::getBool(Int32
->getContext(), RequiresFullRuntime
);
2438 Function
*Fn
= getOrCreateRuntimeFunctionPtr(
2439 omp::RuntimeFunction::OMPRTL___kmpc_target_deinit
);
2441 Builder
.CreateCall(Fn
, {Ident
, IsSPMDVal
, RequiresFullRuntimeVal
});
2444 std::string
OpenMPIRBuilder::getNameWithSeparators(ArrayRef
<StringRef
> Parts
,
2445 StringRef FirstSeparator
,
2446 StringRef Separator
) {
2447 SmallString
<128> Buffer
;
2448 llvm::raw_svector_ostream
OS(Buffer
);
2449 StringRef Sep
= FirstSeparator
;
2450 for (StringRef Part
: Parts
) {
2454 return OS
.str().str();
2457 Constant
*OpenMPIRBuilder::getOrCreateOMPInternalVariable(
2458 llvm::Type
*Ty
, const llvm::Twine
&Name
, unsigned AddressSpace
) {
2459 // TODO: Replace the twine arg with stringref to get rid of the conversion
2460 // logic. However This is taken from current implementation in clang as is.
2461 // Since this method is used in many places exclusively for OMP internal use
2462 // we will keep it as is for temporarily until we move all users to the
2463 // builder and then, if possible, fix it everywhere in one go.
2464 SmallString
<256> Buffer
;
2465 llvm::raw_svector_ostream
Out(Buffer
);
2467 StringRef RuntimeName
= Out
.str();
2468 auto &Elem
= *InternalVars
.try_emplace(RuntimeName
, nullptr).first
;
2470 assert(Elem
.second
->getType()->getPointerElementType() == Ty
&&
2471 "OMP internal variable has different type than requested");
2473 // TODO: investigate the appropriate linkage type used for the global
2474 // variable for possibly changing that to internal or private, or maybe
2475 // create different versions of the function for different OMP internal
2477 Elem
.second
= new llvm::GlobalVariable(
2478 M
, Ty
, /*IsConstant*/ false, llvm::GlobalValue::CommonLinkage
,
2479 llvm::Constant::getNullValue(Ty
), Elem
.first(),
2480 /*InsertBefore=*/nullptr, llvm::GlobalValue::NotThreadLocal
,
2487 Value
*OpenMPIRBuilder::getOMPCriticalRegionLock(StringRef CriticalName
) {
2488 std::string Prefix
= Twine("gomp_critical_user_", CriticalName
).str();
2489 std::string Name
= getNameWithSeparators({Prefix
, "var"}, ".", ".");
2490 return getOrCreateOMPInternalVariable(KmpCriticalNameTy
, Name
);
2494 OpenMPIRBuilder::createOffloadMaptypes(SmallVectorImpl
<uint64_t> &Mappings
,
2495 std::string VarName
) {
2496 llvm::Constant
*MaptypesArrayInit
=
2497 llvm::ConstantDataArray::get(M
.getContext(), Mappings
);
2498 auto *MaptypesArrayGlobal
= new llvm::GlobalVariable(
2499 M
, MaptypesArrayInit
->getType(),
2500 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage
, MaptypesArrayInit
,
2502 MaptypesArrayGlobal
->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global
);
2503 return MaptypesArrayGlobal
;
2506 void OpenMPIRBuilder::createMapperAllocas(const LocationDescription
&Loc
,
2507 InsertPointTy AllocaIP
,
2508 unsigned NumOperands
,
2509 struct MapperAllocas
&MapperAllocas
) {
2510 if (!updateToLocation(Loc
))
2513 auto *ArrI8PtrTy
= ArrayType::get(Int8Ptr
, NumOperands
);
2514 auto *ArrI64Ty
= ArrayType::get(Int64
, NumOperands
);
2515 Builder
.restoreIP(AllocaIP
);
2516 AllocaInst
*ArgsBase
= Builder
.CreateAlloca(ArrI8PtrTy
);
2517 AllocaInst
*Args
= Builder
.CreateAlloca(ArrI8PtrTy
);
2518 AllocaInst
*ArgSizes
= Builder
.CreateAlloca(ArrI64Ty
);
2519 Builder
.restoreIP(Loc
.IP
);
2520 MapperAllocas
.ArgsBase
= ArgsBase
;
2521 MapperAllocas
.Args
= Args
;
2522 MapperAllocas
.ArgSizes
= ArgSizes
;
2525 void OpenMPIRBuilder::emitMapperCall(const LocationDescription
&Loc
,
2526 Function
*MapperFunc
, Value
*SrcLocInfo
,
2527 Value
*MaptypesArg
, Value
*MapnamesArg
,
2528 struct MapperAllocas
&MapperAllocas
,
2529 int64_t DeviceID
, unsigned NumOperands
) {
2530 if (!updateToLocation(Loc
))
2533 auto *ArrI8PtrTy
= ArrayType::get(Int8Ptr
, NumOperands
);
2534 auto *ArrI64Ty
= ArrayType::get(Int64
, NumOperands
);
2535 Value
*ArgsBaseGEP
=
2536 Builder
.CreateInBoundsGEP(ArrI8PtrTy
, MapperAllocas
.ArgsBase
,
2537 {Builder
.getInt32(0), Builder
.getInt32(0)});
2539 Builder
.CreateInBoundsGEP(ArrI8PtrTy
, MapperAllocas
.Args
,
2540 {Builder
.getInt32(0), Builder
.getInt32(0)});
2541 Value
*ArgSizesGEP
=
2542 Builder
.CreateInBoundsGEP(ArrI64Ty
, MapperAllocas
.ArgSizes
,
2543 {Builder
.getInt32(0), Builder
.getInt32(0)});
2544 Value
*NullPtr
= Constant::getNullValue(Int8Ptr
->getPointerTo());
2545 Builder
.CreateCall(MapperFunc
,
2546 {SrcLocInfo
, Builder
.getInt64(DeviceID
),
2547 Builder
.getInt32(NumOperands
), ArgsBaseGEP
, ArgsGEP
,
2548 ArgSizesGEP
, MaptypesArg
, MapnamesArg
, NullPtr
});
2551 bool OpenMPIRBuilder::checkAndEmitFlushAfterAtomic(
2552 const LocationDescription
&Loc
, llvm::AtomicOrdering AO
, AtomicKind AK
) {
2553 assert(!(AO
== AtomicOrdering::NotAtomic
||
2554 AO
== llvm::AtomicOrdering::Unordered
) &&
2555 "Unexpected Atomic Ordering.");
2558 llvm::AtomicOrdering FlushAO
= AtomicOrdering::Monotonic
;
2562 if (AO
== AtomicOrdering::Acquire
|| AO
== AtomicOrdering::AcquireRelease
||
2563 AO
== AtomicOrdering::SequentiallyConsistent
) {
2564 FlushAO
= AtomicOrdering::Acquire
;
2570 if (AO
== AtomicOrdering::Release
|| AO
== AtomicOrdering::AcquireRelease
||
2571 AO
== AtomicOrdering::SequentiallyConsistent
) {
2572 FlushAO
= AtomicOrdering::Release
;
2578 case AtomicOrdering::Acquire
:
2579 FlushAO
= AtomicOrdering::Acquire
;
2582 case AtomicOrdering::Release
:
2583 FlushAO
= AtomicOrdering::Release
;
2586 case AtomicOrdering::AcquireRelease
:
2587 case AtomicOrdering::SequentiallyConsistent
:
2588 FlushAO
= AtomicOrdering::AcquireRelease
;
2592 // do nothing - leave silently.
2598 // Currently Flush RT call still doesn't take memory_ordering, so for when
2599 // that happens, this tries to do the resolution of which atomic ordering
2600 // to use with but issue the flush call
2601 // TODO: pass `FlushAO` after memory ordering support is added
2606 // for AO == AtomicOrdering::Monotonic and all other case combinations
2611 OpenMPIRBuilder::InsertPointTy
2612 OpenMPIRBuilder::createAtomicRead(const LocationDescription
&Loc
,
2613 AtomicOpValue
&X
, AtomicOpValue
&V
,
2614 AtomicOrdering AO
) {
2615 if (!updateToLocation(Loc
))
2618 Type
*XTy
= X
.Var
->getType();
2619 assert(XTy
->isPointerTy() && "OMP Atomic expects a pointer to target memory");
2620 Type
*XElemTy
= XTy
->getPointerElementType();
2621 assert((XElemTy
->isFloatingPointTy() || XElemTy
->isIntegerTy() ||
2622 XElemTy
->isPointerTy()) &&
2623 "OMP atomic read expected a scalar type");
2625 Value
*XRead
= nullptr;
2627 if (XElemTy
->isIntegerTy()) {
2629 Builder
.CreateLoad(XElemTy
, X
.Var
, X
.IsVolatile
, "omp.atomic.read");
2631 XRead
= cast
<Value
>(XLD
);
2633 // We need to bitcast and perform atomic op as integer
2634 unsigned Addrspace
= cast
<PointerType
>(XTy
)->getAddressSpace();
2635 IntegerType
*IntCastTy
=
2636 IntegerType::get(M
.getContext(), XElemTy
->getScalarSizeInBits());
2637 Value
*XBCast
= Builder
.CreateBitCast(
2638 X
.Var
, IntCastTy
->getPointerTo(Addrspace
), "atomic.src.int.cast");
2640 Builder
.CreateLoad(IntCastTy
, XBCast
, X
.IsVolatile
, "omp.atomic.load");
2641 XLoad
->setAtomic(AO
);
2642 if (XElemTy
->isFloatingPointTy()) {
2643 XRead
= Builder
.CreateBitCast(XLoad
, XElemTy
, "atomic.flt.cast");
2645 XRead
= Builder
.CreateIntToPtr(XLoad
, XElemTy
, "atomic.ptr.cast");
2648 checkAndEmitFlushAfterAtomic(Loc
, AO
, AtomicKind::Read
);
2649 Builder
.CreateStore(XRead
, V
.Var
, V
.IsVolatile
);
2650 return Builder
.saveIP();
2653 OpenMPIRBuilder::InsertPointTy
2654 OpenMPIRBuilder::createAtomicWrite(const LocationDescription
&Loc
,
2655 AtomicOpValue
&X
, Value
*Expr
,
2656 AtomicOrdering AO
) {
2657 if (!updateToLocation(Loc
))
2660 Type
*XTy
= X
.Var
->getType();
2661 assert(XTy
->isPointerTy() && "OMP Atomic expects a pointer to target memory");
2662 Type
*XElemTy
= XTy
->getPointerElementType();
2663 assert((XElemTy
->isFloatingPointTy() || XElemTy
->isIntegerTy() ||
2664 XElemTy
->isPointerTy()) &&
2665 "OMP atomic write expected a scalar type");
2667 if (XElemTy
->isIntegerTy()) {
2668 StoreInst
*XSt
= Builder
.CreateStore(Expr
, X
.Var
, X
.IsVolatile
);
2671 // We need to bitcast and perform atomic op as integers
2672 unsigned Addrspace
= cast
<PointerType
>(XTy
)->getAddressSpace();
2673 IntegerType
*IntCastTy
=
2674 IntegerType::get(M
.getContext(), XElemTy
->getScalarSizeInBits());
2675 Value
*XBCast
= Builder
.CreateBitCast(
2676 X
.Var
, IntCastTy
->getPointerTo(Addrspace
), "atomic.dst.int.cast");
2678 Builder
.CreateBitCast(Expr
, IntCastTy
, "atomic.src.int.cast");
2679 StoreInst
*XSt
= Builder
.CreateStore(ExprCast
, XBCast
, X
.IsVolatile
);
2683 checkAndEmitFlushAfterAtomic(Loc
, AO
, AtomicKind::Write
);
2684 return Builder
.saveIP();
2687 OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createAtomicUpdate(
2688 const LocationDescription
&Loc
, Instruction
*AllocIP
, AtomicOpValue
&X
,
2689 Value
*Expr
, AtomicOrdering AO
, AtomicRMWInst::BinOp RMWOp
,
2690 AtomicUpdateCallbackTy
&UpdateOp
, bool IsXLHSInRHSPart
) {
2691 if (!updateToLocation(Loc
))
2695 Type
*XTy
= X
.Var
->getType();
2696 assert(XTy
->isPointerTy() &&
2697 "OMP Atomic expects a pointer to target memory");
2698 Type
*XElemTy
= XTy
->getPointerElementType();
2699 assert((XElemTy
->isFloatingPointTy() || XElemTy
->isIntegerTy() ||
2700 XElemTy
->isPointerTy()) &&
2701 "OMP atomic update expected a scalar type");
2702 assert((RMWOp
!= AtomicRMWInst::Max
) && (RMWOp
!= AtomicRMWInst::Min
) &&
2703 (RMWOp
!= AtomicRMWInst::UMax
) && (RMWOp
!= AtomicRMWInst::UMin
) &&
2704 "OpenMP atomic does not support LT or GT operations");
2707 emitAtomicUpdate(AllocIP
, X
.Var
, Expr
, AO
, RMWOp
, UpdateOp
, X
.IsVolatile
,
2709 checkAndEmitFlushAfterAtomic(Loc
, AO
, AtomicKind::Update
);
2710 return Builder
.saveIP();
2713 Value
*OpenMPIRBuilder::emitRMWOpAsInstruction(Value
*Src1
, Value
*Src2
,
2714 AtomicRMWInst::BinOp RMWOp
) {
2716 case AtomicRMWInst::Add
:
2717 return Builder
.CreateAdd(Src1
, Src2
);
2718 case AtomicRMWInst::Sub
:
2719 return Builder
.CreateSub(Src1
, Src2
);
2720 case AtomicRMWInst::And
:
2721 return Builder
.CreateAnd(Src1
, Src2
);
2722 case AtomicRMWInst::Nand
:
2723 return Builder
.CreateNeg(Builder
.CreateAnd(Src1
, Src2
));
2724 case AtomicRMWInst::Or
:
2725 return Builder
.CreateOr(Src1
, Src2
);
2726 case AtomicRMWInst::Xor
:
2727 return Builder
.CreateXor(Src1
, Src2
);
2728 case AtomicRMWInst::Xchg
:
2729 case AtomicRMWInst::FAdd
:
2730 case AtomicRMWInst::FSub
:
2731 case AtomicRMWInst::BAD_BINOP
:
2732 case AtomicRMWInst::Max
:
2733 case AtomicRMWInst::Min
:
2734 case AtomicRMWInst::UMax
:
2735 case AtomicRMWInst::UMin
:
2736 llvm_unreachable("Unsupported atomic update operation");
2738 llvm_unreachable("Unsupported atomic update operation");
2741 std::pair
<Value
*, Value
*>
2742 OpenMPIRBuilder::emitAtomicUpdate(Instruction
*AllocIP
, Value
*X
, Value
*Expr
,
2743 AtomicOrdering AO
, AtomicRMWInst::BinOp RMWOp
,
2744 AtomicUpdateCallbackTy
&UpdateOp
,
2745 bool VolatileX
, bool IsXLHSInRHSPart
) {
2746 Type
*XElemTy
= X
->getType()->getPointerElementType();
2749 ((RMWOp
== AtomicRMWInst::BAD_BINOP
) || (RMWOp
== AtomicRMWInst::FAdd
)) ||
2750 (RMWOp
== AtomicRMWInst::FSub
) ||
2751 (RMWOp
== AtomicRMWInst::Sub
&& !IsXLHSInRHSPart
);
2753 std::pair
<Value
*, Value
*> Res
;
2754 if (XElemTy
->isIntegerTy() && !DoCmpExch
) {
2755 Res
.first
= Builder
.CreateAtomicRMW(RMWOp
, X
, Expr
, llvm::MaybeAlign(), AO
);
2756 // not needed except in case of postfix captures. Generate anyway for
2757 // consistency with the else part. Will be removed with any DCE pass.
2758 Res
.second
= emitRMWOpAsInstruction(Res
.first
, Expr
, RMWOp
);
2760 unsigned Addrspace
= cast
<PointerType
>(X
->getType())->getAddressSpace();
2761 IntegerType
*IntCastTy
=
2762 IntegerType::get(M
.getContext(), XElemTy
->getScalarSizeInBits());
2764 Builder
.CreateBitCast(X
, IntCastTy
->getPointerTo(Addrspace
));
2766 Builder
.CreateLoad(IntCastTy
, XBCast
, X
->getName() + ".atomic.load");
2767 OldVal
->setAtomic(AO
);
2773 BasicBlock
*CurBB
= Builder
.GetInsertBlock();
2774 Instruction
*CurBBTI
= CurBB
->getTerminator();
2775 CurBBTI
= CurBBTI
? CurBBTI
: Builder
.CreateUnreachable();
2776 BasicBlock
*ExitBB
=
2777 CurBB
->splitBasicBlock(CurBBTI
, X
->getName() + ".atomic.exit");
2778 BasicBlock
*ContBB
= CurBB
->splitBasicBlock(CurBB
->getTerminator(),
2779 X
->getName() + ".atomic.cont");
2780 ContBB
->getTerminator()->eraseFromParent();
2781 Builder
.SetInsertPoint(ContBB
);
2782 llvm::PHINode
*PHI
= Builder
.CreatePHI(OldVal
->getType(), 2);
2783 PHI
->addIncoming(OldVal
, CurBB
);
2784 AllocaInst
*NewAtomicAddr
= Builder
.CreateAlloca(XElemTy
);
2785 NewAtomicAddr
->setName(X
->getName() + "x.new.val");
2786 NewAtomicAddr
->moveBefore(AllocIP
);
2787 IntegerType
*NewAtomicCastTy
=
2788 IntegerType::get(M
.getContext(), XElemTy
->getScalarSizeInBits());
2789 bool IsIntTy
= XElemTy
->isIntegerTy();
2790 Value
*NewAtomicIntAddr
=
2793 : Builder
.CreateBitCast(NewAtomicAddr
,
2794 NewAtomicCastTy
->getPointerTo(Addrspace
));
2795 Value
*OldExprVal
= PHI
;
2797 if (XElemTy
->isFloatingPointTy()) {
2798 OldExprVal
= Builder
.CreateBitCast(PHI
, XElemTy
,
2799 X
->getName() + ".atomic.fltCast");
2801 OldExprVal
= Builder
.CreateIntToPtr(PHI
, XElemTy
,
2802 X
->getName() + ".atomic.ptrCast");
2806 Value
*Upd
= UpdateOp(OldExprVal
, Builder
);
2807 Builder
.CreateStore(Upd
, NewAtomicAddr
);
2808 LoadInst
*DesiredVal
= Builder
.CreateLoad(XElemTy
, NewAtomicIntAddr
);
2812 : Builder
.CreateBitCast(X
, IntCastTy
->getPointerTo(Addrspace
));
2813 AtomicOrdering Failure
=
2814 llvm::AtomicCmpXchgInst::getStrongestFailureOrdering(AO
);
2815 AtomicCmpXchgInst
*Result
= Builder
.CreateAtomicCmpXchg(
2816 XAddr
, OldExprVal
, DesiredVal
, llvm::MaybeAlign(), AO
, Failure
);
2817 Result
->setVolatile(VolatileX
);
2818 Value
*PreviousVal
= Builder
.CreateExtractValue(Result
, /*Idxs=*/0);
2819 Value
*SuccessFailureVal
= Builder
.CreateExtractValue(Result
, /*Idxs=*/1);
2820 PHI
->addIncoming(PreviousVal
, Builder
.GetInsertBlock());
2821 Builder
.CreateCondBr(SuccessFailureVal
, ExitBB
, ContBB
);
2823 Res
.first
= OldExprVal
;
2826 // set Insertion point in exit block
2827 if (UnreachableInst
*ExitTI
=
2828 dyn_cast
<UnreachableInst
>(ExitBB
->getTerminator())) {
2829 CurBBTI
->eraseFromParent();
2830 Builder
.SetInsertPoint(ExitBB
);
2832 Builder
.SetInsertPoint(ExitTI
);
2839 OpenMPIRBuilder::InsertPointTy
OpenMPIRBuilder::createAtomicCapture(
2840 const LocationDescription
&Loc
, Instruction
*AllocIP
, AtomicOpValue
&X
,
2841 AtomicOpValue
&V
, Value
*Expr
, AtomicOrdering AO
,
2842 AtomicRMWInst::BinOp RMWOp
, AtomicUpdateCallbackTy
&UpdateOp
,
2843 bool UpdateExpr
, bool IsPostfixUpdate
, bool IsXLHSInRHSPart
) {
2844 if (!updateToLocation(Loc
))
2848 Type
*XTy
= X
.Var
->getType();
2849 assert(XTy
->isPointerTy() &&
2850 "OMP Atomic expects a pointer to target memory");
2851 Type
*XElemTy
= XTy
->getPointerElementType();
2852 assert((XElemTy
->isFloatingPointTy() || XElemTy
->isIntegerTy() ||
2853 XElemTy
->isPointerTy()) &&
2854 "OMP atomic capture expected a scalar type");
2855 assert((RMWOp
!= AtomicRMWInst::Max
) && (RMWOp
!= AtomicRMWInst::Min
) &&
2856 "OpenMP atomic does not support LT or GT operations");
2859 // If UpdateExpr is 'x' updated with some `expr` not based on 'x',
2860 // 'x' is simply atomically rewritten with 'expr'.
2861 AtomicRMWInst::BinOp AtomicOp
= (UpdateExpr
? RMWOp
: AtomicRMWInst::Xchg
);
2862 std::pair
<Value
*, Value
*> Result
=
2863 emitAtomicUpdate(AllocIP
, X
.Var
, Expr
, AO
, AtomicOp
, UpdateOp
,
2864 X
.IsVolatile
, IsXLHSInRHSPart
);
2866 Value
*CapturedVal
= (IsPostfixUpdate
? Result
.first
: Result
.second
);
2867 Builder
.CreateStore(CapturedVal
, V
.Var
, V
.IsVolatile
);
2869 checkAndEmitFlushAfterAtomic(Loc
, AO
, AtomicKind::Capture
);
2870 return Builder
.saveIP();
2874 OpenMPIRBuilder::createOffloadMapnames(SmallVectorImpl
<llvm::Constant
*> &Names
,
2875 std::string VarName
) {
2876 llvm::Constant
*MapNamesArrayInit
= llvm::ConstantArray::get(
2877 llvm::ArrayType::get(
2878 llvm::Type::getInt8Ty(M
.getContext())->getPointerTo(), Names
.size()),
2880 auto *MapNamesArrayGlobal
= new llvm::GlobalVariable(
2881 M
, MapNamesArrayInit
->getType(),
2882 /*isConstant=*/true, llvm::GlobalValue::PrivateLinkage
, MapNamesArrayInit
,
2884 return MapNamesArrayGlobal
;
2887 // Create all simple and struct types exposed by the runtime and remember
2888 // the llvm::PointerTypes of them for easy access later.
2889 void OpenMPIRBuilder::initializeTypes(Module
&M
) {
2890 LLVMContext
&Ctx
= M
.getContext();
2892 #define OMP_TYPE(VarName, InitValue) VarName = InitValue;
2893 #define OMP_ARRAY_TYPE(VarName, ElemTy, ArraySize) \
2894 VarName##Ty = ArrayType::get(ElemTy, ArraySize); \
2895 VarName##PtrTy = PointerType::getUnqual(VarName##Ty);
2896 #define OMP_FUNCTION_TYPE(VarName, IsVarArg, ReturnType, ...) \
2897 VarName = FunctionType::get(ReturnType, {__VA_ARGS__}, IsVarArg); \
2898 VarName##Ptr = PointerType::getUnqual(VarName);
2899 #define OMP_STRUCT_TYPE(VarName, StructName, ...) \
2900 T = StructType::getTypeByName(Ctx, StructName); \
2902 T = StructType::create(Ctx, {__VA_ARGS__}, StructName); \
2904 VarName##Ptr = PointerType::getUnqual(T);
2905 #include "llvm/Frontend/OpenMP/OMPKinds.def"
2908 void OpenMPIRBuilder::OutlineInfo::collectBlocks(
2909 SmallPtrSetImpl
<BasicBlock
*> &BlockSet
,
2910 SmallVectorImpl
<BasicBlock
*> &BlockVector
) {
2911 SmallVector
<BasicBlock
*, 32> Worklist
;
2912 BlockSet
.insert(EntryBB
);
2913 BlockSet
.insert(ExitBB
);
2915 Worklist
.push_back(EntryBB
);
2916 while (!Worklist
.empty()) {
2917 BasicBlock
*BB
= Worklist
.pop_back_val();
2918 BlockVector
.push_back(BB
);
2919 for (BasicBlock
*SuccBB
: successors(BB
))
2920 if (BlockSet
.insert(SuccBB
).second
)
2921 Worklist
.push_back(SuccBB
);
2925 void CanonicalLoopInfo::collectControlBlocks(
2926 SmallVectorImpl
<BasicBlock
*> &BBs
) {
2927 // We only count those BBs as control block for which we do not need to
2928 // reverse the CFG, i.e. not the loop body which can contain arbitrary control
2929 // flow. For consistency, this also means we do not add the Body block, which
2930 // is just the entry to the body code.
2931 BBs
.reserve(BBs
.size() + 6);
2932 BBs
.append({Preheader
, Header
, Cond
, Latch
, Exit
, After
});
2935 void CanonicalLoopInfo::assertOK() const {
2937 // No constraints if this object currently does not describe a loop.
2941 // Verify standard control-flow we use for OpenMP loops.
2943 assert(isa
<BranchInst
>(Preheader
->getTerminator()) &&
2944 "Preheader must terminate with unconditional branch");
2945 assert(Preheader
->getSingleSuccessor() == Header
&&
2946 "Preheader must jump to header");
2949 assert(isa
<BranchInst
>(Header
->getTerminator()) &&
2950 "Header must terminate with unconditional branch");
2951 assert(Header
->getSingleSuccessor() == Cond
&&
2952 "Header must jump to exiting block");
2955 assert(Cond
->getSinglePredecessor() == Header
&&
2956 "Exiting block only reachable from header");
2958 assert(isa
<BranchInst
>(Cond
->getTerminator()) &&
2959 "Exiting block must terminate with conditional branch");
2960 assert(size(successors(Cond
)) == 2 &&
2961 "Exiting block must have two successors");
2962 assert(cast
<BranchInst
>(Cond
->getTerminator())->getSuccessor(0) == Body
&&
2963 "Exiting block's first successor jump to the body");
2964 assert(cast
<BranchInst
>(Cond
->getTerminator())->getSuccessor(1) == Exit
&&
2965 "Exiting block's second successor must exit the loop");
2968 assert(Body
->getSinglePredecessor() == Cond
&&
2969 "Body only reachable from exiting block");
2970 assert(!isa
<PHINode
>(Body
->front()));
2973 assert(isa
<BranchInst
>(Latch
->getTerminator()) &&
2974 "Latch must terminate with unconditional branch");
2975 assert(Latch
->getSingleSuccessor() == Header
&& "Latch must jump to header");
2976 // TODO: To support simple redirecting of the end of the body code that has
2977 // multiple; introduce another auxiliary basic block like preheader and after.
2978 assert(Latch
->getSinglePredecessor() != nullptr);
2979 assert(!isa
<PHINode
>(Latch
->front()));
2982 assert(isa
<BranchInst
>(Exit
->getTerminator()) &&
2983 "Exit block must terminate with unconditional branch");
2984 assert(Exit
->getSingleSuccessor() == After
&&
2985 "Exit block must jump to after block");
2988 assert(After
->getSinglePredecessor() == Exit
&&
2989 "After block only reachable from exit block");
2990 assert(After
->empty() || !isa
<PHINode
>(After
->front()));
2992 Instruction
*IndVar
= getIndVar();
2993 assert(IndVar
&& "Canonical induction variable not found?");
2994 assert(isa
<IntegerType
>(IndVar
->getType()) &&
2995 "Induction variable must be an integer");
2996 assert(cast
<PHINode
>(IndVar
)->getParent() == Header
&&
2997 "Induction variable must be a PHI in the loop header");
2998 assert(cast
<PHINode
>(IndVar
)->getIncomingBlock(0) == Preheader
);
3000 cast
<ConstantInt
>(cast
<PHINode
>(IndVar
)->getIncomingValue(0))->isZero());
3001 assert(cast
<PHINode
>(IndVar
)->getIncomingBlock(1) == Latch
);
3003 auto *NextIndVar
= cast
<PHINode
>(IndVar
)->getIncomingValue(1);
3004 assert(cast
<Instruction
>(NextIndVar
)->getParent() == Latch
);
3005 assert(cast
<BinaryOperator
>(NextIndVar
)->getOpcode() == BinaryOperator::Add
);
3006 assert(cast
<BinaryOperator
>(NextIndVar
)->getOperand(0) == IndVar
);
3007 assert(cast
<ConstantInt
>(cast
<BinaryOperator
>(NextIndVar
)->getOperand(1))
3010 Value
*TripCount
= getTripCount();
3011 assert(TripCount
&& "Loop trip count not found?");
3012 assert(IndVar
->getType() == TripCount
->getType() &&
3013 "Trip count and induction variable must have the same type");
3015 auto *CmpI
= cast
<CmpInst
>(&Cond
->front());
3016 assert(CmpI
->getPredicate() == CmpInst::ICMP_ULT
&&
3017 "Exit condition must be a signed less-than comparison");
3018 assert(CmpI
->getOperand(0) == IndVar
&&
3019 "Exit condition must compare the induction variable");
3020 assert(CmpI
->getOperand(1) == TripCount
&&
3021 "Exit condition must compare with the trip count");
3025 void CanonicalLoopInfo::invalidate() {
3026 Preheader
= nullptr;