1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
13 #include "CGOpenMPRuntime.h"
14 #include "ABIInfoImpl.h"
16 #include "CGCleanup.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/APValue.h"
21 #include "clang/AST/Attr.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/AST/OpenMPClause.h"
24 #include "clang/AST/StmtOpenMP.h"
25 #include "clang/AST/StmtVisitor.h"
26 #include "clang/Basic/OpenMPKinds.h"
27 #include "clang/Basic/SourceManager.h"
28 #include "clang/CodeGen/ConstantInitBuilder.h"
29 #include "llvm/ADT/ArrayRef.h"
30 #include "llvm/ADT/SmallVector.h"
31 #include "llvm/ADT/StringExtras.h"
32 #include "llvm/Bitcode/BitcodeReader.h"
33 #include "llvm/IR/Constants.h"
34 #include "llvm/IR/DerivedTypes.h"
35 #include "llvm/IR/GlobalValue.h"
36 #include "llvm/IR/InstrTypes.h"
37 #include "llvm/IR/Value.h"
38 #include "llvm/Support/AtomicOrdering.h"
39 #include "llvm/Support/raw_ostream.h"
45 using namespace clang
;
46 using namespace CodeGen
;
47 using namespace llvm::omp
;
50 /// Base class for handling code generation inside OpenMP regions.
51 class CGOpenMPRegionInfo
: public CodeGenFunction::CGCapturedStmtInfo
{
53 /// Kinds of OpenMP regions used in codegen.
54 enum CGOpenMPRegionKind
{
55 /// Region with outlined function for standalone 'parallel'
57 ParallelOutlinedRegion
,
58 /// Region with outlined function for standalone 'task' directive.
60 /// Region for constructs that do not require function outlining,
61 /// like 'for', 'sections', 'atomic' etc. directives.
63 /// Region with outlined function for standalone 'target' directive.
67 CGOpenMPRegionInfo(const CapturedStmt
&CS
,
68 const CGOpenMPRegionKind RegionKind
,
69 const RegionCodeGenTy
&CodeGen
, OpenMPDirectiveKind Kind
,
71 : CGCapturedStmtInfo(CS
, CR_OpenMP
), RegionKind(RegionKind
),
72 CodeGen(CodeGen
), Kind(Kind
), HasCancel(HasCancel
) {}
74 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind
,
75 const RegionCodeGenTy
&CodeGen
, OpenMPDirectiveKind Kind
,
77 : CGCapturedStmtInfo(CR_OpenMP
), RegionKind(RegionKind
), CodeGen(CodeGen
),
78 Kind(Kind
), HasCancel(HasCancel
) {}
80 /// Get a variable or parameter for storing global thread id
81 /// inside OpenMP construct.
82 virtual const VarDecl
*getThreadIDVariable() const = 0;
84 /// Emit the captured statement body.
85 void EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) override
;
87 /// Get an LValue for the current ThreadID variable.
88 /// \return LValue for thread id variable. This LValue always has type int32*.
89 virtual LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
);
91 virtual void emitUntiedSwitch(CodeGenFunction
& /*CGF*/) {}
93 CGOpenMPRegionKind
getRegionKind() const { return RegionKind
; }
95 OpenMPDirectiveKind
getDirectiveKind() const { return Kind
; }
97 bool hasCancel() const { return HasCancel
; }
99 static bool classof(const CGCapturedStmtInfo
*Info
) {
100 return Info
->getKind() == CR_OpenMP
;
103 ~CGOpenMPRegionInfo() override
= default;
106 CGOpenMPRegionKind RegionKind
;
107 RegionCodeGenTy CodeGen
;
108 OpenMPDirectiveKind Kind
;
112 /// API for captured statement code generation in OpenMP constructs.
113 class CGOpenMPOutlinedRegionInfo final
: public CGOpenMPRegionInfo
{
115 CGOpenMPOutlinedRegionInfo(const CapturedStmt
&CS
, const VarDecl
*ThreadIDVar
,
116 const RegionCodeGenTy
&CodeGen
,
117 OpenMPDirectiveKind Kind
, bool HasCancel
,
118 StringRef HelperName
)
119 : CGOpenMPRegionInfo(CS
, ParallelOutlinedRegion
, CodeGen
, Kind
,
121 ThreadIDVar(ThreadIDVar
), HelperName(HelperName
) {
122 assert(ThreadIDVar
!= nullptr && "No ThreadID in OpenMP region.");
125 /// Get a variable or parameter for storing global thread id
126 /// inside OpenMP construct.
127 const VarDecl
*getThreadIDVariable() const override
{ return ThreadIDVar
; }
129 /// Get the name of the capture helper.
130 StringRef
getHelperName() const override
{ return HelperName
; }
132 static bool classof(const CGCapturedStmtInfo
*Info
) {
133 return CGOpenMPRegionInfo::classof(Info
) &&
134 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() ==
135 ParallelOutlinedRegion
;
139 /// A variable or parameter storing global thread id for OpenMP
141 const VarDecl
*ThreadIDVar
;
142 StringRef HelperName
;
145 /// API for captured statement code generation in OpenMP constructs.
146 class CGOpenMPTaskOutlinedRegionInfo final
: public CGOpenMPRegionInfo
{
148 class UntiedTaskActionTy final
: public PrePostActionTy
{
150 const VarDecl
*PartIDVar
;
151 const RegionCodeGenTy UntiedCodeGen
;
152 llvm::SwitchInst
*UntiedSwitch
= nullptr;
155 UntiedTaskActionTy(bool Tied
, const VarDecl
*PartIDVar
,
156 const RegionCodeGenTy
&UntiedCodeGen
)
157 : Untied(!Tied
), PartIDVar(PartIDVar
), UntiedCodeGen(UntiedCodeGen
) {}
158 void Enter(CodeGenFunction
&CGF
) override
{
160 // Emit task switching point.
161 LValue PartIdLVal
= CGF
.EmitLoadOfPointerLValue(
162 CGF
.GetAddrOfLocalVar(PartIDVar
),
163 PartIDVar
->getType()->castAs
<PointerType
>());
165 CGF
.EmitLoadOfScalar(PartIdLVal
, PartIDVar
->getLocation());
166 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock(".untied.done.");
167 UntiedSwitch
= CGF
.Builder
.CreateSwitch(Res
, DoneBB
);
168 CGF
.EmitBlock(DoneBB
);
169 CGF
.EmitBranchThroughCleanup(CGF
.ReturnBlock
);
170 CGF
.EmitBlock(CGF
.createBasicBlock(".untied.jmp."));
171 UntiedSwitch
->addCase(CGF
.Builder
.getInt32(0),
172 CGF
.Builder
.GetInsertBlock());
173 emitUntiedSwitch(CGF
);
176 void emitUntiedSwitch(CodeGenFunction
&CGF
) const {
178 LValue PartIdLVal
= CGF
.EmitLoadOfPointerLValue(
179 CGF
.GetAddrOfLocalVar(PartIDVar
),
180 PartIDVar
->getType()->castAs
<PointerType
>());
181 CGF
.EmitStoreOfScalar(CGF
.Builder
.getInt32(UntiedSwitch
->getNumCases()),
184 CodeGenFunction::JumpDest CurPoint
=
185 CGF
.getJumpDestInCurrentScope(".untied.next.");
186 CGF
.EmitBranch(CGF
.ReturnBlock
.getBlock());
187 CGF
.EmitBlock(CGF
.createBasicBlock(".untied.jmp."));
188 UntiedSwitch
->addCase(CGF
.Builder
.getInt32(UntiedSwitch
->getNumCases()),
189 CGF
.Builder
.GetInsertBlock());
190 CGF
.EmitBranchThroughCleanup(CurPoint
);
191 CGF
.EmitBlock(CurPoint
.getBlock());
194 unsigned getNumberOfParts() const { return UntiedSwitch
->getNumCases(); }
196 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt
&CS
,
197 const VarDecl
*ThreadIDVar
,
198 const RegionCodeGenTy
&CodeGen
,
199 OpenMPDirectiveKind Kind
, bool HasCancel
,
200 const UntiedTaskActionTy
&Action
)
201 : CGOpenMPRegionInfo(CS
, TaskOutlinedRegion
, CodeGen
, Kind
, HasCancel
),
202 ThreadIDVar(ThreadIDVar
), Action(Action
) {
203 assert(ThreadIDVar
!= nullptr && "No ThreadID in OpenMP region.");
206 /// Get a variable or parameter for storing global thread id
207 /// inside OpenMP construct.
208 const VarDecl
*getThreadIDVariable() const override
{ return ThreadIDVar
; }
210 /// Get an LValue for the current ThreadID variable.
211 LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
) override
;
213 /// Get the name of the capture helper.
214 StringRef
getHelperName() const override
{ return ".omp_outlined."; }
216 void emitUntiedSwitch(CodeGenFunction
&CGF
) override
{
217 Action
.emitUntiedSwitch(CGF
);
220 static bool classof(const CGCapturedStmtInfo
*Info
) {
221 return CGOpenMPRegionInfo::classof(Info
) &&
222 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() ==
227 /// A variable or parameter storing global thread id for OpenMP
229 const VarDecl
*ThreadIDVar
;
230 /// Action for emitting code for untied tasks.
231 const UntiedTaskActionTy
&Action
;
234 /// API for inlined captured statement code generation in OpenMP
236 class CGOpenMPInlinedRegionInfo
: public CGOpenMPRegionInfo
{
238 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo
*OldCSI
,
239 const RegionCodeGenTy
&CodeGen
,
240 OpenMPDirectiveKind Kind
, bool HasCancel
)
241 : CGOpenMPRegionInfo(InlinedRegion
, CodeGen
, Kind
, HasCancel
),
243 OuterRegionInfo(dyn_cast_or_null
<CGOpenMPRegionInfo
>(OldCSI
)) {}
245 // Retrieve the value of the context parameter.
246 llvm::Value
*getContextValue() const override
{
248 return OuterRegionInfo
->getContextValue();
249 llvm_unreachable("No context value for inlined OpenMP region");
252 void setContextValue(llvm::Value
*V
) override
{
253 if (OuterRegionInfo
) {
254 OuterRegionInfo
->setContextValue(V
);
257 llvm_unreachable("No context value for inlined OpenMP region");
260 /// Lookup the captured field decl for a variable.
261 const FieldDecl
*lookup(const VarDecl
*VD
) const override
{
263 return OuterRegionInfo
->lookup(VD
);
264 // If there is no outer outlined region,no need to lookup in a list of
265 // captured variables, we can use the original one.
269 FieldDecl
*getThisFieldDecl() const override
{
271 return OuterRegionInfo
->getThisFieldDecl();
275 /// Get a variable or parameter for storing global thread id
276 /// inside OpenMP construct.
277 const VarDecl
*getThreadIDVariable() const override
{
279 return OuterRegionInfo
->getThreadIDVariable();
283 /// Get an LValue for the current ThreadID variable.
284 LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
) override
{
286 return OuterRegionInfo
->getThreadIDVariableLValue(CGF
);
287 llvm_unreachable("No LValue for inlined OpenMP construct");
290 /// Get the name of the capture helper.
291 StringRef
getHelperName() const override
{
292 if (auto *OuterRegionInfo
= getOldCSI())
293 return OuterRegionInfo
->getHelperName();
294 llvm_unreachable("No helper name for inlined OpenMP construct");
297 void emitUntiedSwitch(CodeGenFunction
&CGF
) override
{
299 OuterRegionInfo
->emitUntiedSwitch(CGF
);
302 CodeGenFunction::CGCapturedStmtInfo
*getOldCSI() const { return OldCSI
; }
304 static bool classof(const CGCapturedStmtInfo
*Info
) {
305 return CGOpenMPRegionInfo::classof(Info
) &&
306 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() == InlinedRegion
;
309 ~CGOpenMPInlinedRegionInfo() override
= default;
312 /// CodeGen info about outer OpenMP region.
313 CodeGenFunction::CGCapturedStmtInfo
*OldCSI
;
314 CGOpenMPRegionInfo
*OuterRegionInfo
;
317 /// API for captured statement code generation in OpenMP target
318 /// constructs. For this captures, implicit parameters are used instead of the
319 /// captured fields. The name of the target region has to be unique in a given
320 /// application so it is provided by the client, because only the client has
321 /// the information to generate that.
322 class CGOpenMPTargetRegionInfo final
: public CGOpenMPRegionInfo
{
324 CGOpenMPTargetRegionInfo(const CapturedStmt
&CS
,
325 const RegionCodeGenTy
&CodeGen
, StringRef HelperName
)
326 : CGOpenMPRegionInfo(CS
, TargetRegion
, CodeGen
, OMPD_target
,
327 /*HasCancel=*/false),
328 HelperName(HelperName
) {}
330 /// This is unused for target regions because each starts executing
331 /// with a single thread.
332 const VarDecl
*getThreadIDVariable() const override
{ return nullptr; }
334 /// Get the name of the capture helper.
335 StringRef
getHelperName() const override
{ return HelperName
; }
337 static bool classof(const CGCapturedStmtInfo
*Info
) {
338 return CGOpenMPRegionInfo::classof(Info
) &&
339 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() == TargetRegion
;
343 StringRef HelperName
;
346 static void EmptyCodeGen(CodeGenFunction
&, PrePostActionTy
&) {
347 llvm_unreachable("No codegen for expressions");
349 /// API for generation of expressions captured in a innermost OpenMP
351 class CGOpenMPInnerExprInfo final
: public CGOpenMPInlinedRegionInfo
{
353 CGOpenMPInnerExprInfo(CodeGenFunction
&CGF
, const CapturedStmt
&CS
)
354 : CGOpenMPInlinedRegionInfo(CGF
.CapturedStmtInfo
, EmptyCodeGen
,
356 /*HasCancel=*/false),
358 // Make sure the globals captured in the provided statement are local by
359 // using the privatization logic. We assume the same variable is not
360 // captured more than once.
361 for (const auto &C
: CS
.captures()) {
362 if (!C
.capturesVariable() && !C
.capturesVariableByCopy())
365 const VarDecl
*VD
= C
.getCapturedVar();
366 if (VD
->isLocalVarDeclOrParm())
369 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(VD
),
370 /*RefersToEnclosingVariableOrCapture=*/false,
371 VD
->getType().getNonReferenceType(), VK_LValue
,
373 PrivScope
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress());
375 (void)PrivScope
.Privatize();
378 /// Lookup the captured field decl for a variable.
379 const FieldDecl
*lookup(const VarDecl
*VD
) const override
{
380 if (const FieldDecl
*FD
= CGOpenMPInlinedRegionInfo::lookup(VD
))
385 /// Emit the captured statement body.
386 void EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) override
{
387 llvm_unreachable("No body for expressions");
390 /// Get a variable or parameter for storing global thread id
391 /// inside OpenMP construct.
392 const VarDecl
*getThreadIDVariable() const override
{
393 llvm_unreachable("No thread id for expressions");
396 /// Get the name of the capture helper.
397 StringRef
getHelperName() const override
{
398 llvm_unreachable("No helper name for expressions");
401 static bool classof(const CGCapturedStmtInfo
*Info
) { return false; }
404 /// Private scope to capture global variables.
405 CodeGenFunction::OMPPrivateScope PrivScope
;
408 /// RAII for emitting code of OpenMP constructs.
409 class InlinedOpenMPRegionRAII
{
410 CodeGenFunction
&CGF
;
411 llvm::DenseMap
<const ValueDecl
*, FieldDecl
*> LambdaCaptureFields
;
412 FieldDecl
*LambdaThisCaptureField
= nullptr;
413 const CodeGen::CGBlockInfo
*BlockInfo
= nullptr;
414 bool NoInheritance
= false;
417 /// Constructs region for combined constructs.
418 /// \param CodeGen Code generation sequence for combined directives. Includes
419 /// a list of functions used for code generation of implicitly inlined
421 InlinedOpenMPRegionRAII(CodeGenFunction
&CGF
, const RegionCodeGenTy
&CodeGen
,
422 OpenMPDirectiveKind Kind
, bool HasCancel
,
423 bool NoInheritance
= true)
424 : CGF(CGF
), NoInheritance(NoInheritance
) {
425 // Start emission for the construct.
426 CGF
.CapturedStmtInfo
= new CGOpenMPInlinedRegionInfo(
427 CGF
.CapturedStmtInfo
, CodeGen
, Kind
, HasCancel
);
429 std::swap(CGF
.LambdaCaptureFields
, LambdaCaptureFields
);
430 LambdaThisCaptureField
= CGF
.LambdaThisCaptureField
;
431 CGF
.LambdaThisCaptureField
= nullptr;
432 BlockInfo
= CGF
.BlockInfo
;
433 CGF
.BlockInfo
= nullptr;
437 ~InlinedOpenMPRegionRAII() {
438 // Restore original CapturedStmtInfo only if we're done with code emission.
440 cast
<CGOpenMPInlinedRegionInfo
>(CGF
.CapturedStmtInfo
)->getOldCSI();
441 delete CGF
.CapturedStmtInfo
;
442 CGF
.CapturedStmtInfo
= OldCSI
;
444 std::swap(CGF
.LambdaCaptureFields
, LambdaCaptureFields
);
445 CGF
.LambdaThisCaptureField
= LambdaThisCaptureField
;
446 CGF
.BlockInfo
= BlockInfo
;
451 /// Values for bit flags used in the ident_t to describe the fields.
452 /// All enumeric elements are named and described in accordance with the code
453 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
454 enum OpenMPLocationFlags
: unsigned {
455 /// Use trampoline for internal microtask.
456 OMP_IDENT_IMD
= 0x01,
457 /// Use c-style ident structure.
458 OMP_IDENT_KMPC
= 0x02,
459 /// Atomic reduction option for kmpc_reduce.
460 OMP_ATOMIC_REDUCE
= 0x10,
461 /// Explicit 'barrier' directive.
462 OMP_IDENT_BARRIER_EXPL
= 0x20,
463 /// Implicit barrier in code.
464 OMP_IDENT_BARRIER_IMPL
= 0x40,
465 /// Implicit barrier in 'for' directive.
466 OMP_IDENT_BARRIER_IMPL_FOR
= 0x40,
467 /// Implicit barrier in 'sections' directive.
468 OMP_IDENT_BARRIER_IMPL_SECTIONS
= 0xC0,
469 /// Implicit barrier in 'single' directive.
470 OMP_IDENT_BARRIER_IMPL_SINGLE
= 0x140,
471 /// Call of __kmp_for_static_init for static loop.
472 OMP_IDENT_WORK_LOOP
= 0x200,
473 /// Call of __kmp_for_static_init for sections.
474 OMP_IDENT_WORK_SECTIONS
= 0x400,
475 /// Call of __kmp_for_static_init for distribute.
476 OMP_IDENT_WORK_DISTRIBUTE
= 0x800,
477 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE
)
480 /// Describes ident structure that describes a source location.
481 /// All descriptions are taken from
482 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
483 /// Original structure:
484 /// typedef struct ident {
485 /// kmp_int32 reserved_1; /**< might be used in Fortran;
487 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
488 /// KMP_IDENT_KMPC identifies this union
490 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
493 /// /* but currently used for storing
494 /// region-specific ITT */
495 /// /* contextual information. */
496 ///#endif /* USE_ITT_BUILD */
497 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
499 /// char const *psource; /**< String describing the source location.
500 /// The string is composed of semi-colon separated
501 // fields which describe the source file,
502 /// the function and a pair of line numbers that
503 /// delimit the construct.
506 enum IdentFieldIndex
{
507 /// might be used in Fortran
508 IdentField_Reserved_1
,
509 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
511 /// Not really used in Fortran any more
512 IdentField_Reserved_2
,
513 /// Source[4] in Fortran, do not use for C++
514 IdentField_Reserved_3
,
515 /// String describing the source location. The string is composed of
516 /// semi-colon separated fields which describe the source file, the function
517 /// and a pair of line numbers that delimit the construct.
521 /// Schedule types for 'omp for' loops (these enumerators are taken from
522 /// the enum sched_type in kmp.h).
523 enum OpenMPSchedType
{
524 /// Lower bound for default (unordered) versions.
526 OMP_sch_static_chunked
= 33,
528 OMP_sch_dynamic_chunked
= 35,
529 OMP_sch_guided_chunked
= 36,
530 OMP_sch_runtime
= 37,
532 /// static with chunk adjustment (e.g., simd)
533 OMP_sch_static_balanced_chunked
= 45,
534 /// Lower bound for 'ordered' versions.
536 OMP_ord_static_chunked
= 65,
538 OMP_ord_dynamic_chunked
= 67,
539 OMP_ord_guided_chunked
= 68,
540 OMP_ord_runtime
= 69,
542 OMP_sch_default
= OMP_sch_static
,
543 /// dist_schedule types
544 OMP_dist_sch_static_chunked
= 91,
545 OMP_dist_sch_static
= 92,
546 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
547 /// Set if the monotonic schedule modifier was present.
548 OMP_sch_modifier_monotonic
= (1 << 29),
549 /// Set if the nonmonotonic schedule modifier was present.
550 OMP_sch_modifier_nonmonotonic
= (1 << 30),
553 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
555 class CleanupTy final
: public EHScopeStack::Cleanup
{
556 PrePostActionTy
*Action
;
559 explicit CleanupTy(PrePostActionTy
*Action
) : Action(Action
) {}
560 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
561 if (!CGF
.HaveInsertPoint())
567 } // anonymous namespace
569 void RegionCodeGenTy::operator()(CodeGenFunction
&CGF
) const {
570 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
572 CGF
.EHStack
.pushCleanup
<CleanupTy
>(NormalAndEHCleanup
, PrePostAction
);
573 Callback(CodeGen
, CGF
, *PrePostAction
);
575 PrePostActionTy Action
;
576 Callback(CodeGen
, CGF
, Action
);
580 /// Check if the combiner is a call to UDR combiner and if it is so return the
581 /// UDR decl used for reduction.
582 static const OMPDeclareReductionDecl
*
583 getReductionInit(const Expr
*ReductionOp
) {
584 if (const auto *CE
= dyn_cast
<CallExpr
>(ReductionOp
))
585 if (const auto *OVE
= dyn_cast
<OpaqueValueExpr
>(CE
->getCallee()))
586 if (const auto *DRE
=
587 dyn_cast
<DeclRefExpr
>(OVE
->getSourceExpr()->IgnoreImpCasts()))
588 if (const auto *DRD
= dyn_cast
<OMPDeclareReductionDecl
>(DRE
->getDecl()))
593 static void emitInitWithReductionInitializer(CodeGenFunction
&CGF
,
594 const OMPDeclareReductionDecl
*DRD
,
596 Address Private
, Address Original
,
598 if (DRD
->getInitializer()) {
599 std::pair
<llvm::Function
*, llvm::Function
*> Reduction
=
600 CGF
.CGM
.getOpenMPRuntime().getUserDefinedReduction(DRD
);
601 const auto *CE
= cast
<CallExpr
>(InitOp
);
602 const auto *OVE
= cast
<OpaqueValueExpr
>(CE
->getCallee());
603 const Expr
*LHS
= CE
->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
604 const Expr
*RHS
= CE
->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
606 cast
<DeclRefExpr
>(cast
<UnaryOperator
>(LHS
)->getSubExpr());
608 cast
<DeclRefExpr
>(cast
<UnaryOperator
>(RHS
)->getSubExpr());
609 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
610 PrivateScope
.addPrivate(cast
<VarDecl
>(LHSDRE
->getDecl()), Private
);
611 PrivateScope
.addPrivate(cast
<VarDecl
>(RHSDRE
->getDecl()), Original
);
612 (void)PrivateScope
.Privatize();
613 RValue Func
= RValue::get(Reduction
.second
);
614 CodeGenFunction::OpaqueValueMapping
Map(CGF
, OVE
, Func
);
615 CGF
.EmitIgnoredExpr(InitOp
);
617 llvm::Constant
*Init
= CGF
.CGM
.EmitNullConstant(Ty
);
618 std::string Name
= CGF
.CGM
.getOpenMPRuntime().getName({"init"});
619 auto *GV
= new llvm::GlobalVariable(
620 CGF
.CGM
.getModule(), Init
->getType(), /*isConstant=*/true,
621 llvm::GlobalValue::PrivateLinkage
, Init
, Name
);
622 LValue LV
= CGF
.MakeNaturalAlignRawAddrLValue(GV
, Ty
);
624 switch (CGF
.getEvaluationKind(Ty
)) {
626 InitRVal
= CGF
.EmitLoadOfLValue(LV
, DRD
->getLocation());
630 RValue::getComplex(CGF
.EmitLoadOfComplex(LV
, DRD
->getLocation()));
632 case TEK_Aggregate
: {
633 OpaqueValueExpr
OVE(DRD
->getLocation(), Ty
, VK_LValue
);
634 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, &OVE
, LV
);
635 CGF
.EmitAnyExprToMem(&OVE
, Private
, Ty
.getQualifiers(),
636 /*IsInitializer=*/false);
640 OpaqueValueExpr
OVE(DRD
->getLocation(), Ty
, VK_PRValue
);
641 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, &OVE
, InitRVal
);
642 CGF
.EmitAnyExprToMem(&OVE
, Private
, Ty
.getQualifiers(),
643 /*IsInitializer=*/false);
647 /// Emit initialization of arrays of complex types.
648 /// \param DestAddr Address of the array.
649 /// \param Type Type of array.
650 /// \param Init Initial expression of array.
651 /// \param SrcAddr Address of the original array.
652 static void EmitOMPAggregateInit(CodeGenFunction
&CGF
, Address DestAddr
,
653 QualType Type
, bool EmitDeclareReductionInit
,
655 const OMPDeclareReductionDecl
*DRD
,
656 Address SrcAddr
= Address::invalid()) {
657 // Perform element-by-element initialization.
660 // Drill down to the base element type on both arrays.
661 const ArrayType
*ArrayTy
= Type
->getAsArrayTypeUnsafe();
662 llvm::Value
*NumElements
= CGF
.emitArrayLength(ArrayTy
, ElementTy
, DestAddr
);
664 SrcAddr
= SrcAddr
.withElementType(DestAddr
.getElementType());
666 llvm::Value
*SrcBegin
= nullptr;
668 SrcBegin
= SrcAddr
.emitRawPointer(CGF
);
669 llvm::Value
*DestBegin
= DestAddr
.emitRawPointer(CGF
);
670 // Cast from pointer to array type to pointer to single element.
671 llvm::Value
*DestEnd
=
672 CGF
.Builder
.CreateGEP(DestAddr
.getElementType(), DestBegin
, NumElements
);
673 // The basic structure here is a while-do loop.
674 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.arrayinit.body");
675 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.arrayinit.done");
676 llvm::Value
*IsEmpty
=
677 CGF
.Builder
.CreateICmpEQ(DestBegin
, DestEnd
, "omp.arrayinit.isempty");
678 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
680 // Enter the loop body, making that address the current address.
681 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
682 CGF
.EmitBlock(BodyBB
);
684 CharUnits ElementSize
= CGF
.getContext().getTypeSizeInChars(ElementTy
);
686 llvm::PHINode
*SrcElementPHI
= nullptr;
687 Address SrcElementCurrent
= Address::invalid();
689 SrcElementPHI
= CGF
.Builder
.CreatePHI(SrcBegin
->getType(), 2,
690 "omp.arraycpy.srcElementPast");
691 SrcElementPHI
->addIncoming(SrcBegin
, EntryBB
);
693 Address(SrcElementPHI
, SrcAddr
.getElementType(),
694 SrcAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
696 llvm::PHINode
*DestElementPHI
= CGF
.Builder
.CreatePHI(
697 DestBegin
->getType(), 2, "omp.arraycpy.destElementPast");
698 DestElementPHI
->addIncoming(DestBegin
, EntryBB
);
699 Address DestElementCurrent
=
700 Address(DestElementPHI
, DestAddr
.getElementType(),
701 DestAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
705 CodeGenFunction::RunCleanupsScope
InitScope(CGF
);
706 if (EmitDeclareReductionInit
) {
707 emitInitWithReductionInitializer(CGF
, DRD
, Init
, DestElementCurrent
,
708 SrcElementCurrent
, ElementTy
);
710 CGF
.EmitAnyExprToMem(Init
, DestElementCurrent
, ElementTy
.getQualifiers(),
711 /*IsInitializer=*/false);
715 // Shift the address forward by one element.
716 llvm::Value
*SrcElementNext
= CGF
.Builder
.CreateConstGEP1_32(
717 SrcAddr
.getElementType(), SrcElementPHI
, /*Idx0=*/1,
718 "omp.arraycpy.dest.element");
719 SrcElementPHI
->addIncoming(SrcElementNext
, CGF
.Builder
.GetInsertBlock());
722 // Shift the address forward by one element.
723 llvm::Value
*DestElementNext
= CGF
.Builder
.CreateConstGEP1_32(
724 DestAddr
.getElementType(), DestElementPHI
, /*Idx0=*/1,
725 "omp.arraycpy.dest.element");
726 // Check whether we've reached the end.
728 CGF
.Builder
.CreateICmpEQ(DestElementNext
, DestEnd
, "omp.arraycpy.done");
729 CGF
.Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
730 DestElementPHI
->addIncoming(DestElementNext
, CGF
.Builder
.GetInsertBlock());
733 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
736 LValue
ReductionCodeGen::emitSharedLValue(CodeGenFunction
&CGF
, const Expr
*E
) {
737 return CGF
.EmitOMPSharedLValue(E
);
740 LValue
ReductionCodeGen::emitSharedLValueUB(CodeGenFunction
&CGF
,
742 if (const auto *OASE
= dyn_cast
<ArraySectionExpr
>(E
))
743 return CGF
.EmitArraySectionExpr(OASE
, /*IsLowerBound=*/false);
747 void ReductionCodeGen::emitAggregateInitialization(
748 CodeGenFunction
&CGF
, unsigned N
, Address PrivateAddr
, Address SharedAddr
,
749 const OMPDeclareReductionDecl
*DRD
) {
750 // Emit VarDecl with copy init for arrays.
751 // Get the address of the original variable captured in current
753 const auto *PrivateVD
=
754 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Private
)->getDecl());
755 bool EmitDeclareReductionInit
=
756 DRD
&& (DRD
->getInitializer() || !PrivateVD
->hasInit());
757 EmitOMPAggregateInit(CGF
, PrivateAddr
, PrivateVD
->getType(),
758 EmitDeclareReductionInit
,
759 EmitDeclareReductionInit
? ClausesData
[N
].ReductionOp
760 : PrivateVD
->getInit(),
764 ReductionCodeGen::ReductionCodeGen(ArrayRef
<const Expr
*> Shareds
,
765 ArrayRef
<const Expr
*> Origs
,
766 ArrayRef
<const Expr
*> Privates
,
767 ArrayRef
<const Expr
*> ReductionOps
) {
768 ClausesData
.reserve(Shareds
.size());
769 SharedAddresses
.reserve(Shareds
.size());
770 Sizes
.reserve(Shareds
.size());
771 BaseDecls
.reserve(Shareds
.size());
772 const auto *IOrig
= Origs
.begin();
773 const auto *IPriv
= Privates
.begin();
774 const auto *IRed
= ReductionOps
.begin();
775 for (const Expr
*Ref
: Shareds
) {
776 ClausesData
.emplace_back(Ref
, *IOrig
, *IPriv
, *IRed
);
777 std::advance(IOrig
, 1);
778 std::advance(IPriv
, 1);
779 std::advance(IRed
, 1);
783 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction
&CGF
, unsigned N
) {
784 assert(SharedAddresses
.size() == N
&& OrigAddresses
.size() == N
&&
785 "Number of generated lvalues must be exactly N.");
786 LValue First
= emitSharedLValue(CGF
, ClausesData
[N
].Shared
);
787 LValue Second
= emitSharedLValueUB(CGF
, ClausesData
[N
].Shared
);
788 SharedAddresses
.emplace_back(First
, Second
);
789 if (ClausesData
[N
].Shared
== ClausesData
[N
].Ref
) {
790 OrigAddresses
.emplace_back(First
, Second
);
792 LValue First
= emitSharedLValue(CGF
, ClausesData
[N
].Ref
);
793 LValue Second
= emitSharedLValueUB(CGF
, ClausesData
[N
].Ref
);
794 OrigAddresses
.emplace_back(First
, Second
);
798 void ReductionCodeGen::emitAggregateType(CodeGenFunction
&CGF
, unsigned N
) {
799 QualType PrivateType
= getPrivateType(N
);
800 bool AsArraySection
= isa
<ArraySectionExpr
>(ClausesData
[N
].Ref
);
801 if (!PrivateType
->isVariablyModifiedType()) {
803 CGF
.getTypeSize(OrigAddresses
[N
].first
.getType().getNonReferenceType()),
808 llvm::Value
*SizeInChars
;
809 auto *ElemType
= OrigAddresses
[N
].first
.getAddress().getElementType();
810 auto *ElemSizeOf
= llvm::ConstantExpr::getSizeOf(ElemType
);
811 if (AsArraySection
) {
812 Size
= CGF
.Builder
.CreatePtrDiff(ElemType
,
813 OrigAddresses
[N
].second
.getPointer(CGF
),
814 OrigAddresses
[N
].first
.getPointer(CGF
));
815 Size
= CGF
.Builder
.CreateNUWAdd(
816 Size
, llvm::ConstantInt::get(Size
->getType(), /*V=*/1));
817 SizeInChars
= CGF
.Builder
.CreateNUWMul(Size
, ElemSizeOf
);
820 CGF
.getTypeSize(OrigAddresses
[N
].first
.getType().getNonReferenceType());
821 Size
= CGF
.Builder
.CreateExactUDiv(SizeInChars
, ElemSizeOf
);
823 Sizes
.emplace_back(SizeInChars
, Size
);
824 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
826 cast
<OpaqueValueExpr
>(
827 CGF
.getContext().getAsVariableArrayType(PrivateType
)->getSizeExpr()),
829 CGF
.EmitVariablyModifiedType(PrivateType
);
832 void ReductionCodeGen::emitAggregateType(CodeGenFunction
&CGF
, unsigned N
,
834 QualType PrivateType
= getPrivateType(N
);
835 if (!PrivateType
->isVariablyModifiedType()) {
836 assert(!Size
&& !Sizes
[N
].second
&&
837 "Size should be nullptr for non-variably modified reduction "
841 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
843 cast
<OpaqueValueExpr
>(
844 CGF
.getContext().getAsVariableArrayType(PrivateType
)->getSizeExpr()),
846 CGF
.EmitVariablyModifiedType(PrivateType
);
849 void ReductionCodeGen::emitInitialization(
850 CodeGenFunction
&CGF
, unsigned N
, Address PrivateAddr
, Address SharedAddr
,
851 llvm::function_ref
<bool(CodeGenFunction
&)> DefaultInit
) {
852 assert(SharedAddresses
.size() > N
&& "No variable was generated");
853 const auto *PrivateVD
=
854 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Private
)->getDecl());
855 const OMPDeclareReductionDecl
*DRD
=
856 getReductionInit(ClausesData
[N
].ReductionOp
);
857 if (CGF
.getContext().getAsArrayType(PrivateVD
->getType())) {
858 if (DRD
&& DRD
->getInitializer())
859 (void)DefaultInit(CGF
);
860 emitAggregateInitialization(CGF
, N
, PrivateAddr
, SharedAddr
, DRD
);
861 } else if (DRD
&& (DRD
->getInitializer() || !PrivateVD
->hasInit())) {
862 (void)DefaultInit(CGF
);
863 QualType SharedType
= SharedAddresses
[N
].first
.getType();
864 emitInitWithReductionInitializer(CGF
, DRD
, ClausesData
[N
].ReductionOp
,
865 PrivateAddr
, SharedAddr
, SharedType
);
866 } else if (!DefaultInit(CGF
) && PrivateVD
->hasInit() &&
867 !CGF
.isTrivialInitializer(PrivateVD
->getInit())) {
868 CGF
.EmitAnyExprToMem(PrivateVD
->getInit(), PrivateAddr
,
869 PrivateVD
->getType().getQualifiers(),
870 /*IsInitializer=*/false);
874 bool ReductionCodeGen::needCleanups(unsigned N
) {
875 QualType PrivateType
= getPrivateType(N
);
876 QualType::DestructionKind DTorKind
= PrivateType
.isDestructedType();
877 return DTorKind
!= QualType::DK_none
;
880 void ReductionCodeGen::emitCleanups(CodeGenFunction
&CGF
, unsigned N
,
881 Address PrivateAddr
) {
882 QualType PrivateType
= getPrivateType(N
);
883 QualType::DestructionKind DTorKind
= PrivateType
.isDestructedType();
884 if (needCleanups(N
)) {
886 PrivateAddr
.withElementType(CGF
.ConvertTypeForMem(PrivateType
));
887 CGF
.pushDestroy(DTorKind
, PrivateAddr
, PrivateType
);
891 static LValue
loadToBegin(CodeGenFunction
&CGF
, QualType BaseTy
, QualType ElTy
,
893 BaseTy
= BaseTy
.getNonReferenceType();
894 while ((BaseTy
->isPointerType() || BaseTy
->isReferenceType()) &&
895 !CGF
.getContext().hasSameType(BaseTy
, ElTy
)) {
896 if (const auto *PtrTy
= BaseTy
->getAs
<PointerType
>()) {
897 BaseLV
= CGF
.EmitLoadOfPointerLValue(BaseLV
.getAddress(), PtrTy
);
899 LValue RefLVal
= CGF
.MakeAddrLValue(BaseLV
.getAddress(), BaseTy
);
900 BaseLV
= CGF
.EmitLoadOfReferenceLValue(RefLVal
);
902 BaseTy
= BaseTy
->getPointeeType();
904 return CGF
.MakeAddrLValue(
905 BaseLV
.getAddress().withElementType(CGF
.ConvertTypeForMem(ElTy
)),
906 BaseLV
.getType(), BaseLV
.getBaseInfo(),
907 CGF
.CGM
.getTBAAInfoForSubobject(BaseLV
, BaseLV
.getType()));
910 static Address
castToBase(CodeGenFunction
&CGF
, QualType BaseTy
, QualType ElTy
,
911 Address OriginalBaseAddress
, llvm::Value
*Addr
) {
912 RawAddress Tmp
= RawAddress::invalid();
913 Address TopTmp
= Address::invalid();
914 Address MostTopTmp
= Address::invalid();
915 BaseTy
= BaseTy
.getNonReferenceType();
916 while ((BaseTy
->isPointerType() || BaseTy
->isReferenceType()) &&
917 !CGF
.getContext().hasSameType(BaseTy
, ElTy
)) {
918 Tmp
= CGF
.CreateMemTemp(BaseTy
);
919 if (TopTmp
.isValid())
920 CGF
.Builder
.CreateStore(Tmp
.getPointer(), TopTmp
);
924 BaseTy
= BaseTy
->getPointeeType();
928 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
929 Addr
, Tmp
.getElementType());
930 CGF
.Builder
.CreateStore(Addr
, Tmp
);
934 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
935 Addr
, OriginalBaseAddress
.getType());
936 return OriginalBaseAddress
.withPointer(Addr
, NotKnownNonNull
);
939 static const VarDecl
*getBaseDecl(const Expr
*Ref
, const DeclRefExpr
*&DE
) {
940 const VarDecl
*OrigVD
= nullptr;
941 if (const auto *OASE
= dyn_cast
<ArraySectionExpr
>(Ref
)) {
942 const Expr
*Base
= OASE
->getBase()->IgnoreParenImpCasts();
943 while (const auto *TempOASE
= dyn_cast
<ArraySectionExpr
>(Base
))
944 Base
= TempOASE
->getBase()->IgnoreParenImpCasts();
945 while (const auto *TempASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
946 Base
= TempASE
->getBase()->IgnoreParenImpCasts();
947 DE
= cast
<DeclRefExpr
>(Base
);
948 OrigVD
= cast
<VarDecl
>(DE
->getDecl());
949 } else if (const auto *ASE
= dyn_cast
<ArraySubscriptExpr
>(Ref
)) {
950 const Expr
*Base
= ASE
->getBase()->IgnoreParenImpCasts();
951 while (const auto *TempASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
952 Base
= TempASE
->getBase()->IgnoreParenImpCasts();
953 DE
= cast
<DeclRefExpr
>(Base
);
954 OrigVD
= cast
<VarDecl
>(DE
->getDecl());
959 Address
ReductionCodeGen::adjustPrivateAddress(CodeGenFunction
&CGF
, unsigned N
,
960 Address PrivateAddr
) {
961 const DeclRefExpr
*DE
;
962 if (const VarDecl
*OrigVD
= ::getBaseDecl(ClausesData
[N
].Ref
, DE
)) {
963 BaseDecls
.emplace_back(OrigVD
);
964 LValue OriginalBaseLValue
= CGF
.EmitLValue(DE
);
966 loadToBegin(CGF
, OrigVD
->getType(), SharedAddresses
[N
].first
.getType(),
968 Address SharedAddr
= SharedAddresses
[N
].first
.getAddress();
969 llvm::Value
*Adjustment
= CGF
.Builder
.CreatePtrDiff(
970 SharedAddr
.getElementType(), BaseLValue
.getPointer(CGF
),
971 SharedAddr
.emitRawPointer(CGF
));
972 llvm::Value
*PrivatePointer
=
973 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
974 PrivateAddr
.emitRawPointer(CGF
), SharedAddr
.getType());
975 llvm::Value
*Ptr
= CGF
.Builder
.CreateGEP(
976 SharedAddr
.getElementType(), PrivatePointer
, Adjustment
);
977 return castToBase(CGF
, OrigVD
->getType(),
978 SharedAddresses
[N
].first
.getType(),
979 OriginalBaseLValue
.getAddress(), Ptr
);
981 BaseDecls
.emplace_back(
982 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Ref
)->getDecl()));
986 bool ReductionCodeGen::usesReductionInitializer(unsigned N
) const {
987 const OMPDeclareReductionDecl
*DRD
=
988 getReductionInit(ClausesData
[N
].ReductionOp
);
989 return DRD
&& DRD
->getInitializer();
992 LValue
CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction
&CGF
) {
993 return CGF
.EmitLoadOfPointerLValue(
994 CGF
.GetAddrOfLocalVar(getThreadIDVariable()),
995 getThreadIDVariable()->getType()->castAs
<PointerType
>());
998 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) {
999 if (!CGF
.HaveInsertPoint())
1001 // 1.2.2 OpenMP Language Terminology
1002 // Structured block - An executable statement with a single entry at the
1003 // top and a single exit at the bottom.
1004 // The point of exit cannot be a branch out of the structured block.
1005 // longjmp() and throw() must not violate the entry/exit criteria.
1006 CGF
.EHStack
.pushTerminate();
1008 CGF
.incrementProfileCounter(S
);
1010 CGF
.EHStack
.popTerminate();
1013 LValue
CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1014 CodeGenFunction
&CGF
) {
1015 return CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(getThreadIDVariable()),
1016 getThreadIDVariable()->getType(),
1017 AlignmentSource::Decl
);
1020 static FieldDecl
*addFieldToRecordDecl(ASTContext
&C
, DeclContext
*DC
,
1022 auto *Field
= FieldDecl::Create(
1023 C
, DC
, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy
,
1024 C
.getTrivialTypeSourceInfo(FieldTy
, SourceLocation()),
1025 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit
);
1026 Field
->setAccess(AS_public
);
1031 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule
&CGM
)
1032 : CGM(CGM
), OMPBuilder(CGM
.getModule()) {
1033 KmpCriticalNameTy
= llvm::ArrayType::get(CGM
.Int32Ty
, /*NumElements*/ 8);
1034 llvm::OpenMPIRBuilderConfig
Config(
1035 CGM
.getLangOpts().OpenMPIsTargetDevice
, isGPU(),
1036 CGM
.getLangOpts().OpenMPOffloadMandatory
,
1037 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1038 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1039 OMPBuilder
.initialize();
1040 OMPBuilder
.loadOffloadInfoMetadata(CGM
.getLangOpts().OpenMPIsTargetDevice
1041 ? CGM
.getLangOpts().OMPHostIRFile
1043 OMPBuilder
.setConfig(Config
);
1045 // The user forces the compiler to behave as if omp requires
1046 // unified_shared_memory was given.
1047 if (CGM
.getLangOpts().OpenMPForceUSM
) {
1048 HasRequiresUnifiedSharedMemory
= true;
1049 OMPBuilder
.Config
.setHasRequiresUnifiedSharedMemory(true);
1053 void CGOpenMPRuntime::clear() {
1054 InternalVars
.clear();
1055 // Clean non-target variable declarations possibly used only in debug info.
1056 for (const auto &Data
: EmittedNonTargetVariables
) {
1057 if (!Data
.getValue().pointsToAliveValue())
1059 auto *GV
= dyn_cast
<llvm::GlobalVariable
>(Data
.getValue());
1062 if (!GV
->isDeclaration() || GV
->getNumUses() > 0)
1064 GV
->eraseFromParent();
1068 std::string
CGOpenMPRuntime::getName(ArrayRef
<StringRef
> Parts
) const {
1069 return OMPBuilder
.createPlatformSpecificName(Parts
);
1072 static llvm::Function
*
1073 emitCombinerOrInitializer(CodeGenModule
&CGM
, QualType Ty
,
1074 const Expr
*CombinerInitializer
, const VarDecl
*In
,
1075 const VarDecl
*Out
, bool IsCombiner
) {
1076 // void .omp_combiner.(Ty *in, Ty *out);
1077 ASTContext
&C
= CGM
.getContext();
1078 QualType PtrTy
= C
.getPointerType(Ty
).withRestrict();
1079 FunctionArgList Args
;
1080 ImplicitParamDecl
OmpOutParm(C
, /*DC=*/nullptr, Out
->getLocation(),
1081 /*Id=*/nullptr, PtrTy
, ImplicitParamKind::Other
);
1082 ImplicitParamDecl
OmpInParm(C
, /*DC=*/nullptr, In
->getLocation(),
1083 /*Id=*/nullptr, PtrTy
, ImplicitParamKind::Other
);
1084 Args
.push_back(&OmpOutParm
);
1085 Args
.push_back(&OmpInParm
);
1086 const CGFunctionInfo
&FnInfo
=
1087 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
1088 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
1089 std::string Name
= CGM
.getOpenMPRuntime().getName(
1090 {IsCombiner
? "omp_combiner" : "omp_initializer", ""});
1091 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
1092 Name
, &CGM
.getModule());
1093 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
1094 if (CGM
.getLangOpts().Optimize
) {
1095 Fn
->removeFnAttr(llvm::Attribute::NoInline
);
1096 Fn
->removeFnAttr(llvm::Attribute::OptimizeNone
);
1097 Fn
->addFnAttr(llvm::Attribute::AlwaysInline
);
1099 CodeGenFunction
CGF(CGM
);
1100 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1101 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1102 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, In
->getLocation(),
1103 Out
->getLocation());
1104 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
1105 Address AddrIn
= CGF
.GetAddrOfLocalVar(&OmpInParm
);
1107 In
, CGF
.EmitLoadOfPointerLValue(AddrIn
, PtrTy
->castAs
<PointerType
>())
1109 Address AddrOut
= CGF
.GetAddrOfLocalVar(&OmpOutParm
);
1111 Out
, CGF
.EmitLoadOfPointerLValue(AddrOut
, PtrTy
->castAs
<PointerType
>())
1113 (void)Scope
.Privatize();
1114 if (!IsCombiner
&& Out
->hasInit() &&
1115 !CGF
.isTrivialInitializer(Out
->getInit())) {
1116 CGF
.EmitAnyExprToMem(Out
->getInit(), CGF
.GetAddrOfLocalVar(Out
),
1117 Out
->getType().getQualifiers(),
1118 /*IsInitializer=*/true);
1120 if (CombinerInitializer
)
1121 CGF
.EmitIgnoredExpr(CombinerInitializer
);
1122 Scope
.ForceCleanup();
1123 CGF
.FinishFunction();
1127 void CGOpenMPRuntime::emitUserDefinedReduction(
1128 CodeGenFunction
*CGF
, const OMPDeclareReductionDecl
*D
) {
1129 if (UDRMap
.count(D
) > 0)
1131 llvm::Function
*Combiner
= emitCombinerOrInitializer(
1132 CGM
, D
->getType(), D
->getCombiner(),
1133 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getCombinerIn())->getDecl()),
1134 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getCombinerOut())->getDecl()),
1135 /*IsCombiner=*/true);
1136 llvm::Function
*Initializer
= nullptr;
1137 if (const Expr
*Init
= D
->getInitializer()) {
1138 Initializer
= emitCombinerOrInitializer(
1140 D
->getInitializerKind() == OMPDeclareReductionInitKind::Call
? Init
1142 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getInitOrig())->getDecl()),
1143 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getInitPriv())->getDecl()),
1144 /*IsCombiner=*/false);
1146 UDRMap
.try_emplace(D
, Combiner
, Initializer
);
1148 FunctionUDRMap
[CGF
->CurFn
].push_back(D
);
1151 std::pair
<llvm::Function
*, llvm::Function
*>
1152 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl
*D
) {
1153 auto I
= UDRMap
.find(D
);
1154 if (I
!= UDRMap
.end())
1156 emitUserDefinedReduction(/*CGF=*/nullptr, D
);
1157 return UDRMap
.lookup(D
);
1161 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1162 // Builder if one is present.
1163 struct PushAndPopStackRAII
{
1164 PushAndPopStackRAII(llvm::OpenMPIRBuilder
*OMPBuilder
, CodeGenFunction
&CGF
,
1165 bool HasCancel
, llvm::omp::Directive Kind
)
1166 : OMPBuilder(OMPBuilder
) {
1170 // The following callback is the crucial part of clangs cleanup process.
1173 // Once the OpenMPIRBuilder is used to create parallel regions (and
1174 // similar), the cancellation destination (Dest below) is determined via
1175 // IP. That means if we have variables to finalize we split the block at IP,
1176 // use the new block (=BB) as destination to build a JumpDest (via
1177 // getJumpDestInCurrentScope(BB)) which then is fed to
1178 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1179 // to push & pop an FinalizationInfo object.
1180 // The FiniCB will still be needed but at the point where the
1181 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1182 auto FiniCB
= [&CGF
](llvm::OpenMPIRBuilder::InsertPointTy IP
) {
1183 assert(IP
.getBlock()->end() == IP
.getPoint() &&
1184 "Clang CG should cause non-terminated block!");
1185 CGBuilderTy::InsertPointGuard
IPG(CGF
.Builder
);
1186 CGF
.Builder
.restoreIP(IP
);
1187 CodeGenFunction::JumpDest Dest
=
1188 CGF
.getOMPCancelDestination(OMPD_parallel
);
1189 CGF
.EmitBranchThroughCleanup(Dest
);
1190 return llvm::Error::success();
1193 // TODO: Remove this once we emit parallel regions through the
1194 // OpenMPIRBuilder as it can do this setup internally.
1195 llvm::OpenMPIRBuilder::FinalizationInfo
FI({FiniCB
, Kind
, HasCancel
});
1196 OMPBuilder
->pushFinalizationCB(std::move(FI
));
1198 ~PushAndPopStackRAII() {
1200 OMPBuilder
->popFinalizationCB();
1202 llvm::OpenMPIRBuilder
*OMPBuilder
;
1206 static llvm::Function
*emitParallelOrTeamsOutlinedFunction(
1207 CodeGenModule
&CGM
, const OMPExecutableDirective
&D
, const CapturedStmt
*CS
,
1208 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1209 const StringRef OutlinedHelperName
, const RegionCodeGenTy
&CodeGen
) {
1210 assert(ThreadIDVar
->getType()->isPointerType() &&
1211 "thread id variable must be of type kmp_int32 *");
1212 CodeGenFunction
CGF(CGM
, true);
1213 bool HasCancel
= false;
1214 if (const auto *OPD
= dyn_cast
<OMPParallelDirective
>(&D
))
1215 HasCancel
= OPD
->hasCancel();
1216 else if (const auto *OPD
= dyn_cast
<OMPTargetParallelDirective
>(&D
))
1217 HasCancel
= OPD
->hasCancel();
1218 else if (const auto *OPSD
= dyn_cast
<OMPParallelSectionsDirective
>(&D
))
1219 HasCancel
= OPSD
->hasCancel();
1220 else if (const auto *OPFD
= dyn_cast
<OMPParallelForDirective
>(&D
))
1221 HasCancel
= OPFD
->hasCancel();
1222 else if (const auto *OPFD
= dyn_cast
<OMPTargetParallelForDirective
>(&D
))
1223 HasCancel
= OPFD
->hasCancel();
1224 else if (const auto *OPFD
= dyn_cast
<OMPDistributeParallelForDirective
>(&D
))
1225 HasCancel
= OPFD
->hasCancel();
1226 else if (const auto *OPFD
=
1227 dyn_cast
<OMPTeamsDistributeParallelForDirective
>(&D
))
1228 HasCancel
= OPFD
->hasCancel();
1229 else if (const auto *OPFD
=
1230 dyn_cast
<OMPTargetTeamsDistributeParallelForDirective
>(&D
))
1231 HasCancel
= OPFD
->hasCancel();
1233 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1234 // parallel region to make cancellation barriers work properly.
1235 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1236 PushAndPopStackRAII
PSR(&OMPBuilder
, CGF
, HasCancel
, InnermostKind
);
1237 CGOpenMPOutlinedRegionInfo
CGInfo(*CS
, ThreadIDVar
, CodeGen
, InnermostKind
,
1238 HasCancel
, OutlinedHelperName
);
1239 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
1240 return CGF
.GenerateOpenMPCapturedStmtFunction(*CS
, D
.getBeginLoc());
1243 std::string
CGOpenMPRuntime::getOutlinedHelperName(StringRef Name
) const {
1244 std::string Suffix
= getName({"omp_outlined"});
1245 return (Name
+ Suffix
).str();
1248 std::string
CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction
&CGF
) const {
1249 return getOutlinedHelperName(CGF
.CurFn
->getName());
1252 std::string
CGOpenMPRuntime::getReductionFuncName(StringRef Name
) const {
1253 std::string Suffix
= getName({"omp", "reduction", "reduction_func"});
1254 return (Name
+ Suffix
).str();
1257 llvm::Function
*CGOpenMPRuntime::emitParallelOutlinedFunction(
1258 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
1259 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1260 const RegionCodeGenTy
&CodeGen
) {
1261 const CapturedStmt
*CS
= D
.getCapturedStmt(OMPD_parallel
);
1262 return emitParallelOrTeamsOutlinedFunction(
1263 CGM
, D
, CS
, ThreadIDVar
, InnermostKind
, getOutlinedHelperName(CGF
),
1267 llvm::Function
*CGOpenMPRuntime::emitTeamsOutlinedFunction(
1268 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
1269 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1270 const RegionCodeGenTy
&CodeGen
) {
1271 const CapturedStmt
*CS
= D
.getCapturedStmt(OMPD_teams
);
1272 return emitParallelOrTeamsOutlinedFunction(
1273 CGM
, D
, CS
, ThreadIDVar
, InnermostKind
, getOutlinedHelperName(CGF
),
1277 llvm::Function
*CGOpenMPRuntime::emitTaskOutlinedFunction(
1278 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
1279 const VarDecl
*PartIDVar
, const VarDecl
*TaskTVar
,
1280 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
1281 bool Tied
, unsigned &NumberOfParts
) {
1282 auto &&UntiedCodeGen
= [this, &D
, TaskTVar
](CodeGenFunction
&CGF
,
1283 PrePostActionTy
&) {
1284 llvm::Value
*ThreadID
= getThreadID(CGF
, D
.getBeginLoc());
1285 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
1286 llvm::Value
*TaskArgs
[] = {
1288 CGF
.EmitLoadOfPointerLValue(CGF
.GetAddrOfLocalVar(TaskTVar
),
1289 TaskTVar
->getType()->castAs
<PointerType
>())
1291 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1292 CGM
.getModule(), OMPRTL___kmpc_omp_task
),
1295 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy
Action(Tied
, PartIDVar
,
1297 CodeGen
.setAction(Action
);
1298 assert(!ThreadIDVar
->getType()->isPointerType() &&
1299 "thread id variable must be of type kmp_int32 for tasks");
1300 const OpenMPDirectiveKind Region
=
1301 isOpenMPTaskLoopDirective(D
.getDirectiveKind()) ? OMPD_taskloop
1303 const CapturedStmt
*CS
= D
.getCapturedStmt(Region
);
1304 bool HasCancel
= false;
1305 if (const auto *TD
= dyn_cast
<OMPTaskDirective
>(&D
))
1306 HasCancel
= TD
->hasCancel();
1307 else if (const auto *TD
= dyn_cast
<OMPTaskLoopDirective
>(&D
))
1308 HasCancel
= TD
->hasCancel();
1309 else if (const auto *TD
= dyn_cast
<OMPMasterTaskLoopDirective
>(&D
))
1310 HasCancel
= TD
->hasCancel();
1311 else if (const auto *TD
= dyn_cast
<OMPParallelMasterTaskLoopDirective
>(&D
))
1312 HasCancel
= TD
->hasCancel();
1314 CodeGenFunction
CGF(CGM
, true);
1315 CGOpenMPTaskOutlinedRegionInfo
CGInfo(*CS
, ThreadIDVar
, CodeGen
,
1316 InnermostKind
, HasCancel
, Action
);
1317 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
1318 llvm::Function
*Res
= CGF
.GenerateCapturedStmtFunction(*CS
);
1320 NumberOfParts
= Action
.getNumberOfParts();
1324 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction
&CGF
,
1325 bool AtCurrentPoint
) {
1326 auto &Elem
= OpenMPLocThreadIDMap
[CGF
.CurFn
];
1327 assert(!Elem
.ServiceInsertPt
&& "Insert point is set already.");
1329 llvm::Value
*Undef
= llvm::UndefValue::get(CGF
.Int32Ty
);
1330 if (AtCurrentPoint
) {
1331 Elem
.ServiceInsertPt
= new llvm::BitCastInst(Undef
, CGF
.Int32Ty
, "svcpt",
1332 CGF
.Builder
.GetInsertBlock());
1334 Elem
.ServiceInsertPt
= new llvm::BitCastInst(Undef
, CGF
.Int32Ty
, "svcpt");
1335 Elem
.ServiceInsertPt
->insertAfter(CGF
.AllocaInsertPt
);
1339 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction
&CGF
) {
1340 auto &Elem
= OpenMPLocThreadIDMap
[CGF
.CurFn
];
1341 if (Elem
.ServiceInsertPt
) {
1342 llvm::Instruction
*Ptr
= Elem
.ServiceInsertPt
;
1343 Elem
.ServiceInsertPt
= nullptr;
1344 Ptr
->eraseFromParent();
1348 static StringRef
getIdentStringFromSourceLocation(CodeGenFunction
&CGF
,
1350 SmallString
<128> &Buffer
) {
1351 llvm::raw_svector_ostream
OS(Buffer
);
1352 // Build debug location
1353 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
1354 OS
<< ";" << PLoc
.getFilename() << ";";
1355 if (const auto *FD
= dyn_cast_or_null
<FunctionDecl
>(CGF
.CurFuncDecl
))
1356 OS
<< FD
->getQualifiedNameAsString();
1357 OS
<< ";" << PLoc
.getLine() << ";" << PLoc
.getColumn() << ";;";
1361 llvm::Value
*CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction
&CGF
,
1363 unsigned Flags
, bool EmitLoc
) {
1364 uint32_t SrcLocStrSize
;
1365 llvm::Constant
*SrcLocStr
;
1366 if ((!EmitLoc
&& CGM
.getCodeGenOpts().getDebugInfo() ==
1367 llvm::codegenoptions::NoDebugInfo
) ||
1369 SrcLocStr
= OMPBuilder
.getOrCreateDefaultSrcLocStr(SrcLocStrSize
);
1371 std::string FunctionName
;
1372 if (const auto *FD
= dyn_cast_or_null
<FunctionDecl
>(CGF
.CurFuncDecl
))
1373 FunctionName
= FD
->getQualifiedNameAsString();
1374 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
1375 const char *FileName
= PLoc
.getFilename();
1376 unsigned Line
= PLoc
.getLine();
1377 unsigned Column
= PLoc
.getColumn();
1378 SrcLocStr
= OMPBuilder
.getOrCreateSrcLocStr(FunctionName
, FileName
, Line
,
1379 Column
, SrcLocStrSize
);
1381 unsigned Reserved2Flags
= getDefaultLocationReserved2Flags();
1382 return OMPBuilder
.getOrCreateIdent(
1383 SrcLocStr
, SrcLocStrSize
, llvm::omp::IdentFlag(Flags
), Reserved2Flags
);
1386 llvm::Value
*CGOpenMPRuntime::getThreadID(CodeGenFunction
&CGF
,
1387 SourceLocation Loc
) {
1388 assert(CGF
.CurFn
&& "No function in current CodeGenFunction.");
1389 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1390 // the clang invariants used below might be broken.
1391 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
1392 SmallString
<128> Buffer
;
1393 OMPBuilder
.updateToLocation(CGF
.Builder
.saveIP());
1394 uint32_t SrcLocStrSize
;
1395 auto *SrcLocStr
= OMPBuilder
.getOrCreateSrcLocStr(
1396 getIdentStringFromSourceLocation(CGF
, Loc
, Buffer
), SrcLocStrSize
);
1397 return OMPBuilder
.getOrCreateThreadID(
1398 OMPBuilder
.getOrCreateIdent(SrcLocStr
, SrcLocStrSize
));
1401 llvm::Value
*ThreadID
= nullptr;
1402 // Check whether we've already cached a load of the thread id in this
1404 auto I
= OpenMPLocThreadIDMap
.find(CGF
.CurFn
);
1405 if (I
!= OpenMPLocThreadIDMap
.end()) {
1406 ThreadID
= I
->second
.ThreadID
;
1407 if (ThreadID
!= nullptr)
1410 // If exceptions are enabled, do not use parameter to avoid possible crash.
1411 if (auto *OMPRegionInfo
=
1412 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
1413 if (OMPRegionInfo
->getThreadIDVariable()) {
1414 // Check if this an outlined function with thread id passed as argument.
1415 LValue LVal
= OMPRegionInfo
->getThreadIDVariableLValue(CGF
);
1416 llvm::BasicBlock
*TopBlock
= CGF
.AllocaInsertPt
->getParent();
1417 if (!CGF
.EHStack
.requiresLandingPad() || !CGF
.getLangOpts().Exceptions
||
1418 !CGF
.getLangOpts().CXXExceptions
||
1419 CGF
.Builder
.GetInsertBlock() == TopBlock
||
1420 !isa
<llvm::Instruction
>(LVal
.getPointer(CGF
)) ||
1421 cast
<llvm::Instruction
>(LVal
.getPointer(CGF
))->getParent() ==
1423 cast
<llvm::Instruction
>(LVal
.getPointer(CGF
))->getParent() ==
1424 CGF
.Builder
.GetInsertBlock()) {
1425 ThreadID
= CGF
.EmitLoadOfScalar(LVal
, Loc
);
1426 // If value loaded in entry block, cache it and use it everywhere in
1428 if (CGF
.Builder
.GetInsertBlock() == TopBlock
)
1429 OpenMPLocThreadIDMap
[CGF
.CurFn
].ThreadID
= ThreadID
;
1435 // This is not an outlined function region - need to call __kmpc_int32
1436 // kmpc_global_thread_num(ident_t *loc).
1437 // Generate thread id value and cache this value for use across the
1439 auto &Elem
= OpenMPLocThreadIDMap
[CGF
.CurFn
];
1440 if (!Elem
.ServiceInsertPt
)
1441 setLocThreadIdInsertPt(CGF
);
1442 CGBuilderTy::InsertPointGuard
IPG(CGF
.Builder
);
1443 CGF
.Builder
.SetInsertPoint(Elem
.ServiceInsertPt
);
1444 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
1445 llvm::CallInst
*Call
= CGF
.Builder
.CreateCall(
1446 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
1447 OMPRTL___kmpc_global_thread_num
),
1448 emitUpdateLocation(CGF
, Loc
));
1449 Call
->setCallingConv(CGF
.getRuntimeCC());
1450 Elem
.ThreadID
= Call
;
1454 void CGOpenMPRuntime::functionFinished(CodeGenFunction
&CGF
) {
1455 assert(CGF
.CurFn
&& "No function in current CodeGenFunction.");
1456 if (OpenMPLocThreadIDMap
.count(CGF
.CurFn
)) {
1457 clearLocThreadIdInsertPt(CGF
);
1458 OpenMPLocThreadIDMap
.erase(CGF
.CurFn
);
1460 if (FunctionUDRMap
.count(CGF
.CurFn
) > 0) {
1461 for(const auto *D
: FunctionUDRMap
[CGF
.CurFn
])
1463 FunctionUDRMap
.erase(CGF
.CurFn
);
1465 auto I
= FunctionUDMMap
.find(CGF
.CurFn
);
1466 if (I
!= FunctionUDMMap
.end()) {
1467 for(const auto *D
: I
->second
)
1469 FunctionUDMMap
.erase(I
);
1471 LastprivateConditionalToTypes
.erase(CGF
.CurFn
);
1472 FunctionToUntiedTaskStackMap
.erase(CGF
.CurFn
);
1475 llvm::Type
*CGOpenMPRuntime::getIdentTyPointerTy() {
1476 return OMPBuilder
.IdentPtr
;
1479 llvm::Type
*CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1480 if (!Kmpc_MicroTy
) {
1481 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1482 llvm::Type
*MicroParams
[] = {llvm::PointerType::getUnqual(CGM
.Int32Ty
),
1483 llvm::PointerType::getUnqual(CGM
.Int32Ty
)};
1484 Kmpc_MicroTy
= llvm::FunctionType::get(CGM
.VoidTy
, MicroParams
, true);
1486 return llvm::PointerType::getUnqual(Kmpc_MicroTy
);
1489 static llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1490 convertDeviceClause(const VarDecl
*VD
) {
1491 std::optional
<OMPDeclareTargetDeclAttr::DevTypeTy
> DevTy
=
1492 OMPDeclareTargetDeclAttr::getDeviceType(VD
);
1494 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone
;
1496 switch ((int)*DevTy
) { // Avoid -Wcovered-switch-default
1497 case OMPDeclareTargetDeclAttr::DT_Host
:
1498 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost
;
1500 case OMPDeclareTargetDeclAttr::DT_NoHost
:
1501 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost
;
1503 case OMPDeclareTargetDeclAttr::DT_Any
:
1504 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny
;
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone
;
1512 static llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1513 convertCaptureClause(const VarDecl
*VD
) {
1514 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> MapType
=
1515 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
1517 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone
;
1518 switch ((int)*MapType
) { // Avoid -Wcovered-switch-default
1519 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To
:
1520 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo
;
1522 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter
:
1523 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter
;
1525 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link
:
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink
;
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone
;
1534 static llvm::TargetRegionEntryInfo
getEntryInfoFromPresumedLoc(
1535 CodeGenModule
&CGM
, llvm::OpenMPIRBuilder
&OMPBuilder
,
1536 SourceLocation BeginLoc
, llvm::StringRef ParentName
= "") {
1538 auto FileInfoCallBack
= [&]() {
1539 SourceManager
&SM
= CGM
.getContext().getSourceManager();
1540 PresumedLoc PLoc
= SM
.getPresumedLoc(BeginLoc
);
1542 llvm::sys::fs::UniqueID ID
;
1543 if (llvm::sys::fs::getUniqueID(PLoc
.getFilename(), ID
)) {
1544 PLoc
= SM
.getPresumedLoc(BeginLoc
, /*UseLineDirectives=*/false);
1547 return std::pair
<std::string
, uint64_t>(PLoc
.getFilename(), PLoc
.getLine());
1550 return OMPBuilder
.getTargetEntryUniqueInfo(FileInfoCallBack
, ParentName
);
1553 ConstantAddress
CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl
*VD
) {
1554 auto AddrOfGlobal
= [&VD
, this]() { return CGM
.GetAddrOfGlobal(VD
); };
1556 auto LinkageForVariable
= [&VD
, this]() {
1557 return CGM
.getLLVMLinkageVarDefinition(VD
);
1560 std::vector
<llvm::GlobalVariable
*> GeneratedRefs
;
1562 llvm::Type
*LlvmPtrTy
= CGM
.getTypes().ConvertTypeForMem(
1563 CGM
.getContext().getPointerType(VD
->getType()));
1564 llvm::Constant
*addr
= OMPBuilder
.getAddrOfDeclareTargetVar(
1565 convertCaptureClause(VD
), convertDeviceClause(VD
),
1566 VD
->hasDefinition(CGM
.getContext()) == VarDecl::DeclarationOnly
,
1567 VD
->isExternallyVisible(),
1568 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
,
1569 VD
->getCanonicalDecl()->getBeginLoc()),
1570 CGM
.getMangledName(VD
), GeneratedRefs
, CGM
.getLangOpts().OpenMPSimd
,
1571 CGM
.getLangOpts().OMPTargetTriples
, LlvmPtrTy
, AddrOfGlobal
,
1572 LinkageForVariable
);
1575 return ConstantAddress::invalid();
1576 return ConstantAddress(addr
, LlvmPtrTy
, CGM
.getContext().getDeclAlign(VD
));
1580 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl
*VD
) {
1581 assert(!CGM
.getLangOpts().OpenMPUseTLS
||
1582 !CGM
.getContext().getTargetInfo().isTLSSupported());
1583 // Lookup the entry, lazily creating it if necessary.
1584 std::string Suffix
= getName({"cache", ""});
1585 return OMPBuilder
.getOrCreateInternalVariable(
1586 CGM
.Int8PtrPtrTy
, Twine(CGM
.getMangledName(VD
)).concat(Suffix
).str());
1589 Address
CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction
&CGF
,
1592 SourceLocation Loc
) {
1593 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1594 CGM
.getContext().getTargetInfo().isTLSSupported())
1597 llvm::Type
*VarTy
= VDAddr
.getElementType();
1598 llvm::Value
*Args
[] = {
1599 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
1600 CGF
.Builder
.CreatePointerCast(VDAddr
.emitRawPointer(CGF
), CGM
.Int8PtrTy
),
1601 CGM
.getSize(CGM
.GetTargetTypeStoreSize(VarTy
)),
1602 getOrCreateThreadPrivateCache(VD
)};
1604 CGF
.EmitRuntimeCall(
1605 OMPBuilder
.getOrCreateRuntimeFunction(
1606 CGM
.getModule(), OMPRTL___kmpc_threadprivate_cached
),
1608 CGF
.Int8Ty
, VDAddr
.getAlignment());
1611 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1612 CodeGenFunction
&CGF
, Address VDAddr
, llvm::Value
*Ctor
,
1613 llvm::Value
*CopyCtor
, llvm::Value
*Dtor
, SourceLocation Loc
) {
1614 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1616 llvm::Value
*OMPLoc
= emitUpdateLocation(CGF
, Loc
);
1617 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1618 CGM
.getModule(), OMPRTL___kmpc_global_thread_num
),
1620 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1621 // to register constructor/destructor for variable.
1622 llvm::Value
*Args
[] = {
1624 CGF
.Builder
.CreatePointerCast(VDAddr
.emitRawPointer(CGF
), CGM
.VoidPtrTy
),
1625 Ctor
, CopyCtor
, Dtor
};
1626 CGF
.EmitRuntimeCall(
1627 OMPBuilder
.getOrCreateRuntimeFunction(
1628 CGM
.getModule(), OMPRTL___kmpc_threadprivate_register
),
1632 llvm::Function
*CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1633 const VarDecl
*VD
, Address VDAddr
, SourceLocation Loc
,
1634 bool PerformInit
, CodeGenFunction
*CGF
) {
1635 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1636 CGM
.getContext().getTargetInfo().isTLSSupported())
1639 VD
= VD
->getDefinition(CGM
.getContext());
1640 if (VD
&& ThreadPrivateWithDefinition
.insert(CGM
.getMangledName(VD
)).second
) {
1641 QualType ASTTy
= VD
->getType();
1643 llvm::Value
*Ctor
= nullptr, *CopyCtor
= nullptr, *Dtor
= nullptr;
1644 const Expr
*Init
= VD
->getAnyInitializer();
1645 if (CGM
.getLangOpts().CPlusPlus
&& PerformInit
) {
1646 // Generate function that re-emits the declaration's initializer into the
1647 // threadprivate copy of the variable VD
1648 CodeGenFunction
CtorCGF(CGM
);
1649 FunctionArgList Args
;
1650 ImplicitParamDecl
Dst(CGM
.getContext(), /*DC=*/nullptr, Loc
,
1651 /*Id=*/nullptr, CGM
.getContext().VoidPtrTy
,
1652 ImplicitParamKind::Other
);
1653 Args
.push_back(&Dst
);
1655 const auto &FI
= CGM
.getTypes().arrangeBuiltinFunctionDeclaration(
1656 CGM
.getContext().VoidPtrTy
, Args
);
1657 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1658 std::string Name
= getName({"__kmpc_global_ctor_", ""});
1659 llvm::Function
*Fn
=
1660 CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, Name
, FI
, Loc
);
1661 CtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidPtrTy
, Fn
, FI
,
1663 llvm::Value
*ArgVal
= CtorCGF
.EmitLoadOfScalar(
1664 CtorCGF
.GetAddrOfLocalVar(&Dst
), /*Volatile=*/false,
1665 CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1666 Address
Arg(ArgVal
, CtorCGF
.ConvertTypeForMem(ASTTy
),
1667 VDAddr
.getAlignment());
1668 CtorCGF
.EmitAnyExprToMem(Init
, Arg
, Init
->getType().getQualifiers(),
1669 /*IsInitializer=*/true);
1670 ArgVal
= CtorCGF
.EmitLoadOfScalar(
1671 CtorCGF
.GetAddrOfLocalVar(&Dst
), /*Volatile=*/false,
1672 CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1673 CtorCGF
.Builder
.CreateStore(ArgVal
, CtorCGF
.ReturnValue
);
1674 CtorCGF
.FinishFunction();
1677 if (VD
->getType().isDestructedType() != QualType::DK_none
) {
1678 // Generate function that emits destructor call for the threadprivate copy
1679 // of the variable VD
1680 CodeGenFunction
DtorCGF(CGM
);
1681 FunctionArgList Args
;
1682 ImplicitParamDecl
Dst(CGM
.getContext(), /*DC=*/nullptr, Loc
,
1683 /*Id=*/nullptr, CGM
.getContext().VoidPtrTy
,
1684 ImplicitParamKind::Other
);
1685 Args
.push_back(&Dst
);
1687 const auto &FI
= CGM
.getTypes().arrangeBuiltinFunctionDeclaration(
1688 CGM
.getContext().VoidTy
, Args
);
1689 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1690 std::string Name
= getName({"__kmpc_global_dtor_", ""});
1691 llvm::Function
*Fn
=
1692 CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, Name
, FI
, Loc
);
1693 auto NL
= ApplyDebugLocation::CreateEmpty(DtorCGF
);
1694 DtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
, Args
,
1696 // Create a scope with an artificial location for the body of this function.
1697 auto AL
= ApplyDebugLocation::CreateArtificial(DtorCGF
);
1698 llvm::Value
*ArgVal
= DtorCGF
.EmitLoadOfScalar(
1699 DtorCGF
.GetAddrOfLocalVar(&Dst
),
1700 /*Volatile=*/false, CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1701 DtorCGF
.emitDestroy(
1702 Address(ArgVal
, DtorCGF
.Int8Ty
, VDAddr
.getAlignment()), ASTTy
,
1703 DtorCGF
.getDestroyer(ASTTy
.isDestructedType()),
1704 DtorCGF
.needsEHCleanup(ASTTy
.isDestructedType()));
1705 DtorCGF
.FinishFunction();
1708 // Do not emit init function if it is not required.
1712 // Copying constructor for the threadprivate variable.
1713 // Must be NULL - reserved by runtime, but currently it requires that this
1714 // parameter is always NULL. Otherwise it fires assertion.
1715 CopyCtor
= llvm::Constant::getNullValue(CGM
.UnqualPtrTy
);
1716 if (Ctor
== nullptr) {
1717 Ctor
= llvm::Constant::getNullValue(CGM
.UnqualPtrTy
);
1719 if (Dtor
== nullptr) {
1720 Dtor
= llvm::Constant::getNullValue(CGM
.UnqualPtrTy
);
1723 auto *InitFunctionTy
=
1724 llvm::FunctionType::get(CGM
.VoidTy
, /*isVarArg*/ false);
1725 std::string Name
= getName({"__omp_threadprivate_init_", ""});
1726 llvm::Function
*InitFunction
= CGM
.CreateGlobalInitOrCleanUpFunction(
1727 InitFunctionTy
, Name
, CGM
.getTypes().arrangeNullaryFunction());
1728 CodeGenFunction
InitCGF(CGM
);
1729 FunctionArgList ArgList
;
1730 InitCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, InitFunction
,
1731 CGM
.getTypes().arrangeNullaryFunction(), ArgList
,
1733 emitThreadPrivateVarInit(InitCGF
, VDAddr
, Ctor
, CopyCtor
, Dtor
, Loc
);
1734 InitCGF
.FinishFunction();
1735 return InitFunction
;
1737 emitThreadPrivateVarInit(*CGF
, VDAddr
, Ctor
, CopyCtor
, Dtor
, Loc
);
1742 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl
*FD
,
1743 llvm::GlobalValue
*GV
) {
1744 std::optional
<OMPDeclareTargetDeclAttr
*> ActiveAttr
=
1745 OMPDeclareTargetDeclAttr::getActiveAttr(FD
);
1747 // We only need to handle active 'indirect' declare target functions.
1748 if (!ActiveAttr
|| !(*ActiveAttr
)->getIndirect())
1751 // Get a mangled name to store the new device global in.
1752 llvm::TargetRegionEntryInfo EntryInfo
= getEntryInfoFromPresumedLoc(
1753 CGM
, OMPBuilder
, FD
->getCanonicalDecl()->getBeginLoc(), FD
->getName());
1754 SmallString
<128> Name
;
1755 OMPBuilder
.OffloadInfoManager
.getTargetRegionEntryFnName(Name
, EntryInfo
);
1757 // We need to generate a new global to hold the address of the indirectly
1758 // called device function. Doing this allows us to keep the visibility and
1759 // linkage of the associated function unchanged while allowing the runtime to
1760 // access its value.
1761 llvm::GlobalValue
*Addr
= GV
;
1762 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
1763 Addr
= new llvm::GlobalVariable(
1764 CGM
.getModule(), CGM
.VoidPtrTy
,
1765 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage
, GV
, Name
,
1766 nullptr, llvm::GlobalValue::NotThreadLocal
,
1767 CGM
.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1768 Addr
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
1771 OMPBuilder
.OffloadInfoManager
.registerDeviceGlobalVarEntryInfo(
1772 Name
, Addr
, CGM
.GetTargetTypeStoreSize(CGM
.VoidPtrTy
).getQuantity(),
1773 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect
,
1774 llvm::GlobalValue::WeakODRLinkage
);
1777 Address
CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction
&CGF
,
1780 std::string Suffix
= getName({"artificial", ""});
1781 llvm::Type
*VarLVType
= CGF
.ConvertTypeForMem(VarType
);
1782 llvm::GlobalVariable
*GAddr
= OMPBuilder
.getOrCreateInternalVariable(
1783 VarLVType
, Twine(Name
).concat(Suffix
).str());
1784 if (CGM
.getLangOpts().OpenMP
&& CGM
.getLangOpts().OpenMPUseTLS
&&
1785 CGM
.getTarget().isTLSSupported()) {
1786 GAddr
->setThreadLocal(/*Val=*/true);
1787 return Address(GAddr
, GAddr
->getValueType(),
1788 CGM
.getContext().getTypeAlignInChars(VarType
));
1790 std::string CacheSuffix
= getName({"cache", ""});
1791 llvm::Value
*Args
[] = {
1792 emitUpdateLocation(CGF
, SourceLocation()),
1793 getThreadID(CGF
, SourceLocation()),
1794 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(GAddr
, CGM
.VoidPtrTy
),
1795 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(VarType
), CGM
.SizeTy
,
1796 /*isSigned=*/false),
1797 OMPBuilder
.getOrCreateInternalVariable(
1799 Twine(Name
).concat(Suffix
).concat(CacheSuffix
).str())};
1801 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
1802 CGF
.EmitRuntimeCall(
1803 OMPBuilder
.getOrCreateRuntimeFunction(
1804 CGM
.getModule(), OMPRTL___kmpc_threadprivate_cached
),
1806 CGF
.Builder
.getPtrTy(0)),
1807 VarLVType
, CGM
.getContext().getTypeAlignInChars(VarType
));
1810 void CGOpenMPRuntime::emitIfClause(CodeGenFunction
&CGF
, const Expr
*Cond
,
1811 const RegionCodeGenTy
&ThenGen
,
1812 const RegionCodeGenTy
&ElseGen
) {
1813 CodeGenFunction::LexicalScope
ConditionScope(CGF
, Cond
->getSourceRange());
1815 // If the condition constant folds and can be elided, try to avoid emitting
1816 // the condition and the dead arm of the if/else.
1818 if (CGF
.ConstantFoldsToSimpleInteger(Cond
, CondConstant
)) {
1826 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1827 // emit the conditional branch.
1828 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("omp_if.then");
1829 llvm::BasicBlock
*ElseBlock
= CGF
.createBasicBlock("omp_if.else");
1830 llvm::BasicBlock
*ContBlock
= CGF
.createBasicBlock("omp_if.end");
1831 CGF
.EmitBranchOnBoolExpr(Cond
, ThenBlock
, ElseBlock
, /*TrueCount=*/0);
1833 // Emit the 'then' code.
1834 CGF
.EmitBlock(ThenBlock
);
1836 CGF
.EmitBranch(ContBlock
);
1837 // Emit the 'else' code if present.
1838 // There is no need to emit line number for unconditional branch.
1839 (void)ApplyDebugLocation::CreateEmpty(CGF
);
1840 CGF
.EmitBlock(ElseBlock
);
1842 // There is no need to emit line number for unconditional branch.
1843 (void)ApplyDebugLocation::CreateEmpty(CGF
);
1844 CGF
.EmitBranch(ContBlock
);
1845 // Emit the continuation block for code after the if.
1846 CGF
.EmitBlock(ContBlock
, /*IsFinished=*/true);
1849 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
1850 llvm::Function
*OutlinedFn
,
1851 ArrayRef
<llvm::Value
*> CapturedVars
,
1853 llvm::Value
*NumThreads
) {
1854 if (!CGF
.HaveInsertPoint())
1856 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
1857 auto &M
= CGM
.getModule();
1858 auto &&ThenGen
= [&M
, OutlinedFn
, CapturedVars
, RTLoc
,
1859 this](CodeGenFunction
&CGF
, PrePostActionTy
&) {
1860 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1861 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
1862 llvm::Value
*Args
[] = {
1864 CGF
.Builder
.getInt32(CapturedVars
.size()), // Number of captured vars
1865 CGF
.Builder
.CreateBitCast(OutlinedFn
, RT
.getKmpc_MicroPointerTy())};
1866 llvm::SmallVector
<llvm::Value
*, 16> RealArgs
;
1867 RealArgs
.append(std::begin(Args
), std::end(Args
));
1868 RealArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
1870 llvm::FunctionCallee RTLFn
=
1871 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_fork_call
);
1872 CGF
.EmitRuntimeCall(RTLFn
, RealArgs
);
1874 auto &&ElseGen
= [&M
, OutlinedFn
, CapturedVars
, RTLoc
, Loc
,
1875 this](CodeGenFunction
&CGF
, PrePostActionTy
&) {
1876 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
1877 llvm::Value
*ThreadID
= RT
.getThreadID(CGF
, Loc
);
1879 // __kmpc_serialized_parallel(&Loc, GTid);
1880 llvm::Value
*Args
[] = {RTLoc
, ThreadID
};
1881 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1882 M
, OMPRTL___kmpc_serialized_parallel
),
1885 // OutlinedFn(>id, &zero_bound, CapturedStruct);
1886 Address ThreadIDAddr
= RT
.emitThreadIDAddress(CGF
, Loc
);
1887 RawAddress ZeroAddrBound
=
1888 CGF
.CreateDefaultAlignTempAlloca(CGF
.Int32Ty
,
1889 /*Name=*/".bound.zero.addr");
1890 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(/*C*/ 0), ZeroAddrBound
);
1891 llvm::SmallVector
<llvm::Value
*, 16> OutlinedFnArgs
;
1892 // ThreadId for serialized parallels is 0.
1893 OutlinedFnArgs
.push_back(ThreadIDAddr
.emitRawPointer(CGF
));
1894 OutlinedFnArgs
.push_back(ZeroAddrBound
.getPointer());
1895 OutlinedFnArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
1897 // Ensure we do not inline the function. This is trivially true for the ones
1898 // passed to __kmpc_fork_call but the ones called in serialized regions
1899 // could be inlined. This is not a perfect but it is closer to the invariant
1900 // we want, namely, every data environment starts with a new function.
1901 // TODO: We should pass the if condition to the runtime function and do the
1902 // handling there. Much cleaner code.
1903 OutlinedFn
->removeFnAttr(llvm::Attribute::AlwaysInline
);
1904 OutlinedFn
->addFnAttr(llvm::Attribute::NoInline
);
1905 RT
.emitOutlinedFunctionCall(CGF
, Loc
, OutlinedFn
, OutlinedFnArgs
);
1907 // __kmpc_end_serialized_parallel(&Loc, GTid);
1908 llvm::Value
*EndArgs
[] = {RT
.emitUpdateLocation(CGF
, Loc
), ThreadID
};
1909 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1910 M
, OMPRTL___kmpc_end_serialized_parallel
),
1914 emitIfClause(CGF
, IfCond
, ThenGen
, ElseGen
);
1916 RegionCodeGenTy
ThenRCG(ThenGen
);
1921 // If we're inside an (outlined) parallel region, use the region info's
1922 // thread-ID variable (it is passed in a first argument of the outlined function
1923 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1924 // regular serial code region, get thread ID by calling kmp_int32
1925 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1926 // return the address of that temp.
1927 Address
CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction
&CGF
,
1928 SourceLocation Loc
) {
1929 if (auto *OMPRegionInfo
=
1930 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
1931 if (OMPRegionInfo
->getThreadIDVariable())
1932 return OMPRegionInfo
->getThreadIDVariableLValue(CGF
).getAddress();
1934 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
1936 CGF
.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1937 Address ThreadIDTemp
= CGF
.CreateMemTemp(Int32Ty
, /*Name*/ ".threadid_temp.");
1938 CGF
.EmitStoreOfScalar(ThreadID
,
1939 CGF
.MakeAddrLValue(ThreadIDTemp
, Int32Ty
));
1941 return ThreadIDTemp
;
1944 llvm::Value
*CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName
) {
1945 std::string Prefix
= Twine("gomp_critical_user_", CriticalName
).str();
1946 std::string Name
= getName({Prefix
, "var"});
1947 return OMPBuilder
.getOrCreateInternalVariable(KmpCriticalNameTy
, Name
);
1951 /// Common pre(post)-action for different OpenMP constructs.
1952 class CommonActionTy final
: public PrePostActionTy
{
1953 llvm::FunctionCallee EnterCallee
;
1954 ArrayRef
<llvm::Value
*> EnterArgs
;
1955 llvm::FunctionCallee ExitCallee
;
1956 ArrayRef
<llvm::Value
*> ExitArgs
;
1958 llvm::BasicBlock
*ContBlock
= nullptr;
1961 CommonActionTy(llvm::FunctionCallee EnterCallee
,
1962 ArrayRef
<llvm::Value
*> EnterArgs
,
1963 llvm::FunctionCallee ExitCallee
,
1964 ArrayRef
<llvm::Value
*> ExitArgs
, bool Conditional
= false)
1965 : EnterCallee(EnterCallee
), EnterArgs(EnterArgs
), ExitCallee(ExitCallee
),
1966 ExitArgs(ExitArgs
), Conditional(Conditional
) {}
1967 void Enter(CodeGenFunction
&CGF
) override
{
1968 llvm::Value
*EnterRes
= CGF
.EmitRuntimeCall(EnterCallee
, EnterArgs
);
1970 llvm::Value
*CallBool
= CGF
.Builder
.CreateIsNotNull(EnterRes
);
1971 auto *ThenBlock
= CGF
.createBasicBlock("omp_if.then");
1972 ContBlock
= CGF
.createBasicBlock("omp_if.end");
1973 // Generate the branch (If-stmt)
1974 CGF
.Builder
.CreateCondBr(CallBool
, ThenBlock
, ContBlock
);
1975 CGF
.EmitBlock(ThenBlock
);
1978 void Done(CodeGenFunction
&CGF
) {
1979 // Emit the rest of blocks/branches
1980 CGF
.EmitBranch(ContBlock
);
1981 CGF
.EmitBlock(ContBlock
, true);
1983 void Exit(CodeGenFunction
&CGF
) override
{
1984 CGF
.EmitRuntimeCall(ExitCallee
, ExitArgs
);
1987 } // anonymous namespace
1989 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction
&CGF
,
1990 StringRef CriticalName
,
1991 const RegionCodeGenTy
&CriticalOpGen
,
1992 SourceLocation Loc
, const Expr
*Hint
) {
1993 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
1995 // __kmpc_end_critical(ident_t *, gtid, Lock);
1996 // Prepare arguments and build a call to __kmpc_critical
1997 if (!CGF
.HaveInsertPoint())
1999 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2000 getCriticalRegionLock(CriticalName
)};
2001 llvm::SmallVector
<llvm::Value
*, 4> EnterArgs(std::begin(Args
),
2004 EnterArgs
.push_back(CGF
.Builder
.CreateIntCast(
2005 CGF
.EmitScalarExpr(Hint
), CGM
.Int32Ty
, /*isSigned=*/false));
2007 CommonActionTy
Action(
2008 OMPBuilder
.getOrCreateRuntimeFunction(
2010 Hint
? OMPRTL___kmpc_critical_with_hint
: OMPRTL___kmpc_critical
),
2012 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
2013 OMPRTL___kmpc_end_critical
),
2015 CriticalOpGen
.setAction(Action
);
2016 emitInlinedDirective(CGF
, OMPD_critical
, CriticalOpGen
);
2019 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction
&CGF
,
2020 const RegionCodeGenTy
&MasterOpGen
,
2021 SourceLocation Loc
) {
2022 if (!CGF
.HaveInsertPoint())
2024 // if(__kmpc_master(ident_t *, gtid)) {
2026 // __kmpc_end_master(ident_t *, gtid);
2028 // Prepare arguments and build a call to __kmpc_master
2029 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2030 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2031 CGM
.getModule(), OMPRTL___kmpc_master
),
2033 OMPBuilder
.getOrCreateRuntimeFunction(
2034 CGM
.getModule(), OMPRTL___kmpc_end_master
),
2036 /*Conditional=*/true);
2037 MasterOpGen
.setAction(Action
);
2038 emitInlinedDirective(CGF
, OMPD_master
, MasterOpGen
);
2042 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction
&CGF
,
2043 const RegionCodeGenTy
&MaskedOpGen
,
2044 SourceLocation Loc
, const Expr
*Filter
) {
2045 if (!CGF
.HaveInsertPoint())
2047 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2049 // __kmpc_end_masked(iden_t *, gtid);
2051 // Prepare arguments and build a call to __kmpc_masked
2052 llvm::Value
*FilterVal
= Filter
2053 ? CGF
.EmitScalarExpr(Filter
, CGF
.Int32Ty
)
2054 : llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/0);
2055 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2057 llvm::Value
*ArgsEnd
[] = {emitUpdateLocation(CGF
, Loc
),
2058 getThreadID(CGF
, Loc
)};
2059 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2060 CGM
.getModule(), OMPRTL___kmpc_masked
),
2062 OMPBuilder
.getOrCreateRuntimeFunction(
2063 CGM
.getModule(), OMPRTL___kmpc_end_masked
),
2065 /*Conditional=*/true);
2066 MaskedOpGen
.setAction(Action
);
2067 emitInlinedDirective(CGF
, OMPD_masked
, MaskedOpGen
);
2071 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction
&CGF
,
2072 SourceLocation Loc
) {
2073 if (!CGF
.HaveInsertPoint())
2075 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2076 OMPBuilder
.createTaskyield(CGF
.Builder
);
2078 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2079 llvm::Value
*Args
[] = {
2080 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2081 llvm::ConstantInt::get(CGM
.IntTy
, /*V=*/0, /*isSigned=*/true)};
2082 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2083 CGM
.getModule(), OMPRTL___kmpc_omp_taskyield
),
2087 if (auto *Region
= dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
2088 Region
->emitUntiedSwitch(CGF
);
2091 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction
&CGF
,
2092 const RegionCodeGenTy
&TaskgroupOpGen
,
2093 SourceLocation Loc
) {
2094 if (!CGF
.HaveInsertPoint())
2096 // __kmpc_taskgroup(ident_t *, gtid);
2097 // TaskgroupOpGen();
2098 // __kmpc_end_taskgroup(ident_t *, gtid);
2099 // Prepare arguments and build a call to __kmpc_taskgroup
2100 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2101 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2102 CGM
.getModule(), OMPRTL___kmpc_taskgroup
),
2104 OMPBuilder
.getOrCreateRuntimeFunction(
2105 CGM
.getModule(), OMPRTL___kmpc_end_taskgroup
),
2107 TaskgroupOpGen
.setAction(Action
);
2108 emitInlinedDirective(CGF
, OMPD_taskgroup
, TaskgroupOpGen
);
2111 /// Given an array of pointers to variables, project the address of a
2113 static Address
emitAddrOfVarFromArray(CodeGenFunction
&CGF
, Address Array
,
2114 unsigned Index
, const VarDecl
*Var
) {
2115 // Pull out the pointer to the variable.
2116 Address PtrAddr
= CGF
.Builder
.CreateConstArrayGEP(Array
, Index
);
2117 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(PtrAddr
);
2119 llvm::Type
*ElemTy
= CGF
.ConvertTypeForMem(Var
->getType());
2120 return Address(Ptr
, ElemTy
, CGF
.getContext().getDeclAlign(Var
));
2123 static llvm::Value
*emitCopyprivateCopyFunction(
2124 CodeGenModule
&CGM
, llvm::Type
*ArgsElemType
,
2125 ArrayRef
<const Expr
*> CopyprivateVars
, ArrayRef
<const Expr
*> DestExprs
,
2126 ArrayRef
<const Expr
*> SrcExprs
, ArrayRef
<const Expr
*> AssignmentOps
,
2127 SourceLocation Loc
) {
2128 ASTContext
&C
= CGM
.getContext();
2129 // void copy_func(void *LHSArg, void *RHSArg);
2130 FunctionArgList Args
;
2131 ImplicitParamDecl
LHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
2132 ImplicitParamKind::Other
);
2133 ImplicitParamDecl
RHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
2134 ImplicitParamKind::Other
);
2135 Args
.push_back(&LHSArg
);
2136 Args
.push_back(&RHSArg
);
2138 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
2140 CGM
.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2141 auto *Fn
= llvm::Function::Create(CGM
.getTypes().GetFunctionType(CGFI
),
2142 llvm::GlobalValue::InternalLinkage
, Name
,
2144 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, CGFI
);
2145 Fn
->setDoesNotRecurse();
2146 CodeGenFunction
CGF(CGM
);
2147 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, CGFI
, Args
, Loc
, Loc
);
2148 // Dest = (void*[n])(LHSArg);
2149 // Src = (void*[n])(RHSArg);
2150 Address
LHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2151 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&LHSArg
)),
2152 CGF
.Builder
.getPtrTy(0)),
2153 ArgsElemType
, CGF
.getPointerAlign());
2154 Address
RHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2155 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&RHSArg
)),
2156 CGF
.Builder
.getPtrTy(0)),
2157 ArgsElemType
, CGF
.getPointerAlign());
2158 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2159 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2161 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2162 for (unsigned I
= 0, E
= AssignmentOps
.size(); I
< E
; ++I
) {
2163 const auto *DestVar
=
2164 cast
<VarDecl
>(cast
<DeclRefExpr
>(DestExprs
[I
])->getDecl());
2165 Address DestAddr
= emitAddrOfVarFromArray(CGF
, LHS
, I
, DestVar
);
2167 const auto *SrcVar
=
2168 cast
<VarDecl
>(cast
<DeclRefExpr
>(SrcExprs
[I
])->getDecl());
2169 Address SrcAddr
= emitAddrOfVarFromArray(CGF
, RHS
, I
, SrcVar
);
2171 const auto *VD
= cast
<DeclRefExpr
>(CopyprivateVars
[I
])->getDecl();
2172 QualType Type
= VD
->getType();
2173 CGF
.EmitOMPCopy(Type
, DestAddr
, SrcAddr
, DestVar
, SrcVar
, AssignmentOps
[I
]);
2175 CGF
.FinishFunction();
2179 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction
&CGF
,
2180 const RegionCodeGenTy
&SingleOpGen
,
2182 ArrayRef
<const Expr
*> CopyprivateVars
,
2183 ArrayRef
<const Expr
*> SrcExprs
,
2184 ArrayRef
<const Expr
*> DstExprs
,
2185 ArrayRef
<const Expr
*> AssignmentOps
) {
2186 if (!CGF
.HaveInsertPoint())
2188 assert(CopyprivateVars
.size() == SrcExprs
.size() &&
2189 CopyprivateVars
.size() == DstExprs
.size() &&
2190 CopyprivateVars
.size() == AssignmentOps
.size());
2191 ASTContext
&C
= CGM
.getContext();
2192 // int32 did_it = 0;
2193 // if(__kmpc_single(ident_t *, gtid)) {
2195 // __kmpc_end_single(ident_t *, gtid);
2198 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2199 // <copy_func>, did_it);
2201 Address DidIt
= Address::invalid();
2202 if (!CopyprivateVars
.empty()) {
2203 // int32 did_it = 0;
2204 QualType KmpInt32Ty
=
2205 C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2206 DidIt
= CGF
.CreateMemTemp(KmpInt32Ty
, ".omp.copyprivate.did_it");
2207 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(0), DidIt
);
2209 // Prepare arguments and build a call to __kmpc_single
2210 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2211 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2212 CGM
.getModule(), OMPRTL___kmpc_single
),
2214 OMPBuilder
.getOrCreateRuntimeFunction(
2215 CGM
.getModule(), OMPRTL___kmpc_end_single
),
2217 /*Conditional=*/true);
2218 SingleOpGen
.setAction(Action
);
2219 emitInlinedDirective(CGF
, OMPD_single
, SingleOpGen
);
2220 if (DidIt
.isValid()) {
2222 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(1), DidIt
);
2225 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2226 // <copy_func>, did_it);
2227 if (DidIt
.isValid()) {
2228 llvm::APInt
ArraySize(/*unsigned int numBits=*/32, CopyprivateVars
.size());
2229 QualType CopyprivateArrayTy
= C
.getConstantArrayType(
2230 C
.VoidPtrTy
, ArraySize
, nullptr, ArraySizeModifier::Normal
,
2231 /*IndexTypeQuals=*/0);
2232 // Create a list of all private variables for copyprivate.
2233 Address CopyprivateList
=
2234 CGF
.CreateMemTemp(CopyprivateArrayTy
, ".omp.copyprivate.cpr_list");
2235 for (unsigned I
= 0, E
= CopyprivateVars
.size(); I
< E
; ++I
) {
2236 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(CopyprivateList
, I
);
2237 CGF
.Builder
.CreateStore(
2238 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2239 CGF
.EmitLValue(CopyprivateVars
[I
]).getPointer(CGF
),
2243 // Build function that copies private values from single region to all other
2244 // threads in the corresponding parallel region.
2245 llvm::Value
*CpyFn
= emitCopyprivateCopyFunction(
2246 CGM
, CGF
.ConvertTypeForMem(CopyprivateArrayTy
), CopyprivateVars
,
2247 SrcExprs
, DstExprs
, AssignmentOps
, Loc
);
2248 llvm::Value
*BufSize
= CGF
.getTypeSize(CopyprivateArrayTy
);
2249 Address CL
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2250 CopyprivateList
, CGF
.VoidPtrTy
, CGF
.Int8Ty
);
2251 llvm::Value
*DidItVal
= CGF
.Builder
.CreateLoad(DidIt
);
2252 llvm::Value
*Args
[] = {
2253 emitUpdateLocation(CGF
, Loc
), // ident_t *<loc>
2254 getThreadID(CGF
, Loc
), // i32 <gtid>
2255 BufSize
, // size_t <buf_size>
2256 CL
.emitRawPointer(CGF
), // void *<copyprivate list>
2257 CpyFn
, // void (*) (void *, void *) <copy_func>
2258 DidItVal
// i32 did_it
2260 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2261 CGM
.getModule(), OMPRTL___kmpc_copyprivate
),
2266 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction
&CGF
,
2267 const RegionCodeGenTy
&OrderedOpGen
,
2268 SourceLocation Loc
, bool IsThreads
) {
2269 if (!CGF
.HaveInsertPoint())
2271 // __kmpc_ordered(ident_t *, gtid);
2273 // __kmpc_end_ordered(ident_t *, gtid);
2274 // Prepare arguments and build a call to __kmpc_ordered
2276 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2277 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2278 CGM
.getModule(), OMPRTL___kmpc_ordered
),
2280 OMPBuilder
.getOrCreateRuntimeFunction(
2281 CGM
.getModule(), OMPRTL___kmpc_end_ordered
),
2283 OrderedOpGen
.setAction(Action
);
2284 emitInlinedDirective(CGF
, OMPD_ordered
, OrderedOpGen
);
2287 emitInlinedDirective(CGF
, OMPD_ordered
, OrderedOpGen
);
2290 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind
) {
2292 if (Kind
== OMPD_for
)
2293 Flags
= OMP_IDENT_BARRIER_IMPL_FOR
;
2294 else if (Kind
== OMPD_sections
)
2295 Flags
= OMP_IDENT_BARRIER_IMPL_SECTIONS
;
2296 else if (Kind
== OMPD_single
)
2297 Flags
= OMP_IDENT_BARRIER_IMPL_SINGLE
;
2298 else if (Kind
== OMPD_barrier
)
2299 Flags
= OMP_IDENT_BARRIER_EXPL
;
2301 Flags
= OMP_IDENT_BARRIER_IMPL
;
2305 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2306 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2307 OpenMPScheduleClauseKind
&ScheduleKind
, const Expr
*&ChunkExpr
) const {
2308 // Check if the loop directive is actually a doacross loop directive. In this
2309 // case choose static, 1 schedule.
2311 S
.getClausesOfKind
<OMPOrderedClause
>(),
2312 [](const OMPOrderedClause
*C
) { return C
->getNumForLoops(); })) {
2313 ScheduleKind
= OMPC_SCHEDULE_static
;
2314 // Chunk size is 1 in this case.
2315 llvm::APInt
ChunkSize(32, 1);
2316 ChunkExpr
= IntegerLiteral::Create(
2317 CGF
.getContext(), ChunkSize
,
2318 CGF
.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2323 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2324 OpenMPDirectiveKind Kind
, bool EmitChecks
,
2325 bool ForceSimpleCall
) {
2326 // Check if we should use the OMPBuilder
2327 auto *OMPRegionInfo
=
2328 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
);
2329 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2330 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP
=
2331 OMPBuilder
.createBarrier(CGF
.Builder
, Kind
, ForceSimpleCall
,
2333 assert(AfterIP
&& "unexpected error creating barrier");
2334 CGF
.Builder
.restoreIP(*AfterIP
);
2338 if (!CGF
.HaveInsertPoint())
2340 // Build call __kmpc_cancel_barrier(loc, thread_id);
2341 // Build call __kmpc_barrier(loc, thread_id);
2342 unsigned Flags
= getDefaultFlagsForBarriers(Kind
);
2343 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2345 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
, Flags
),
2346 getThreadID(CGF
, Loc
)};
2347 if (OMPRegionInfo
) {
2348 if (!ForceSimpleCall
&& OMPRegionInfo
->hasCancel()) {
2349 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
2350 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
2351 OMPRTL___kmpc_cancel_barrier
),
2354 // if (__kmpc_cancel_barrier()) {
2355 // exit from construct;
2357 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
2358 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
2359 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
2360 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
2361 CGF
.EmitBlock(ExitBB
);
2362 // exit from construct;
2363 CodeGenFunction::JumpDest CancelDestination
=
2364 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
2365 CGF
.EmitBranchThroughCleanup(CancelDestination
);
2366 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
2371 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2372 CGM
.getModule(), OMPRTL___kmpc_barrier
),
2376 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2377 Expr
*ME
, bool IsFatal
) {
2379 ME
? CGF
.EmitStringLiteralLValue(cast
<StringLiteral
>(ME
)).getPointer(CGF
)
2380 : llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
2381 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2383 llvm::Value
*Args
[] = {
2384 emitUpdateLocation(CGF
, Loc
, /*Flags=*/0, /*GenLoc=*/true),
2385 llvm::ConstantInt::get(CGM
.Int32Ty
, IsFatal
? 2 : 1),
2386 CGF
.Builder
.CreatePointerCast(MVL
, CGM
.Int8PtrTy
)};
2387 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2388 CGM
.getModule(), OMPRTL___kmpc_error
),
2392 /// Map the OpenMP loop schedule to the runtime enumeration.
2393 static OpenMPSchedType
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind
,
2394 bool Chunked
, bool Ordered
) {
2395 switch (ScheduleKind
) {
2396 case OMPC_SCHEDULE_static
:
2397 return Chunked
? (Ordered
? OMP_ord_static_chunked
: OMP_sch_static_chunked
)
2398 : (Ordered
? OMP_ord_static
: OMP_sch_static
);
2399 case OMPC_SCHEDULE_dynamic
:
2400 return Ordered
? OMP_ord_dynamic_chunked
: OMP_sch_dynamic_chunked
;
2401 case OMPC_SCHEDULE_guided
:
2402 return Ordered
? OMP_ord_guided_chunked
: OMP_sch_guided_chunked
;
2403 case OMPC_SCHEDULE_runtime
:
2404 return Ordered
? OMP_ord_runtime
: OMP_sch_runtime
;
2405 case OMPC_SCHEDULE_auto
:
2406 return Ordered
? OMP_ord_auto
: OMP_sch_auto
;
2407 case OMPC_SCHEDULE_unknown
:
2408 assert(!Chunked
&& "chunk was specified but schedule kind not known");
2409 return Ordered
? OMP_ord_static
: OMP_sch_static
;
2411 llvm_unreachable("Unexpected runtime schedule");
2414 /// Map the OpenMP distribute schedule to the runtime enumeration.
2415 static OpenMPSchedType
2416 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) {
2417 // only static is allowed for dist_schedule
2418 return Chunked
? OMP_dist_sch_static_chunked
: OMP_dist_sch_static
;
2421 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind
,
2422 bool Chunked
) const {
2423 OpenMPSchedType Schedule
=
2424 getRuntimeSchedule(ScheduleKind
, Chunked
, /*Ordered=*/false);
2425 return Schedule
== OMP_sch_static
;
2428 bool CGOpenMPRuntime::isStaticNonchunked(
2429 OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) const {
2430 OpenMPSchedType Schedule
= getRuntimeSchedule(ScheduleKind
, Chunked
);
2431 return Schedule
== OMP_dist_sch_static
;
2434 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind
,
2435 bool Chunked
) const {
2436 OpenMPSchedType Schedule
=
2437 getRuntimeSchedule(ScheduleKind
, Chunked
, /*Ordered=*/false);
2438 return Schedule
== OMP_sch_static_chunked
;
2441 bool CGOpenMPRuntime::isStaticChunked(
2442 OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) const {
2443 OpenMPSchedType Schedule
= getRuntimeSchedule(ScheduleKind
, Chunked
);
2444 return Schedule
== OMP_dist_sch_static_chunked
;
2447 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind
) const {
2448 OpenMPSchedType Schedule
=
2449 getRuntimeSchedule(ScheduleKind
, /*Chunked=*/false, /*Ordered=*/false);
2450 assert(Schedule
!= OMP_sch_static_chunked
&& "cannot be chunked here");
2451 return Schedule
!= OMP_sch_static
;
2454 static int addMonoNonMonoModifier(CodeGenModule
&CGM
, OpenMPSchedType Schedule
,
2455 OpenMPScheduleClauseModifier M1
,
2456 OpenMPScheduleClauseModifier M2
) {
2459 case OMPC_SCHEDULE_MODIFIER_monotonic
:
2460 Modifier
= OMP_sch_modifier_monotonic
;
2462 case OMPC_SCHEDULE_MODIFIER_nonmonotonic
:
2463 Modifier
= OMP_sch_modifier_nonmonotonic
;
2465 case OMPC_SCHEDULE_MODIFIER_simd
:
2466 if (Schedule
== OMP_sch_static_chunked
)
2467 Schedule
= OMP_sch_static_balanced_chunked
;
2469 case OMPC_SCHEDULE_MODIFIER_last
:
2470 case OMPC_SCHEDULE_MODIFIER_unknown
:
2474 case OMPC_SCHEDULE_MODIFIER_monotonic
:
2475 Modifier
= OMP_sch_modifier_monotonic
;
2477 case OMPC_SCHEDULE_MODIFIER_nonmonotonic
:
2478 Modifier
= OMP_sch_modifier_nonmonotonic
;
2480 case OMPC_SCHEDULE_MODIFIER_simd
:
2481 if (Schedule
== OMP_sch_static_chunked
)
2482 Schedule
= OMP_sch_static_balanced_chunked
;
2484 case OMPC_SCHEDULE_MODIFIER_last
:
2485 case OMPC_SCHEDULE_MODIFIER_unknown
:
2488 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2489 // If the static schedule kind is specified or if the ordered clause is
2490 // specified, and if the nonmonotonic modifier is not specified, the effect is
2491 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2492 // modifier is specified, the effect is as if the nonmonotonic modifier is
2494 if (CGM
.getLangOpts().OpenMP
>= 50 && Modifier
== 0) {
2495 if (!(Schedule
== OMP_sch_static_chunked
|| Schedule
== OMP_sch_static
||
2496 Schedule
== OMP_sch_static_balanced_chunked
||
2497 Schedule
== OMP_ord_static_chunked
|| Schedule
== OMP_ord_static
||
2498 Schedule
== OMP_dist_sch_static_chunked
||
2499 Schedule
== OMP_dist_sch_static
))
2500 Modifier
= OMP_sch_modifier_nonmonotonic
;
2502 return Schedule
| Modifier
;
2505 void CGOpenMPRuntime::emitForDispatchInit(
2506 CodeGenFunction
&CGF
, SourceLocation Loc
,
2507 const OpenMPScheduleTy
&ScheduleKind
, unsigned IVSize
, bool IVSigned
,
2508 bool Ordered
, const DispatchRTInput
&DispatchValues
) {
2509 if (!CGF
.HaveInsertPoint())
2511 OpenMPSchedType Schedule
= getRuntimeSchedule(
2512 ScheduleKind
.Schedule
, DispatchValues
.Chunk
!= nullptr, Ordered
);
2514 (Schedule
!= OMP_sch_static
&& Schedule
!= OMP_sch_static_chunked
&&
2515 Schedule
!= OMP_ord_static
&& Schedule
!= OMP_ord_static_chunked
&&
2516 Schedule
!= OMP_sch_static_balanced_chunked
));
2517 // Call __kmpc_dispatch_init(
2518 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2519 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2520 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2522 // If the Chunk was not specified in the clause - use default value 1.
2523 llvm::Value
*Chunk
= DispatchValues
.Chunk
? DispatchValues
.Chunk
2524 : CGF
.Builder
.getIntN(IVSize
, 1);
2525 llvm::Value
*Args
[] = {
2526 emitUpdateLocation(CGF
, Loc
),
2527 getThreadID(CGF
, Loc
),
2528 CGF
.Builder
.getInt32(addMonoNonMonoModifier(
2529 CGM
, Schedule
, ScheduleKind
.M1
, ScheduleKind
.M2
)), // Schedule type
2530 DispatchValues
.LB
, // Lower
2531 DispatchValues
.UB
, // Upper
2532 CGF
.Builder
.getIntN(IVSize
, 1), // Stride
2535 CGF
.EmitRuntimeCall(OMPBuilder
.createDispatchInitFunction(IVSize
, IVSigned
),
2539 void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction
&CGF
,
2540 SourceLocation Loc
) {
2541 if (!CGF
.HaveInsertPoint())
2543 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2544 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2545 CGF
.EmitRuntimeCall(OMPBuilder
.createDispatchDeinitFunction(), Args
);
2548 static void emitForStaticInitCall(
2549 CodeGenFunction
&CGF
, llvm::Value
*UpdateLocation
, llvm::Value
*ThreadId
,
2550 llvm::FunctionCallee ForStaticInitFunction
, OpenMPSchedType Schedule
,
2551 OpenMPScheduleClauseModifier M1
, OpenMPScheduleClauseModifier M2
,
2552 const CGOpenMPRuntime::StaticRTInput
&Values
) {
2553 if (!CGF
.HaveInsertPoint())
2556 assert(!Values
.Ordered
);
2557 assert(Schedule
== OMP_sch_static
|| Schedule
== OMP_sch_static_chunked
||
2558 Schedule
== OMP_sch_static_balanced_chunked
||
2559 Schedule
== OMP_ord_static
|| Schedule
== OMP_ord_static_chunked
||
2560 Schedule
== OMP_dist_sch_static
||
2561 Schedule
== OMP_dist_sch_static_chunked
);
2563 // Call __kmpc_for_static_init(
2564 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2565 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2566 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2567 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2568 llvm::Value
*Chunk
= Values
.Chunk
;
2569 if (Chunk
== nullptr) {
2570 assert((Schedule
== OMP_sch_static
|| Schedule
== OMP_ord_static
||
2571 Schedule
== OMP_dist_sch_static
) &&
2572 "expected static non-chunked schedule");
2573 // If the Chunk was not specified in the clause - use default value 1.
2574 Chunk
= CGF
.Builder
.getIntN(Values
.IVSize
, 1);
2576 assert((Schedule
== OMP_sch_static_chunked
||
2577 Schedule
== OMP_sch_static_balanced_chunked
||
2578 Schedule
== OMP_ord_static_chunked
||
2579 Schedule
== OMP_dist_sch_static_chunked
) &&
2580 "expected static chunked schedule");
2582 llvm::Value
*Args
[] = {
2585 CGF
.Builder
.getInt32(addMonoNonMonoModifier(CGF
.CGM
, Schedule
, M1
,
2586 M2
)), // Schedule type
2587 Values
.IL
.emitRawPointer(CGF
), // &isLastIter
2588 Values
.LB
.emitRawPointer(CGF
), // &LB
2589 Values
.UB
.emitRawPointer(CGF
), // &UB
2590 Values
.ST
.emitRawPointer(CGF
), // &Stride
2591 CGF
.Builder
.getIntN(Values
.IVSize
, 1), // Incr
2594 CGF
.EmitRuntimeCall(ForStaticInitFunction
, Args
);
2597 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction
&CGF
,
2599 OpenMPDirectiveKind DKind
,
2600 const OpenMPScheduleTy
&ScheduleKind
,
2601 const StaticRTInput
&Values
) {
2602 OpenMPSchedType ScheduleNum
= getRuntimeSchedule(
2603 ScheduleKind
.Schedule
, Values
.Chunk
!= nullptr, Values
.Ordered
);
2604 assert((isOpenMPWorksharingDirective(DKind
) || (DKind
== OMPD_loop
)) &&
2605 "Expected loop-based or sections-based directive.");
2606 llvm::Value
*UpdatedLocation
= emitUpdateLocation(CGF
, Loc
,
2607 isOpenMPLoopDirective(DKind
)
2608 ? OMP_IDENT_WORK_LOOP
2609 : OMP_IDENT_WORK_SECTIONS
);
2610 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
2611 llvm::FunctionCallee StaticInitFunction
=
2612 OMPBuilder
.createForStaticInitFunction(Values
.IVSize
, Values
.IVSigned
,
2614 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
2615 emitForStaticInitCall(CGF
, UpdatedLocation
, ThreadId
, StaticInitFunction
,
2616 ScheduleNum
, ScheduleKind
.M1
, ScheduleKind
.M2
, Values
);
2619 void CGOpenMPRuntime::emitDistributeStaticInit(
2620 CodeGenFunction
&CGF
, SourceLocation Loc
,
2621 OpenMPDistScheduleClauseKind SchedKind
,
2622 const CGOpenMPRuntime::StaticRTInput
&Values
) {
2623 OpenMPSchedType ScheduleNum
=
2624 getRuntimeSchedule(SchedKind
, Values
.Chunk
!= nullptr);
2625 llvm::Value
*UpdatedLocation
=
2626 emitUpdateLocation(CGF
, Loc
, OMP_IDENT_WORK_DISTRIBUTE
);
2627 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
2628 llvm::FunctionCallee StaticInitFunction
;
2629 bool isGPUDistribute
=
2630 CGM
.getLangOpts().OpenMPIsTargetDevice
&&
2631 (CGM
.getTriple().isAMDGCN() || CGM
.getTriple().isNVPTX());
2632 StaticInitFunction
= OMPBuilder
.createForStaticInitFunction(
2633 Values
.IVSize
, Values
.IVSigned
, isGPUDistribute
);
2635 emitForStaticInitCall(CGF
, UpdatedLocation
, ThreadId
, StaticInitFunction
,
2636 ScheduleNum
, OMPC_SCHEDULE_MODIFIER_unknown
,
2637 OMPC_SCHEDULE_MODIFIER_unknown
, Values
);
2640 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction
&CGF
,
2642 OpenMPDirectiveKind DKind
) {
2643 assert((DKind
== OMPD_distribute
|| DKind
== OMPD_for
||
2644 DKind
== OMPD_sections
) &&
2645 "Expected distribute, for, or sections directive kind");
2646 if (!CGF
.HaveInsertPoint())
2648 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2649 llvm::Value
*Args
[] = {
2650 emitUpdateLocation(CGF
, Loc
,
2651 isOpenMPDistributeDirective(DKind
) ||
2652 (DKind
== OMPD_target_teams_loop
)
2653 ? OMP_IDENT_WORK_DISTRIBUTE
2654 : isOpenMPLoopDirective(DKind
)
2655 ? OMP_IDENT_WORK_LOOP
2656 : OMP_IDENT_WORK_SECTIONS
),
2657 getThreadID(CGF
, Loc
)};
2658 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
2659 if (isOpenMPDistributeDirective(DKind
) &&
2660 CGM
.getLangOpts().OpenMPIsTargetDevice
&&
2661 (CGM
.getTriple().isAMDGCN() || CGM
.getTriple().isNVPTX()))
2662 CGF
.EmitRuntimeCall(
2663 OMPBuilder
.getOrCreateRuntimeFunction(
2664 CGM
.getModule(), OMPRTL___kmpc_distribute_static_fini
),
2667 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2668 CGM
.getModule(), OMPRTL___kmpc_for_static_fini
),
2672 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction
&CGF
,
2676 if (!CGF
.HaveInsertPoint())
2678 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2679 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2680 CGF
.EmitRuntimeCall(OMPBuilder
.createDispatchFiniFunction(IVSize
, IVSigned
),
2684 llvm::Value
*CGOpenMPRuntime::emitForNext(CodeGenFunction
&CGF
,
2685 SourceLocation Loc
, unsigned IVSize
,
2686 bool IVSigned
, Address IL
,
2687 Address LB
, Address UB
,
2689 // Call __kmpc_dispatch_next(
2690 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2691 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2692 // kmp_int[32|64] *p_stride);
2693 llvm::Value
*Args
[] = {
2694 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2695 IL
.emitRawPointer(CGF
), // &isLastIter
2696 LB
.emitRawPointer(CGF
), // &Lower
2697 UB
.emitRawPointer(CGF
), // &Upper
2698 ST
.emitRawPointer(CGF
) // &Stride
2700 llvm::Value
*Call
= CGF
.EmitRuntimeCall(
2701 OMPBuilder
.createDispatchNextFunction(IVSize
, IVSigned
), Args
);
2702 return CGF
.EmitScalarConversion(
2703 Call
, CGF
.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2704 CGF
.getContext().BoolTy
, Loc
);
2707 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction
&CGF
,
2708 llvm::Value
*NumThreads
,
2709 SourceLocation Loc
) {
2710 if (!CGF
.HaveInsertPoint())
2712 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2713 llvm::Value
*Args
[] = {
2714 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2715 CGF
.Builder
.CreateIntCast(NumThreads
, CGF
.Int32Ty
, /*isSigned*/ true)};
2716 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2717 CGM
.getModule(), OMPRTL___kmpc_push_num_threads
),
2721 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction
&CGF
,
2722 ProcBindKind ProcBind
,
2723 SourceLocation Loc
) {
2724 if (!CGF
.HaveInsertPoint())
2726 assert(ProcBind
!= OMP_PROC_BIND_unknown
&& "Unsupported proc_bind value.");
2727 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2728 llvm::Value
*Args
[] = {
2729 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2730 llvm::ConstantInt::get(CGM
.IntTy
, unsigned(ProcBind
), /*isSigned=*/true)};
2731 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2732 CGM
.getModule(), OMPRTL___kmpc_push_proc_bind
),
2736 void CGOpenMPRuntime::emitFlush(CodeGenFunction
&CGF
, ArrayRef
<const Expr
*>,
2737 SourceLocation Loc
, llvm::AtomicOrdering AO
) {
2738 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2739 OMPBuilder
.createFlush(CGF
.Builder
);
2741 if (!CGF
.HaveInsertPoint())
2743 // Build call void __kmpc_flush(ident_t *loc)
2744 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2745 CGM
.getModule(), OMPRTL___kmpc_flush
),
2746 emitUpdateLocation(CGF
, Loc
));
2751 /// Indexes of fields for type kmp_task_t.
2752 enum KmpTaskTFields
{
2753 /// List of shared variables.
2757 /// Partition id for the untied tasks.
2759 /// Function with call of destructors for private variables.
2763 /// (Taskloops only) Lower bound.
2765 /// (Taskloops only) Upper bound.
2767 /// (Taskloops only) Stride.
2769 /// (Taskloops only) Is last iteration flag.
2771 /// (Taskloops only) Reduction data.
2774 } // anonymous namespace
2776 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2777 // If we are in simd mode or there are no entries, we don't need to do
2779 if (CGM
.getLangOpts().OpenMPSimd
|| OMPBuilder
.OffloadInfoManager
.empty())
2782 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy
&&ErrorReportFn
=
2783 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind
,
2784 const llvm::TargetRegionEntryInfo
&EntryInfo
) -> void {
2786 if (Kind
!= llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR
) {
2787 for (auto I
= CGM
.getContext().getSourceManager().fileinfo_begin(),
2788 E
= CGM
.getContext().getSourceManager().fileinfo_end();
2790 if (I
->getFirst().getUniqueID().getDevice() == EntryInfo
.DeviceID
&&
2791 I
->getFirst().getUniqueID().getFile() == EntryInfo
.FileID
) {
2792 Loc
= CGM
.getContext().getSourceManager().translateFileLineCol(
2793 I
->getFirst(), EntryInfo
.Line
, 1);
2799 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR
: {
2800 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2801 DiagnosticsEngine::Error
, "Offloading entry for target region in "
2802 "%0 is incorrect: either the "
2803 "address or the ID is invalid.");
2804 CGM
.getDiags().Report(Loc
, DiagID
) << EntryInfo
.ParentName
;
2806 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR
: {
2807 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2808 DiagnosticsEngine::Error
, "Offloading entry for declare target "
2809 "variable %0 is incorrect: the "
2810 "address is invalid.");
2811 CGM
.getDiags().Report(Loc
, DiagID
) << EntryInfo
.ParentName
;
2813 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR
: {
2814 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2815 DiagnosticsEngine::Error
,
2816 "Offloading entry for declare target variable is incorrect: the "
2817 "address is invalid.");
2818 CGM
.getDiags().Report(DiagID
);
2823 OMPBuilder
.createOffloadEntriesAndInfoMetadata(ErrorReportFn
);
2826 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty
) {
2827 if (!KmpRoutineEntryPtrTy
) {
2828 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2829 ASTContext
&C
= CGM
.getContext();
2830 QualType KmpRoutineEntryTyArgs
[] = {KmpInt32Ty
, C
.VoidPtrTy
};
2831 FunctionProtoType::ExtProtoInfo EPI
;
2832 KmpRoutineEntryPtrQTy
= C
.getPointerType(
2833 C
.getFunctionType(KmpInt32Ty
, KmpRoutineEntryTyArgs
, EPI
));
2834 KmpRoutineEntryPtrTy
= CGM
.getTypes().ConvertType(KmpRoutineEntryPtrQTy
);
2839 struct PrivateHelpersTy
{
2840 PrivateHelpersTy(const Expr
*OriginalRef
, const VarDecl
*Original
,
2841 const VarDecl
*PrivateCopy
, const VarDecl
*PrivateElemInit
)
2842 : OriginalRef(OriginalRef
), Original(Original
), PrivateCopy(PrivateCopy
),
2843 PrivateElemInit(PrivateElemInit
) {}
2844 PrivateHelpersTy(const VarDecl
*Original
) : Original(Original
) {}
2845 const Expr
*OriginalRef
= nullptr;
2846 const VarDecl
*Original
= nullptr;
2847 const VarDecl
*PrivateCopy
= nullptr;
2848 const VarDecl
*PrivateElemInit
= nullptr;
2849 bool isLocalPrivate() const {
2850 return !OriginalRef
&& !PrivateCopy
&& !PrivateElemInit
;
2853 typedef std::pair
<CharUnits
/*Align*/, PrivateHelpersTy
> PrivateDataTy
;
2854 } // anonymous namespace
2856 static bool isAllocatableDecl(const VarDecl
*VD
) {
2857 const VarDecl
*CVD
= VD
->getCanonicalDecl();
2858 if (!CVD
->hasAttr
<OMPAllocateDeclAttr
>())
2860 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
2861 // Use the default allocation.
2862 return !(AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc
&&
2863 !AA
->getAllocator());
2867 createPrivatesRecordDecl(CodeGenModule
&CGM
, ArrayRef
<PrivateDataTy
> Privates
) {
2868 if (!Privates
.empty()) {
2869 ASTContext
&C
= CGM
.getContext();
2870 // Build struct .kmp_privates_t. {
2871 // /* private vars */
2873 RecordDecl
*RD
= C
.buildImplicitRecord(".kmp_privates.t");
2874 RD
->startDefinition();
2875 for (const auto &Pair
: Privates
) {
2876 const VarDecl
*VD
= Pair
.second
.Original
;
2877 QualType Type
= VD
->getType().getNonReferenceType();
2878 // If the private variable is a local variable with lvalue ref type,
2879 // allocate the pointer instead of the pointee type.
2880 if (Pair
.second
.isLocalPrivate()) {
2881 if (VD
->getType()->isLValueReferenceType())
2882 Type
= C
.getPointerType(Type
);
2883 if (isAllocatableDecl(VD
))
2884 Type
= C
.getPointerType(Type
);
2886 FieldDecl
*FD
= addFieldToRecordDecl(C
, RD
, Type
);
2887 if (VD
->hasAttrs()) {
2888 for (specific_attr_iterator
<AlignedAttr
> I(VD
->getAttrs().begin()),
2889 E(VD
->getAttrs().end());
2894 RD
->completeDefinition();
2901 createKmpTaskTRecordDecl(CodeGenModule
&CGM
, OpenMPDirectiveKind Kind
,
2902 QualType KmpInt32Ty
,
2903 QualType KmpRoutineEntryPointerQTy
) {
2904 ASTContext
&C
= CGM
.getContext();
2905 // Build struct kmp_task_t {
2907 // kmp_routine_entry_t routine;
2908 // kmp_int32 part_id;
2909 // kmp_cmplrdata_t data1;
2910 // kmp_cmplrdata_t data2;
2911 // For taskloops additional fields:
2916 // void * reductions;
2918 RecordDecl
*UD
= C
.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union
);
2919 UD
->startDefinition();
2920 addFieldToRecordDecl(C
, UD
, KmpInt32Ty
);
2921 addFieldToRecordDecl(C
, UD
, KmpRoutineEntryPointerQTy
);
2922 UD
->completeDefinition();
2923 QualType KmpCmplrdataTy
= C
.getRecordType(UD
);
2924 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_task_t");
2925 RD
->startDefinition();
2926 addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
2927 addFieldToRecordDecl(C
, RD
, KmpRoutineEntryPointerQTy
);
2928 addFieldToRecordDecl(C
, RD
, KmpInt32Ty
);
2929 addFieldToRecordDecl(C
, RD
, KmpCmplrdataTy
);
2930 addFieldToRecordDecl(C
, RD
, KmpCmplrdataTy
);
2931 if (isOpenMPTaskLoopDirective(Kind
)) {
2932 QualType KmpUInt64Ty
=
2933 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2934 QualType KmpInt64Ty
=
2935 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2936 addFieldToRecordDecl(C
, RD
, KmpUInt64Ty
);
2937 addFieldToRecordDecl(C
, RD
, KmpUInt64Ty
);
2938 addFieldToRecordDecl(C
, RD
, KmpInt64Ty
);
2939 addFieldToRecordDecl(C
, RD
, KmpInt32Ty
);
2940 addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
2942 RD
->completeDefinition();
2947 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule
&CGM
, QualType KmpTaskTQTy
,
2948 ArrayRef
<PrivateDataTy
> Privates
) {
2949 ASTContext
&C
= CGM
.getContext();
2950 // Build struct kmp_task_t_with_privates {
2951 // kmp_task_t task_data;
2952 // .kmp_privates_t. privates;
2954 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_task_t_with_privates");
2955 RD
->startDefinition();
2956 addFieldToRecordDecl(C
, RD
, KmpTaskTQTy
);
2957 if (const RecordDecl
*PrivateRD
= createPrivatesRecordDecl(CGM
, Privates
))
2958 addFieldToRecordDecl(C
, RD
, C
.getRecordType(PrivateRD
));
2959 RD
->completeDefinition();
2963 /// Emit a proxy function which accepts kmp_task_t as the second
2966 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2967 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2969 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2970 /// tt->reductions, tt->shareds);
2974 static llvm::Function
*
2975 emitProxyTaskFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
2976 OpenMPDirectiveKind Kind
, QualType KmpInt32Ty
,
2977 QualType KmpTaskTWithPrivatesPtrQTy
,
2978 QualType KmpTaskTWithPrivatesQTy
, QualType KmpTaskTQTy
,
2979 QualType SharedsPtrTy
, llvm::Function
*TaskFunction
,
2980 llvm::Value
*TaskPrivatesMap
) {
2981 ASTContext
&C
= CGM
.getContext();
2982 FunctionArgList Args
;
2983 ImplicitParamDecl
GtidArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, KmpInt32Ty
,
2984 ImplicitParamKind::Other
);
2985 ImplicitParamDecl
TaskTypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
2986 KmpTaskTWithPrivatesPtrQTy
.withRestrict(),
2987 ImplicitParamKind::Other
);
2988 Args
.push_back(&GtidArg
);
2989 Args
.push_back(&TaskTypeArg
);
2990 const auto &TaskEntryFnInfo
=
2991 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty
, Args
);
2992 llvm::FunctionType
*TaskEntryTy
=
2993 CGM
.getTypes().GetFunctionType(TaskEntryFnInfo
);
2994 std::string Name
= CGM
.getOpenMPRuntime().getName({"omp_task_entry", ""});
2995 auto *TaskEntry
= llvm::Function::Create(
2996 TaskEntryTy
, llvm::GlobalValue::InternalLinkage
, Name
, &CGM
.getModule());
2997 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry
, TaskEntryFnInfo
);
2998 TaskEntry
->setDoesNotRecurse();
2999 CodeGenFunction
CGF(CGM
);
3000 CGF
.StartFunction(GlobalDecl(), KmpInt32Ty
, TaskEntry
, TaskEntryFnInfo
, Args
,
3003 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3006 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3007 // tt->task_data.shareds);
3008 llvm::Value
*GtidParam
= CGF
.EmitLoadOfScalar(
3009 CGF
.GetAddrOfLocalVar(&GtidArg
), /*Volatile=*/false, KmpInt32Ty
, Loc
);
3010 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3011 CGF
.GetAddrOfLocalVar(&TaskTypeArg
),
3012 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3013 const auto *KmpTaskTWithPrivatesQTyRD
=
3014 cast
<RecordDecl
>(KmpTaskTWithPrivatesQTy
->getAsTagDecl());
3016 CGF
.EmitLValueForField(TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3017 const auto *KmpTaskTQTyRD
= cast
<RecordDecl
>(KmpTaskTQTy
->getAsTagDecl());
3018 auto PartIdFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTPartId
);
3019 LValue PartIdLVal
= CGF
.EmitLValueForField(Base
, *PartIdFI
);
3020 llvm::Value
*PartidParam
= PartIdLVal
.getPointer(CGF
);
3022 auto SharedsFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTShareds
);
3023 LValue SharedsLVal
= CGF
.EmitLValueForField(Base
, *SharedsFI
);
3024 llvm::Value
*SharedsParam
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3025 CGF
.EmitLoadOfScalar(SharedsLVal
, Loc
),
3026 CGF
.ConvertTypeForMem(SharedsPtrTy
));
3028 auto PrivatesFI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin(), 1);
3029 llvm::Value
*PrivatesParam
;
3030 if (PrivatesFI
!= KmpTaskTWithPrivatesQTyRD
->field_end()) {
3031 LValue PrivatesLVal
= CGF
.EmitLValueForField(TDBase
, *PrivatesFI
);
3032 PrivatesParam
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3033 PrivatesLVal
.getPointer(CGF
), CGF
.VoidPtrTy
);
3035 PrivatesParam
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
3038 llvm::Value
*CommonArgs
[] = {
3039 GtidParam
, PartidParam
, PrivatesParam
, TaskPrivatesMap
,
3041 .CreatePointerBitCastOrAddrSpaceCast(TDBase
.getAddress(),
3042 CGF
.VoidPtrTy
, CGF
.Int8Ty
)
3043 .emitRawPointer(CGF
)};
3044 SmallVector
<llvm::Value
*, 16> CallArgs(std::begin(CommonArgs
),
3045 std::end(CommonArgs
));
3046 if (isOpenMPTaskLoopDirective(Kind
)) {
3047 auto LBFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLowerBound
);
3048 LValue LBLVal
= CGF
.EmitLValueForField(Base
, *LBFI
);
3049 llvm::Value
*LBParam
= CGF
.EmitLoadOfScalar(LBLVal
, Loc
);
3050 auto UBFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTUpperBound
);
3051 LValue UBLVal
= CGF
.EmitLValueForField(Base
, *UBFI
);
3052 llvm::Value
*UBParam
= CGF
.EmitLoadOfScalar(UBLVal
, Loc
);
3053 auto StFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTStride
);
3054 LValue StLVal
= CGF
.EmitLValueForField(Base
, *StFI
);
3055 llvm::Value
*StParam
= CGF
.EmitLoadOfScalar(StLVal
, Loc
);
3056 auto LIFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLastIter
);
3057 LValue LILVal
= CGF
.EmitLValueForField(Base
, *LIFI
);
3058 llvm::Value
*LIParam
= CGF
.EmitLoadOfScalar(LILVal
, Loc
);
3059 auto RFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTReductions
);
3060 LValue RLVal
= CGF
.EmitLValueForField(Base
, *RFI
);
3061 llvm::Value
*RParam
= CGF
.EmitLoadOfScalar(RLVal
, Loc
);
3062 CallArgs
.push_back(LBParam
);
3063 CallArgs
.push_back(UBParam
);
3064 CallArgs
.push_back(StParam
);
3065 CallArgs
.push_back(LIParam
);
3066 CallArgs
.push_back(RParam
);
3068 CallArgs
.push_back(SharedsParam
);
3070 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, Loc
, TaskFunction
,
3072 CGF
.EmitStoreThroughLValue(RValue::get(CGF
.Builder
.getInt32(/*C=*/0)),
3073 CGF
.MakeAddrLValue(CGF
.ReturnValue
, KmpInt32Ty
));
3074 CGF
.FinishFunction();
3078 static llvm::Value
*emitDestructorsFunction(CodeGenModule
&CGM
,
3080 QualType KmpInt32Ty
,
3081 QualType KmpTaskTWithPrivatesPtrQTy
,
3082 QualType KmpTaskTWithPrivatesQTy
) {
3083 ASTContext
&C
= CGM
.getContext();
3084 FunctionArgList Args
;
3085 ImplicitParamDecl
GtidArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, KmpInt32Ty
,
3086 ImplicitParamKind::Other
);
3087 ImplicitParamDecl
TaskTypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3088 KmpTaskTWithPrivatesPtrQTy
.withRestrict(),
3089 ImplicitParamKind::Other
);
3090 Args
.push_back(&GtidArg
);
3091 Args
.push_back(&TaskTypeArg
);
3092 const auto &DestructorFnInfo
=
3093 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty
, Args
);
3094 llvm::FunctionType
*DestructorFnTy
=
3095 CGM
.getTypes().GetFunctionType(DestructorFnInfo
);
3097 CGM
.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3098 auto *DestructorFn
=
3099 llvm::Function::Create(DestructorFnTy
, llvm::GlobalValue::InternalLinkage
,
3100 Name
, &CGM
.getModule());
3101 CGM
.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn
,
3103 DestructorFn
->setDoesNotRecurse();
3104 CodeGenFunction
CGF(CGM
);
3105 CGF
.StartFunction(GlobalDecl(), KmpInt32Ty
, DestructorFn
, DestructorFnInfo
,
3108 LValue Base
= CGF
.EmitLoadOfPointerLValue(
3109 CGF
.GetAddrOfLocalVar(&TaskTypeArg
),
3110 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3111 const auto *KmpTaskTWithPrivatesQTyRD
=
3112 cast
<RecordDecl
>(KmpTaskTWithPrivatesQTy
->getAsTagDecl());
3113 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3114 Base
= CGF
.EmitLValueForField(Base
, *FI
);
3115 for (const auto *Field
:
3116 cast
<RecordDecl
>(FI
->getType()->getAsTagDecl())->fields()) {
3117 if (QualType::DestructionKind DtorKind
=
3118 Field
->getType().isDestructedType()) {
3119 LValue FieldLValue
= CGF
.EmitLValueForField(Base
, Field
);
3120 CGF
.pushDestroy(DtorKind
, FieldLValue
.getAddress(), Field
->getType());
3123 CGF
.FinishFunction();
3124 return DestructorFn
;
3127 /// Emit a privates mapping function for correct handling of private and
3128 /// firstprivate variables.
3130 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3131 /// **noalias priv1,..., <tyn> **noalias privn) {
3132 /// *priv1 = &.privates.priv1;
3134 /// *privn = &.privates.privn;
3137 static llvm::Value
*
3138 emitTaskPrivateMappingFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3139 const OMPTaskDataTy
&Data
, QualType PrivatesQTy
,
3140 ArrayRef
<PrivateDataTy
> Privates
) {
3141 ASTContext
&C
= CGM
.getContext();
3142 FunctionArgList Args
;
3143 ImplicitParamDecl
TaskPrivatesArg(
3144 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3145 C
.getPointerType(PrivatesQTy
).withConst().withRestrict(),
3146 ImplicitParamKind::Other
);
3147 Args
.push_back(&TaskPrivatesArg
);
3148 llvm::DenseMap
<CanonicalDeclPtr
<const VarDecl
>, unsigned> PrivateVarsPos
;
3149 unsigned Counter
= 1;
3150 for (const Expr
*E
: Data
.PrivateVars
) {
3151 Args
.push_back(ImplicitParamDecl::Create(
3152 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3153 C
.getPointerType(C
.getPointerType(E
->getType()))
3156 ImplicitParamKind::Other
));
3157 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3158 PrivateVarsPos
[VD
] = Counter
;
3161 for (const Expr
*E
: Data
.FirstprivateVars
) {
3162 Args
.push_back(ImplicitParamDecl::Create(
3163 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3164 C
.getPointerType(C
.getPointerType(E
->getType()))
3167 ImplicitParamKind::Other
));
3168 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3169 PrivateVarsPos
[VD
] = Counter
;
3172 for (const Expr
*E
: Data
.LastprivateVars
) {
3173 Args
.push_back(ImplicitParamDecl::Create(
3174 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3175 C
.getPointerType(C
.getPointerType(E
->getType()))
3178 ImplicitParamKind::Other
));
3179 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3180 PrivateVarsPos
[VD
] = Counter
;
3183 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
3184 QualType Ty
= VD
->getType().getNonReferenceType();
3185 if (VD
->getType()->isLValueReferenceType())
3186 Ty
= C
.getPointerType(Ty
);
3187 if (isAllocatableDecl(VD
))
3188 Ty
= C
.getPointerType(Ty
);
3189 Args
.push_back(ImplicitParamDecl::Create(
3190 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3191 C
.getPointerType(C
.getPointerType(Ty
)).withConst().withRestrict(),
3192 ImplicitParamKind::Other
));
3193 PrivateVarsPos
[VD
] = Counter
;
3196 const auto &TaskPrivatesMapFnInfo
=
3197 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
3198 llvm::FunctionType
*TaskPrivatesMapTy
=
3199 CGM
.getTypes().GetFunctionType(TaskPrivatesMapFnInfo
);
3201 CGM
.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3202 auto *TaskPrivatesMap
= llvm::Function::Create(
3203 TaskPrivatesMapTy
, llvm::GlobalValue::InternalLinkage
, Name
,
3205 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap
,
3206 TaskPrivatesMapFnInfo
);
3207 if (CGM
.getLangOpts().Optimize
) {
3208 TaskPrivatesMap
->removeFnAttr(llvm::Attribute::NoInline
);
3209 TaskPrivatesMap
->removeFnAttr(llvm::Attribute::OptimizeNone
);
3210 TaskPrivatesMap
->addFnAttr(llvm::Attribute::AlwaysInline
);
3212 CodeGenFunction
CGF(CGM
);
3213 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, TaskPrivatesMap
,
3214 TaskPrivatesMapFnInfo
, Args
, Loc
, Loc
);
3216 // *privi = &.privates.privi;
3217 LValue Base
= CGF
.EmitLoadOfPointerLValue(
3218 CGF
.GetAddrOfLocalVar(&TaskPrivatesArg
),
3219 TaskPrivatesArg
.getType()->castAs
<PointerType
>());
3220 const auto *PrivatesQTyRD
= cast
<RecordDecl
>(PrivatesQTy
->getAsTagDecl());
3222 for (const FieldDecl
*Field
: PrivatesQTyRD
->fields()) {
3223 LValue FieldLVal
= CGF
.EmitLValueForField(Base
, Field
);
3224 const VarDecl
*VD
= Args
[PrivateVarsPos
[Privates
[Counter
].second
.Original
]];
3226 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(VD
), VD
->getType());
3227 LValue RefLoadLVal
= CGF
.EmitLoadOfPointerLValue(
3228 RefLVal
.getAddress(), RefLVal
.getType()->castAs
<PointerType
>());
3229 CGF
.EmitStoreOfScalar(FieldLVal
.getPointer(CGF
), RefLoadLVal
);
3232 CGF
.FinishFunction();
3233 return TaskPrivatesMap
;
3236 /// Emit initialization for private variables in task-based directives.
3237 static void emitPrivatesInit(CodeGenFunction
&CGF
,
3238 const OMPExecutableDirective
&D
,
3239 Address KmpTaskSharedsPtr
, LValue TDBase
,
3240 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3241 QualType SharedsTy
, QualType SharedsPtrTy
,
3242 const OMPTaskDataTy
&Data
,
3243 ArrayRef
<PrivateDataTy
> Privates
, bool ForDup
) {
3244 ASTContext
&C
= CGF
.getContext();
3245 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3246 LValue PrivatesBase
= CGF
.EmitLValueForField(TDBase
, *FI
);
3247 OpenMPDirectiveKind Kind
= isOpenMPTaskLoopDirective(D
.getDirectiveKind())
3250 const CapturedStmt
&CS
= *D
.getCapturedStmt(Kind
);
3251 CodeGenFunction::CGCapturedStmtInfo
CapturesInfo(CS
);
3254 isOpenMPTargetDataManagementDirective(D
.getDirectiveKind()) ||
3255 isOpenMPTargetExecutionDirective(D
.getDirectiveKind());
3256 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3257 // PointersArray, SizesArray, and MappersArray. The original variables for
3258 // these arrays are not captured and we get their addresses explicitly.
3259 if ((!IsTargetTask
&& !Data
.FirstprivateVars
.empty() && ForDup
) ||
3260 (IsTargetTask
&& KmpTaskSharedsPtr
.isValid())) {
3261 SrcBase
= CGF
.MakeAddrLValue(
3262 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3263 KmpTaskSharedsPtr
, CGF
.ConvertTypeForMem(SharedsPtrTy
),
3264 CGF
.ConvertTypeForMem(SharedsTy
)),
3267 FI
= cast
<RecordDecl
>(FI
->getType()->getAsTagDecl())->field_begin();
3268 for (const PrivateDataTy
&Pair
: Privates
) {
3269 // Do not initialize private locals.
3270 if (Pair
.second
.isLocalPrivate()) {
3274 const VarDecl
*VD
= Pair
.second
.PrivateCopy
;
3275 const Expr
*Init
= VD
->getAnyInitializer();
3276 if (Init
&& (!ForDup
|| (isa
<CXXConstructExpr
>(Init
) &&
3277 !CGF
.isTrivialInitializer(Init
)))) {
3278 LValue PrivateLValue
= CGF
.EmitLValueForField(PrivatesBase
, *FI
);
3279 if (const VarDecl
*Elem
= Pair
.second
.PrivateElemInit
) {
3280 const VarDecl
*OriginalVD
= Pair
.second
.Original
;
3281 // Check if the variable is the target-based BasePointersArray,
3282 // PointersArray, SizesArray, or MappersArray.
3283 LValue SharedRefLValue
;
3284 QualType Type
= PrivateLValue
.getType();
3285 const FieldDecl
*SharedField
= CapturesInfo
.lookup(OriginalVD
);
3286 if (IsTargetTask
&& !SharedField
) {
3287 assert(isa
<ImplicitParamDecl
>(OriginalVD
) &&
3288 isa
<CapturedDecl
>(OriginalVD
->getDeclContext()) &&
3289 cast
<CapturedDecl
>(OriginalVD
->getDeclContext())
3290 ->getNumParams() == 0 &&
3291 isa
<TranslationUnitDecl
>(
3292 cast
<CapturedDecl
>(OriginalVD
->getDeclContext())
3293 ->getDeclContext()) &&
3294 "Expected artificial target data variable.");
3296 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(OriginalVD
), Type
);
3297 } else if (ForDup
) {
3298 SharedRefLValue
= CGF
.EmitLValueForField(SrcBase
, SharedField
);
3299 SharedRefLValue
= CGF
.MakeAddrLValue(
3300 SharedRefLValue
.getAddress().withAlignment(
3301 C
.getDeclAlign(OriginalVD
)),
3302 SharedRefLValue
.getType(), LValueBaseInfo(AlignmentSource::Decl
),
3303 SharedRefLValue
.getTBAAInfo());
3304 } else if (CGF
.LambdaCaptureFields
.count(
3305 Pair
.second
.Original
->getCanonicalDecl()) > 0 ||
3306 isa_and_nonnull
<BlockDecl
>(CGF
.CurCodeDecl
)) {
3307 SharedRefLValue
= CGF
.EmitLValue(Pair
.second
.OriginalRef
);
3309 // Processing for implicitly captured variables.
3310 InlinedOpenMPRegionRAII
Region(
3311 CGF
, [](CodeGenFunction
&, PrePostActionTy
&) {}, OMPD_unknown
,
3312 /*HasCancel=*/false, /*NoInheritance=*/true);
3313 SharedRefLValue
= CGF
.EmitLValue(Pair
.second
.OriginalRef
);
3315 if (Type
->isArrayType()) {
3316 // Initialize firstprivate array.
3317 if (!isa
<CXXConstructExpr
>(Init
) || CGF
.isTrivialInitializer(Init
)) {
3318 // Perform simple memcpy.
3319 CGF
.EmitAggregateAssign(PrivateLValue
, SharedRefLValue
, Type
);
3321 // Initialize firstprivate array using element-by-element
3323 CGF
.EmitOMPAggregateAssign(
3324 PrivateLValue
.getAddress(), SharedRefLValue
.getAddress(), Type
,
3325 [&CGF
, Elem
, Init
, &CapturesInfo
](Address DestElement
,
3326 Address SrcElement
) {
3327 // Clean up any temporaries needed by the initialization.
3328 CodeGenFunction::OMPPrivateScope
InitScope(CGF
);
3329 InitScope
.addPrivate(Elem
, SrcElement
);
3330 (void)InitScope
.Privatize();
3331 // Emit initialization for single element.
3332 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(
3333 CGF
, &CapturesInfo
);
3334 CGF
.EmitAnyExprToMem(Init
, DestElement
,
3335 Init
->getType().getQualifiers(),
3336 /*IsInitializer=*/false);
3340 CodeGenFunction::OMPPrivateScope
InitScope(CGF
);
3341 InitScope
.addPrivate(Elem
, SharedRefLValue
.getAddress());
3342 (void)InitScope
.Privatize();
3343 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CapturesInfo
);
3344 CGF
.EmitExprAsInit(Init
, VD
, PrivateLValue
,
3345 /*capturedByInit=*/false);
3348 CGF
.EmitExprAsInit(Init
, VD
, PrivateLValue
, /*capturedByInit=*/false);
3355 /// Check if duplication function is required for taskloops.
3356 static bool checkInitIsRequired(CodeGenFunction
&CGF
,
3357 ArrayRef
<PrivateDataTy
> Privates
) {
3358 bool InitRequired
= false;
3359 for (const PrivateDataTy
&Pair
: Privates
) {
3360 if (Pair
.second
.isLocalPrivate())
3362 const VarDecl
*VD
= Pair
.second
.PrivateCopy
;
3363 const Expr
*Init
= VD
->getAnyInitializer();
3364 InitRequired
= InitRequired
|| (isa_and_nonnull
<CXXConstructExpr
>(Init
) &&
3365 !CGF
.isTrivialInitializer(Init
));
3369 return InitRequired
;
3373 /// Emit task_dup function (for initialization of
3374 /// private/firstprivate/lastprivate vars and last_iter flag)
3376 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3378 /// // setup lastprivate flag
3379 /// task_dst->last = lastpriv;
3380 /// // could be constructor calls here...
3383 static llvm::Value
*
3384 emitTaskDupFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3385 const OMPExecutableDirective
&D
,
3386 QualType KmpTaskTWithPrivatesPtrQTy
,
3387 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3388 const RecordDecl
*KmpTaskTQTyRD
, QualType SharedsTy
,
3389 QualType SharedsPtrTy
, const OMPTaskDataTy
&Data
,
3390 ArrayRef
<PrivateDataTy
> Privates
, bool WithLastIter
) {
3391 ASTContext
&C
= CGM
.getContext();
3392 FunctionArgList Args
;
3393 ImplicitParamDecl
DstArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3394 KmpTaskTWithPrivatesPtrQTy
,
3395 ImplicitParamKind::Other
);
3396 ImplicitParamDecl
SrcArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3397 KmpTaskTWithPrivatesPtrQTy
,
3398 ImplicitParamKind::Other
);
3399 ImplicitParamDecl
LastprivArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.IntTy
,
3400 ImplicitParamKind::Other
);
3401 Args
.push_back(&DstArg
);
3402 Args
.push_back(&SrcArg
);
3403 Args
.push_back(&LastprivArg
);
3404 const auto &TaskDupFnInfo
=
3405 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
3406 llvm::FunctionType
*TaskDupTy
= CGM
.getTypes().GetFunctionType(TaskDupFnInfo
);
3407 std::string Name
= CGM
.getOpenMPRuntime().getName({"omp_task_dup", ""});
3408 auto *TaskDup
= llvm::Function::Create(
3409 TaskDupTy
, llvm::GlobalValue::InternalLinkage
, Name
, &CGM
.getModule());
3410 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskDup
, TaskDupFnInfo
);
3411 TaskDup
->setDoesNotRecurse();
3412 CodeGenFunction
CGF(CGM
);
3413 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, TaskDup
, TaskDupFnInfo
, Args
, Loc
,
3416 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3417 CGF
.GetAddrOfLocalVar(&DstArg
),
3418 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3419 // task_dst->liter = lastpriv;
3421 auto LIFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLastIter
);
3422 LValue Base
= CGF
.EmitLValueForField(
3423 TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3424 LValue LILVal
= CGF
.EmitLValueForField(Base
, *LIFI
);
3425 llvm::Value
*Lastpriv
= CGF
.EmitLoadOfScalar(
3426 CGF
.GetAddrOfLocalVar(&LastprivArg
), /*Volatile=*/false, C
.IntTy
, Loc
);
3427 CGF
.EmitStoreOfScalar(Lastpriv
, LILVal
);
3430 // Emit initial values for private copies (if any).
3431 assert(!Privates
.empty());
3432 Address KmpTaskSharedsPtr
= Address::invalid();
3433 if (!Data
.FirstprivateVars
.empty()) {
3434 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3435 CGF
.GetAddrOfLocalVar(&SrcArg
),
3436 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3437 LValue Base
= CGF
.EmitLValueForField(
3438 TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3439 KmpTaskSharedsPtr
= Address(
3440 CGF
.EmitLoadOfScalar(CGF
.EmitLValueForField(
3441 Base
, *std::next(KmpTaskTQTyRD
->field_begin(),
3444 CGF
.Int8Ty
, CGM
.getNaturalTypeAlignment(SharedsTy
));
3446 emitPrivatesInit(CGF
, D
, KmpTaskSharedsPtr
, TDBase
, KmpTaskTWithPrivatesQTyRD
,
3447 SharedsTy
, SharedsPtrTy
, Data
, Privates
, /*ForDup=*/true);
3448 CGF
.FinishFunction();
3452 /// Checks if destructor function is required to be generated.
3453 /// \return true if cleanups are required, false otherwise.
3455 checkDestructorsRequired(const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3456 ArrayRef
<PrivateDataTy
> Privates
) {
3457 for (const PrivateDataTy
&P
: Privates
) {
3458 if (P
.second
.isLocalPrivate())
3460 QualType Ty
= P
.second
.Original
->getType().getNonReferenceType();
3461 if (Ty
.isDestructedType())
3468 /// Loop generator for OpenMP iterator expression.
3469 class OMPIteratorGeneratorScope final
3470 : public CodeGenFunction::OMPPrivateScope
{
3471 CodeGenFunction
&CGF
;
3472 const OMPIteratorExpr
*E
= nullptr;
3473 SmallVector
<CodeGenFunction::JumpDest
, 4> ContDests
;
3474 SmallVector
<CodeGenFunction::JumpDest
, 4> ExitDests
;
3475 OMPIteratorGeneratorScope() = delete;
3476 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope
&) = delete;
3479 OMPIteratorGeneratorScope(CodeGenFunction
&CGF
, const OMPIteratorExpr
*E
)
3480 : CodeGenFunction::OMPPrivateScope(CGF
), CGF(CGF
), E(E
) {
3483 SmallVector
<llvm::Value
*, 4> Uppers
;
3484 for (unsigned I
= 0, End
= E
->numOfIterators(); I
< End
; ++I
) {
3485 Uppers
.push_back(CGF
.EmitScalarExpr(E
->getHelper(I
).Upper
));
3486 const auto *VD
= cast
<VarDecl
>(E
->getIteratorDecl(I
));
3487 addPrivate(VD
, CGF
.CreateMemTemp(VD
->getType(), VD
->getName()));
3488 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
);
3490 HelperData
.CounterVD
,
3491 CGF
.CreateMemTemp(HelperData
.CounterVD
->getType(), "counter.addr"));
3495 for (unsigned I
= 0, End
= E
->numOfIterators(); I
< End
; ++I
) {
3496 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
);
3498 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(HelperData
.CounterVD
),
3499 HelperData
.CounterVD
->getType());
3501 CGF
.EmitStoreOfScalar(
3502 llvm::ConstantInt::get(CLVal
.getAddress().getElementType(), 0),
3504 CodeGenFunction::JumpDest
&ContDest
=
3505 ContDests
.emplace_back(CGF
.getJumpDestInCurrentScope("iter.cont"));
3506 CodeGenFunction::JumpDest
&ExitDest
=
3507 ExitDests
.emplace_back(CGF
.getJumpDestInCurrentScope("iter.exit"));
3508 // N = <number-of_iterations>;
3509 llvm::Value
*N
= Uppers
[I
];
3511 // if (Counter < N) goto body; else goto exit;
3512 CGF
.EmitBlock(ContDest
.getBlock());
3514 CGF
.EmitLoadOfScalar(CLVal
, HelperData
.CounterVD
->getLocation());
3516 HelperData
.CounterVD
->getType()->isSignedIntegerOrEnumerationType()
3517 ? CGF
.Builder
.CreateICmpSLT(CVal
, N
)
3518 : CGF
.Builder
.CreateICmpULT(CVal
, N
);
3519 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("iter.body");
3520 CGF
.Builder
.CreateCondBr(Cmp
, BodyBB
, ExitDest
.getBlock());
3522 CGF
.EmitBlock(BodyBB
);
3523 // Iteri = Begini + Counter * Stepi;
3524 CGF
.EmitIgnoredExpr(HelperData
.Update
);
3527 ~OMPIteratorGeneratorScope() {
3530 for (unsigned I
= E
->numOfIterators(); I
> 0; --I
) {
3531 // Counter = Counter + 1;
3532 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
- 1);
3533 CGF
.EmitIgnoredExpr(HelperData
.CounterUpdate
);
3535 CGF
.EmitBranchThroughCleanup(ContDests
[I
- 1]);
3537 CGF
.EmitBlock(ExitDests
[I
- 1].getBlock(), /*IsFinished=*/I
== 1);
3543 static std::pair
<llvm::Value
*, llvm::Value
*>
3544 getPointerAndSize(CodeGenFunction
&CGF
, const Expr
*E
) {
3545 const auto *OASE
= dyn_cast
<OMPArrayShapingExpr
>(E
);
3548 const Expr
*Base
= OASE
->getBase();
3549 Addr
= CGF
.EmitScalarExpr(Base
);
3551 Addr
= CGF
.EmitLValue(E
).getPointer(CGF
);
3553 llvm::Value
*SizeVal
;
3554 QualType Ty
= E
->getType();
3556 SizeVal
= CGF
.getTypeSize(OASE
->getBase()->getType()->getPointeeType());
3557 for (const Expr
*SE
: OASE
->getDimensions()) {
3558 llvm::Value
*Sz
= CGF
.EmitScalarExpr(SE
);
3559 Sz
= CGF
.EmitScalarConversion(
3560 Sz
, SE
->getType(), CGF
.getContext().getSizeType(), SE
->getExprLoc());
3561 SizeVal
= CGF
.Builder
.CreateNUWMul(SizeVal
, Sz
);
3563 } else if (const auto *ASE
=
3564 dyn_cast
<ArraySectionExpr
>(E
->IgnoreParenImpCasts())) {
3565 LValue UpAddrLVal
= CGF
.EmitArraySectionExpr(ASE
, /*IsLowerBound=*/false);
3566 Address UpAddrAddress
= UpAddrLVal
.getAddress();
3567 llvm::Value
*UpAddr
= CGF
.Builder
.CreateConstGEP1_32(
3568 UpAddrAddress
.getElementType(), UpAddrAddress
.emitRawPointer(CGF
),
3570 llvm::Value
*LowIntPtr
= CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.SizeTy
);
3571 llvm::Value
*UpIntPtr
= CGF
.Builder
.CreatePtrToInt(UpAddr
, CGF
.SizeTy
);
3572 SizeVal
= CGF
.Builder
.CreateNUWSub(UpIntPtr
, LowIntPtr
);
3574 SizeVal
= CGF
.getTypeSize(Ty
);
3576 return std::make_pair(Addr
, SizeVal
);
3579 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3580 static void getKmpAffinityType(ASTContext
&C
, QualType
&KmpTaskAffinityInfoTy
) {
3581 QualType FlagsTy
= C
.getIntTypeForBitwidth(32, /*Signed=*/false);
3582 if (KmpTaskAffinityInfoTy
.isNull()) {
3583 RecordDecl
*KmpAffinityInfoRD
=
3584 C
.buildImplicitRecord("kmp_task_affinity_info_t");
3585 KmpAffinityInfoRD
->startDefinition();
3586 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, C
.getIntPtrType());
3587 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, C
.getSizeType());
3588 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, FlagsTy
);
3589 KmpAffinityInfoRD
->completeDefinition();
3590 KmpTaskAffinityInfoTy
= C
.getRecordType(KmpAffinityInfoRD
);
3594 CGOpenMPRuntime::TaskResultTy
3595 CGOpenMPRuntime::emitTaskInit(CodeGenFunction
&CGF
, SourceLocation Loc
,
3596 const OMPExecutableDirective
&D
,
3597 llvm::Function
*TaskFunction
, QualType SharedsTy
,
3598 Address Shareds
, const OMPTaskDataTy
&Data
) {
3599 ASTContext
&C
= CGM
.getContext();
3600 llvm::SmallVector
<PrivateDataTy
, 4> Privates
;
3601 // Aggregate privates and sort them by the alignment.
3602 const auto *I
= Data
.PrivateCopies
.begin();
3603 for (const Expr
*E
: Data
.PrivateVars
) {
3604 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3605 Privates
.emplace_back(
3607 PrivateHelpersTy(E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3608 /*PrivateElemInit=*/nullptr));
3611 I
= Data
.FirstprivateCopies
.begin();
3612 const auto *IElemInitRef
= Data
.FirstprivateInits
.begin();
3613 for (const Expr
*E
: Data
.FirstprivateVars
) {
3614 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3615 Privates
.emplace_back(
3618 E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3619 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IElemInitRef
)->getDecl())));
3623 I
= Data
.LastprivateCopies
.begin();
3624 for (const Expr
*E
: Data
.LastprivateVars
) {
3625 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3626 Privates
.emplace_back(
3628 PrivateHelpersTy(E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3629 /*PrivateElemInit=*/nullptr));
3632 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
3633 if (isAllocatableDecl(VD
))
3634 Privates
.emplace_back(CGM
.getPointerAlign(), PrivateHelpersTy(VD
));
3636 Privates
.emplace_back(C
.getDeclAlign(VD
), PrivateHelpersTy(VD
));
3638 llvm::stable_sort(Privates
,
3639 [](const PrivateDataTy
&L
, const PrivateDataTy
&R
) {
3640 return L
.first
> R
.first
;
3642 QualType KmpInt32Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3643 // Build type kmp_routine_entry_t (if not built yet).
3644 emitKmpRoutineEntryT(KmpInt32Ty
);
3645 // Build type kmp_task_t (if not built yet).
3646 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind())) {
3647 if (SavedKmpTaskloopTQTy
.isNull()) {
3648 SavedKmpTaskloopTQTy
= C
.getRecordType(createKmpTaskTRecordDecl(
3649 CGM
, D
.getDirectiveKind(), KmpInt32Ty
, KmpRoutineEntryPtrQTy
));
3651 KmpTaskTQTy
= SavedKmpTaskloopTQTy
;
3653 assert((D
.getDirectiveKind() == OMPD_task
||
3654 isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) ||
3655 isOpenMPTargetDataManagementDirective(D
.getDirectiveKind())) &&
3656 "Expected taskloop, task or target directive");
3657 if (SavedKmpTaskTQTy
.isNull()) {
3658 SavedKmpTaskTQTy
= C
.getRecordType(createKmpTaskTRecordDecl(
3659 CGM
, D
.getDirectiveKind(), KmpInt32Ty
, KmpRoutineEntryPtrQTy
));
3661 KmpTaskTQTy
= SavedKmpTaskTQTy
;
3663 const auto *KmpTaskTQTyRD
= cast
<RecordDecl
>(KmpTaskTQTy
->getAsTagDecl());
3664 // Build particular struct kmp_task_t for the given task.
3665 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
=
3666 createKmpTaskTWithPrivatesRecordDecl(CGM
, KmpTaskTQTy
, Privates
);
3667 QualType KmpTaskTWithPrivatesQTy
= C
.getRecordType(KmpTaskTWithPrivatesQTyRD
);
3668 QualType KmpTaskTWithPrivatesPtrQTy
=
3669 C
.getPointerType(KmpTaskTWithPrivatesQTy
);
3670 llvm::Type
*KmpTaskTWithPrivatesPtrTy
= CGF
.Builder
.getPtrTy(0);
3671 llvm::Value
*KmpTaskTWithPrivatesTySize
=
3672 CGF
.getTypeSize(KmpTaskTWithPrivatesQTy
);
3673 QualType SharedsPtrTy
= C
.getPointerType(SharedsTy
);
3675 // Emit initial values for private copies (if any).
3676 llvm::Value
*TaskPrivatesMap
= nullptr;
3677 llvm::Type
*TaskPrivatesMapTy
=
3678 std::next(TaskFunction
->arg_begin(), 3)->getType();
3679 if (!Privates
.empty()) {
3680 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3682 emitTaskPrivateMappingFunction(CGM
, Loc
, Data
, FI
->getType(), Privates
);
3683 TaskPrivatesMap
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3684 TaskPrivatesMap
, TaskPrivatesMapTy
);
3686 TaskPrivatesMap
= llvm::ConstantPointerNull::get(
3687 cast
<llvm::PointerType
>(TaskPrivatesMapTy
));
3689 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3691 llvm::Function
*TaskEntry
= emitProxyTaskFunction(
3692 CGM
, Loc
, D
.getDirectiveKind(), KmpInt32Ty
, KmpTaskTWithPrivatesPtrQTy
,
3693 KmpTaskTWithPrivatesQTy
, KmpTaskTQTy
, SharedsPtrTy
, TaskFunction
,
3696 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3697 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3698 // kmp_routine_entry_t *task_entry);
3699 // Task flags. Format is taken from
3700 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3701 // description of kmp_tasking_flags struct.
3705 DestructorsFlag
= 0x8,
3706 PriorityFlag
= 0x20,
3707 DetachableFlag
= 0x40,
3709 unsigned Flags
= Data
.Tied
? TiedFlag
: 0;
3710 bool NeedsCleanup
= false;
3711 if (!Privates
.empty()) {
3713 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD
, Privates
);
3715 Flags
= Flags
| DestructorsFlag
;
3717 if (Data
.Priority
.getInt())
3718 Flags
= Flags
| PriorityFlag
;
3719 if (D
.hasClausesOfKind
<OMPDetachClause
>())
3720 Flags
= Flags
| DetachableFlag
;
3721 llvm::Value
*TaskFlags
=
3722 Data
.Final
.getPointer()
3723 ? CGF
.Builder
.CreateSelect(Data
.Final
.getPointer(),
3724 CGF
.Builder
.getInt32(FinalFlag
),
3725 CGF
.Builder
.getInt32(/*C=*/0))
3726 : CGF
.Builder
.getInt32(Data
.Final
.getInt() ? FinalFlag
: 0);
3727 TaskFlags
= CGF
.Builder
.CreateOr(TaskFlags
, CGF
.Builder
.getInt32(Flags
));
3728 llvm::Value
*SharedsSize
= CGM
.getSize(C
.getTypeSizeInChars(SharedsTy
));
3729 SmallVector
<llvm::Value
*, 8> AllocArgs
= {emitUpdateLocation(CGF
, Loc
),
3730 getThreadID(CGF
, Loc
), TaskFlags
, KmpTaskTWithPrivatesTySize
,
3731 SharedsSize
, CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3732 TaskEntry
, KmpRoutineEntryPtrTy
)};
3733 llvm::Value
*NewTask
;
3734 if (D
.hasClausesOfKind
<OMPNowaitClause
>()) {
3735 // Check if we have any device clause associated with the directive.
3736 const Expr
*Device
= nullptr;
3737 if (auto *C
= D
.getSingleClause
<OMPDeviceClause
>())
3738 Device
= C
->getDevice();
3739 // Emit device ID if any otherwise use default value.
3740 llvm::Value
*DeviceID
;
3742 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
3743 CGF
.Int64Ty
, /*isSigned=*/true);
3745 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
3746 AllocArgs
.push_back(DeviceID
);
3747 NewTask
= CGF
.EmitRuntimeCall(
3748 OMPBuilder
.getOrCreateRuntimeFunction(
3749 CGM
.getModule(), OMPRTL___kmpc_omp_target_task_alloc
),
3753 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
3754 CGM
.getModule(), OMPRTL___kmpc_omp_task_alloc
),
3757 // Emit detach clause initialization.
3758 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3759 // task_descriptor);
3760 if (const auto *DC
= D
.getSingleClause
<OMPDetachClause
>()) {
3761 const Expr
*Evt
= DC
->getEventHandler()->IgnoreParenImpCasts();
3762 LValue EvtLVal
= CGF
.EmitLValue(Evt
);
3764 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3765 // int gtid, kmp_task_t *task);
3766 llvm::Value
*Loc
= emitUpdateLocation(CGF
, DC
->getBeginLoc());
3767 llvm::Value
*Tid
= getThreadID(CGF
, DC
->getBeginLoc());
3768 Tid
= CGF
.Builder
.CreateIntCast(Tid
, CGF
.IntTy
, /*isSigned=*/false);
3769 llvm::Value
*EvtVal
= CGF
.EmitRuntimeCall(
3770 OMPBuilder
.getOrCreateRuntimeFunction(
3771 CGM
.getModule(), OMPRTL___kmpc_task_allow_completion_event
),
3772 {Loc
, Tid
, NewTask
});
3773 EvtVal
= CGF
.EmitScalarConversion(EvtVal
, C
.VoidPtrTy
, Evt
->getType(),
3775 CGF
.EmitStoreOfScalar(EvtVal
, EvtLVal
);
3777 // Process affinity clauses.
3778 if (D
.hasClausesOfKind
<OMPAffinityClause
>()) {
3779 // Process list of affinity data.
3780 ASTContext
&C
= CGM
.getContext();
3781 Address AffinitiesArray
= Address::invalid();
3782 // Calculate number of elements to form the array of affinity data.
3783 llvm::Value
*NumOfElements
= nullptr;
3784 unsigned NumAffinities
= 0;
3785 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
3786 if (const Expr
*Modifier
= C
->getModifier()) {
3787 const auto *IE
= cast
<OMPIteratorExpr
>(Modifier
->IgnoreParenImpCasts());
3788 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
3789 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
3790 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.SizeTy
, /*isSigned=*/false);
3792 NumOfElements
? CGF
.Builder
.CreateNUWMul(NumOfElements
, Sz
) : Sz
;
3795 NumAffinities
+= C
->varlist_size();
3798 getKmpAffinityType(CGM
.getContext(), KmpTaskAffinityInfoTy
);
3799 // Fields ids in kmp_task_affinity_info record.
3800 enum RTLAffinityInfoFieldsTy
{ BaseAddr
, Len
, Flags
};
3802 QualType KmpTaskAffinityInfoArrayTy
;
3803 if (NumOfElements
) {
3804 NumOfElements
= CGF
.Builder
.CreateNUWAdd(
3805 llvm::ConstantInt::get(CGF
.SizeTy
, NumAffinities
), NumOfElements
);
3806 auto *OVE
= new (C
) OpaqueValueExpr(
3808 C
.getIntTypeForBitwidth(C
.getTypeSize(C
.getSizeType()), /*Signed=*/0),
3810 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, OVE
,
3811 RValue::get(NumOfElements
));
3812 KmpTaskAffinityInfoArrayTy
= C
.getVariableArrayType(
3813 KmpTaskAffinityInfoTy
, OVE
, ArraySizeModifier::Normal
,
3814 /*IndexTypeQuals=*/0, SourceRange(Loc
, Loc
));
3815 // Properly emit variable-sized array.
3816 auto *PD
= ImplicitParamDecl::Create(C
, KmpTaskAffinityInfoArrayTy
,
3817 ImplicitParamKind::Other
);
3818 CGF
.EmitVarDecl(*PD
);
3819 AffinitiesArray
= CGF
.GetAddrOfLocalVar(PD
);
3820 NumOfElements
= CGF
.Builder
.CreateIntCast(NumOfElements
, CGF
.Int32Ty
,
3821 /*isSigned=*/false);
3823 KmpTaskAffinityInfoArrayTy
= C
.getConstantArrayType(
3824 KmpTaskAffinityInfoTy
,
3825 llvm::APInt(C
.getTypeSize(C
.getSizeType()), NumAffinities
), nullptr,
3826 ArraySizeModifier::Normal
, /*IndexTypeQuals=*/0);
3828 CGF
.CreateMemTemp(KmpTaskAffinityInfoArrayTy
, ".affs.arr.addr");
3829 AffinitiesArray
= CGF
.Builder
.CreateConstArrayGEP(AffinitiesArray
, 0);
3830 NumOfElements
= llvm::ConstantInt::get(CGM
.Int32Ty
, NumAffinities
,
3831 /*isSigned=*/false);
3834 const auto *KmpAffinityInfoRD
= KmpTaskAffinityInfoTy
->getAsRecordDecl();
3835 // Fill array by elements without iterators.
3837 bool HasIterator
= false;
3838 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
3839 if (C
->getModifier()) {
3843 for (const Expr
*E
: C
->varlist()) {
3846 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
3848 CGF
.MakeAddrLValue(CGF
.Builder
.CreateConstGEP(AffinitiesArray
, Pos
),
3849 KmpTaskAffinityInfoTy
);
3850 // affs[i].base_addr = &<Affinities[i].second>;
3851 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
3852 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), BaseAddr
));
3853 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
),
3855 // affs[i].len = sizeof(<Affinities[i].second>);
3856 LValue LenLVal
= CGF
.EmitLValueForField(
3857 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), Len
));
3858 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
3864 PosLVal
= CGF
.MakeAddrLValue(
3865 CGF
.CreateMemTemp(C
.getSizeType(), "affs.counter.addr"),
3867 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Pos
), PosLVal
);
3869 // Process elements with iterators.
3870 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
3871 const Expr
*Modifier
= C
->getModifier();
3874 OMPIteratorGeneratorScope
IteratorScope(
3875 CGF
, cast_or_null
<OMPIteratorExpr
>(Modifier
->IgnoreParenImpCasts()));
3876 for (const Expr
*E
: C
->varlist()) {
3879 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
3880 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
3882 CGF
.MakeAddrLValue(CGF
.Builder
.CreateGEP(CGF
, AffinitiesArray
, Idx
),
3883 KmpTaskAffinityInfoTy
);
3884 // affs[i].base_addr = &<Affinities[i].second>;
3885 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
3886 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), BaseAddr
));
3887 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
),
3889 // affs[i].len = sizeof(<Affinities[i].second>);
3890 LValue LenLVal
= CGF
.EmitLValueForField(
3891 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), Len
));
3892 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
3893 Idx
= CGF
.Builder
.CreateNUWAdd(
3894 Idx
, llvm::ConstantInt::get(Idx
->getType(), 1));
3895 CGF
.EmitStoreOfScalar(Idx
, PosLVal
);
3898 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3899 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3900 // naffins, kmp_task_affinity_info_t *affin_list);
3901 llvm::Value
*LocRef
= emitUpdateLocation(CGF
, Loc
);
3902 llvm::Value
*GTid
= getThreadID(CGF
, Loc
);
3903 llvm::Value
*AffinListPtr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3904 AffinitiesArray
.emitRawPointer(CGF
), CGM
.VoidPtrTy
);
3905 // FIXME: Emit the function and ignore its result for now unless the
3906 // runtime function is properly implemented.
3907 (void)CGF
.EmitRuntimeCall(
3908 OMPBuilder
.getOrCreateRuntimeFunction(
3909 CGM
.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity
),
3910 {LocRef
, GTid
, NewTask
, NumOfElements
, AffinListPtr
});
3912 llvm::Value
*NewTaskNewTaskTTy
=
3913 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3914 NewTask
, KmpTaskTWithPrivatesPtrTy
);
3915 LValue Base
= CGF
.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy
,
3916 KmpTaskTWithPrivatesQTy
);
3918 CGF
.EmitLValueForField(Base
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3919 // Fill the data in the resulting kmp_task_t record.
3920 // Copy shareds if there are any.
3921 Address KmpTaskSharedsPtr
= Address::invalid();
3922 if (!SharedsTy
->getAsStructureType()->getDecl()->field_empty()) {
3923 KmpTaskSharedsPtr
= Address(
3924 CGF
.EmitLoadOfScalar(
3925 CGF
.EmitLValueForField(
3927 *std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTShareds
)),
3929 CGF
.Int8Ty
, CGM
.getNaturalTypeAlignment(SharedsTy
));
3930 LValue Dest
= CGF
.MakeAddrLValue(KmpTaskSharedsPtr
, SharedsTy
);
3931 LValue Src
= CGF
.MakeAddrLValue(Shareds
, SharedsTy
);
3932 CGF
.EmitAggregateCopy(Dest
, Src
, SharedsTy
, AggValueSlot::DoesNotOverlap
);
3934 // Emit initial values for private copies (if any).
3935 TaskResultTy Result
;
3936 if (!Privates
.empty()) {
3937 emitPrivatesInit(CGF
, D
, KmpTaskSharedsPtr
, Base
, KmpTaskTWithPrivatesQTyRD
,
3938 SharedsTy
, SharedsPtrTy
, Data
, Privates
,
3940 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind()) &&
3941 (!Data
.LastprivateVars
.empty() || checkInitIsRequired(CGF
, Privates
))) {
3942 Result
.TaskDupFn
= emitTaskDupFunction(
3943 CGM
, Loc
, D
, KmpTaskTWithPrivatesPtrQTy
, KmpTaskTWithPrivatesQTyRD
,
3944 KmpTaskTQTyRD
, SharedsTy
, SharedsPtrTy
, Data
, Privates
,
3945 /*WithLastIter=*/!Data
.LastprivateVars
.empty());
3948 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3949 enum { Priority
= 0, Destructors
= 1 };
3950 // Provide pointer to function with destructors for privates.
3951 auto FI
= std::next(KmpTaskTQTyRD
->field_begin(), Data1
);
3952 const RecordDecl
*KmpCmplrdataUD
=
3953 (*FI
)->getType()->getAsUnionType()->getDecl();
3955 llvm::Value
*DestructorFn
= emitDestructorsFunction(
3956 CGM
, Loc
, KmpInt32Ty
, KmpTaskTWithPrivatesPtrQTy
,
3957 KmpTaskTWithPrivatesQTy
);
3958 LValue Data1LV
= CGF
.EmitLValueForField(TDBase
, *FI
);
3959 LValue DestructorsLV
= CGF
.EmitLValueForField(
3960 Data1LV
, *std::next(KmpCmplrdataUD
->field_begin(), Destructors
));
3961 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3962 DestructorFn
, KmpRoutineEntryPtrTy
),
3966 if (Data
.Priority
.getInt()) {
3967 LValue Data2LV
= CGF
.EmitLValueForField(
3968 TDBase
, *std::next(KmpTaskTQTyRD
->field_begin(), Data2
));
3969 LValue PriorityLV
= CGF
.EmitLValueForField(
3970 Data2LV
, *std::next(KmpCmplrdataUD
->field_begin(), Priority
));
3971 CGF
.EmitStoreOfScalar(Data
.Priority
.getPointer(), PriorityLV
);
3973 Result
.NewTask
= NewTask
;
3974 Result
.TaskEntry
= TaskEntry
;
3975 Result
.NewTaskNewTaskTTy
= NewTaskNewTaskTTy
;
3976 Result
.TDBase
= TDBase
;
3977 Result
.KmpTaskTQTyRD
= KmpTaskTQTyRD
;
3981 /// Translates internal dependency kind into the runtime kind.
3982 static RTLDependenceKindTy
translateDependencyKind(OpenMPDependClauseKind K
) {
3983 RTLDependenceKindTy DepKind
;
3985 case OMPC_DEPEND_in
:
3986 DepKind
= RTLDependenceKindTy::DepIn
;
3988 // Out and InOut dependencies must use the same code.
3989 case OMPC_DEPEND_out
:
3990 case OMPC_DEPEND_inout
:
3991 DepKind
= RTLDependenceKindTy::DepInOut
;
3993 case OMPC_DEPEND_mutexinoutset
:
3994 DepKind
= RTLDependenceKindTy::DepMutexInOutSet
;
3996 case OMPC_DEPEND_inoutset
:
3997 DepKind
= RTLDependenceKindTy::DepInOutSet
;
3999 case OMPC_DEPEND_outallmemory
:
4000 DepKind
= RTLDependenceKindTy::DepOmpAllMem
;
4002 case OMPC_DEPEND_source
:
4003 case OMPC_DEPEND_sink
:
4004 case OMPC_DEPEND_depobj
:
4005 case OMPC_DEPEND_inoutallmemory
:
4006 case OMPC_DEPEND_unknown
:
4007 llvm_unreachable("Unknown task dependence type");
4012 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4013 static void getDependTypes(ASTContext
&C
, QualType
&KmpDependInfoTy
,
4014 QualType
&FlagsTy
) {
4015 FlagsTy
= C
.getIntTypeForBitwidth(C
.getTypeSize(C
.BoolTy
), /*Signed=*/false);
4016 if (KmpDependInfoTy
.isNull()) {
4017 RecordDecl
*KmpDependInfoRD
= C
.buildImplicitRecord("kmp_depend_info");
4018 KmpDependInfoRD
->startDefinition();
4019 addFieldToRecordDecl(C
, KmpDependInfoRD
, C
.getIntPtrType());
4020 addFieldToRecordDecl(C
, KmpDependInfoRD
, C
.getSizeType());
4021 addFieldToRecordDecl(C
, KmpDependInfoRD
, FlagsTy
);
4022 KmpDependInfoRD
->completeDefinition();
4023 KmpDependInfoTy
= C
.getRecordType(KmpDependInfoRD
);
4027 std::pair
<llvm::Value
*, LValue
>
4028 CGOpenMPRuntime::getDepobjElements(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4029 SourceLocation Loc
) {
4030 ASTContext
&C
= CGM
.getContext();
4032 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4033 RecordDecl
*KmpDependInfoRD
=
4034 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4035 QualType KmpDependInfoPtrTy
= C
.getPointerType(KmpDependInfoTy
);
4036 LValue Base
= CGF
.EmitLoadOfPointerLValue(
4037 DepobjLVal
.getAddress().withElementType(
4038 CGF
.ConvertTypeForMem(KmpDependInfoPtrTy
)),
4039 KmpDependInfoPtrTy
->castAs
<PointerType
>());
4040 Address DepObjAddr
= CGF
.Builder
.CreateGEP(
4041 CGF
, Base
.getAddress(),
4042 llvm::ConstantInt::get(CGF
.IntPtrTy
, -1, /*isSigned=*/true));
4043 LValue NumDepsBase
= CGF
.MakeAddrLValue(
4044 DepObjAddr
, KmpDependInfoTy
, Base
.getBaseInfo(), Base
.getTBAAInfo());
4045 // NumDeps = deps[i].base_addr;
4046 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4048 *std::next(KmpDependInfoRD
->field_begin(),
4049 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4050 llvm::Value
*NumDeps
= CGF
.EmitLoadOfScalar(BaseAddrLVal
, Loc
);
4051 return std::make_pair(NumDeps
, Base
);
4054 static void emitDependData(CodeGenFunction
&CGF
, QualType
&KmpDependInfoTy
,
4055 llvm::PointerUnion
<unsigned *, LValue
*> Pos
,
4056 const OMPTaskDataTy::DependData
&Data
,
4057 Address DependenciesArray
) {
4058 CodeGenModule
&CGM
= CGF
.CGM
;
4059 ASTContext
&C
= CGM
.getContext();
4061 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4062 RecordDecl
*KmpDependInfoRD
=
4063 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4064 llvm::Type
*LLVMFlagsTy
= CGF
.ConvertTypeForMem(FlagsTy
);
4066 OMPIteratorGeneratorScope
IteratorScope(
4067 CGF
, cast_or_null
<OMPIteratorExpr
>(
4068 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4070 for (const Expr
*E
: Data
.DepExprs
) {
4074 // The expression will be a nullptr in the 'omp_all_memory' case.
4076 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
4077 Addr
= CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
);
4079 Addr
= llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4080 Size
= llvm::ConstantInt::get(CGF
.SizeTy
, 0);
4083 if (unsigned *P
= Pos
.dyn_cast
<unsigned *>()) {
4084 Base
= CGF
.MakeAddrLValue(
4085 CGF
.Builder
.CreateConstGEP(DependenciesArray
, *P
), KmpDependInfoTy
);
4087 assert(E
&& "Expected a non-null expression");
4088 LValue
&PosLVal
= *Pos
.get
<LValue
*>();
4089 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4090 Base
= CGF
.MakeAddrLValue(
4091 CGF
.Builder
.CreateGEP(CGF
, DependenciesArray
, Idx
), KmpDependInfoTy
);
4093 // deps[i].base_addr = &<Dependencies[i].second>;
4094 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4096 *std::next(KmpDependInfoRD
->field_begin(),
4097 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4098 CGF
.EmitStoreOfScalar(Addr
, BaseAddrLVal
);
4099 // deps[i].len = sizeof(<Dependencies[i].second>);
4100 LValue LenLVal
= CGF
.EmitLValueForField(
4101 Base
, *std::next(KmpDependInfoRD
->field_begin(),
4102 static_cast<unsigned int>(RTLDependInfoFields::Len
)));
4103 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4104 // deps[i].flags = <Dependencies[i].first>;
4105 RTLDependenceKindTy DepKind
= translateDependencyKind(Data
.DepKind
);
4106 LValue FlagsLVal
= CGF
.EmitLValueForField(
4108 *std::next(KmpDependInfoRD
->field_begin(),
4109 static_cast<unsigned int>(RTLDependInfoFields::Flags
)));
4110 CGF
.EmitStoreOfScalar(
4111 llvm::ConstantInt::get(LLVMFlagsTy
, static_cast<unsigned int>(DepKind
)),
4113 if (unsigned *P
= Pos
.dyn_cast
<unsigned *>()) {
4116 LValue
&PosLVal
= *Pos
.get
<LValue
*>();
4117 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4118 Idx
= CGF
.Builder
.CreateNUWAdd(Idx
,
4119 llvm::ConstantInt::get(Idx
->getType(), 1));
4120 CGF
.EmitStoreOfScalar(Idx
, PosLVal
);
4125 SmallVector
<llvm::Value
*, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4126 CodeGenFunction
&CGF
, QualType
&KmpDependInfoTy
,
4127 const OMPTaskDataTy::DependData
&Data
) {
4128 assert(Data
.DepKind
== OMPC_DEPEND_depobj
&&
4129 "Expected depobj dependency kind.");
4130 SmallVector
<llvm::Value
*, 4> Sizes
;
4131 SmallVector
<LValue
, 4> SizeLVals
;
4132 ASTContext
&C
= CGF
.getContext();
4134 OMPIteratorGeneratorScope
IteratorScope(
4135 CGF
, cast_or_null
<OMPIteratorExpr
>(
4136 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4138 for (const Expr
*E
: Data
.DepExprs
) {
4139 llvm::Value
*NumDeps
;
4141 LValue DepobjLVal
= CGF
.EmitLValue(E
->IgnoreParenImpCasts());
4142 std::tie(NumDeps
, Base
) =
4143 getDepobjElements(CGF
, DepobjLVal
, E
->getExprLoc());
4144 LValue NumLVal
= CGF
.MakeAddrLValue(
4145 CGF
.CreateMemTemp(C
.getUIntPtrType(), "depobj.size.addr"),
4146 C
.getUIntPtrType());
4147 CGF
.Builder
.CreateStore(llvm::ConstantInt::get(CGF
.IntPtrTy
, 0),
4148 NumLVal
.getAddress());
4149 llvm::Value
*PrevVal
= CGF
.EmitLoadOfScalar(NumLVal
, E
->getExprLoc());
4150 llvm::Value
*Add
= CGF
.Builder
.CreateNUWAdd(PrevVal
, NumDeps
);
4151 CGF
.EmitStoreOfScalar(Add
, NumLVal
);
4152 SizeLVals
.push_back(NumLVal
);
4155 for (unsigned I
= 0, E
= SizeLVals
.size(); I
< E
; ++I
) {
4157 CGF
.EmitLoadOfScalar(SizeLVals
[I
], Data
.DepExprs
[I
]->getExprLoc());
4158 Sizes
.push_back(Size
);
4163 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction
&CGF
,
4164 QualType
&KmpDependInfoTy
,
4166 const OMPTaskDataTy::DependData
&Data
,
4167 Address DependenciesArray
) {
4168 assert(Data
.DepKind
== OMPC_DEPEND_depobj
&&
4169 "Expected depobj dependency kind.");
4170 llvm::Value
*ElSize
= CGF
.getTypeSize(KmpDependInfoTy
);
4172 OMPIteratorGeneratorScope
IteratorScope(
4173 CGF
, cast_or_null
<OMPIteratorExpr
>(
4174 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4176 for (unsigned I
= 0, End
= Data
.DepExprs
.size(); I
< End
; ++I
) {
4177 const Expr
*E
= Data
.DepExprs
[I
];
4178 llvm::Value
*NumDeps
;
4180 LValue DepobjLVal
= CGF
.EmitLValue(E
->IgnoreParenImpCasts());
4181 std::tie(NumDeps
, Base
) =
4182 getDepobjElements(CGF
, DepobjLVal
, E
->getExprLoc());
4184 // memcopy dependency data.
4185 llvm::Value
*Size
= CGF
.Builder
.CreateNUWMul(
4187 CGF
.Builder
.CreateIntCast(NumDeps
, CGF
.SizeTy
, /*isSigned=*/false));
4188 llvm::Value
*Pos
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4189 Address DepAddr
= CGF
.Builder
.CreateGEP(CGF
, DependenciesArray
, Pos
);
4190 CGF
.Builder
.CreateMemCpy(DepAddr
, Base
.getAddress(), Size
);
4194 llvm::Value
*Add
= CGF
.Builder
.CreateNUWAdd(Pos
, NumDeps
);
4195 CGF
.EmitStoreOfScalar(Add
, PosLVal
);
4200 std::pair
<llvm::Value
*, Address
> CGOpenMPRuntime::emitDependClause(
4201 CodeGenFunction
&CGF
, ArrayRef
<OMPTaskDataTy::DependData
> Dependencies
,
4202 SourceLocation Loc
) {
4203 if (llvm::all_of(Dependencies
, [](const OMPTaskDataTy::DependData
&D
) {
4204 return D
.DepExprs
.empty();
4206 return std::make_pair(nullptr, Address::invalid());
4207 // Process list of dependencies.
4208 ASTContext
&C
= CGM
.getContext();
4209 Address DependenciesArray
= Address::invalid();
4210 llvm::Value
*NumOfElements
= nullptr;
4211 unsigned NumDependencies
= std::accumulate(
4212 Dependencies
.begin(), Dependencies
.end(), 0,
4213 [](unsigned V
, const OMPTaskDataTy::DependData
&D
) {
4214 return D
.DepKind
== OMPC_DEPEND_depobj
4216 : (V
+ (D
.IteratorExpr
? 0 : D
.DepExprs
.size()));
4219 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4220 bool HasDepobjDeps
= false;
4221 bool HasRegularWithIterators
= false;
4222 llvm::Value
*NumOfDepobjElements
= llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4223 llvm::Value
*NumOfRegularWithIterators
=
4224 llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4225 // Calculate number of depobj dependencies and regular deps with the
4227 for (const OMPTaskDataTy::DependData
&D
: Dependencies
) {
4228 if (D
.DepKind
== OMPC_DEPEND_depobj
) {
4229 SmallVector
<llvm::Value
*, 4> Sizes
=
4230 emitDepobjElementsSizes(CGF
, KmpDependInfoTy
, D
);
4231 for (llvm::Value
*Size
: Sizes
) {
4232 NumOfDepobjElements
=
4233 CGF
.Builder
.CreateNUWAdd(NumOfDepobjElements
, Size
);
4235 HasDepobjDeps
= true;
4238 // Include number of iterations, if any.
4240 if (const auto *IE
= cast_or_null
<OMPIteratorExpr
>(D
.IteratorExpr
)) {
4241 llvm::Value
*ClauseIteratorSpace
=
4242 llvm::ConstantInt::get(CGF
.IntPtrTy
, 1);
4243 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4244 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4245 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.IntPtrTy
, /*isSigned=*/false);
4246 ClauseIteratorSpace
= CGF
.Builder
.CreateNUWMul(Sz
, ClauseIteratorSpace
);
4248 llvm::Value
*NumClauseDeps
= CGF
.Builder
.CreateNUWMul(
4249 ClauseIteratorSpace
,
4250 llvm::ConstantInt::get(CGF
.IntPtrTy
, D
.DepExprs
.size()));
4251 NumOfRegularWithIterators
=
4252 CGF
.Builder
.CreateNUWAdd(NumOfRegularWithIterators
, NumClauseDeps
);
4253 HasRegularWithIterators
= true;
4258 QualType KmpDependInfoArrayTy
;
4259 if (HasDepobjDeps
|| HasRegularWithIterators
) {
4260 NumOfElements
= llvm::ConstantInt::get(CGM
.IntPtrTy
, NumDependencies
,
4261 /*isSigned=*/false);
4262 if (HasDepobjDeps
) {
4264 CGF
.Builder
.CreateNUWAdd(NumOfDepobjElements
, NumOfElements
);
4266 if (HasRegularWithIterators
) {
4268 CGF
.Builder
.CreateNUWAdd(NumOfRegularWithIterators
, NumOfElements
);
4270 auto *OVE
= new (C
) OpaqueValueExpr(
4271 Loc
, C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4273 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, OVE
,
4274 RValue::get(NumOfElements
));
4275 KmpDependInfoArrayTy
=
4276 C
.getVariableArrayType(KmpDependInfoTy
, OVE
, ArraySizeModifier::Normal
,
4277 /*IndexTypeQuals=*/0, SourceRange(Loc
, Loc
));
4278 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4279 // Properly emit variable-sized array.
4280 auto *PD
= ImplicitParamDecl::Create(C
, KmpDependInfoArrayTy
,
4281 ImplicitParamKind::Other
);
4282 CGF
.EmitVarDecl(*PD
);
4283 DependenciesArray
= CGF
.GetAddrOfLocalVar(PD
);
4284 NumOfElements
= CGF
.Builder
.CreateIntCast(NumOfElements
, CGF
.Int32Ty
,
4285 /*isSigned=*/false);
4287 KmpDependInfoArrayTy
= C
.getConstantArrayType(
4288 KmpDependInfoTy
, llvm::APInt(/*numBits=*/64, NumDependencies
), nullptr,
4289 ArraySizeModifier::Normal
, /*IndexTypeQuals=*/0);
4291 CGF
.CreateMemTemp(KmpDependInfoArrayTy
, ".dep.arr.addr");
4292 DependenciesArray
= CGF
.Builder
.CreateConstArrayGEP(DependenciesArray
, 0);
4293 NumOfElements
= llvm::ConstantInt::get(CGM
.Int32Ty
, NumDependencies
,
4294 /*isSigned=*/false);
4297 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4298 if (Dependencies
[I
].DepKind
== OMPC_DEPEND_depobj
||
4299 Dependencies
[I
].IteratorExpr
)
4301 emitDependData(CGF
, KmpDependInfoTy
, &Pos
, Dependencies
[I
],
4304 // Copy regular dependencies with iterators.
4305 LValue PosLVal
= CGF
.MakeAddrLValue(
4306 CGF
.CreateMemTemp(C
.getSizeType(), "dep.counter.addr"), C
.getSizeType());
4307 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Pos
), PosLVal
);
4308 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4309 if (Dependencies
[I
].DepKind
== OMPC_DEPEND_depobj
||
4310 !Dependencies
[I
].IteratorExpr
)
4312 emitDependData(CGF
, KmpDependInfoTy
, &PosLVal
, Dependencies
[I
],
4315 // Copy final depobj arrays without iterators.
4316 if (HasDepobjDeps
) {
4317 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4318 if (Dependencies
[I
].DepKind
!= OMPC_DEPEND_depobj
)
4320 emitDepobjElements(CGF
, KmpDependInfoTy
, PosLVal
, Dependencies
[I
],
4324 DependenciesArray
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4325 DependenciesArray
, CGF
.VoidPtrTy
, CGF
.Int8Ty
);
4326 return std::make_pair(NumOfElements
, DependenciesArray
);
4329 Address
CGOpenMPRuntime::emitDepobjDependClause(
4330 CodeGenFunction
&CGF
, const OMPTaskDataTy::DependData
&Dependencies
,
4331 SourceLocation Loc
) {
4332 if (Dependencies
.DepExprs
.empty())
4333 return Address::invalid();
4334 // Process list of dependencies.
4335 ASTContext
&C
= CGM
.getContext();
4336 Address DependenciesArray
= Address::invalid();
4337 unsigned NumDependencies
= Dependencies
.DepExprs
.size();
4339 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4340 RecordDecl
*KmpDependInfoRD
=
4341 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4344 // Define type kmp_depend_info[<Dependencies.size()>];
4345 // For depobj reserve one extra element to store the number of elements.
4346 // It is required to handle depobj(x) update(in) construct.
4347 // kmp_depend_info[<Dependencies.size()>] deps;
4348 llvm::Value
*NumDepsVal
;
4349 CharUnits Align
= C
.getTypeAlignInChars(KmpDependInfoTy
);
4350 if (const auto *IE
=
4351 cast_or_null
<OMPIteratorExpr
>(Dependencies
.IteratorExpr
)) {
4352 NumDepsVal
= llvm::ConstantInt::get(CGF
.SizeTy
, 1);
4353 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4354 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4355 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.SizeTy
, /*isSigned=*/false);
4356 NumDepsVal
= CGF
.Builder
.CreateNUWMul(NumDepsVal
, Sz
);
4358 Size
= CGF
.Builder
.CreateNUWAdd(llvm::ConstantInt::get(CGF
.SizeTy
, 1),
4360 CharUnits SizeInBytes
=
4361 C
.getTypeSizeInChars(KmpDependInfoTy
).alignTo(Align
);
4362 llvm::Value
*RecSize
= CGM
.getSize(SizeInBytes
);
4363 Size
= CGF
.Builder
.CreateNUWMul(Size
, RecSize
);
4365 CGF
.Builder
.CreateIntCast(NumDepsVal
, CGF
.IntPtrTy
, /*isSigned=*/false);
4367 QualType KmpDependInfoArrayTy
= C
.getConstantArrayType(
4368 KmpDependInfoTy
, llvm::APInt(/*numBits=*/64, NumDependencies
+ 1),
4369 nullptr, ArraySizeModifier::Normal
, /*IndexTypeQuals=*/0);
4370 CharUnits Sz
= C
.getTypeSizeInChars(KmpDependInfoArrayTy
);
4371 Size
= CGM
.getSize(Sz
.alignTo(Align
));
4372 NumDepsVal
= llvm::ConstantInt::get(CGF
.IntPtrTy
, NumDependencies
);
4374 // Need to allocate on the dynamic memory.
4375 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4376 // Use default allocator.
4377 llvm::Value
*Allocator
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4378 llvm::Value
*Args
[] = {ThreadID
, Size
, Allocator
};
4381 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4382 CGM
.getModule(), OMPRTL___kmpc_alloc
),
4383 Args
, ".dep.arr.addr");
4384 llvm::Type
*KmpDependInfoLlvmTy
= CGF
.ConvertTypeForMem(KmpDependInfoTy
);
4385 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4386 Addr
, CGF
.Builder
.getPtrTy(0));
4387 DependenciesArray
= Address(Addr
, KmpDependInfoLlvmTy
, Align
);
4388 // Write number of elements in the first element of array for depobj.
4389 LValue Base
= CGF
.MakeAddrLValue(DependenciesArray
, KmpDependInfoTy
);
4390 // deps[i].base_addr = NumDependencies;
4391 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4393 *std::next(KmpDependInfoRD
->field_begin(),
4394 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4395 CGF
.EmitStoreOfScalar(NumDepsVal
, BaseAddrLVal
);
4396 llvm::PointerUnion
<unsigned *, LValue
*> Pos
;
4399 if (Dependencies
.IteratorExpr
) {
4400 PosLVal
= CGF
.MakeAddrLValue(
4401 CGF
.CreateMemTemp(C
.getSizeType(), "iterator.counter.addr"),
4403 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Idx
), PosLVal
,
4409 emitDependData(CGF
, KmpDependInfoTy
, Pos
, Dependencies
, DependenciesArray
);
4410 DependenciesArray
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4411 CGF
.Builder
.CreateConstGEP(DependenciesArray
, 1), CGF
.VoidPtrTy
,
4413 return DependenciesArray
;
4416 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4417 SourceLocation Loc
) {
4418 ASTContext
&C
= CGM
.getContext();
4420 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4421 LValue Base
= CGF
.EmitLoadOfPointerLValue(DepobjLVal
.getAddress(),
4422 C
.VoidPtrTy
.castAs
<PointerType
>());
4423 QualType KmpDependInfoPtrTy
= C
.getPointerType(KmpDependInfoTy
);
4424 Address Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4425 Base
.getAddress(), CGF
.ConvertTypeForMem(KmpDependInfoPtrTy
),
4426 CGF
.ConvertTypeForMem(KmpDependInfoTy
));
4427 llvm::Value
*DepObjAddr
= CGF
.Builder
.CreateGEP(
4428 Addr
.getElementType(), Addr
.emitRawPointer(CGF
),
4429 llvm::ConstantInt::get(CGF
.IntPtrTy
, -1, /*isSigned=*/true));
4430 DepObjAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr
,
4432 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4433 // Use default allocator.
4434 llvm::Value
*Allocator
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4435 llvm::Value
*Args
[] = {ThreadID
, DepObjAddr
, Allocator
};
4437 // _kmpc_free(gtid, addr, nullptr);
4438 (void)CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4439 CGM
.getModule(), OMPRTL___kmpc_free
),
4443 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4444 OpenMPDependClauseKind NewDepKind
,
4445 SourceLocation Loc
) {
4446 ASTContext
&C
= CGM
.getContext();
4448 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4449 RecordDecl
*KmpDependInfoRD
=
4450 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4451 llvm::Type
*LLVMFlagsTy
= CGF
.ConvertTypeForMem(FlagsTy
);
4452 llvm::Value
*NumDeps
;
4454 std::tie(NumDeps
, Base
) = getDepobjElements(CGF
, DepobjLVal
, Loc
);
4456 Address Begin
= Base
.getAddress();
4457 // Cast from pointer to array type to pointer to single element.
4458 llvm::Value
*End
= CGF
.Builder
.CreateGEP(Begin
.getElementType(),
4459 Begin
.emitRawPointer(CGF
), NumDeps
);
4460 // The basic structure here is a while-do loop.
4461 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.body");
4462 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.done");
4463 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
4464 CGF
.EmitBlock(BodyBB
);
4465 llvm::PHINode
*ElementPHI
=
4466 CGF
.Builder
.CreatePHI(Begin
.getType(), 2, "omp.elementPast");
4467 ElementPHI
->addIncoming(Begin
.emitRawPointer(CGF
), EntryBB
);
4468 Begin
= Begin
.withPointer(ElementPHI
, KnownNonNull
);
4469 Base
= CGF
.MakeAddrLValue(Begin
, KmpDependInfoTy
, Base
.getBaseInfo(),
4470 Base
.getTBAAInfo());
4471 // deps[i].flags = NewDepKind;
4472 RTLDependenceKindTy DepKind
= translateDependencyKind(NewDepKind
);
4473 LValue FlagsLVal
= CGF
.EmitLValueForField(
4474 Base
, *std::next(KmpDependInfoRD
->field_begin(),
4475 static_cast<unsigned int>(RTLDependInfoFields::Flags
)));
4476 CGF
.EmitStoreOfScalar(
4477 llvm::ConstantInt::get(LLVMFlagsTy
, static_cast<unsigned int>(DepKind
)),
4480 // Shift the address forward by one element.
4481 llvm::Value
*ElementNext
=
4482 CGF
.Builder
.CreateConstGEP(Begin
, /*Index=*/1, "omp.elementNext")
4483 .emitRawPointer(CGF
);
4484 ElementPHI
->addIncoming(ElementNext
, CGF
.Builder
.GetInsertBlock());
4485 llvm::Value
*IsEmpty
=
4486 CGF
.Builder
.CreateICmpEQ(ElementNext
, End
, "omp.isempty");
4487 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
4489 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
4492 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
4493 const OMPExecutableDirective
&D
,
4494 llvm::Function
*TaskFunction
,
4495 QualType SharedsTy
, Address Shareds
,
4497 const OMPTaskDataTy
&Data
) {
4498 if (!CGF
.HaveInsertPoint())
4501 TaskResultTy Result
=
4502 emitTaskInit(CGF
, Loc
, D
, TaskFunction
, SharedsTy
, Shareds
, Data
);
4503 llvm::Value
*NewTask
= Result
.NewTask
;
4504 llvm::Function
*TaskEntry
= Result
.TaskEntry
;
4505 llvm::Value
*NewTaskNewTaskTTy
= Result
.NewTaskNewTaskTTy
;
4506 LValue TDBase
= Result
.TDBase
;
4507 const RecordDecl
*KmpTaskTQTyRD
= Result
.KmpTaskTQTyRD
;
4508 // Process list of dependences.
4509 Address DependenciesArray
= Address::invalid();
4510 llvm::Value
*NumOfElements
;
4511 std::tie(NumOfElements
, DependenciesArray
) =
4512 emitDependClause(CGF
, Data
.Dependences
, Loc
);
4514 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4516 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4517 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4518 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4519 // list is not empty
4520 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4521 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
4522 llvm::Value
*TaskArgs
[] = { UpLoc
, ThreadID
, NewTask
};
4523 llvm::Value
*DepTaskArgs
[7];
4524 if (!Data
.Dependences
.empty()) {
4525 DepTaskArgs
[0] = UpLoc
;
4526 DepTaskArgs
[1] = ThreadID
;
4527 DepTaskArgs
[2] = NewTask
;
4528 DepTaskArgs
[3] = NumOfElements
;
4529 DepTaskArgs
[4] = DependenciesArray
.emitRawPointer(CGF
);
4530 DepTaskArgs
[5] = CGF
.Builder
.getInt32(0);
4531 DepTaskArgs
[6] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4533 auto &&ThenCodeGen
= [this, &Data
, TDBase
, KmpTaskTQTyRD
, &TaskArgs
,
4534 &DepTaskArgs
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4536 auto PartIdFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTPartId
);
4537 LValue PartIdLVal
= CGF
.EmitLValueForField(TDBase
, *PartIdFI
);
4538 CGF
.EmitStoreOfScalar(CGF
.Builder
.getInt32(0), PartIdLVal
);
4540 if (!Data
.Dependences
.empty()) {
4541 CGF
.EmitRuntimeCall(
4542 OMPBuilder
.getOrCreateRuntimeFunction(
4543 CGM
.getModule(), OMPRTL___kmpc_omp_task_with_deps
),
4546 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4547 CGM
.getModule(), OMPRTL___kmpc_omp_task
),
4550 // Check if parent region is untied and build return for untied task;
4552 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
4553 Region
->emitUntiedSwitch(CGF
);
4556 llvm::Value
*DepWaitTaskArgs
[7];
4557 if (!Data
.Dependences
.empty()) {
4558 DepWaitTaskArgs
[0] = UpLoc
;
4559 DepWaitTaskArgs
[1] = ThreadID
;
4560 DepWaitTaskArgs
[2] = NumOfElements
;
4561 DepWaitTaskArgs
[3] = DependenciesArray
.emitRawPointer(CGF
);
4562 DepWaitTaskArgs
[4] = CGF
.Builder
.getInt32(0);
4563 DepWaitTaskArgs
[5] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4564 DepWaitTaskArgs
[6] =
4565 llvm::ConstantInt::get(CGF
.Int32Ty
, Data
.HasNowaitClause
);
4567 auto &M
= CGM
.getModule();
4568 auto &&ElseCodeGen
= [this, &M
, &TaskArgs
, ThreadID
, NewTaskNewTaskTTy
,
4569 TaskEntry
, &Data
, &DepWaitTaskArgs
,
4570 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4571 CodeGenFunction::RunCleanupsScope
LocalScope(CGF
);
4572 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4573 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4574 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4576 if (!Data
.Dependences
.empty())
4577 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4578 M
, OMPRTL___kmpc_omp_taskwait_deps_51
),
4580 // Call proxy_task_entry(gtid, new_task);
4581 auto &&CodeGen
= [TaskEntry
, ThreadID
, NewTaskNewTaskTTy
,
4582 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4584 llvm::Value
*OutlinedFnArgs
[] = {ThreadID
, NewTaskNewTaskTTy
};
4585 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, Loc
, TaskEntry
,
4589 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4590 // kmp_task_t *new_task);
4591 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4592 // kmp_task_t *new_task);
4593 RegionCodeGenTy
RCG(CodeGen
);
4594 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
4595 M
, OMPRTL___kmpc_omp_task_begin_if0
),
4597 OMPBuilder
.getOrCreateRuntimeFunction(
4598 M
, OMPRTL___kmpc_omp_task_complete_if0
),
4600 RCG
.setAction(Action
);
4605 emitIfClause(CGF
, IfCond
, ThenCodeGen
, ElseCodeGen
);
4607 RegionCodeGenTy
ThenRCG(ThenCodeGen
);
4612 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
4613 const OMPLoopDirective
&D
,
4614 llvm::Function
*TaskFunction
,
4615 QualType SharedsTy
, Address Shareds
,
4617 const OMPTaskDataTy
&Data
) {
4618 if (!CGF
.HaveInsertPoint())
4620 TaskResultTy Result
=
4621 emitTaskInit(CGF
, Loc
, D
, TaskFunction
, SharedsTy
, Shareds
, Data
);
4622 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4624 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4625 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4626 // sched, kmp_uint64 grainsize, void *task_dup);
4627 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4628 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
4631 IfVal
= CGF
.Builder
.CreateIntCast(CGF
.EvaluateExprAsBool(IfCond
), CGF
.IntTy
,
4634 IfVal
= llvm::ConstantInt::getSigned(CGF
.IntTy
, /*V=*/1);
4637 LValue LBLVal
= CGF
.EmitLValueForField(
4639 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTLowerBound
));
4641 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getLowerBoundVariable())->getDecl());
4642 CGF
.EmitAnyExprToMem(LBVar
->getInit(), LBLVal
.getAddress(), LBLVal
.getQuals(),
4643 /*IsInitializer=*/true);
4644 LValue UBLVal
= CGF
.EmitLValueForField(
4646 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTUpperBound
));
4648 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getUpperBoundVariable())->getDecl());
4649 CGF
.EmitAnyExprToMem(UBVar
->getInit(), UBLVal
.getAddress(), UBLVal
.getQuals(),
4650 /*IsInitializer=*/true);
4651 LValue StLVal
= CGF
.EmitLValueForField(
4653 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTStride
));
4655 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getStrideVariable())->getDecl());
4656 CGF
.EmitAnyExprToMem(StVar
->getInit(), StLVal
.getAddress(), StLVal
.getQuals(),
4657 /*IsInitializer=*/true);
4658 // Store reductions address.
4659 LValue RedLVal
= CGF
.EmitLValueForField(
4661 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTReductions
));
4662 if (Data
.Reductions
) {
4663 CGF
.EmitStoreOfScalar(Data
.Reductions
, RedLVal
);
4665 CGF
.EmitNullInitialization(RedLVal
.getAddress(),
4666 CGF
.getContext().VoidPtrTy
);
4668 enum { NoSchedule
= 0, Grainsize
= 1, NumTasks
= 2 };
4669 llvm::Value
*TaskArgs
[] = {
4674 LBLVal
.getPointer(CGF
),
4675 UBLVal
.getPointer(CGF
),
4676 CGF
.EmitLoadOfScalar(StLVal
, Loc
),
4677 llvm::ConstantInt::getSigned(
4678 CGF
.IntTy
, 1), // Always 1 because taskgroup emitted by the compiler
4679 llvm::ConstantInt::getSigned(
4680 CGF
.IntTy
, Data
.Schedule
.getPointer()
4681 ? Data
.Schedule
.getInt() ? NumTasks
: Grainsize
4683 Data
.Schedule
.getPointer()
4684 ? CGF
.Builder
.CreateIntCast(Data
.Schedule
.getPointer(), CGF
.Int64Ty
,
4686 : llvm::ConstantInt::get(CGF
.Int64Ty
, /*V=*/0),
4687 Result
.TaskDupFn
? CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4688 Result
.TaskDupFn
, CGF
.VoidPtrTy
)
4689 : llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
)};
4690 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4691 CGM
.getModule(), OMPRTL___kmpc_taskloop
),
4695 /// Emit reduction operation for each element of array (required for
4696 /// array sections) LHS op = RHS.
4697 /// \param Type Type of array.
4698 /// \param LHSVar Variable on the left side of the reduction operation
4699 /// (references element of array in original variable).
4700 /// \param RHSVar Variable on the right side of the reduction operation
4701 /// (references element of array in original variable).
4702 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4704 static void EmitOMPAggregateReduction(
4705 CodeGenFunction
&CGF
, QualType Type
, const VarDecl
*LHSVar
,
4706 const VarDecl
*RHSVar
,
4707 const llvm::function_ref
<void(CodeGenFunction
&CGF
, const Expr
*,
4708 const Expr
*, const Expr
*)> &RedOpGen
,
4709 const Expr
*XExpr
= nullptr, const Expr
*EExpr
= nullptr,
4710 const Expr
*UpExpr
= nullptr) {
4711 // Perform element-by-element initialization.
4713 Address LHSAddr
= CGF
.GetAddrOfLocalVar(LHSVar
);
4714 Address RHSAddr
= CGF
.GetAddrOfLocalVar(RHSVar
);
4716 // Drill down to the base element type on both arrays.
4717 const ArrayType
*ArrayTy
= Type
->getAsArrayTypeUnsafe();
4718 llvm::Value
*NumElements
= CGF
.emitArrayLength(ArrayTy
, ElementTy
, LHSAddr
);
4720 llvm::Value
*RHSBegin
= RHSAddr
.emitRawPointer(CGF
);
4721 llvm::Value
*LHSBegin
= LHSAddr
.emitRawPointer(CGF
);
4722 // Cast from pointer to array type to pointer to single element.
4723 llvm::Value
*LHSEnd
=
4724 CGF
.Builder
.CreateGEP(LHSAddr
.getElementType(), LHSBegin
, NumElements
);
4725 // The basic structure here is a while-do loop.
4726 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.arraycpy.body");
4727 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.arraycpy.done");
4728 llvm::Value
*IsEmpty
=
4729 CGF
.Builder
.CreateICmpEQ(LHSBegin
, LHSEnd
, "omp.arraycpy.isempty");
4730 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
4732 // Enter the loop body, making that address the current address.
4733 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
4734 CGF
.EmitBlock(BodyBB
);
4736 CharUnits ElementSize
= CGF
.getContext().getTypeSizeInChars(ElementTy
);
4738 llvm::PHINode
*RHSElementPHI
= CGF
.Builder
.CreatePHI(
4739 RHSBegin
->getType(), 2, "omp.arraycpy.srcElementPast");
4740 RHSElementPHI
->addIncoming(RHSBegin
, EntryBB
);
4741 Address
RHSElementCurrent(
4742 RHSElementPHI
, RHSAddr
.getElementType(),
4743 RHSAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
4745 llvm::PHINode
*LHSElementPHI
= CGF
.Builder
.CreatePHI(
4746 LHSBegin
->getType(), 2, "omp.arraycpy.destElementPast");
4747 LHSElementPHI
->addIncoming(LHSBegin
, EntryBB
);
4748 Address
LHSElementCurrent(
4749 LHSElementPHI
, LHSAddr
.getElementType(),
4750 LHSAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
4753 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
4754 Scope
.addPrivate(LHSVar
, LHSElementCurrent
);
4755 Scope
.addPrivate(RHSVar
, RHSElementCurrent
);
4757 RedOpGen(CGF
, XExpr
, EExpr
, UpExpr
);
4758 Scope
.ForceCleanup();
4760 // Shift the address forward by one element.
4761 llvm::Value
*LHSElementNext
= CGF
.Builder
.CreateConstGEP1_32(
4762 LHSAddr
.getElementType(), LHSElementPHI
, /*Idx0=*/1,
4763 "omp.arraycpy.dest.element");
4764 llvm::Value
*RHSElementNext
= CGF
.Builder
.CreateConstGEP1_32(
4765 RHSAddr
.getElementType(), RHSElementPHI
, /*Idx0=*/1,
4766 "omp.arraycpy.src.element");
4767 // Check whether we've reached the end.
4769 CGF
.Builder
.CreateICmpEQ(LHSElementNext
, LHSEnd
, "omp.arraycpy.done");
4770 CGF
.Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
4771 LHSElementPHI
->addIncoming(LHSElementNext
, CGF
.Builder
.GetInsertBlock());
4772 RHSElementPHI
->addIncoming(RHSElementNext
, CGF
.Builder
.GetInsertBlock());
4775 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
4778 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4779 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4780 /// UDR combiner function.
4781 static void emitReductionCombiner(CodeGenFunction
&CGF
,
4782 const Expr
*ReductionOp
) {
4783 if (const auto *CE
= dyn_cast
<CallExpr
>(ReductionOp
))
4784 if (const auto *OVE
= dyn_cast
<OpaqueValueExpr
>(CE
->getCallee()))
4785 if (const auto *DRE
=
4786 dyn_cast
<DeclRefExpr
>(OVE
->getSourceExpr()->IgnoreImpCasts()))
4787 if (const auto *DRD
=
4788 dyn_cast
<OMPDeclareReductionDecl
>(DRE
->getDecl())) {
4789 std::pair
<llvm::Function
*, llvm::Function
*> Reduction
=
4790 CGF
.CGM
.getOpenMPRuntime().getUserDefinedReduction(DRD
);
4791 RValue Func
= RValue::get(Reduction
.first
);
4792 CodeGenFunction::OpaqueValueMapping
Map(CGF
, OVE
, Func
);
4793 CGF
.EmitIgnoredExpr(ReductionOp
);
4796 CGF
.EmitIgnoredExpr(ReductionOp
);
4799 llvm::Function
*CGOpenMPRuntime::emitReductionFunction(
4800 StringRef ReducerName
, SourceLocation Loc
, llvm::Type
*ArgsElemType
,
4801 ArrayRef
<const Expr
*> Privates
, ArrayRef
<const Expr
*> LHSExprs
,
4802 ArrayRef
<const Expr
*> RHSExprs
, ArrayRef
<const Expr
*> ReductionOps
) {
4803 ASTContext
&C
= CGM
.getContext();
4805 // void reduction_func(void *LHSArg, void *RHSArg);
4806 FunctionArgList Args
;
4807 ImplicitParamDecl
LHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
4808 ImplicitParamKind::Other
);
4809 ImplicitParamDecl
RHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
4810 ImplicitParamKind::Other
);
4811 Args
.push_back(&LHSArg
);
4812 Args
.push_back(&RHSArg
);
4814 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
4815 std::string Name
= getReductionFuncName(ReducerName
);
4816 auto *Fn
= llvm::Function::Create(CGM
.getTypes().GetFunctionType(CGFI
),
4817 llvm::GlobalValue::InternalLinkage
, Name
,
4819 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, CGFI
);
4820 Fn
->setDoesNotRecurse();
4821 CodeGenFunction
CGF(CGM
);
4822 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, CGFI
, Args
, Loc
, Loc
);
4824 // Dst = (void*[n])(LHSArg);
4825 // Src = (void*[n])(RHSArg);
4826 Address
LHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4827 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&LHSArg
)),
4828 CGF
.Builder
.getPtrTy(0)),
4829 ArgsElemType
, CGF
.getPointerAlign());
4830 Address
RHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4831 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&RHSArg
)),
4832 CGF
.Builder
.getPtrTy(0)),
4833 ArgsElemType
, CGF
.getPointerAlign());
4836 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4838 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
4839 const auto *IPriv
= Privates
.begin();
4841 for (unsigned I
= 0, E
= ReductionOps
.size(); I
< E
; ++I
, ++IPriv
, ++Idx
) {
4842 const auto *RHSVar
=
4843 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSExprs
[I
])->getDecl());
4844 Scope
.addPrivate(RHSVar
, emitAddrOfVarFromArray(CGF
, RHS
, Idx
, RHSVar
));
4845 const auto *LHSVar
=
4846 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSExprs
[I
])->getDecl());
4847 Scope
.addPrivate(LHSVar
, emitAddrOfVarFromArray(CGF
, LHS
, Idx
, LHSVar
));
4848 QualType PrivTy
= (*IPriv
)->getType();
4849 if (PrivTy
->isVariablyModifiedType()) {
4850 // Get array size and emit VLA type.
4852 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(LHS
, Idx
);
4853 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Elem
);
4854 const VariableArrayType
*VLA
=
4855 CGF
.getContext().getAsVariableArrayType(PrivTy
);
4856 const auto *OVE
= cast
<OpaqueValueExpr
>(VLA
->getSizeExpr());
4857 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
4858 CGF
, OVE
, RValue::get(CGF
.Builder
.CreatePtrToInt(Ptr
, CGF
.SizeTy
)));
4859 CGF
.EmitVariablyModifiedType(PrivTy
);
4863 IPriv
= Privates
.begin();
4864 const auto *ILHS
= LHSExprs
.begin();
4865 const auto *IRHS
= RHSExprs
.begin();
4866 for (const Expr
*E
: ReductionOps
) {
4867 if ((*IPriv
)->getType()->isArrayType()) {
4868 // Emit reduction for array section.
4869 const auto *LHSVar
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
4870 const auto *RHSVar
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
4871 EmitOMPAggregateReduction(
4872 CGF
, (*IPriv
)->getType(), LHSVar
, RHSVar
,
4873 [=](CodeGenFunction
&CGF
, const Expr
*, const Expr
*, const Expr
*) {
4874 emitReductionCombiner(CGF
, E
);
4877 // Emit reduction for array subscript or single variable.
4878 emitReductionCombiner(CGF
, E
);
4884 Scope
.ForceCleanup();
4885 CGF
.FinishFunction();
4889 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction
&CGF
,
4890 const Expr
*ReductionOp
,
4891 const Expr
*PrivateRef
,
4892 const DeclRefExpr
*LHS
,
4893 const DeclRefExpr
*RHS
) {
4894 if (PrivateRef
->getType()->isArrayType()) {
4895 // Emit reduction for array section.
4896 const auto *LHSVar
= cast
<VarDecl
>(LHS
->getDecl());
4897 const auto *RHSVar
= cast
<VarDecl
>(RHS
->getDecl());
4898 EmitOMPAggregateReduction(
4899 CGF
, PrivateRef
->getType(), LHSVar
, RHSVar
,
4900 [=](CodeGenFunction
&CGF
, const Expr
*, const Expr
*, const Expr
*) {
4901 emitReductionCombiner(CGF
, ReductionOp
);
4904 // Emit reduction for array subscript or single variable.
4905 emitReductionCombiner(CGF
, ReductionOp
);
4909 void CGOpenMPRuntime::emitReduction(CodeGenFunction
&CGF
, SourceLocation Loc
,
4910 ArrayRef
<const Expr
*> Privates
,
4911 ArrayRef
<const Expr
*> LHSExprs
,
4912 ArrayRef
<const Expr
*> RHSExprs
,
4913 ArrayRef
<const Expr
*> ReductionOps
,
4914 ReductionOptionsTy Options
) {
4915 if (!CGF
.HaveInsertPoint())
4918 bool WithNowait
= Options
.WithNowait
;
4919 bool SimpleReduction
= Options
.SimpleReduction
;
4921 // Next code should be emitted for reduction:
4923 // static kmp_critical_name lock = { 0 };
4925 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4926 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4928 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4929 // *(Type<n>-1*)rhs[<n>-1]);
4933 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4934 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4935 // RedList, reduce_func, &<lock>)) {
4938 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4940 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4944 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4946 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4951 // if SimpleReduction is true, only the next code is generated:
4953 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4956 ASTContext
&C
= CGM
.getContext();
4958 if (SimpleReduction
) {
4959 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
4960 const auto *IPriv
= Privates
.begin();
4961 const auto *ILHS
= LHSExprs
.begin();
4962 const auto *IRHS
= RHSExprs
.begin();
4963 for (const Expr
*E
: ReductionOps
) {
4964 emitSingleReductionCombiner(CGF
, E
, *IPriv
, cast
<DeclRefExpr
>(*ILHS
),
4965 cast
<DeclRefExpr
>(*IRHS
));
4973 // 1. Build a list of reduction variables.
4974 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4975 auto Size
= RHSExprs
.size();
4976 for (const Expr
*E
: Privates
) {
4977 if (E
->getType()->isVariablyModifiedType())
4978 // Reserve place for array size.
4981 llvm::APInt
ArraySize(/*unsigned int numBits=*/32, Size
);
4982 QualType ReductionArrayTy
= C
.getConstantArrayType(
4983 C
.VoidPtrTy
, ArraySize
, nullptr, ArraySizeModifier::Normal
,
4984 /*IndexTypeQuals=*/0);
4985 RawAddress ReductionList
=
4986 CGF
.CreateMemTemp(ReductionArrayTy
, ".omp.reduction.red_list");
4987 const auto *IPriv
= Privates
.begin();
4989 for (unsigned I
= 0, E
= RHSExprs
.size(); I
< E
; ++I
, ++IPriv
, ++Idx
) {
4990 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(ReductionList
, Idx
);
4991 CGF
.Builder
.CreateStore(
4992 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4993 CGF
.EmitLValue(RHSExprs
[I
]).getPointer(CGF
), CGF
.VoidPtrTy
),
4995 if ((*IPriv
)->getType()->isVariablyModifiedType()) {
4996 // Store array size.
4998 Elem
= CGF
.Builder
.CreateConstArrayGEP(ReductionList
, Idx
);
4999 llvm::Value
*Size
= CGF
.Builder
.CreateIntCast(
5001 CGF
.getContext().getAsVariableArrayType((*IPriv
)->getType()))
5003 CGF
.SizeTy
, /*isSigned=*/false);
5004 CGF
.Builder
.CreateStore(CGF
.Builder
.CreateIntToPtr(Size
, CGF
.VoidPtrTy
),
5009 // 2. Emit reduce_func().
5010 llvm::Function
*ReductionFn
= emitReductionFunction(
5011 CGF
.CurFn
->getName(), Loc
, CGF
.ConvertTypeForMem(ReductionArrayTy
),
5012 Privates
, LHSExprs
, RHSExprs
, ReductionOps
);
5014 // 3. Create static kmp_critical_name lock = { 0 };
5015 std::string Name
= getName({"reduction"});
5016 llvm::Value
*Lock
= getCriticalRegionLock(Name
);
5018 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5019 // RedList, reduce_func, &<lock>);
5020 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
, OMP_ATOMIC_REDUCE
);
5021 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
5022 llvm::Value
*ReductionArrayTySize
= CGF
.getTypeSize(ReductionArrayTy
);
5023 llvm::Value
*RL
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5024 ReductionList
.getPointer(), CGF
.VoidPtrTy
);
5025 llvm::Value
*Args
[] = {
5026 IdentTLoc
, // ident_t *<loc>
5027 ThreadId
, // i32 <gtid>
5028 CGF
.Builder
.getInt32(RHSExprs
.size()), // i32 <n>
5029 ReductionArrayTySize
, // size_type sizeof(RedList)
5030 RL
, // void *RedList
5031 ReductionFn
, // void (*) (void *, void *) <reduce_func>
5032 Lock
// kmp_critical_name *&<lock>
5034 llvm::Value
*Res
= CGF
.EmitRuntimeCall(
5035 OMPBuilder
.getOrCreateRuntimeFunction(
5037 WithNowait
? OMPRTL___kmpc_reduce_nowait
: OMPRTL___kmpc_reduce
),
5040 // 5. Build switch(res)
5041 llvm::BasicBlock
*DefaultBB
= CGF
.createBasicBlock(".omp.reduction.default");
5042 llvm::SwitchInst
*SwInst
=
5043 CGF
.Builder
.CreateSwitch(Res
, DefaultBB
, /*NumCases=*/2);
5047 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5049 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5051 llvm::BasicBlock
*Case1BB
= CGF
.createBasicBlock(".omp.reduction.case1");
5052 SwInst
->addCase(CGF
.Builder
.getInt32(1), Case1BB
);
5053 CGF
.EmitBlock(Case1BB
);
5055 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5056 llvm::Value
*EndArgs
[] = {
5057 IdentTLoc
, // ident_t *<loc>
5058 ThreadId
, // i32 <gtid>
5059 Lock
// kmp_critical_name *&<lock>
5061 auto &&CodeGen
= [Privates
, LHSExprs
, RHSExprs
, ReductionOps
](
5062 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5063 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5064 const auto *IPriv
= Privates
.begin();
5065 const auto *ILHS
= LHSExprs
.begin();
5066 const auto *IRHS
= RHSExprs
.begin();
5067 for (const Expr
*E
: ReductionOps
) {
5068 RT
.emitSingleReductionCombiner(CGF
, E
, *IPriv
, cast
<DeclRefExpr
>(*ILHS
),
5069 cast
<DeclRefExpr
>(*IRHS
));
5075 RegionCodeGenTy
RCG(CodeGen
);
5076 CommonActionTy
Action(
5078 OMPBuilder
.getOrCreateRuntimeFunction(
5079 CGM
.getModule(), WithNowait
? OMPRTL___kmpc_end_reduce_nowait
5080 : OMPRTL___kmpc_end_reduce
),
5082 RCG
.setAction(Action
);
5085 CGF
.EmitBranch(DefaultBB
);
5089 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5092 llvm::BasicBlock
*Case2BB
= CGF
.createBasicBlock(".omp.reduction.case2");
5093 SwInst
->addCase(CGF
.Builder
.getInt32(2), Case2BB
);
5094 CGF
.EmitBlock(Case2BB
);
5096 auto &&AtomicCodeGen
= [Loc
, Privates
, LHSExprs
, RHSExprs
, ReductionOps
](
5097 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5098 const auto *ILHS
= LHSExprs
.begin();
5099 const auto *IRHS
= RHSExprs
.begin();
5100 const auto *IPriv
= Privates
.begin();
5101 for (const Expr
*E
: ReductionOps
) {
5102 const Expr
*XExpr
= nullptr;
5103 const Expr
*EExpr
= nullptr;
5104 const Expr
*UpExpr
= nullptr;
5105 BinaryOperatorKind BO
= BO_Comma
;
5106 if (const auto *BO
= dyn_cast
<BinaryOperator
>(E
)) {
5107 if (BO
->getOpcode() == BO_Assign
) {
5108 XExpr
= BO
->getLHS();
5109 UpExpr
= BO
->getRHS();
5112 // Try to emit update expression as a simple atomic.
5113 const Expr
*RHSExpr
= UpExpr
;
5115 // Analyze RHS part of the whole expression.
5116 if (const auto *ACO
= dyn_cast
<AbstractConditionalOperator
>(
5117 RHSExpr
->IgnoreParenImpCasts())) {
5118 // If this is a conditional operator, analyze its condition for
5119 // min/max reduction operator.
5120 RHSExpr
= ACO
->getCond();
5122 if (const auto *BORHS
=
5123 dyn_cast
<BinaryOperator
>(RHSExpr
->IgnoreParenImpCasts())) {
5124 EExpr
= BORHS
->getRHS();
5125 BO
= BORHS
->getOpcode();
5129 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5130 auto &&AtomicRedGen
= [BO
, VD
,
5131 Loc
](CodeGenFunction
&CGF
, const Expr
*XExpr
,
5132 const Expr
*EExpr
, const Expr
*UpExpr
) {
5133 LValue X
= CGF
.EmitLValue(XExpr
);
5136 E
= CGF
.EmitAnyExpr(EExpr
);
5137 CGF
.EmitOMPAtomicSimpleUpdateExpr(
5138 X
, E
, BO
, /*IsXLHSInRHSPart=*/true,
5139 llvm::AtomicOrdering::Monotonic
, Loc
,
5140 [&CGF
, UpExpr
, VD
, Loc
](RValue XRValue
) {
5141 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
5142 Address LHSTemp
= CGF
.CreateMemTemp(VD
->getType());
5143 CGF
.emitOMPSimpleStore(
5144 CGF
.MakeAddrLValue(LHSTemp
, VD
->getType()), XRValue
,
5145 VD
->getType().getNonReferenceType(), Loc
);
5146 PrivateScope
.addPrivate(VD
, LHSTemp
);
5147 (void)PrivateScope
.Privatize();
5148 return CGF
.EmitAnyExpr(UpExpr
);
5151 if ((*IPriv
)->getType()->isArrayType()) {
5152 // Emit atomic reduction for array section.
5153 const auto *RHSVar
=
5154 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5155 EmitOMPAggregateReduction(CGF
, (*IPriv
)->getType(), VD
, RHSVar
,
5156 AtomicRedGen
, XExpr
, EExpr
, UpExpr
);
5158 // Emit atomic reduction for array subscript or single variable.
5159 AtomicRedGen(CGF
, XExpr
, EExpr
, UpExpr
);
5162 // Emit as a critical region.
5163 auto &&CritRedGen
= [E
, Loc
](CodeGenFunction
&CGF
, const Expr
*,
5164 const Expr
*, const Expr
*) {
5165 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5166 std::string Name
= RT
.getName({"atomic_reduction"});
5167 RT
.emitCriticalRegion(
5169 [=](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5171 emitReductionCombiner(CGF
, E
);
5175 if ((*IPriv
)->getType()->isArrayType()) {
5176 const auto *LHSVar
=
5177 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5178 const auto *RHSVar
=
5179 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5180 EmitOMPAggregateReduction(CGF
, (*IPriv
)->getType(), LHSVar
, RHSVar
,
5183 CritRedGen(CGF
, nullptr, nullptr, nullptr);
5191 RegionCodeGenTy
AtomicRCG(AtomicCodeGen
);
5193 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5194 llvm::Value
*EndArgs
[] = {
5195 IdentTLoc
, // ident_t *<loc>
5196 ThreadId
, // i32 <gtid>
5197 Lock
// kmp_critical_name *&<lock>
5199 CommonActionTy
Action(nullptr, {},
5200 OMPBuilder
.getOrCreateRuntimeFunction(
5201 CGM
.getModule(), OMPRTL___kmpc_end_reduce
),
5203 AtomicRCG
.setAction(Action
);
5209 CGF
.EmitBranch(DefaultBB
);
5210 CGF
.EmitBlock(DefaultBB
, /*IsFinished=*/true);
5213 /// Generates unique name for artificial threadprivate variables.
5214 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5215 static std::string
generateUniqueName(CodeGenModule
&CGM
, StringRef Prefix
,
5217 SmallString
<256> Buffer
;
5218 llvm::raw_svector_ostream
Out(Buffer
);
5219 const clang::DeclRefExpr
*DE
;
5220 const VarDecl
*D
= ::getBaseDecl(Ref
, DE
);
5222 D
= cast
<VarDecl
>(cast
<DeclRefExpr
>(Ref
)->getDecl());
5223 D
= D
->getCanonicalDecl();
5224 std::string Name
= CGM
.getOpenMPRuntime().getName(
5225 {D
->isLocalVarDeclOrParm() ? D
->getName() : CGM
.getMangledName(D
)});
5226 Out
<< Prefix
<< Name
<< "_"
5227 << D
->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5228 return std::string(Out
.str());
5231 /// Emits reduction initializer function:
5233 /// void @.red_init(void* %arg, void* %orig) {
5234 /// %0 = bitcast void* %arg to <type>*
5235 /// store <type> <init>, <type>* %0
5239 static llvm::Value
*emitReduceInitFunction(CodeGenModule
&CGM
,
5241 ReductionCodeGen
&RCG
, unsigned N
) {
5242 ASTContext
&C
= CGM
.getContext();
5243 QualType VoidPtrTy
= C
.VoidPtrTy
;
5244 VoidPtrTy
.addRestrict();
5245 FunctionArgList Args
;
5246 ImplicitParamDecl
Param(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, VoidPtrTy
,
5247 ImplicitParamKind::Other
);
5248 ImplicitParamDecl
ParamOrig(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, VoidPtrTy
,
5249 ImplicitParamKind::Other
);
5250 Args
.emplace_back(&Param
);
5251 Args
.emplace_back(&ParamOrig
);
5252 const auto &FnInfo
=
5253 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5254 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5255 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_init", ""});
5256 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5257 Name
, &CGM
.getModule());
5258 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5259 Fn
->setDoesNotRecurse();
5260 CodeGenFunction
CGF(CGM
);
5261 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5262 QualType PrivateType
= RCG
.getPrivateType(N
);
5263 Address PrivateAddr
= CGF
.EmitLoadOfPointer(
5264 CGF
.GetAddrOfLocalVar(&Param
).withElementType(CGF
.Builder
.getPtrTy(0)),
5265 C
.getPointerType(PrivateType
)->castAs
<PointerType
>());
5266 llvm::Value
*Size
= nullptr;
5267 // If the size of the reduction item is non-constant, load it from global
5268 // threadprivate variable.
5269 if (RCG
.getSizes(N
).second
) {
5270 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5271 CGF
, CGM
.getContext().getSizeType(),
5272 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5273 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5274 CGM
.getContext().getSizeType(), Loc
);
5276 RCG
.emitAggregateType(CGF
, N
, Size
);
5277 Address OrigAddr
= Address::invalid();
5278 // If initializer uses initializer from declare reduction construct, emit a
5279 // pointer to the address of the original reduction item (reuired by reduction
5281 if (RCG
.usesReductionInitializer(N
)) {
5282 Address SharedAddr
= CGF
.GetAddrOfLocalVar(&ParamOrig
);
5283 OrigAddr
= CGF
.EmitLoadOfPointer(
5285 CGM
.getContext().VoidPtrTy
.castAs
<PointerType
>()->getTypePtr());
5287 // Emit the initializer:
5288 // %0 = bitcast void* %arg to <type>*
5289 // store <type> <init>, <type>* %0
5290 RCG
.emitInitialization(CGF
, N
, PrivateAddr
, OrigAddr
,
5291 [](CodeGenFunction
&) { return false; });
5292 CGF
.FinishFunction();
5296 /// Emits reduction combiner function:
5298 /// void @.red_comb(void* %arg0, void* %arg1) {
5299 /// %lhs = bitcast void* %arg0 to <type>*
5300 /// %rhs = bitcast void* %arg1 to <type>*
5301 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5302 /// store <type> %2, <type>* %lhs
5306 static llvm::Value
*emitReduceCombFunction(CodeGenModule
&CGM
,
5308 ReductionCodeGen
&RCG
, unsigned N
,
5309 const Expr
*ReductionOp
,
5310 const Expr
*LHS
, const Expr
*RHS
,
5311 const Expr
*PrivateRef
) {
5312 ASTContext
&C
= CGM
.getContext();
5313 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(LHS
)->getDecl());
5314 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(RHS
)->getDecl());
5315 FunctionArgList Args
;
5316 ImplicitParamDecl
ParamInOut(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
5317 C
.VoidPtrTy
, ImplicitParamKind::Other
);
5318 ImplicitParamDecl
ParamIn(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5319 ImplicitParamKind::Other
);
5320 Args
.emplace_back(&ParamInOut
);
5321 Args
.emplace_back(&ParamIn
);
5322 const auto &FnInfo
=
5323 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5324 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5325 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_comb", ""});
5326 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5327 Name
, &CGM
.getModule());
5328 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5329 Fn
->setDoesNotRecurse();
5330 CodeGenFunction
CGF(CGM
);
5331 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5332 llvm::Value
*Size
= nullptr;
5333 // If the size of the reduction item is non-constant, load it from global
5334 // threadprivate variable.
5335 if (RCG
.getSizes(N
).second
) {
5336 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5337 CGF
, CGM
.getContext().getSizeType(),
5338 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5339 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5340 CGM
.getContext().getSizeType(), Loc
);
5342 RCG
.emitAggregateType(CGF
, N
, Size
);
5343 // Remap lhs and rhs variables to the addresses of the function arguments.
5344 // %lhs = bitcast void* %arg0 to <type>*
5345 // %rhs = bitcast void* %arg1 to <type>*
5346 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
5347 PrivateScope
.addPrivate(
5349 // Pull out the pointer to the variable.
5350 CGF
.EmitLoadOfPointer(
5351 CGF
.GetAddrOfLocalVar(&ParamInOut
)
5352 .withElementType(CGF
.Builder
.getPtrTy(0)),
5353 C
.getPointerType(LHSVD
->getType())->castAs
<PointerType
>()));
5354 PrivateScope
.addPrivate(
5356 // Pull out the pointer to the variable.
5357 CGF
.EmitLoadOfPointer(
5358 CGF
.GetAddrOfLocalVar(&ParamIn
).withElementType(
5359 CGF
.Builder
.getPtrTy(0)),
5360 C
.getPointerType(RHSVD
->getType())->castAs
<PointerType
>()));
5361 PrivateScope
.Privatize();
5362 // Emit the combiner body:
5363 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5364 // store <type> %2, <type>* %lhs
5365 CGM
.getOpenMPRuntime().emitSingleReductionCombiner(
5366 CGF
, ReductionOp
, PrivateRef
, cast
<DeclRefExpr
>(LHS
),
5367 cast
<DeclRefExpr
>(RHS
));
5368 CGF
.FinishFunction();
5372 /// Emits reduction finalizer function:
5374 /// void @.red_fini(void* %arg) {
5375 /// %0 = bitcast void* %arg to <type>*
5376 /// <destroy>(<type>* %0)
5380 static llvm::Value
*emitReduceFiniFunction(CodeGenModule
&CGM
,
5382 ReductionCodeGen
&RCG
, unsigned N
) {
5383 if (!RCG
.needCleanups(N
))
5385 ASTContext
&C
= CGM
.getContext();
5386 FunctionArgList Args
;
5387 ImplicitParamDecl
Param(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5388 ImplicitParamKind::Other
);
5389 Args
.emplace_back(&Param
);
5390 const auto &FnInfo
=
5391 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5392 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5393 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_fini", ""});
5394 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5395 Name
, &CGM
.getModule());
5396 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5397 Fn
->setDoesNotRecurse();
5398 CodeGenFunction
CGF(CGM
);
5399 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5400 Address PrivateAddr
= CGF
.EmitLoadOfPointer(
5401 CGF
.GetAddrOfLocalVar(&Param
), C
.VoidPtrTy
.castAs
<PointerType
>());
5402 llvm::Value
*Size
= nullptr;
5403 // If the size of the reduction item is non-constant, load it from global
5404 // threadprivate variable.
5405 if (RCG
.getSizes(N
).second
) {
5406 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5407 CGF
, CGM
.getContext().getSizeType(),
5408 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5409 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5410 CGM
.getContext().getSizeType(), Loc
);
5412 RCG
.emitAggregateType(CGF
, N
, Size
);
5413 // Emit the finalizer body:
5414 // <destroy>(<type>* %0)
5415 RCG
.emitCleanups(CGF
, N
, PrivateAddr
);
5416 CGF
.FinishFunction(Loc
);
5420 llvm::Value
*CGOpenMPRuntime::emitTaskReductionInit(
5421 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> LHSExprs
,
5422 ArrayRef
<const Expr
*> RHSExprs
, const OMPTaskDataTy
&Data
) {
5423 if (!CGF
.HaveInsertPoint() || Data
.ReductionVars
.empty())
5426 // Build typedef struct:
5427 // kmp_taskred_input {
5428 // void *reduce_shar; // shared reduction item
5429 // void *reduce_orig; // original reduction item used for initialization
5430 // size_t reduce_size; // size of data item
5431 // void *reduce_init; // data initialization routine
5432 // void *reduce_fini; // data finalization routine
5433 // void *reduce_comb; // data combiner routine
5434 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5435 // } kmp_taskred_input_t;
5436 ASTContext
&C
= CGM
.getContext();
5437 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_taskred_input_t");
5438 RD
->startDefinition();
5439 const FieldDecl
*SharedFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5440 const FieldDecl
*OrigFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5441 const FieldDecl
*SizeFD
= addFieldToRecordDecl(C
, RD
, C
.getSizeType());
5442 const FieldDecl
*InitFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5443 const FieldDecl
*FiniFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5444 const FieldDecl
*CombFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5445 const FieldDecl
*FlagsFD
= addFieldToRecordDecl(
5446 C
, RD
, C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5447 RD
->completeDefinition();
5448 QualType RDType
= C
.getRecordType(RD
);
5449 unsigned Size
= Data
.ReductionVars
.size();
5450 llvm::APInt
ArraySize(/*numBits=*/64, Size
);
5451 QualType ArrayRDType
=
5452 C
.getConstantArrayType(RDType
, ArraySize
, nullptr,
5453 ArraySizeModifier::Normal
, /*IndexTypeQuals=*/0);
5454 // kmp_task_red_input_t .rd_input.[Size];
5455 RawAddress TaskRedInput
= CGF
.CreateMemTemp(ArrayRDType
, ".rd_input.");
5456 ReductionCodeGen
RCG(Data
.ReductionVars
, Data
.ReductionOrigs
,
5457 Data
.ReductionCopies
, Data
.ReductionOps
);
5458 for (unsigned Cnt
= 0; Cnt
< Size
; ++Cnt
) {
5459 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5460 llvm::Value
*Idxs
[] = {llvm::ConstantInt::get(CGM
.SizeTy
, /*V=*/0),
5461 llvm::ConstantInt::get(CGM
.SizeTy
, Cnt
)};
5462 llvm::Value
*GEP
= CGF
.EmitCheckedInBoundsGEP(
5463 TaskRedInput
.getElementType(), TaskRedInput
.getPointer(), Idxs
,
5464 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc
,
5466 LValue ElemLVal
= CGF
.MakeNaturalAlignRawAddrLValue(GEP
, RDType
);
5467 // ElemLVal.reduce_shar = &Shareds[Cnt];
5468 LValue SharedLVal
= CGF
.EmitLValueForField(ElemLVal
, SharedFD
);
5469 RCG
.emitSharedOrigLValue(CGF
, Cnt
);
5470 llvm::Value
*Shared
= RCG
.getSharedLValue(Cnt
).getPointer(CGF
);
5471 CGF
.EmitStoreOfScalar(Shared
, SharedLVal
);
5472 // ElemLVal.reduce_orig = &Origs[Cnt];
5473 LValue OrigLVal
= CGF
.EmitLValueForField(ElemLVal
, OrigFD
);
5474 llvm::Value
*Orig
= RCG
.getOrigLValue(Cnt
).getPointer(CGF
);
5475 CGF
.EmitStoreOfScalar(Orig
, OrigLVal
);
5476 RCG
.emitAggregateType(CGF
, Cnt
);
5477 llvm::Value
*SizeValInChars
;
5478 llvm::Value
*SizeVal
;
5479 std::tie(SizeValInChars
, SizeVal
) = RCG
.getSizes(Cnt
);
5480 // We use delayed creation/initialization for VLAs and array sections. It is
5481 // required because runtime does not provide the way to pass the sizes of
5482 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5483 // threadprivate global variables are used to store these values and use
5484 // them in the functions.
5485 bool DelayedCreation
= !!SizeVal
;
5486 SizeValInChars
= CGF
.Builder
.CreateIntCast(SizeValInChars
, CGM
.SizeTy
,
5487 /*isSigned=*/false);
5488 LValue SizeLVal
= CGF
.EmitLValueForField(ElemLVal
, SizeFD
);
5489 CGF
.EmitStoreOfScalar(SizeValInChars
, SizeLVal
);
5490 // ElemLVal.reduce_init = init;
5491 LValue InitLVal
= CGF
.EmitLValueForField(ElemLVal
, InitFD
);
5492 llvm::Value
*InitAddr
= emitReduceInitFunction(CGM
, Loc
, RCG
, Cnt
);
5493 CGF
.EmitStoreOfScalar(InitAddr
, InitLVal
);
5494 // ElemLVal.reduce_fini = fini;
5495 LValue FiniLVal
= CGF
.EmitLValueForField(ElemLVal
, FiniFD
);
5496 llvm::Value
*Fini
= emitReduceFiniFunction(CGM
, Loc
, RCG
, Cnt
);
5497 llvm::Value
*FiniAddr
=
5498 Fini
? Fini
: llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
);
5499 CGF
.EmitStoreOfScalar(FiniAddr
, FiniLVal
);
5500 // ElemLVal.reduce_comb = comb;
5501 LValue CombLVal
= CGF
.EmitLValueForField(ElemLVal
, CombFD
);
5502 llvm::Value
*CombAddr
= emitReduceCombFunction(
5503 CGM
, Loc
, RCG
, Cnt
, Data
.ReductionOps
[Cnt
], LHSExprs
[Cnt
],
5504 RHSExprs
[Cnt
], Data
.ReductionCopies
[Cnt
]);
5505 CGF
.EmitStoreOfScalar(CombAddr
, CombLVal
);
5506 // ElemLVal.flags = 0;
5507 LValue FlagsLVal
= CGF
.EmitLValueForField(ElemLVal
, FlagsFD
);
5508 if (DelayedCreation
) {
5509 CGF
.EmitStoreOfScalar(
5510 llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/1, /*isSigned=*/true),
5513 CGF
.EmitNullInitialization(FlagsLVal
.getAddress(), FlagsLVal
.getType());
5515 if (Data
.IsReductionWithTaskMod
) {
5516 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5517 // is_ws, int num, void *data);
5518 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
);
5519 llvm::Value
*GTid
= CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5520 CGM
.IntTy
, /*isSigned=*/true);
5521 llvm::Value
*Args
[] = {
5523 llvm::ConstantInt::get(CGM
.IntTy
, Data
.IsWorksharingReduction
? 1 : 0,
5525 llvm::ConstantInt::get(CGM
.IntTy
, Size
, /*isSigned=*/true),
5526 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5527 TaskRedInput
.getPointer(), CGM
.VoidPtrTy
)};
5528 return CGF
.EmitRuntimeCall(
5529 OMPBuilder
.getOrCreateRuntimeFunction(
5530 CGM
.getModule(), OMPRTL___kmpc_taskred_modifier_init
),
5533 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5534 llvm::Value
*Args
[] = {
5535 CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
), CGM
.IntTy
,
5537 llvm::ConstantInt::get(CGM
.IntTy
, Size
, /*isSigned=*/true),
5538 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput
.getPointer(),
5540 return CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5541 CGM
.getModule(), OMPRTL___kmpc_taskred_init
),
5545 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction
&CGF
,
5547 bool IsWorksharingReduction
) {
5548 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5549 // is_ws, int num, void *data);
5550 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
);
5551 llvm::Value
*GTid
= CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5552 CGM
.IntTy
, /*isSigned=*/true);
5553 llvm::Value
*Args
[] = {IdentTLoc
, GTid
,
5554 llvm::ConstantInt::get(CGM
.IntTy
,
5555 IsWorksharingReduction
? 1 : 0,
5556 /*isSigned=*/true)};
5557 (void)CGF
.EmitRuntimeCall(
5558 OMPBuilder
.getOrCreateRuntimeFunction(
5559 CGM
.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini
),
5563 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction
&CGF
,
5565 ReductionCodeGen
&RCG
,
5567 auto Sizes
= RCG
.getSizes(N
);
5568 // Emit threadprivate global variable if the type is non-constant
5569 // (Sizes.second = nullptr).
5571 llvm::Value
*SizeVal
= CGF
.Builder
.CreateIntCast(Sizes
.second
, CGM
.SizeTy
,
5572 /*isSigned=*/false);
5573 Address SizeAddr
= getAddrOfArtificialThreadPrivate(
5574 CGF
, CGM
.getContext().getSizeType(),
5575 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5576 CGF
.Builder
.CreateStore(SizeVal
, SizeAddr
, /*IsVolatile=*/false);
5580 Address
CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction
&CGF
,
5582 llvm::Value
*ReductionsPtr
,
5583 LValue SharedLVal
) {
5584 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5586 llvm::Value
*Args
[] = {CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5590 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5591 SharedLVal
.getPointer(CGF
), CGM
.VoidPtrTy
)};
5593 CGF
.EmitRuntimeCall(
5594 OMPBuilder
.getOrCreateRuntimeFunction(
5595 CGM
.getModule(), OMPRTL___kmpc_task_reduction_get_th_data
),
5597 CGF
.Int8Ty
, SharedLVal
.getAlignment());
5600 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
5601 const OMPTaskDataTy
&Data
) {
5602 if (!CGF
.HaveInsertPoint())
5605 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
&& Data
.Dependences
.empty()) {
5606 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5607 OMPBuilder
.createTaskwait(CGF
.Builder
);
5609 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
5610 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
5611 auto &M
= CGM
.getModule();
5612 Address DependenciesArray
= Address::invalid();
5613 llvm::Value
*NumOfElements
;
5614 std::tie(NumOfElements
, DependenciesArray
) =
5615 emitDependClause(CGF
, Data
.Dependences
, Loc
);
5616 if (!Data
.Dependences
.empty()) {
5617 llvm::Value
*DepWaitTaskArgs
[7];
5618 DepWaitTaskArgs
[0] = UpLoc
;
5619 DepWaitTaskArgs
[1] = ThreadID
;
5620 DepWaitTaskArgs
[2] = NumOfElements
;
5621 DepWaitTaskArgs
[3] = DependenciesArray
.emitRawPointer(CGF
);
5622 DepWaitTaskArgs
[4] = CGF
.Builder
.getInt32(0);
5623 DepWaitTaskArgs
[5] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5624 DepWaitTaskArgs
[6] =
5625 llvm::ConstantInt::get(CGF
.Int32Ty
, Data
.HasNowaitClause
);
5627 CodeGenFunction::RunCleanupsScope
LocalScope(CGF
);
5629 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5630 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5631 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5632 // kmp_int32 has_no_wait); if dependence info is specified.
5633 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5634 M
, OMPRTL___kmpc_omp_taskwait_deps_51
),
5639 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5641 llvm::Value
*Args
[] = {UpLoc
, ThreadID
};
5642 // Ignore return result until untied tasks are supported.
5643 CGF
.EmitRuntimeCall(
5644 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_omp_taskwait
),
5649 if (auto *Region
= dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
5650 Region
->emitUntiedSwitch(CGF
);
5653 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction
&CGF
,
5654 OpenMPDirectiveKind InnerKind
,
5655 const RegionCodeGenTy
&CodeGen
,
5657 if (!CGF
.HaveInsertPoint())
5659 InlinedOpenMPRegionRAII
Region(CGF
, CodeGen
, InnerKind
, HasCancel
,
5660 InnerKind
!= OMPD_critical
&&
5661 InnerKind
!= OMPD_master
&&
5662 InnerKind
!= OMPD_masked
);
5663 CGF
.CapturedStmtInfo
->EmitBody(CGF
, /*S=*/nullptr);
5674 } // anonymous namespace
5676 static RTCancelKind
getCancellationKind(OpenMPDirectiveKind CancelRegion
) {
5677 RTCancelKind CancelKind
= CancelNoreq
;
5678 if (CancelRegion
== OMPD_parallel
)
5679 CancelKind
= CancelParallel
;
5680 else if (CancelRegion
== OMPD_for
)
5681 CancelKind
= CancelLoop
;
5682 else if (CancelRegion
== OMPD_sections
)
5683 CancelKind
= CancelSections
;
5685 assert(CancelRegion
== OMPD_taskgroup
);
5686 CancelKind
= CancelTaskgroup
;
5691 void CGOpenMPRuntime::emitCancellationPointCall(
5692 CodeGenFunction
&CGF
, SourceLocation Loc
,
5693 OpenMPDirectiveKind CancelRegion
) {
5694 if (!CGF
.HaveInsertPoint())
5696 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5697 // global_tid, kmp_int32 cncl_kind);
5698 if (auto *OMPRegionInfo
=
5699 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
5700 // For 'cancellation point taskgroup', the task region info may not have a
5701 // cancel. This may instead happen in another adjacent task.
5702 if (CancelRegion
== OMPD_taskgroup
|| OMPRegionInfo
->hasCancel()) {
5703 llvm::Value
*Args
[] = {
5704 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
5705 CGF
.Builder
.getInt32(getCancellationKind(CancelRegion
))};
5706 // Ignore return result until untied tasks are supported.
5707 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
5708 OMPBuilder
.getOrCreateRuntimeFunction(
5709 CGM
.getModule(), OMPRTL___kmpc_cancellationpoint
),
5711 // if (__kmpc_cancellationpoint()) {
5712 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5713 // exit from construct;
5715 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
5716 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
5717 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
5718 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
5719 CGF
.EmitBlock(ExitBB
);
5720 if (CancelRegion
== OMPD_parallel
)
5721 emitBarrierCall(CGF
, Loc
, OMPD_unknown
, /*EmitChecks=*/false);
5722 // exit from construct;
5723 CodeGenFunction::JumpDest CancelDest
=
5724 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
5725 CGF
.EmitBranchThroughCleanup(CancelDest
);
5726 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
5731 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
5733 OpenMPDirectiveKind CancelRegion
) {
5734 if (!CGF
.HaveInsertPoint())
5736 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5737 // kmp_int32 cncl_kind);
5738 auto &M
= CGM
.getModule();
5739 if (auto *OMPRegionInfo
=
5740 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
5741 auto &&ThenGen
= [this, &M
, Loc
, CancelRegion
,
5742 OMPRegionInfo
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5743 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5744 llvm::Value
*Args
[] = {
5745 RT
.emitUpdateLocation(CGF
, Loc
), RT
.getThreadID(CGF
, Loc
),
5746 CGF
.Builder
.getInt32(getCancellationKind(CancelRegion
))};
5747 // Ignore return result until untied tasks are supported.
5748 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
5749 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_cancel
), Args
);
5750 // if (__kmpc_cancel()) {
5751 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5752 // exit from construct;
5754 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
5755 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
5756 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
5757 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
5758 CGF
.EmitBlock(ExitBB
);
5759 if (CancelRegion
== OMPD_parallel
)
5760 RT
.emitBarrierCall(CGF
, Loc
, OMPD_unknown
, /*EmitChecks=*/false);
5761 // exit from construct;
5762 CodeGenFunction::JumpDest CancelDest
=
5763 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
5764 CGF
.EmitBranchThroughCleanup(CancelDest
);
5765 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
5768 emitIfClause(CGF
, IfCond
, ThenGen
,
5769 [](CodeGenFunction
&, PrePostActionTy
&) {});
5771 RegionCodeGenTy
ThenRCG(ThenGen
);
5778 /// Cleanup action for uses_allocators support.
5779 class OMPUsesAllocatorsActionTy final
: public PrePostActionTy
{
5780 ArrayRef
<std::pair
<const Expr
*, const Expr
*>> Allocators
;
5783 OMPUsesAllocatorsActionTy(
5784 ArrayRef
<std::pair
<const Expr
*, const Expr
*>> Allocators
)
5785 : Allocators(Allocators
) {}
5786 void Enter(CodeGenFunction
&CGF
) override
{
5787 if (!CGF
.HaveInsertPoint())
5789 for (const auto &AllocatorData
: Allocators
) {
5790 CGF
.CGM
.getOpenMPRuntime().emitUsesAllocatorsInit(
5791 CGF
, AllocatorData
.first
, AllocatorData
.second
);
5794 void Exit(CodeGenFunction
&CGF
) override
{
5795 if (!CGF
.HaveInsertPoint())
5797 for (const auto &AllocatorData
: Allocators
) {
5798 CGF
.CGM
.getOpenMPRuntime().emitUsesAllocatorsFini(CGF
,
5799 AllocatorData
.first
);
5805 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5806 const OMPExecutableDirective
&D
, StringRef ParentName
,
5807 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
5808 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
5809 assert(!ParentName
.empty() && "Invalid target entry parent name!");
5810 HasEmittedTargetRegion
= true;
5811 SmallVector
<std::pair
<const Expr
*, const Expr
*>, 4> Allocators
;
5812 for (const auto *C
: D
.getClausesOfKind
<OMPUsesAllocatorsClause
>()) {
5813 for (unsigned I
= 0, E
= C
->getNumberOfAllocators(); I
< E
; ++I
) {
5814 const OMPUsesAllocatorsClause::Data D
= C
->getAllocatorData(I
);
5815 if (!D
.AllocatorTraits
)
5817 Allocators
.emplace_back(D
.Allocator
, D
.AllocatorTraits
);
5820 OMPUsesAllocatorsActionTy
UsesAllocatorAction(Allocators
);
5821 CodeGen
.setAction(UsesAllocatorAction
);
5822 emitTargetOutlinedFunctionHelper(D
, ParentName
, OutlinedFn
, OutlinedFnID
,
5823 IsOffloadEntry
, CodeGen
);
5826 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction
&CGF
,
5827 const Expr
*Allocator
,
5828 const Expr
*AllocatorTraits
) {
5829 llvm::Value
*ThreadId
= getThreadID(CGF
, Allocator
->getExprLoc());
5830 ThreadId
= CGF
.Builder
.CreateIntCast(ThreadId
, CGF
.IntTy
, /*isSigned=*/true);
5831 // Use default memspace handle.
5832 llvm::Value
*MemSpaceHandle
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5833 llvm::Value
*NumTraits
= llvm::ConstantInt::get(
5834 CGF
.IntTy
, cast
<ConstantArrayType
>(
5835 AllocatorTraits
->getType()->getAsArrayTypeUnsafe())
5837 .getLimitedValue());
5838 LValue AllocatorTraitsLVal
= CGF
.EmitLValue(AllocatorTraits
);
5839 Address Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5840 AllocatorTraitsLVal
.getAddress(), CGF
.VoidPtrPtrTy
, CGF
.VoidPtrTy
);
5841 AllocatorTraitsLVal
= CGF
.MakeAddrLValue(Addr
, CGF
.getContext().VoidPtrTy
,
5842 AllocatorTraitsLVal
.getBaseInfo(),
5843 AllocatorTraitsLVal
.getTBAAInfo());
5844 llvm::Value
*Traits
= Addr
.emitRawPointer(CGF
);
5846 llvm::Value
*AllocatorVal
=
5847 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5848 CGM
.getModule(), OMPRTL___kmpc_init_allocator
),
5849 {ThreadId
, MemSpaceHandle
, NumTraits
, Traits
});
5850 // Store to allocator.
5851 CGF
.EmitAutoVarAlloca(*cast
<VarDecl
>(
5852 cast
<DeclRefExpr
>(Allocator
->IgnoreParenImpCasts())->getDecl()));
5853 LValue AllocatorLVal
= CGF
.EmitLValue(Allocator
->IgnoreParenImpCasts());
5855 CGF
.EmitScalarConversion(AllocatorVal
, CGF
.getContext().VoidPtrTy
,
5856 Allocator
->getType(), Allocator
->getExprLoc());
5857 CGF
.EmitStoreOfScalar(AllocatorVal
, AllocatorLVal
);
5860 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction
&CGF
,
5861 const Expr
*Allocator
) {
5862 llvm::Value
*ThreadId
= getThreadID(CGF
, Allocator
->getExprLoc());
5863 ThreadId
= CGF
.Builder
.CreateIntCast(ThreadId
, CGF
.IntTy
, /*isSigned=*/true);
5864 LValue AllocatorLVal
= CGF
.EmitLValue(Allocator
->IgnoreParenImpCasts());
5865 llvm::Value
*AllocatorVal
=
5866 CGF
.EmitLoadOfScalar(AllocatorLVal
, Allocator
->getExprLoc());
5867 AllocatorVal
= CGF
.EmitScalarConversion(AllocatorVal
, Allocator
->getType(),
5868 CGF
.getContext().VoidPtrTy
,
5869 Allocator
->getExprLoc());
5870 (void)CGF
.EmitRuntimeCall(
5871 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
5872 OMPRTL___kmpc_destroy_allocator
),
5873 {ThreadId
, AllocatorVal
});
5876 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5877 const OMPExecutableDirective
&D
, CodeGenFunction
&CGF
,
5878 int32_t &MinThreadsVal
, int32_t &MaxThreadsVal
, int32_t &MinTeamsVal
,
5879 int32_t &MaxTeamsVal
) {
5881 getNumTeamsExprForTargetDirective(CGF
, D
, MinTeamsVal
, MaxTeamsVal
);
5882 getNumThreadsExprForTargetDirective(CGF
, D
, MaxThreadsVal
,
5883 /*UpperBoundOnly=*/true);
5885 for (auto *C
: D
.getClausesOfKind
<OMPXAttributeClause
>()) {
5886 for (auto *A
: C
->getAttrs()) {
5887 int32_t AttrMinThreadsVal
= 1, AttrMaxThreadsVal
= -1;
5888 int32_t AttrMinBlocksVal
= 1, AttrMaxBlocksVal
= -1;
5889 if (auto *Attr
= dyn_cast
<CUDALaunchBoundsAttr
>(A
))
5890 CGM
.handleCUDALaunchBoundsAttr(nullptr, Attr
, &AttrMaxThreadsVal
,
5891 &AttrMinBlocksVal
, &AttrMaxBlocksVal
);
5892 else if (auto *Attr
= dyn_cast
<AMDGPUFlatWorkGroupSizeAttr
>(A
))
5893 CGM
.handleAMDGPUFlatWorkGroupSizeAttr(
5894 nullptr, Attr
, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal
,
5895 &AttrMaxThreadsVal
);
5899 MinThreadsVal
= std::max(MinThreadsVal
, AttrMinThreadsVal
);
5900 if (AttrMaxThreadsVal
> 0)
5901 MaxThreadsVal
= MaxThreadsVal
> 0
5902 ? std::min(MaxThreadsVal
, AttrMaxThreadsVal
)
5903 : AttrMaxThreadsVal
;
5904 MinTeamsVal
= std::max(MinTeamsVal
, AttrMinBlocksVal
);
5905 if (AttrMaxBlocksVal
> 0)
5906 MaxTeamsVal
= MaxTeamsVal
> 0 ? std::min(MaxTeamsVal
, AttrMaxBlocksVal
)
5912 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5913 const OMPExecutableDirective
&D
, StringRef ParentName
,
5914 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
5915 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
5917 llvm::TargetRegionEntryInfo EntryInfo
=
5918 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
, D
.getBeginLoc(), ParentName
);
5920 CodeGenFunction
CGF(CGM
, true);
5921 llvm::OpenMPIRBuilder::FunctionGenCallback
&&GenerateOutlinedFunction
=
5922 [&CGF
, &D
, &CodeGen
](StringRef EntryFnName
) {
5923 const CapturedStmt
&CS
= *D
.getCapturedStmt(OMPD_target
);
5925 CGOpenMPTargetRegionInfo
CGInfo(CS
, CodeGen
, EntryFnName
);
5926 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
5927 return CGF
.GenerateOpenMPCapturedStmtFunction(CS
, D
.getBeginLoc());
5930 llvm::Error Err
= OMPBuilder
.emitTargetRegionFunction(
5931 EntryInfo
, GenerateOutlinedFunction
, IsOffloadEntry
, OutlinedFn
,
5933 assert(!Err
&& "unexpected error creating target region");
5938 CGM
.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn
, CGM
);
5940 for (auto *C
: D
.getClausesOfKind
<OMPXAttributeClause
>()) {
5941 for (auto *A
: C
->getAttrs()) {
5942 if (auto *Attr
= dyn_cast
<AMDGPUWavesPerEUAttr
>(A
))
5943 CGM
.handleAMDGPUWavesPerEUAttr(OutlinedFn
, Attr
);
5948 /// Checks if the expression is constant or does not have non-trivial function
5950 static bool isTrivial(ASTContext
&Ctx
, const Expr
* E
) {
5951 // We can skip constant expressions.
5952 // We can skip expressions with trivial calls or simple expressions.
5953 return (E
->isEvaluatable(Ctx
, Expr::SE_AllowUndefinedBehavior
) ||
5954 !E
->hasNonTrivialCall(Ctx
)) &&
5955 !E
->HasSideEffects(Ctx
, /*IncludePossibleEffects=*/true);
5958 const Stmt
*CGOpenMPRuntime::getSingleCompoundChild(ASTContext
&Ctx
,
5960 const Stmt
*Child
= Body
->IgnoreContainers();
5961 while (const auto *C
= dyn_cast_or_null
<CompoundStmt
>(Child
)) {
5963 for (const Stmt
*S
: C
->body()) {
5964 if (const auto *E
= dyn_cast
<Expr
>(S
)) {
5965 if (isTrivial(Ctx
, E
))
5968 // Some of the statements can be ignored.
5969 if (isa
<AsmStmt
>(S
) || isa
<NullStmt
>(S
) || isa
<OMPFlushDirective
>(S
) ||
5970 isa
<OMPBarrierDirective
>(S
) || isa
<OMPTaskyieldDirective
>(S
))
5972 // Analyze declarations.
5973 if (const auto *DS
= dyn_cast
<DeclStmt
>(S
)) {
5974 if (llvm::all_of(DS
->decls(), [](const Decl
*D
) {
5975 if (isa
<EmptyDecl
>(D
) || isa
<DeclContext
>(D
) ||
5976 isa
<TypeDecl
>(D
) || isa
<PragmaCommentDecl
>(D
) ||
5977 isa
<PragmaDetectMismatchDecl
>(D
) || isa
<UsingDecl
>(D
) ||
5978 isa
<UsingDirectiveDecl
>(D
) ||
5979 isa
<OMPDeclareReductionDecl
>(D
) ||
5980 isa
<OMPThreadPrivateDecl
>(D
) || isa
<OMPAllocateDecl
>(D
))
5982 const auto *VD
= dyn_cast
<VarDecl
>(D
);
5985 return VD
->hasGlobalStorage() || !VD
->isUsed();
5989 // Found multiple children - cannot get the one child only.
5995 Child
= Child
->IgnoreContainers();
6000 const Expr
*CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6001 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, int32_t &MinTeamsVal
,
6002 int32_t &MaxTeamsVal
) {
6004 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6005 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6006 "Expected target-based executable directive.");
6007 switch (DirectiveKind
) {
6009 const auto *CS
= D
.getInnermostCapturedStmt();
6011 CS
->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6012 const Stmt
*ChildStmt
=
6013 CGOpenMPRuntime::getSingleCompoundChild(CGF
.getContext(), Body
);
6014 if (const auto *NestedDir
=
6015 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
6016 if (isOpenMPTeamsDirective(NestedDir
->getDirectiveKind())) {
6017 if (NestedDir
->hasClausesOfKind
<OMPNumTeamsClause
>()) {
6018 const Expr
*NumTeams
= NestedDir
->getSingleClause
<OMPNumTeamsClause
>()
6021 if (NumTeams
->isIntegerConstantExpr(CGF
.getContext()))
6023 NumTeams
->getIntegerConstantExpr(CGF
.getContext()))
6024 MinTeamsVal
= MaxTeamsVal
= Constant
->getExtValue();
6027 MinTeamsVal
= MaxTeamsVal
= 0;
6030 MinTeamsVal
= MaxTeamsVal
= 1;
6033 // A value of -1 is used to check if we need to emit no teams region
6034 MinTeamsVal
= MaxTeamsVal
= -1;
6037 case OMPD_target_teams_loop
:
6038 case OMPD_target_teams
:
6039 case OMPD_target_teams_distribute
:
6040 case OMPD_target_teams_distribute_simd
:
6041 case OMPD_target_teams_distribute_parallel_for
:
6042 case OMPD_target_teams_distribute_parallel_for_simd
: {
6043 if (D
.hasClausesOfKind
<OMPNumTeamsClause
>()) {
6044 const Expr
*NumTeams
=
6045 D
.getSingleClause
<OMPNumTeamsClause
>()->getNumTeams().front();
6046 if (NumTeams
->isIntegerConstantExpr(CGF
.getContext()))
6047 if (auto Constant
= NumTeams
->getIntegerConstantExpr(CGF
.getContext()))
6048 MinTeamsVal
= MaxTeamsVal
= Constant
->getExtValue();
6051 MinTeamsVal
= MaxTeamsVal
= 0;
6054 case OMPD_target_parallel
:
6055 case OMPD_target_parallel_for
:
6056 case OMPD_target_parallel_for_simd
:
6057 case OMPD_target_parallel_loop
:
6058 case OMPD_target_simd
:
6059 MinTeamsVal
= MaxTeamsVal
= 1;
6063 case OMPD_parallel_for
:
6064 case OMPD_parallel_loop
:
6065 case OMPD_parallel_master
:
6066 case OMPD_parallel_sections
:
6068 case OMPD_parallel_for_simd
:
6070 case OMPD_cancellation_point
:
6072 case OMPD_threadprivate
:
6083 case OMPD_taskyield
:
6086 case OMPD_taskgroup
:
6092 case OMPD_target_data
:
6093 case OMPD_target_exit_data
:
6094 case OMPD_target_enter_data
:
6095 case OMPD_distribute
:
6096 case OMPD_distribute_simd
:
6097 case OMPD_distribute_parallel_for
:
6098 case OMPD_distribute_parallel_for_simd
:
6099 case OMPD_teams_distribute
:
6100 case OMPD_teams_distribute_simd
:
6101 case OMPD_teams_distribute_parallel_for
:
6102 case OMPD_teams_distribute_parallel_for_simd
:
6103 case OMPD_target_update
:
6104 case OMPD_declare_simd
:
6105 case OMPD_declare_variant
:
6106 case OMPD_begin_declare_variant
:
6107 case OMPD_end_declare_variant
:
6108 case OMPD_declare_target
:
6109 case OMPD_end_declare_target
:
6110 case OMPD_declare_reduction
:
6111 case OMPD_declare_mapper
:
6113 case OMPD_taskloop_simd
:
6114 case OMPD_master_taskloop
:
6115 case OMPD_master_taskloop_simd
:
6116 case OMPD_parallel_master_taskloop
:
6117 case OMPD_parallel_master_taskloop_simd
:
6119 case OMPD_metadirective
:
6125 llvm_unreachable("Unexpected directive kind.");
6128 llvm::Value
*CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6129 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) {
6130 assert(!CGF
.getLangOpts().OpenMPIsTargetDevice
&&
6131 "Clauses associated with the teams directive expected to be emitted "
6132 "only for the host!");
6133 CGBuilderTy
&Bld
= CGF
.Builder
;
6134 int32_t MinNT
= -1, MaxNT
= -1;
6135 const Expr
*NumTeams
=
6136 getNumTeamsExprForTargetDirective(CGF
, D
, MinNT
, MaxNT
);
6137 if (NumTeams
!= nullptr) {
6138 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6140 switch (DirectiveKind
) {
6142 const auto *CS
= D
.getInnermostCapturedStmt();
6143 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6144 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6145 llvm::Value
*NumTeamsVal
= CGF
.EmitScalarExpr(NumTeams
,
6146 /*IgnoreResultAssign*/ true);
6147 return Bld
.CreateIntCast(NumTeamsVal
, CGF
.Int32Ty
,
6150 case OMPD_target_teams
:
6151 case OMPD_target_teams_distribute
:
6152 case OMPD_target_teams_distribute_simd
:
6153 case OMPD_target_teams_distribute_parallel_for
:
6154 case OMPD_target_teams_distribute_parallel_for_simd
: {
6155 CodeGenFunction::RunCleanupsScope
NumTeamsScope(CGF
);
6156 llvm::Value
*NumTeamsVal
= CGF
.EmitScalarExpr(NumTeams
,
6157 /*IgnoreResultAssign*/ true);
6158 return Bld
.CreateIntCast(NumTeamsVal
, CGF
.Int32Ty
,
6166 assert(MinNT
== MaxNT
&& "Num threads ranges require handling here.");
6167 return llvm::ConstantInt::get(CGF
.Int32Ty
, MinNT
);
6170 /// Check for a num threads constant value (stored in \p DefaultVal), or
6171 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6172 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6173 /// nullptr, no expression evaluation is perfomed.
6174 static void getNumThreads(CodeGenFunction
&CGF
, const CapturedStmt
*CS
,
6175 const Expr
**E
, int32_t &UpperBound
,
6176 bool UpperBoundOnly
, llvm::Value
**CondVal
) {
6177 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6178 CGF
.getContext(), CS
->getCapturedStmt());
6179 const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
);
6183 if (isOpenMPParallelDirective(Dir
->getDirectiveKind())) {
6184 // Handle if clause. If if clause present, the number of threads is
6185 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6186 if (CondVal
&& Dir
->hasClausesOfKind
<OMPIfClause
>()) {
6187 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6188 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6189 const OMPIfClause
*IfClause
= nullptr;
6190 for (const auto *C
: Dir
->getClausesOfKind
<OMPIfClause
>()) {
6191 if (C
->getNameModifier() == OMPD_unknown
||
6192 C
->getNameModifier() == OMPD_parallel
) {
6198 const Expr
*CondExpr
= IfClause
->getCondition();
6200 if (CondExpr
->EvaluateAsBooleanCondition(Result
, CGF
.getContext())) {
6206 CodeGenFunction::LexicalScope
Scope(CGF
, CondExpr
->getSourceRange());
6207 if (const auto *PreInit
=
6208 cast_or_null
<DeclStmt
>(IfClause
->getPreInitStmt())) {
6209 for (const auto *I
: PreInit
->decls()) {
6210 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6211 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6213 CodeGenFunction::AutoVarEmission Emission
=
6214 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6215 CGF
.EmitAutoVarCleanups(Emission
);
6218 *CondVal
= CGF
.EvaluateExprAsBool(CondExpr
);
6223 // Check the value of num_threads clause iff if clause was not specified
6224 // or is not evaluated to false.
6225 if (Dir
->hasClausesOfKind
<OMPNumThreadsClause
>()) {
6226 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6227 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6228 const auto *NumThreadsClause
=
6229 Dir
->getSingleClause
<OMPNumThreadsClause
>();
6230 const Expr
*NTExpr
= NumThreadsClause
->getNumThreads();
6231 if (NTExpr
->isIntegerConstantExpr(CGF
.getContext()))
6232 if (auto Constant
= NTExpr
->getIntegerConstantExpr(CGF
.getContext()))
6235 ? Constant
->getZExtValue()
6236 : std::min(UpperBound
,
6237 static_cast<int32_t>(Constant
->getZExtValue()));
6238 // If we haven't found a upper bound, remember we saw a thread limiting
6240 if (UpperBound
== -1)
6244 CodeGenFunction::LexicalScope
Scope(CGF
, NTExpr
->getSourceRange());
6245 if (const auto *PreInit
=
6246 cast_or_null
<DeclStmt
>(NumThreadsClause
->getPreInitStmt())) {
6247 for (const auto *I
: PreInit
->decls()) {
6248 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6249 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6251 CodeGenFunction::AutoVarEmission Emission
=
6252 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6253 CGF
.EmitAutoVarCleanups(Emission
);
6261 if (isOpenMPSimdDirective(Dir
->getDirectiveKind()))
6265 const Expr
*CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6266 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, int32_t &UpperBound
,
6267 bool UpperBoundOnly
, llvm::Value
**CondVal
, const Expr
**ThreadLimitExpr
) {
6268 assert((!CGF
.getLangOpts().OpenMPIsTargetDevice
|| UpperBoundOnly
) &&
6269 "Clauses associated with the teams directive expected to be emitted "
6270 "only for the host!");
6271 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6272 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6273 "Expected target-based executable directive.");
6275 const Expr
*NT
= nullptr;
6276 const Expr
**NTPtr
= UpperBoundOnly
? nullptr : &NT
;
6278 auto CheckForConstExpr
= [&](const Expr
*E
, const Expr
**EPtr
) {
6279 if (E
->isIntegerConstantExpr(CGF
.getContext())) {
6280 if (auto Constant
= E
->getIntegerConstantExpr(CGF
.getContext()))
6281 UpperBound
= UpperBound
? Constant
->getZExtValue()
6282 : std::min(UpperBound
,
6283 int32_t(Constant
->getZExtValue()));
6285 // If we haven't found a upper bound, remember we saw a thread limiting
6287 if (UpperBound
== -1)
6293 auto ReturnSequential
= [&]() {
6298 switch (DirectiveKind
) {
6300 const CapturedStmt
*CS
= D
.getInnermostCapturedStmt();
6301 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6302 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6303 CGF
.getContext(), CS
->getCapturedStmt());
6304 // TODO: The standard is not clear how to resolve two thread limit clauses,
6305 // let's pick the teams one if it's present, otherwise the target one.
6306 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6307 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6308 if (const auto *TLC
= Dir
->getSingleClause
<OMPThreadLimitClause
>()) {
6309 ThreadLimitClause
= TLC
;
6310 if (ThreadLimitExpr
) {
6311 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6312 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6313 CodeGenFunction::LexicalScope
Scope(
6315 ThreadLimitClause
->getThreadLimit().front()->getSourceRange());
6316 if (const auto *PreInit
=
6317 cast_or_null
<DeclStmt
>(ThreadLimitClause
->getPreInitStmt())) {
6318 for (const auto *I
: PreInit
->decls()) {
6319 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6320 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6322 CodeGenFunction::AutoVarEmission Emission
=
6323 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6324 CGF
.EmitAutoVarCleanups(Emission
);
6331 if (ThreadLimitClause
)
6332 CheckForConstExpr(ThreadLimitClause
->getThreadLimit().front(),
6334 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6335 if (isOpenMPTeamsDirective(Dir
->getDirectiveKind()) &&
6336 !isOpenMPDistributeDirective(Dir
->getDirectiveKind())) {
6337 CS
= Dir
->getInnermostCapturedStmt();
6338 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6339 CGF
.getContext(), CS
->getCapturedStmt());
6340 Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
);
6342 if (Dir
&& isOpenMPParallelDirective(Dir
->getDirectiveKind())) {
6343 CS
= Dir
->getInnermostCapturedStmt();
6344 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6345 } else if (Dir
&& isOpenMPSimdDirective(Dir
->getDirectiveKind()))
6346 return ReturnSequential();
6350 case OMPD_target_teams
: {
6351 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6352 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6353 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6354 CheckForConstExpr(ThreadLimitClause
->getThreadLimit().front(),
6357 const CapturedStmt
*CS
= D
.getInnermostCapturedStmt();
6358 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6359 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6360 CGF
.getContext(), CS
->getCapturedStmt());
6361 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6362 if (Dir
->getDirectiveKind() == OMPD_distribute
) {
6363 CS
= Dir
->getInnermostCapturedStmt();
6364 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6369 case OMPD_target_teams_distribute
:
6370 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6371 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6372 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6373 CheckForConstExpr(ThreadLimitClause
->getThreadLimit().front(),
6376 getNumThreads(CGF
, D
.getInnermostCapturedStmt(), NTPtr
, UpperBound
,
6377 UpperBoundOnly
, CondVal
);
6379 case OMPD_target_teams_loop
:
6380 case OMPD_target_parallel_loop
:
6381 case OMPD_target_parallel
:
6382 case OMPD_target_parallel_for
:
6383 case OMPD_target_parallel_for_simd
:
6384 case OMPD_target_teams_distribute_parallel_for
:
6385 case OMPD_target_teams_distribute_parallel_for_simd
: {
6386 if (CondVal
&& D
.hasClausesOfKind
<OMPIfClause
>()) {
6387 const OMPIfClause
*IfClause
= nullptr;
6388 for (const auto *C
: D
.getClausesOfKind
<OMPIfClause
>()) {
6389 if (C
->getNameModifier() == OMPD_unknown
||
6390 C
->getNameModifier() == OMPD_parallel
) {
6396 const Expr
*Cond
= IfClause
->getCondition();
6398 if (Cond
->EvaluateAsBooleanCondition(Result
, CGF
.getContext())) {
6400 return ReturnSequential();
6402 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
6403 *CondVal
= CGF
.EvaluateExprAsBool(Cond
);
6407 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6408 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6409 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6410 CheckForConstExpr(ThreadLimitClause
->getThreadLimit().front(),
6413 if (D
.hasClausesOfKind
<OMPNumThreadsClause
>()) {
6414 CodeGenFunction::RunCleanupsScope
NumThreadsScope(CGF
);
6415 const auto *NumThreadsClause
= D
.getSingleClause
<OMPNumThreadsClause
>();
6416 CheckForConstExpr(NumThreadsClause
->getNumThreads(), nullptr);
6417 return NumThreadsClause
->getNumThreads();
6421 case OMPD_target_teams_distribute_simd
:
6422 case OMPD_target_simd
:
6423 return ReturnSequential();
6427 llvm_unreachable("Unsupported directive kind.");
6430 llvm::Value
*CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6431 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) {
6432 llvm::Value
*NumThreadsVal
= nullptr;
6433 llvm::Value
*CondVal
= nullptr;
6434 llvm::Value
*ThreadLimitVal
= nullptr;
6435 const Expr
*ThreadLimitExpr
= nullptr;
6436 int32_t UpperBound
= -1;
6438 const Expr
*NT
= getNumThreadsExprForTargetDirective(
6439 CGF
, D
, UpperBound
, /* UpperBoundOnly */ false, &CondVal
,
6442 // Thread limit expressions are used below, emit them.
6443 if (ThreadLimitExpr
) {
6445 CGF
.EmitScalarExpr(ThreadLimitExpr
, /*IgnoreResultAssign=*/true);
6446 ThreadLimitVal
= CGF
.Builder
.CreateIntCast(ThreadLimitVal
, CGF
.Int32Ty
,
6447 /*isSigned=*/false);
6450 // Generate the num teams expression.
6451 if (UpperBound
== 1) {
6452 NumThreadsVal
= CGF
.Builder
.getInt32(UpperBound
);
6454 NumThreadsVal
= CGF
.EmitScalarExpr(NT
, /*IgnoreResultAssign=*/true);
6455 NumThreadsVal
= CGF
.Builder
.CreateIntCast(NumThreadsVal
, CGF
.Int32Ty
,
6456 /*isSigned=*/false);
6457 } else if (ThreadLimitVal
) {
6458 // If we do not have a num threads value but a thread limit, replace the
6459 // former with the latter. We know handled the thread limit expression.
6460 NumThreadsVal
= ThreadLimitVal
;
6461 ThreadLimitVal
= nullptr;
6463 // Default to "0" which means runtime choice.
6464 assert(!ThreadLimitVal
&& "Default not applicable with thread limit value");
6465 NumThreadsVal
= CGF
.Builder
.getInt32(0);
6468 // Handle if clause. If if clause present, the number of threads is
6469 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6471 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
6472 NumThreadsVal
= CGF
.Builder
.CreateSelect(CondVal
, NumThreadsVal
,
6473 CGF
.Builder
.getInt32(1));
6476 // If the thread limit and num teams expression were present, take the
6478 if (ThreadLimitVal
) {
6479 NumThreadsVal
= CGF
.Builder
.CreateSelect(
6480 CGF
.Builder
.CreateICmpULT(ThreadLimitVal
, NumThreadsVal
),
6481 ThreadLimitVal
, NumThreadsVal
);
6484 return NumThreadsVal
;
6488 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6490 // Utility to handle information from clauses associated with a given
6491 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6492 // It provides a convenient interface to obtain the information and generate
6493 // code for that information.
6494 class MappableExprsHandler
{
6496 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6497 static unsigned getFlagMemberOffset() {
6498 unsigned Offset
= 0;
6499 for (uint64_t Remain
=
6500 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
6501 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
6502 !(Remain
& 1); Remain
= Remain
>> 1)
6507 /// Class that holds debugging information for a data mapping to be passed to
6508 /// the runtime library.
6509 class MappingExprInfo
{
6510 /// The variable declaration used for the data mapping.
6511 const ValueDecl
*MapDecl
= nullptr;
6512 /// The original expression used in the map clause, or null if there is
6514 const Expr
*MapExpr
= nullptr;
6517 MappingExprInfo(const ValueDecl
*MapDecl
, const Expr
*MapExpr
= nullptr)
6518 : MapDecl(MapDecl
), MapExpr(MapExpr
) {}
6520 const ValueDecl
*getMapDecl() const { return MapDecl
; }
6521 const Expr
*getMapExpr() const { return MapExpr
; }
6524 using DeviceInfoTy
= llvm::OpenMPIRBuilder::DeviceInfoTy
;
6525 using MapBaseValuesArrayTy
= llvm::OpenMPIRBuilder::MapValuesArrayTy
;
6526 using MapValuesArrayTy
= llvm::OpenMPIRBuilder::MapValuesArrayTy
;
6527 using MapFlagsArrayTy
= llvm::OpenMPIRBuilder::MapFlagsArrayTy
;
6528 using MapDimArrayTy
= llvm::OpenMPIRBuilder::MapDimArrayTy
;
6529 using MapNonContiguousArrayTy
=
6530 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy
;
6531 using MapExprsArrayTy
= SmallVector
<MappingExprInfo
, 4>;
6532 using MapValueDeclsArrayTy
= SmallVector
<const ValueDecl
*, 4>;
6534 /// This structure contains combined information generated for mappable
6535 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6536 /// mappers, and non-contiguous information.
6537 struct MapCombinedInfoTy
: llvm::OpenMPIRBuilder::MapInfosTy
{
6538 MapExprsArrayTy Exprs
;
6539 MapValueDeclsArrayTy Mappers
;
6540 MapValueDeclsArrayTy DevicePtrDecls
;
6542 /// Append arrays in \a CurInfo.
6543 void append(MapCombinedInfoTy
&CurInfo
) {
6544 Exprs
.append(CurInfo
.Exprs
.begin(), CurInfo
.Exprs
.end());
6545 DevicePtrDecls
.append(CurInfo
.DevicePtrDecls
.begin(),
6546 CurInfo
.DevicePtrDecls
.end());
6547 Mappers
.append(CurInfo
.Mappers
.begin(), CurInfo
.Mappers
.end());
6548 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo
);
6552 /// Map between a struct and the its lowest & highest elements which have been
6554 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6555 /// HE(FieldIndex, Pointer)}
6556 struct StructRangeInfoTy
{
6557 MapCombinedInfoTy PreliminaryMapData
;
6558 std::pair
<unsigned /*FieldIndex*/, Address
/*Pointer*/> LowestElem
= {
6559 0, Address::invalid()};
6560 std::pair
<unsigned /*FieldIndex*/, Address
/*Pointer*/> HighestElem
= {
6561 0, Address::invalid()};
6562 Address Base
= Address::invalid();
6563 Address LB
= Address::invalid();
6564 bool IsArraySection
= false;
6565 bool HasCompleteRecord
= false;
6569 /// Kind that defines how a device pointer has to be returned.
6571 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
6572 OpenMPMapClauseKind MapType
= OMPC_MAP_unknown
;
6573 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
6574 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
;
6575 bool ReturnDevicePointer
= false;
6576 bool IsImplicit
= false;
6577 const ValueDecl
*Mapper
= nullptr;
6578 const Expr
*VarRef
= nullptr;
6579 bool ForDeviceAddr
= false;
6581 MapInfo() = default;
6583 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
,
6584 OpenMPMapClauseKind MapType
,
6585 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6586 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
6587 bool ReturnDevicePointer
, bool IsImplicit
,
6588 const ValueDecl
*Mapper
= nullptr, const Expr
*VarRef
= nullptr,
6589 bool ForDeviceAddr
= false)
6590 : Components(Components
), MapType(MapType
), MapModifiers(MapModifiers
),
6591 MotionModifiers(MotionModifiers
),
6592 ReturnDevicePointer(ReturnDevicePointer
), IsImplicit(IsImplicit
),
6593 Mapper(Mapper
), VarRef(VarRef
), ForDeviceAddr(ForDeviceAddr
) {}
6596 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6597 /// member and there is no map information about it, then emission of that
6598 /// entry is deferred until the whole struct has been processed.
6599 struct DeferredDevicePtrEntryTy
{
6600 const Expr
*IE
= nullptr;
6601 const ValueDecl
*VD
= nullptr;
6602 bool ForDeviceAddr
= false;
6604 DeferredDevicePtrEntryTy(const Expr
*IE
, const ValueDecl
*VD
,
6606 : IE(IE
), VD(VD
), ForDeviceAddr(ForDeviceAddr
) {}
6609 /// The target directive from where the mappable clauses were extracted. It
6610 /// is either a executable directive or a user-defined mapper directive.
6611 llvm::PointerUnion
<const OMPExecutableDirective
*,
6612 const OMPDeclareMapperDecl
*>
6615 /// Function the directive is being generated for.
6616 CodeGenFunction
&CGF
;
6618 /// Set of all first private variables in the current directive.
6619 /// bool data is set to true if the variable is implicitly marked as
6620 /// firstprivate, false otherwise.
6621 llvm::DenseMap
<CanonicalDeclPtr
<const VarDecl
>, bool> FirstPrivateDecls
;
6623 /// Map between device pointer declarations and their expression components.
6624 /// The key value for declarations in 'this' is null.
6627 SmallVector
<OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>>
6630 /// Map between device addr declarations and their expression components.
6631 /// The key value for declarations in 'this' is null.
6634 SmallVector
<OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>>
6637 /// Map between lambda declarations and their map type.
6638 llvm::DenseMap
<const ValueDecl
*, const OMPMapClause
*> LambdasMap
;
6640 llvm::Value
*getExprTypeSize(const Expr
*E
) const {
6641 QualType ExprTy
= E
->getType().getCanonicalType();
6643 // Calculate the size for array shaping expression.
6644 if (const auto *OAE
= dyn_cast
<OMPArrayShapingExpr
>(E
)) {
6646 CGF
.getTypeSize(OAE
->getBase()->getType()->getPointeeType());
6647 for (const Expr
*SE
: OAE
->getDimensions()) {
6648 llvm::Value
*Sz
= CGF
.EmitScalarExpr(SE
);
6649 Sz
= CGF
.EmitScalarConversion(Sz
, SE
->getType(),
6650 CGF
.getContext().getSizeType(),
6652 Size
= CGF
.Builder
.CreateNUWMul(Size
, Sz
);
6657 // Reference types are ignored for mapping purposes.
6658 if (const auto *RefTy
= ExprTy
->getAs
<ReferenceType
>())
6659 ExprTy
= RefTy
->getPointeeType().getCanonicalType();
6661 // Given that an array section is considered a built-in type, we need to
6662 // do the calculation based on the length of the section instead of relying
6663 // on CGF.getTypeSize(E->getType()).
6664 if (const auto *OAE
= dyn_cast
<ArraySectionExpr
>(E
)) {
6665 QualType BaseTy
= ArraySectionExpr::getBaseOriginalType(
6666 OAE
->getBase()->IgnoreParenImpCasts())
6667 .getCanonicalType();
6669 // If there is no length associated with the expression and lower bound is
6670 // not specified too, that means we are using the whole length of the
6672 if (!OAE
->getLength() && OAE
->getColonLocFirst().isValid() &&
6673 !OAE
->getLowerBound())
6674 return CGF
.getTypeSize(BaseTy
);
6676 llvm::Value
*ElemSize
;
6677 if (const auto *PTy
= BaseTy
->getAs
<PointerType
>()) {
6678 ElemSize
= CGF
.getTypeSize(PTy
->getPointeeType().getCanonicalType());
6680 const auto *ATy
= cast
<ArrayType
>(BaseTy
.getTypePtr());
6681 assert(ATy
&& "Expecting array type if not a pointer type.");
6682 ElemSize
= CGF
.getTypeSize(ATy
->getElementType().getCanonicalType());
6685 // If we don't have a length at this point, that is because we have an
6686 // array section with a single element.
6687 if (!OAE
->getLength() && OAE
->getColonLocFirst().isInvalid())
6690 if (const Expr
*LenExpr
= OAE
->getLength()) {
6691 llvm::Value
*LengthVal
= CGF
.EmitScalarExpr(LenExpr
);
6692 LengthVal
= CGF
.EmitScalarConversion(LengthVal
, LenExpr
->getType(),
6693 CGF
.getContext().getSizeType(),
6694 LenExpr
->getExprLoc());
6695 return CGF
.Builder
.CreateNUWMul(LengthVal
, ElemSize
);
6697 assert(!OAE
->getLength() && OAE
->getColonLocFirst().isValid() &&
6698 OAE
->getLowerBound() && "expected array_section[lb:].");
6699 // Size = sizetype - lb * elemtype;
6700 llvm::Value
*LengthVal
= CGF
.getTypeSize(BaseTy
);
6701 llvm::Value
*LBVal
= CGF
.EmitScalarExpr(OAE
->getLowerBound());
6702 LBVal
= CGF
.EmitScalarConversion(LBVal
, OAE
->getLowerBound()->getType(),
6703 CGF
.getContext().getSizeType(),
6704 OAE
->getLowerBound()->getExprLoc());
6705 LBVal
= CGF
.Builder
.CreateNUWMul(LBVal
, ElemSize
);
6706 llvm::Value
*Cmp
= CGF
.Builder
.CreateICmpUGT(LengthVal
, LBVal
);
6707 llvm::Value
*TrueVal
= CGF
.Builder
.CreateNUWSub(LengthVal
, LBVal
);
6708 LengthVal
= CGF
.Builder
.CreateSelect(
6709 Cmp
, TrueVal
, llvm::ConstantInt::get(CGF
.SizeTy
, 0));
6712 return CGF
.getTypeSize(ExprTy
);
6715 /// Return the corresponding bits for a given map clause modifier. Add
6716 /// a flag marking the map as a pointer if requested. Add a flag marking the
6717 /// map as the first one of a series of maps that relate to the same map
6719 OpenMPOffloadMappingFlags
getMapTypeBits(
6720 OpenMPMapClauseKind MapType
, ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6721 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
, bool IsImplicit
,
6722 bool AddPtrFlag
, bool AddIsTargetParamFlag
, bool IsNonContiguous
) const {
6723 OpenMPOffloadMappingFlags Bits
=
6724 IsImplicit
? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6725 : OpenMPOffloadMappingFlags::OMP_MAP_NONE
;
6727 case OMPC_MAP_alloc
:
6728 case OMPC_MAP_release
:
6729 // alloc and release is the default behavior in the runtime library, i.e.
6730 // if we don't pass any bits alloc/release that is what the runtime is
6731 // going to do. Therefore, we don't need to signal anything for these two
6735 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TO
;
6738 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
6740 case OMPC_MAP_tofrom
:
6741 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TO
|
6742 OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
6744 case OMPC_MAP_delete
:
6745 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_DELETE
;
6747 case OMPC_MAP_unknown
:
6748 llvm_unreachable("Unexpected map type!");
6751 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
;
6752 if (AddIsTargetParamFlag
)
6753 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
6754 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_always
))
6755 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS
;
6756 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_close
))
6757 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE
;
6758 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_present
) ||
6759 llvm::is_contained(MotionModifiers
, OMPC_MOTION_MODIFIER_present
))
6760 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
;
6761 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_ompx_hold
))
6762 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
6763 if (IsNonContiguous
)
6764 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG
;
6768 /// Return true if the provided expression is a final array section. A
6769 /// final array section, is one whose length can't be proved to be one.
6770 bool isFinalArraySectionExpression(const Expr
*E
) const {
6771 const auto *OASE
= dyn_cast
<ArraySectionExpr
>(E
);
6773 // It is not an array section and therefore not a unity-size one.
6777 // An array section with no colon always refer to a single element.
6778 if (OASE
->getColonLocFirst().isInvalid())
6781 const Expr
*Length
= OASE
->getLength();
6783 // If we don't have a length we have to check if the array has size 1
6784 // for this dimension. Also, we should always expect a length if the
6785 // base type is pointer.
6787 QualType BaseQTy
= ArraySectionExpr::getBaseOriginalType(
6788 OASE
->getBase()->IgnoreParenImpCasts())
6789 .getCanonicalType();
6790 if (const auto *ATy
= dyn_cast
<ConstantArrayType
>(BaseQTy
.getTypePtr()))
6791 return ATy
->getSExtSize() != 1;
6792 // If we don't have a constant dimension length, we have to consider
6793 // the current section as having any size, so it is not necessarily
6794 // unitary. If it happen to be unity size, that's user fault.
6798 // Check if the length evaluates to 1.
6799 Expr::EvalResult Result
;
6800 if (!Length
->EvaluateAsInt(Result
, CGF
.getContext()))
6801 return true; // Can have more that size 1.
6803 llvm::APSInt ConstLength
= Result
.Val
.getInt();
6804 return ConstLength
.getSExtValue() != 1;
6807 /// Generate the base pointers, section pointers, sizes, map type bits, and
6808 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6809 /// map type, map or motion modifiers, and expression components.
6810 /// \a IsFirstComponent should be set to true if the provided set of
6811 /// components is the first associated with a capture.
6812 void generateInfoForComponentList(
6813 OpenMPMapClauseKind MapType
, ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6814 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
6815 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
,
6816 MapCombinedInfoTy
&CombinedInfo
,
6817 MapCombinedInfoTy
&StructBaseCombinedInfo
,
6818 StructRangeInfoTy
&PartialStruct
, bool IsFirstComponentList
,
6819 bool IsImplicit
, bool GenerateAllInfoForClauses
,
6820 const ValueDecl
*Mapper
= nullptr, bool ForDeviceAddr
= false,
6821 const ValueDecl
*BaseDecl
= nullptr, const Expr
*MapExpr
= nullptr,
6822 ArrayRef
<OMPClauseMappableExprCommon::MappableExprComponentListRef
>
6823 OverlappedElements
= {},
6824 bool AreBothBasePtrAndPteeMapped
= false) const {
6825 // The following summarizes what has to be generated for each map and the
6826 // types below. The generated information is expressed in this order:
6827 // base pointer, section pointer, size, flags
6828 // (to add to the ones that come from the map type and modifier).
6851 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6854 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6857 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6860 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6863 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6864 // in unified shared memory mode or for local pointers
6865 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6868 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6869 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6872 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6873 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6876 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6879 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6882 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6885 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6887 // map(to: s.p[:22])
6888 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6889 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6890 // &(s.p), &(s.p[0]), 22*sizeof(double),
6891 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6892 // (*) alloc space for struct members, only this is a target parameter
6893 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6894 // optimizes this entry out, same in the examples below)
6895 // (***) map the pointee (map: to)
6898 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6899 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6900 // (*) alloc space for struct members, only this is a target parameter
6901 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6902 // optimizes this entry out, same in the examples below)
6903 // (***) map the pointee (map: to)
6906 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6908 // map(from: s.ps->s.i)
6909 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6910 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6911 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6913 // map(to: s.ps->ps)
6914 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6915 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6916 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6918 // map(s.ps->ps->ps)
6919 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6920 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6921 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6922 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6924 // map(to: s.ps->ps->s.f[:22])
6925 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6926 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6927 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6928 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6931 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6934 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6937 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6940 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6942 // map(to: ps->p[:22])
6943 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6944 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6945 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6948 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6950 // map(from: ps->ps->s.i)
6951 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6952 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6953 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6955 // map(from: ps->ps->ps)
6956 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6957 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6958 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6960 // map(ps->ps->ps->ps)
6961 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6962 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6963 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6964 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6966 // map(to: ps->ps->ps->s.f[:22])
6967 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6968 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6969 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6970 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6972 // map(to: s.f[:22]) map(from: s.p[:33])
6973 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6974 // sizeof(double*) (**), TARGET_PARAM
6975 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6976 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6977 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6978 // (*) allocate contiguous space needed to fit all mapped members even if
6979 // we allocate space for members not mapped (in this example,
6980 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
6981 // them as well because they fall between &s.f[0] and &s.p)
6983 // map(from: s.f[:22]) map(to: ps->p[:33])
6984 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
6985 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6986 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
6987 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
6988 // (*) the struct this entry pertains to is the 2nd element in the list of
6989 // arguments, hence MEMBER_OF(2)
6991 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
6992 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
6993 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
6994 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
6995 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
6996 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
6997 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
6998 // (*) the struct this entry pertains to is the 4th element in the list
6999 // of arguments, hence MEMBER_OF(4)
7002 // ===> map(p[:100])
7003 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7005 // Track if the map information being generated is the first for a capture.
7006 bool IsCaptureFirstInfo
= IsFirstComponentList
;
7007 // When the variable is on a declare target link or in a to clause with
7008 // unified memory, a reference is needed to hold the host/device address
7010 bool RequiresReference
= false;
7012 // Scan the components from the base to the complete expression.
7013 auto CI
= Components
.rbegin();
7014 auto CE
= Components
.rend();
7017 // Track if the map information being generated is the first for a list of
7019 bool IsExpressionFirstInfo
= true;
7020 bool FirstPointerInComplexData
= false;
7021 Address BP
= Address::invalid();
7022 const Expr
*AssocExpr
= I
->getAssociatedExpression();
7023 const auto *AE
= dyn_cast
<ArraySubscriptExpr
>(AssocExpr
);
7024 const auto *OASE
= dyn_cast
<ArraySectionExpr
>(AssocExpr
);
7025 const auto *OAShE
= dyn_cast
<OMPArrayShapingExpr
>(AssocExpr
);
7027 if (AreBothBasePtrAndPteeMapped
&& std::next(I
) == CE
)
7029 if (isa
<MemberExpr
>(AssocExpr
)) {
7030 // The base is the 'this' pointer. The content of the pointer is going
7031 // to be the base of the field being mapped.
7032 BP
= CGF
.LoadCXXThisAddress();
7033 } else if ((AE
&& isa
<CXXThisExpr
>(AE
->getBase()->IgnoreParenImpCasts())) ||
7035 isa
<CXXThisExpr
>(OASE
->getBase()->IgnoreParenImpCasts()))) {
7036 BP
= CGF
.EmitOMPSharedLValue(AssocExpr
).getAddress();
7038 isa
<CXXThisExpr
>(OAShE
->getBase()->IgnoreParenCasts())) {
7040 CGF
.EmitScalarExpr(OAShE
->getBase()),
7041 CGF
.ConvertTypeForMem(OAShE
->getBase()->getType()->getPointeeType()),
7042 CGF
.getContext().getTypeAlignInChars(OAShE
->getBase()->getType()));
7044 // The base is the reference to the variable.
7046 BP
= CGF
.EmitOMPSharedLValue(AssocExpr
).getAddress();
7047 if (const auto *VD
=
7048 dyn_cast_or_null
<VarDecl
>(I
->getAssociatedDeclaration())) {
7049 if (std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
7050 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
)) {
7051 if ((*Res
== OMPDeclareTargetDeclAttr::MT_Link
) ||
7052 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
7053 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
7054 CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7055 RequiresReference
= true;
7056 BP
= CGF
.CGM
.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD
);
7061 // If the variable is a pointer and is being dereferenced (i.e. is not
7062 // the last component), the base has to be the pointer itself, not its
7063 // reference. References are ignored for mapping purposes.
7065 I
->getAssociatedDeclaration()->getType().getNonReferenceType();
7066 if (Ty
->isAnyPointerType() && std::next(I
) != CE
) {
7067 // No need to generate individual map information for the pointer, it
7068 // can be associated with the combined storage if shared memory mode is
7069 // active or the base declaration is not global variable.
7070 const auto *VD
= dyn_cast
<VarDecl
>(I
->getAssociatedDeclaration());
7071 if (!AreBothBasePtrAndPteeMapped
&&
7072 (CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7073 !VD
|| VD
->hasLocalStorage()))
7074 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7076 FirstPointerInComplexData
= true;
7081 // Track whether a component of the list should be marked as MEMBER_OF some
7082 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7083 // in a component list should be marked as MEMBER_OF, all subsequent entries
7084 // do not belong to the base struct. E.g.
7086 // s.ps->ps->ps->f[:]
7088 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7089 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7090 // is the pointee of ps(2) which is not member of struct s, so it should not
7091 // be marked as such (it is still PTR_AND_OBJ).
7092 // The variable is initialized to false so that PTR_AND_OBJ entries which
7093 // are not struct members are not considered (e.g. array of pointers to
7095 bool ShouldBeMemberOf
= false;
7097 // Variable keeping track of whether or not we have encountered a component
7098 // in the component list which is a member expression. Useful when we have a
7099 // pointer or a final array section, in which case it is the previous
7100 // component in the list which tells us whether we have a member expression.
7102 // While processing the final array section "[:]" it is "f" which tells us
7103 // whether we are dealing with a member of a declared struct.
7104 const MemberExpr
*EncounteredME
= nullptr;
7106 // Track for the total number of dimension. Start from one for the dummy
7108 uint64_t DimSize
= 1;
7110 bool IsNonContiguous
= CombinedInfo
.NonContigInfo
.IsNonContiguous
;
7111 bool IsPrevMemberReference
= false;
7113 bool IsPartialMapped
=
7114 !PartialStruct
.PreliminaryMapData
.BasePointers
.empty();
7116 // We need to check if we will be encountering any MEs. If we do not
7117 // encounter any ME expression it means we will be mapping the whole struct.
7118 // In that case we need to skip adding an entry for the struct to the
7119 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7120 // list only when generating all info for clauses.
7121 bool IsMappingWholeStruct
= true;
7122 if (!GenerateAllInfoForClauses
) {
7123 IsMappingWholeStruct
= false;
7125 for (auto TempI
= I
; TempI
!= CE
; ++TempI
) {
7126 const MemberExpr
*PossibleME
=
7127 dyn_cast
<MemberExpr
>(TempI
->getAssociatedExpression());
7129 IsMappingWholeStruct
= false;
7135 for (; I
!= CE
; ++I
) {
7136 // If the current component is member of a struct (parent struct) mark it.
7137 if (!EncounteredME
) {
7138 EncounteredME
= dyn_cast
<MemberExpr
>(I
->getAssociatedExpression());
7139 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7140 // as MEMBER_OF the parent struct.
7141 if (EncounteredME
) {
7142 ShouldBeMemberOf
= true;
7143 // Do not emit as complex pointer if this is actually not array-like
7145 if (FirstPointerInComplexData
) {
7146 QualType Ty
= std::prev(I
)
7147 ->getAssociatedDeclaration()
7149 .getNonReferenceType();
7150 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7151 FirstPointerInComplexData
= false;
7156 auto Next
= std::next(I
);
7158 // We need to generate the addresses and sizes if this is the last
7159 // component, if the component is a pointer or if it is an array section
7160 // whose length can't be proved to be one. If this is a pointer, it
7161 // becomes the base address for the following components.
7163 // A final array section, is one whose length can't be proved to be one.
7164 // If the map item is non-contiguous then we don't treat any array section
7165 // as final array section.
7166 bool IsFinalArraySection
=
7168 isFinalArraySectionExpression(I
->getAssociatedExpression());
7170 // If we have a declaration for the mapping use that, otherwise use
7171 // the base declaration of the map clause.
7172 const ValueDecl
*MapDecl
= (I
->getAssociatedDeclaration())
7173 ? I
->getAssociatedDeclaration()
7175 MapExpr
= (I
->getAssociatedExpression()) ? I
->getAssociatedExpression()
7178 // Get information on whether the element is a pointer. Have to do a
7179 // special treatment for array sections given that they are built-in
7182 dyn_cast
<ArraySectionExpr
>(I
->getAssociatedExpression());
7184 dyn_cast
<OMPArrayShapingExpr
>(I
->getAssociatedExpression());
7185 const auto *UO
= dyn_cast
<UnaryOperator
>(I
->getAssociatedExpression());
7186 const auto *BO
= dyn_cast
<BinaryOperator
>(I
->getAssociatedExpression());
7189 (OASE
&& ArraySectionExpr::getBaseOriginalType(OASE
)
7191 ->isAnyPointerType()) ||
7192 I
->getAssociatedExpression()->getType()->isAnyPointerType();
7193 bool IsMemberReference
= isa
<MemberExpr
>(I
->getAssociatedExpression()) &&
7195 MapDecl
->getType()->isLValueReferenceType();
7196 bool IsNonDerefPointer
= IsPointer
&&
7197 !(UO
&& UO
->getOpcode() != UO_Deref
) && !BO
&&
7203 if (Next
== CE
|| IsMemberReference
|| IsNonDerefPointer
||
7204 IsFinalArraySection
) {
7205 // If this is not the last component, we expect the pointer to be
7206 // associated with an array expression or member expression.
7207 assert((Next
== CE
||
7208 isa
<MemberExpr
>(Next
->getAssociatedExpression()) ||
7209 isa
<ArraySubscriptExpr
>(Next
->getAssociatedExpression()) ||
7210 isa
<ArraySectionExpr
>(Next
->getAssociatedExpression()) ||
7211 isa
<OMPArrayShapingExpr
>(Next
->getAssociatedExpression()) ||
7212 isa
<UnaryOperator
>(Next
->getAssociatedExpression()) ||
7213 isa
<BinaryOperator
>(Next
->getAssociatedExpression())) &&
7214 "Unexpected expression");
7216 Address LB
= Address::invalid();
7217 Address LowestElem
= Address::invalid();
7218 auto &&EmitMemberExprBase
= [](CodeGenFunction
&CGF
,
7219 const MemberExpr
*E
) {
7220 const Expr
*BaseExpr
= E
->getBase();
7221 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7225 LValueBaseInfo BaseInfo
;
7226 TBAAAccessInfo TBAAInfo
;
7228 CGF
.EmitPointerWithAlignment(BaseExpr
, &BaseInfo
, &TBAAInfo
);
7229 QualType PtrTy
= BaseExpr
->getType()->getPointeeType();
7230 BaseLV
= CGF
.MakeAddrLValue(Addr
, PtrTy
, BaseInfo
, TBAAInfo
);
7232 BaseLV
= CGF
.EmitOMPSharedLValue(BaseExpr
);
7238 Address(CGF
.EmitScalarExpr(OAShE
->getBase()),
7239 CGF
.ConvertTypeForMem(
7240 OAShE
->getBase()->getType()->getPointeeType()),
7241 CGF
.getContext().getTypeAlignInChars(
7242 OAShE
->getBase()->getType()));
7243 } else if (IsMemberReference
) {
7244 const auto *ME
= cast
<MemberExpr
>(I
->getAssociatedExpression());
7245 LValue BaseLVal
= EmitMemberExprBase(CGF
, ME
);
7246 LowestElem
= CGF
.EmitLValueForFieldInitialization(
7247 BaseLVal
, cast
<FieldDecl
>(MapDecl
))
7249 LB
= CGF
.EmitLoadOfReferenceLValue(LowestElem
, MapDecl
->getType())
7253 CGF
.EmitOMPSharedLValue(I
->getAssociatedExpression())
7257 // If this component is a pointer inside the base struct then we don't
7258 // need to create any entry for it - it will be combined with the object
7259 // it is pointing to into a single PTR_AND_OBJ entry.
7260 bool IsMemberPointerOrAddr
=
7262 (((IsPointer
|| ForDeviceAddr
) &&
7263 I
->getAssociatedExpression() == EncounteredME
) ||
7264 (IsPrevMemberReference
&& !IsPointer
) ||
7265 (IsMemberReference
&& Next
!= CE
&&
7266 !Next
->getAssociatedExpression()->getType()->isPointerType()));
7267 if (!OverlappedElements
.empty() && Next
== CE
) {
7268 // Handle base element with the info for overlapped elements.
7269 assert(!PartialStruct
.Base
.isValid() && "The base element is set.");
7270 assert(!IsPointer
&&
7271 "Unexpected base element with the pointer type.");
7272 // Mark the whole struct as the struct that requires allocation on the
7274 PartialStruct
.LowestElem
= {0, LowestElem
};
7275 CharUnits TypeSize
= CGF
.getContext().getTypeSizeInChars(
7276 I
->getAssociatedExpression()->getType());
7277 Address HB
= CGF
.Builder
.CreateConstGEP(
7278 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
7279 LowestElem
, CGF
.VoidPtrTy
, CGF
.Int8Ty
),
7280 TypeSize
.getQuantity() - 1);
7281 PartialStruct
.HighestElem
= {
7282 std::numeric_limits
<decltype(
7283 PartialStruct
.HighestElem
.first
)>::max(),
7285 PartialStruct
.Base
= BP
;
7286 PartialStruct
.LB
= LB
;
7288 PartialStruct
.PreliminaryMapData
.BasePointers
.empty() &&
7289 "Overlapped elements must be used only once for the variable.");
7290 std::swap(PartialStruct
.PreliminaryMapData
, CombinedInfo
);
7291 // Emit data for non-overlapped data.
7292 OpenMPOffloadMappingFlags Flags
=
7293 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
7294 getMapTypeBits(MapType
, MapModifiers
, MotionModifiers
, IsImplicit
,
7295 /*AddPtrFlag=*/false,
7296 /*AddIsTargetParamFlag=*/false, IsNonContiguous
);
7297 llvm::Value
*Size
= nullptr;
7298 // Do bitcopy of all non-overlapped structure elements.
7299 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7300 Component
: OverlappedElements
) {
7301 Address ComponentLB
= Address::invalid();
7302 for (const OMPClauseMappableExprCommon::MappableComponent
&MC
:
7304 if (const ValueDecl
*VD
= MC
.getAssociatedDeclaration()) {
7305 const auto *FD
= dyn_cast
<FieldDecl
>(VD
);
7306 if (FD
&& FD
->getType()->isLValueReferenceType()) {
7308 cast
<MemberExpr
>(MC
.getAssociatedExpression());
7309 LValue BaseLVal
= EmitMemberExprBase(CGF
, ME
);
7311 CGF
.EmitLValueForFieldInitialization(BaseLVal
, FD
)
7315 CGF
.EmitOMPSharedLValue(MC
.getAssociatedExpression())
7318 llvm::Value
*ComponentLBPtr
= ComponentLB
.emitRawPointer(CGF
);
7319 llvm::Value
*LBPtr
= LB
.emitRawPointer(CGF
);
7320 Size
= CGF
.Builder
.CreatePtrDiff(CGF
.Int8Ty
, ComponentLBPtr
,
7325 assert(Size
&& "Failed to determine structure size");
7326 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7327 CombinedInfo
.BasePointers
.push_back(BP
.emitRawPointer(CGF
));
7328 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7329 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7330 CombinedInfo
.Pointers
.push_back(LB
.emitRawPointer(CGF
));
7331 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
7332 Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7333 CombinedInfo
.Types
.push_back(Flags
);
7334 CombinedInfo
.Mappers
.push_back(nullptr);
7335 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7337 LB
= CGF
.Builder
.CreateConstGEP(ComponentLB
, 1);
7339 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7340 CombinedInfo
.BasePointers
.push_back(BP
.emitRawPointer(CGF
));
7341 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7342 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7343 CombinedInfo
.Pointers
.push_back(LB
.emitRawPointer(CGF
));
7344 llvm::Value
*LBPtr
= LB
.emitRawPointer(CGF
);
7345 Size
= CGF
.Builder
.CreatePtrDiff(
7346 CGF
.Int8Ty
, CGF
.Builder
.CreateConstGEP(HB
, 1).emitRawPointer(CGF
),
7348 CombinedInfo
.Sizes
.push_back(
7349 CGF
.Builder
.CreateIntCast(Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7350 CombinedInfo
.Types
.push_back(Flags
);
7351 CombinedInfo
.Mappers
.push_back(nullptr);
7352 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7356 llvm::Value
*Size
= getExprTypeSize(I
->getAssociatedExpression());
7357 // Skip adding an entry in the CurInfo of this combined entry if the
7358 // whole struct is currently being mapped. The struct needs to be added
7359 // in the first position before any data internal to the struct is being
7361 // Skip adding an entry in the CurInfo of this combined entry if the
7362 // PartialStruct.PreliminaryMapData.BasePointers has been mapped.
7363 if ((!IsMemberPointerOrAddr
&& !IsPartialMapped
) ||
7364 (Next
== CE
&& MapType
!= OMPC_MAP_unknown
)) {
7365 if (!IsMappingWholeStruct
) {
7366 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7367 CombinedInfo
.BasePointers
.push_back(BP
.emitRawPointer(CGF
));
7368 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7369 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7370 CombinedInfo
.Pointers
.push_back(LB
.emitRawPointer(CGF
));
7371 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
7372 Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7373 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7376 StructBaseCombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7377 StructBaseCombinedInfo
.BasePointers
.push_back(
7378 BP
.emitRawPointer(CGF
));
7379 StructBaseCombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7380 StructBaseCombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7381 StructBaseCombinedInfo
.Pointers
.push_back(LB
.emitRawPointer(CGF
));
7382 StructBaseCombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
7383 Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7384 StructBaseCombinedInfo
.NonContigInfo
.Dims
.push_back(
7385 IsNonContiguous
? DimSize
: 1);
7388 // If Mapper is valid, the last component inherits the mapper.
7389 bool HasMapper
= Mapper
&& Next
== CE
;
7390 if (!IsMappingWholeStruct
)
7391 CombinedInfo
.Mappers
.push_back(HasMapper
? Mapper
: nullptr);
7393 StructBaseCombinedInfo
.Mappers
.push_back(HasMapper
? Mapper
7396 // We need to add a pointer flag for each map that comes from the
7397 // same expression except for the first one. We also need to signal
7398 // this map is the first one that relates with the current capture
7399 // (there is a set of entries for each capture).
7400 OpenMPOffloadMappingFlags Flags
=
7401 getMapTypeBits(MapType
, MapModifiers
, MotionModifiers
, IsImplicit
,
7402 !IsExpressionFirstInfo
|| RequiresReference
||
7403 FirstPointerInComplexData
|| IsMemberReference
,
7404 AreBothBasePtrAndPteeMapped
||
7405 (IsCaptureFirstInfo
&& !RequiresReference
),
7408 if (!IsExpressionFirstInfo
|| IsMemberReference
) {
7409 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7410 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7411 if (IsPointer
|| (IsMemberReference
&& Next
!= CE
))
7412 Flags
&= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7413 OpenMPOffloadMappingFlags::OMP_MAP_FROM
|
7414 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS
|
7415 OpenMPOffloadMappingFlags::OMP_MAP_DELETE
|
7416 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE
);
7418 if (ShouldBeMemberOf
) {
7419 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7420 // should be later updated with the correct value of MEMBER_OF.
7421 Flags
|= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
;
7422 // From now on, all subsequent PTR_AND_OBJ entries should not be
7423 // marked as MEMBER_OF.
7424 ShouldBeMemberOf
= false;
7428 if (!IsMappingWholeStruct
)
7429 CombinedInfo
.Types
.push_back(Flags
);
7431 StructBaseCombinedInfo
.Types
.push_back(Flags
);
7434 // If we have encountered a member expression so far, keep track of the
7435 // mapped member. If the parent is "*this", then the value declaration
7437 if (EncounteredME
) {
7438 const auto *FD
= cast
<FieldDecl
>(EncounteredME
->getMemberDecl());
7439 unsigned FieldIndex
= FD
->getFieldIndex();
7441 // Update info about the lowest and highest elements for this struct
7442 if (!PartialStruct
.Base
.isValid()) {
7443 PartialStruct
.LowestElem
= {FieldIndex
, LowestElem
};
7444 if (IsFinalArraySection
) {
7446 CGF
.EmitArraySectionExpr(OASE
, /*IsLowerBound=*/false)
7448 PartialStruct
.HighestElem
= {FieldIndex
, HB
};
7450 PartialStruct
.HighestElem
= {FieldIndex
, LowestElem
};
7452 PartialStruct
.Base
= BP
;
7453 PartialStruct
.LB
= BP
;
7454 } else if (FieldIndex
< PartialStruct
.LowestElem
.first
) {
7455 PartialStruct
.LowestElem
= {FieldIndex
, LowestElem
};
7456 } else if (FieldIndex
> PartialStruct
.HighestElem
.first
) {
7457 if (IsFinalArraySection
) {
7459 CGF
.EmitArraySectionExpr(OASE
, /*IsLowerBound=*/false)
7461 PartialStruct
.HighestElem
= {FieldIndex
, HB
};
7463 PartialStruct
.HighestElem
= {FieldIndex
, LowestElem
};
7468 // Need to emit combined struct for array sections.
7469 if (IsFinalArraySection
|| IsNonContiguous
)
7470 PartialStruct
.IsArraySection
= true;
7472 // If we have a final array section, we are done with this expression.
7473 if (IsFinalArraySection
)
7476 // The pointer becomes the base for the next element.
7478 BP
= IsMemberReference
? LowestElem
: LB
;
7479 if (!IsPartialMapped
)
7480 IsExpressionFirstInfo
= false;
7481 IsCaptureFirstInfo
= false;
7482 FirstPointerInComplexData
= false;
7483 IsPrevMemberReference
= IsMemberReference
;
7484 } else if (FirstPointerInComplexData
) {
7485 QualType Ty
= Components
.rbegin()
7486 ->getAssociatedDeclaration()
7488 .getNonReferenceType();
7489 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7490 FirstPointerInComplexData
= false;
7493 // If ran into the whole component - allocate the space for the whole
7496 PartialStruct
.HasCompleteRecord
= true;
7498 if (!IsNonContiguous
)
7501 const ASTContext
&Context
= CGF
.getContext();
7503 // For supporting stride in array section, we need to initialize the first
7504 // dimension size as 1, first offset as 0, and first count as 1
7505 MapValuesArrayTy CurOffsets
= {llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 0)};
7506 MapValuesArrayTy CurCounts
= {llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 1)};
7507 MapValuesArrayTy CurStrides
;
7508 MapValuesArrayTy DimSizes
{llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 1)};
7509 uint64_t ElementTypeSize
;
7511 // Collect Size information for each dimension and get the element size as
7512 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7513 // should be [10, 10] and the first stride is 4 btyes.
7514 for (const OMPClauseMappableExprCommon::MappableComponent
&Component
:
7516 const Expr
*AssocExpr
= Component
.getAssociatedExpression();
7517 const auto *OASE
= dyn_cast
<ArraySectionExpr
>(AssocExpr
);
7522 QualType Ty
= ArraySectionExpr::getBaseOriginalType(OASE
->getBase());
7523 auto *CAT
= Context
.getAsConstantArrayType(Ty
);
7524 auto *VAT
= Context
.getAsVariableArrayType(Ty
);
7526 // We need all the dimension size except for the last dimension.
7527 assert((VAT
|| CAT
|| &Component
== &*Components
.begin()) &&
7528 "Should be either ConstantArray or VariableArray if not the "
7531 // Get element size if CurStrides is empty.
7532 if (CurStrides
.empty()) {
7533 const Type
*ElementType
= nullptr;
7535 ElementType
= CAT
->getElementType().getTypePtr();
7537 ElementType
= VAT
->getElementType().getTypePtr();
7539 assert(&Component
== &*Components
.begin() &&
7540 "Only expect pointer (non CAT or VAT) when this is the "
7542 // If ElementType is null, then it means the base is a pointer
7543 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7544 // for next iteration.
7546 // For the case that having pointer as base, we need to remove one
7547 // level of indirection.
7548 if (&Component
!= &*Components
.begin())
7549 ElementType
= ElementType
->getPointeeOrArrayElementType();
7551 Context
.getTypeSizeInChars(ElementType
).getQuantity();
7552 CurStrides
.push_back(
7553 llvm::ConstantInt::get(CGF
.Int64Ty
, ElementTypeSize
));
7556 // Get dimension value except for the last dimension since we don't need
7558 if (DimSizes
.size() < Components
.size() - 1) {
7561 llvm::ConstantInt::get(CGF
.Int64Ty
, CAT
->getZExtSize()));
7563 DimSizes
.push_back(CGF
.Builder
.CreateIntCast(
7564 CGF
.EmitScalarExpr(VAT
->getSizeExpr()), CGF
.Int64Ty
,
7565 /*IsSigned=*/false));
7569 // Skip the dummy dimension since we have already have its information.
7570 auto *DI
= DimSizes
.begin() + 1;
7571 // Product of dimension.
7572 llvm::Value
*DimProd
=
7573 llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, ElementTypeSize
);
7575 // Collect info for non-contiguous. Notice that offset, count, and stride
7576 // are only meaningful for array-section, so we insert a null for anything
7577 // other than array-section.
7578 // Also, the size of offset, count, and stride are not the same as
7579 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7580 // count, and stride are the same as the number of non-contiguous
7581 // declaration in target update to/from clause.
7582 for (const OMPClauseMappableExprCommon::MappableComponent
&Component
:
7584 const Expr
*AssocExpr
= Component
.getAssociatedExpression();
7586 if (const auto *AE
= dyn_cast
<ArraySubscriptExpr
>(AssocExpr
)) {
7587 llvm::Value
*Offset
= CGF
.Builder
.CreateIntCast(
7588 CGF
.EmitScalarExpr(AE
->getIdx()), CGF
.Int64Ty
,
7589 /*isSigned=*/false);
7590 CurOffsets
.push_back(Offset
);
7591 CurCounts
.push_back(llvm::ConstantInt::get(CGF
.Int64Ty
, /*V=*/1));
7592 CurStrides
.push_back(CurStrides
.back());
7596 const auto *OASE
= dyn_cast
<ArraySectionExpr
>(AssocExpr
);
7602 const Expr
*OffsetExpr
= OASE
->getLowerBound();
7603 llvm::Value
*Offset
= nullptr;
7605 // If offset is absent, then we just set it to zero.
7606 Offset
= llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
7608 Offset
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(OffsetExpr
),
7610 /*isSigned=*/false);
7612 CurOffsets
.push_back(Offset
);
7615 const Expr
*CountExpr
= OASE
->getLength();
7616 llvm::Value
*Count
= nullptr;
7618 // In Clang, once a high dimension is an array section, we construct all
7619 // the lower dimension as array section, however, for case like
7620 // arr[0:2][2], Clang construct the inner dimension as an array section
7621 // but it actually is not in an array section form according to spec.
7622 if (!OASE
->getColonLocFirst().isValid() &&
7623 !OASE
->getColonLocSecond().isValid()) {
7624 Count
= llvm::ConstantInt::get(CGF
.Int64Ty
, 1);
7626 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7627 // When the length is absent it defaults to ⌈(size −
7628 // lower-bound)/stride⌉, where size is the size of the array
7630 const Expr
*StrideExpr
= OASE
->getStride();
7631 llvm::Value
*Stride
=
7633 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(StrideExpr
),
7634 CGF
.Int64Ty
, /*isSigned=*/false)
7637 Count
= CGF
.Builder
.CreateUDiv(
7638 CGF
.Builder
.CreateNUWSub(*DI
, Offset
), Stride
);
7640 Count
= CGF
.Builder
.CreateNUWSub(*DI
, Offset
);
7643 Count
= CGF
.EmitScalarExpr(CountExpr
);
7645 Count
= CGF
.Builder
.CreateIntCast(Count
, CGF
.Int64Ty
, /*isSigned=*/false);
7646 CurCounts
.push_back(Count
);
7648 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7649 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7650 // Offset Count Stride
7651 // D0 0 1 4 (int) <- dummy dimension
7652 // D1 0 2 8 (2 * (1) * 4)
7653 // D2 1 2 20 (1 * (1 * 5) * 4)
7654 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7655 const Expr
*StrideExpr
= OASE
->getStride();
7656 llvm::Value
*Stride
=
7658 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(StrideExpr
),
7659 CGF
.Int64Ty
, /*isSigned=*/false)
7661 DimProd
= CGF
.Builder
.CreateNUWMul(DimProd
, *(DI
- 1));
7663 CurStrides
.push_back(CGF
.Builder
.CreateNUWMul(DimProd
, Stride
));
7665 CurStrides
.push_back(DimProd
);
7666 if (DI
!= DimSizes
.end())
7670 CombinedInfo
.NonContigInfo
.Offsets
.push_back(CurOffsets
);
7671 CombinedInfo
.NonContigInfo
.Counts
.push_back(CurCounts
);
7672 CombinedInfo
.NonContigInfo
.Strides
.push_back(CurStrides
);
7675 /// Return the adjusted map modifiers if the declaration a capture refers to
7676 /// appears in a first-private clause. This is expected to be used only with
7677 /// directives that start with 'target'.
7678 OpenMPOffloadMappingFlags
7679 getMapModifiersForPrivateClauses(const CapturedStmt::Capture
&Cap
) const {
7680 assert(Cap
.capturesVariable() && "Expected capture by reference only!");
7682 // A first private variable captured by reference will use only the
7683 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7684 // declaration is known as first-private in this handler.
7685 if (FirstPrivateDecls
.count(Cap
.getCapturedVar())) {
7686 if (Cap
.getCapturedVar()->getType()->isAnyPointerType())
7687 return OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7688 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
;
7689 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE
|
7690 OpenMPOffloadMappingFlags::OMP_MAP_TO
;
7692 auto I
= LambdasMap
.find(Cap
.getCapturedVar()->getCanonicalDecl());
7693 if (I
!= LambdasMap
.end())
7694 // for map(to: lambda): using user specified map type.
7695 return getMapTypeBits(
7696 I
->getSecond()->getMapType(), I
->getSecond()->getMapTypeModifiers(),
7697 /*MotionModifiers=*/{}, I
->getSecond()->isImplicit(),
7698 /*AddPtrFlag=*/false,
7699 /*AddIsTargetParamFlag=*/false,
7700 /*isNonContiguous=*/false);
7701 return OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7702 OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
7705 void getPlainLayout(const CXXRecordDecl
*RD
,
7706 llvm::SmallVectorImpl
<const FieldDecl
*> &Layout
,
7707 bool AsBase
) const {
7708 const CGRecordLayout
&RL
= CGF
.getTypes().getCGRecordLayout(RD
);
7710 llvm::StructType
*St
=
7711 AsBase
? RL
.getBaseSubobjectLLVMType() : RL
.getLLVMType();
7713 unsigned NumElements
= St
->getNumElements();
7715 llvm::PointerUnion
<const CXXRecordDecl
*, const FieldDecl
*>, 4>
7716 RecordLayout(NumElements
);
7719 for (const auto &I
: RD
->bases()) {
7723 QualType BaseTy
= I
.getType();
7724 const auto *Base
= BaseTy
->getAsCXXRecordDecl();
7725 // Ignore empty bases.
7726 if (isEmptyRecordForLayout(CGF
.getContext(), BaseTy
) ||
7728 .getASTRecordLayout(Base
)
7729 .getNonVirtualSize()
7733 unsigned FieldIndex
= RL
.getNonVirtualBaseLLVMFieldNo(Base
);
7734 RecordLayout
[FieldIndex
] = Base
;
7736 // Fill in virtual bases.
7737 for (const auto &I
: RD
->vbases()) {
7738 QualType BaseTy
= I
.getType();
7739 // Ignore empty bases.
7740 if (isEmptyRecordForLayout(CGF
.getContext(), BaseTy
))
7743 const auto *Base
= BaseTy
->getAsCXXRecordDecl();
7744 unsigned FieldIndex
= RL
.getVirtualBaseIndex(Base
);
7745 if (RecordLayout
[FieldIndex
])
7747 RecordLayout
[FieldIndex
] = Base
;
7749 // Fill in all the fields.
7750 assert(!RD
->isUnion() && "Unexpected union.");
7751 for (const auto *Field
: RD
->fields()) {
7752 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7753 // will fill in later.)
7754 if (!Field
->isBitField() &&
7755 !isEmptyFieldForLayout(CGF
.getContext(), Field
)) {
7756 unsigned FieldIndex
= RL
.getLLVMFieldNo(Field
);
7757 RecordLayout
[FieldIndex
] = Field
;
7760 for (const llvm::PointerUnion
<const CXXRecordDecl
*, const FieldDecl
*>
7761 &Data
: RecordLayout
) {
7764 if (const auto *Base
= Data
.dyn_cast
<const CXXRecordDecl
*>())
7765 getPlainLayout(Base
, Layout
, /*AsBase=*/true);
7767 Layout
.push_back(Data
.get
<const FieldDecl
*>());
7771 /// Generate all the base pointers, section pointers, sizes, map types, and
7772 /// mappers for the extracted mappable expressions (all included in \a
7773 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7774 /// pair of the relevant declaration and index where it occurs is appended to
7775 /// the device pointers info array.
7776 void generateAllInfoForClauses(
7777 ArrayRef
<const OMPClause
*> Clauses
, MapCombinedInfoTy
&CombinedInfo
,
7778 llvm::OpenMPIRBuilder
&OMPBuilder
,
7779 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkipVarSet
=
7780 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) const {
7781 // We have to process the component lists that relate with the same
7782 // declaration in a single chunk so that we can generate the map flags
7783 // correctly. Therefore, we organize all lists in a map.
7784 enum MapKind
{ Present
, Allocs
, Other
, Total
};
7785 llvm::MapVector
<CanonicalDeclPtr
<const Decl
>,
7786 SmallVector
<SmallVector
<MapInfo
, 8>, 4>>
7789 // Helper function to fill the information map for the different supported
7792 [&Info
, &SkipVarSet
](
7793 const ValueDecl
*D
, MapKind Kind
,
7794 OMPClauseMappableExprCommon::MappableExprComponentListRef L
,
7795 OpenMPMapClauseKind MapType
,
7796 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
7797 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
7798 bool ReturnDevicePointer
, bool IsImplicit
, const ValueDecl
*Mapper
,
7799 const Expr
*VarRef
= nullptr, bool ForDeviceAddr
= false) {
7800 if (SkipVarSet
.contains(D
))
7802 auto It
= Info
.try_emplace(D
, Total
).first
;
7803 It
->second
[Kind
].emplace_back(
7804 L
, MapType
, MapModifiers
, MotionModifiers
, ReturnDevicePointer
,
7805 IsImplicit
, Mapper
, VarRef
, ForDeviceAddr
);
7808 for (const auto *Cl
: Clauses
) {
7809 const auto *C
= dyn_cast
<OMPMapClause
>(Cl
);
7812 MapKind Kind
= Other
;
7813 if (llvm::is_contained(C
->getMapTypeModifiers(),
7814 OMPC_MAP_MODIFIER_present
))
7816 else if (C
->getMapType() == OMPC_MAP_alloc
)
7818 const auto *EI
= C
->getVarRefs().begin();
7819 for (const auto L
: C
->component_lists()) {
7820 const Expr
*E
= (C
->getMapLoc().isValid()) ? *EI
: nullptr;
7821 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), C
->getMapType(),
7822 C
->getMapTypeModifiers(), {},
7823 /*ReturnDevicePointer=*/false, C
->isImplicit(), std::get
<2>(L
),
7828 for (const auto *Cl
: Clauses
) {
7829 const auto *C
= dyn_cast
<OMPToClause
>(Cl
);
7832 MapKind Kind
= Other
;
7833 if (llvm::is_contained(C
->getMotionModifiers(),
7834 OMPC_MOTION_MODIFIER_present
))
7836 const auto *EI
= C
->getVarRefs().begin();
7837 for (const auto L
: C
->component_lists()) {
7838 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), OMPC_MAP_to
, {},
7839 C
->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7840 C
->isImplicit(), std::get
<2>(L
), *EI
);
7844 for (const auto *Cl
: Clauses
) {
7845 const auto *C
= dyn_cast
<OMPFromClause
>(Cl
);
7848 MapKind Kind
= Other
;
7849 if (llvm::is_contained(C
->getMotionModifiers(),
7850 OMPC_MOTION_MODIFIER_present
))
7852 const auto *EI
= C
->getVarRefs().begin();
7853 for (const auto L
: C
->component_lists()) {
7854 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), OMPC_MAP_from
, {},
7855 C
->getMotionModifiers(),
7856 /*ReturnDevicePointer=*/false, C
->isImplicit(), std::get
<2>(L
),
7862 // Look at the use_device_ptr and use_device_addr clauses information and
7863 // mark the existing map entries as such. If there is no map information for
7864 // an entry in the use_device_ptr and use_device_addr list, we create one
7865 // with map type 'alloc' and zero size section. It is the user fault if that
7866 // was not mapped before. If there is no map information and the pointer is
7867 // a struct member, then we defer the emission of that entry until the whole
7868 // struct has been processed.
7869 llvm::MapVector
<CanonicalDeclPtr
<const Decl
>,
7870 SmallVector
<DeferredDevicePtrEntryTy
, 4>>
7872 MapCombinedInfoTy UseDeviceDataCombinedInfo
;
7874 auto &&UseDeviceDataCombinedInfoGen
=
7875 [&UseDeviceDataCombinedInfo
](const ValueDecl
*VD
, llvm::Value
*Ptr
,
7876 CodeGenFunction
&CGF
, bool IsDevAddr
) {
7877 UseDeviceDataCombinedInfo
.Exprs
.push_back(VD
);
7878 UseDeviceDataCombinedInfo
.BasePointers
.emplace_back(Ptr
);
7879 UseDeviceDataCombinedInfo
.DevicePtrDecls
.emplace_back(VD
);
7880 UseDeviceDataCombinedInfo
.DevicePointers
.emplace_back(
7881 IsDevAddr
? DeviceInfoTy::Address
: DeviceInfoTy::Pointer
);
7882 UseDeviceDataCombinedInfo
.Pointers
.push_back(Ptr
);
7883 UseDeviceDataCombinedInfo
.Sizes
.push_back(
7884 llvm::Constant::getNullValue(CGF
.Int64Ty
));
7885 UseDeviceDataCombinedInfo
.Types
.push_back(
7886 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
);
7887 UseDeviceDataCombinedInfo
.Mappers
.push_back(nullptr);
7891 [&DeferredInfo
, &UseDeviceDataCombinedInfoGen
,
7892 &InfoGen
](CodeGenFunction
&CGF
, const Expr
*IE
, const ValueDecl
*VD
,
7893 OMPClauseMappableExprCommon::MappableExprComponentListRef
7895 bool IsImplicit
, bool IsDevAddr
) {
7896 // We didn't find any match in our map information - generate a zero
7897 // size array section - if the pointer is a struct member we defer
7898 // this action until the whole struct has been processed.
7899 if (isa
<MemberExpr
>(IE
)) {
7900 // Insert the pointer into Info to be processed by
7901 // generateInfoForComponentList. Because it is a member pointer
7902 // without a pointee, no entry will be generated for it, therefore
7903 // we need to generate one after the whole struct has been
7904 // processed. Nonetheless, generateInfoForComponentList must be
7905 // called to take the pointer into account for the calculation of
7906 // the range of the partial struct.
7907 InfoGen(nullptr, Other
, Components
, OMPC_MAP_unknown
, {}, {},
7908 /*ReturnDevicePointer=*/false, IsImplicit
, nullptr, nullptr,
7910 DeferredInfo
[nullptr].emplace_back(IE
, VD
, IsDevAddr
);
7914 if (IE
->isGLValue())
7915 Ptr
= CGF
.EmitLValue(IE
).getPointer(CGF
);
7917 Ptr
= CGF
.EmitScalarExpr(IE
);
7919 Ptr
= CGF
.EmitLoadOfScalar(CGF
.EmitLValue(IE
), IE
->getExprLoc());
7921 UseDeviceDataCombinedInfoGen(VD
, Ptr
, CGF
, IsDevAddr
);
7925 auto &&IsMapInfoExist
= [&Info
](CodeGenFunction
&CGF
, const ValueDecl
*VD
,
7926 const Expr
*IE
, bool IsDevAddr
) -> bool {
7927 // We potentially have map information for this declaration already.
7928 // Look for the first set of components that refer to it. If found,
7930 // If the first component is a member expression, we have to look into
7931 // 'this', which maps to null in the map of map information. Otherwise
7932 // look directly for the information.
7933 auto It
= Info
.find(isa
<MemberExpr
>(IE
) ? nullptr : VD
);
7934 if (It
!= Info
.end()) {
7936 for (auto &Data
: It
->second
) {
7937 auto *CI
= llvm::find_if(Data
, [VD
](const MapInfo
&MI
) {
7938 return MI
.Components
.back().getAssociatedDeclaration() == VD
;
7940 // If we found a map entry, signal that the pointer has to be
7941 // returned and move on to the next declaration. Exclude cases where
7942 // the base pointer is mapped as array subscript, array section or
7943 // array shaping. The base address is passed as a pointer to base in
7944 // this case and cannot be used as a base for use_device_ptr list
7946 if (CI
!= Data
.end()) {
7948 CI
->ForDeviceAddr
= IsDevAddr
;
7949 CI
->ReturnDevicePointer
= true;
7953 auto PrevCI
= std::next(CI
->Components
.rbegin());
7954 const auto *VarD
= dyn_cast
<VarDecl
>(VD
);
7955 if (CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7956 isa
<MemberExpr
>(IE
) ||
7957 !VD
->getType().getNonReferenceType()->isPointerType() ||
7958 PrevCI
== CI
->Components
.rend() ||
7959 isa
<MemberExpr
>(PrevCI
->getAssociatedExpression()) || !VarD
||
7960 VarD
->hasLocalStorage()) {
7961 CI
->ForDeviceAddr
= IsDevAddr
;
7962 CI
->ReturnDevicePointer
= true;
7974 // Look at the use_device_ptr clause information and mark the existing map
7975 // entries as such. If there is no map information for an entry in the
7976 // use_device_ptr list, we create one with map type 'alloc' and zero size
7977 // section. It is the user fault if that was not mapped before. If there is
7978 // no map information and the pointer is a struct member, then we defer the
7979 // emission of that entry until the whole struct has been processed.
7980 for (const auto *Cl
: Clauses
) {
7981 const auto *C
= dyn_cast
<OMPUseDevicePtrClause
>(Cl
);
7984 for (const auto L
: C
->component_lists()) {
7985 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
=
7987 assert(!Components
.empty() &&
7988 "Not expecting empty list of components!");
7989 const ValueDecl
*VD
= Components
.back().getAssociatedDeclaration();
7990 VD
= cast
<ValueDecl
>(VD
->getCanonicalDecl());
7991 const Expr
*IE
= Components
.back().getAssociatedExpression();
7992 if (IsMapInfoExist(CGF
, VD
, IE
, /*IsDevAddr=*/false))
7994 MapInfoGen(CGF
, IE
, VD
, Components
, C
->isImplicit(),
7995 /*IsDevAddr=*/false);
7999 llvm::SmallDenseSet
<CanonicalDeclPtr
<const Decl
>, 4> Processed
;
8000 for (const auto *Cl
: Clauses
) {
8001 const auto *C
= dyn_cast
<OMPUseDeviceAddrClause
>(Cl
);
8004 for (const auto L
: C
->component_lists()) {
8005 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
=
8007 assert(!std::get
<1>(L
).empty() &&
8008 "Not expecting empty list of components!");
8009 const ValueDecl
*VD
= std::get
<1>(L
).back().getAssociatedDeclaration();
8010 if (!Processed
.insert(VD
).second
)
8012 VD
= cast
<ValueDecl
>(VD
->getCanonicalDecl());
8013 const Expr
*IE
= std::get
<1>(L
).back().getAssociatedExpression();
8014 if (IsMapInfoExist(CGF
, VD
, IE
, /*IsDevAddr=*/true))
8016 MapInfoGen(CGF
, IE
, VD
, Components
, C
->isImplicit(),
8017 /*IsDevAddr=*/true);
8021 for (const auto &Data
: Info
) {
8022 StructRangeInfoTy PartialStruct
;
8023 // Current struct information:
8024 MapCombinedInfoTy CurInfo
;
8025 // Current struct base information:
8026 MapCombinedInfoTy StructBaseCurInfo
;
8027 const Decl
*D
= Data
.first
;
8028 const ValueDecl
*VD
= cast_or_null
<ValueDecl
>(D
);
8029 bool HasMapBasePtr
= false;
8030 bool HasMapArraySec
= false;
8031 if (VD
&& VD
->getType()->isAnyPointerType()) {
8032 for (const auto &M
: Data
.second
) {
8033 HasMapBasePtr
= any_of(M
, [](const MapInfo
&L
) {
8034 return isa_and_present
<DeclRefExpr
>(L
.VarRef
);
8036 HasMapArraySec
= any_of(M
, [](const MapInfo
&L
) {
8037 return isa_and_present
<ArraySectionExpr
, ArraySubscriptExpr
>(
8040 if (HasMapBasePtr
&& HasMapArraySec
)
8044 for (const auto &M
: Data
.second
) {
8045 for (const MapInfo
&L
: M
) {
8046 assert(!L
.Components
.empty() &&
8047 "Not expecting declaration with no component lists.");
8049 // Remember the current base pointer index.
8050 unsigned CurrentBasePointersIdx
= CurInfo
.BasePointers
.size();
8051 unsigned StructBasePointersIdx
=
8052 StructBaseCurInfo
.BasePointers
.size();
8053 CurInfo
.NonContigInfo
.IsNonContiguous
=
8054 L
.Components
.back().isNonContiguous();
8055 generateInfoForComponentList(
8056 L
.MapType
, L
.MapModifiers
, L
.MotionModifiers
, L
.Components
,
8057 CurInfo
, StructBaseCurInfo
, PartialStruct
,
8058 /*IsFirstComponentList=*/false, L
.IsImplicit
,
8059 /*GenerateAllInfoForClauses*/ true, L
.Mapper
, L
.ForDeviceAddr
, VD
,
8060 L
.VarRef
, /*OverlappedElements*/ {},
8061 HasMapBasePtr
&& HasMapArraySec
);
8063 // If this entry relates to a device pointer, set the relevant
8064 // declaration and add the 'return pointer' flag.
8065 if (L
.ReturnDevicePointer
) {
8066 // Check whether a value was added to either CurInfo or
8067 // StructBaseCurInfo and error if no value was added to either of
8069 assert((CurrentBasePointersIdx
< CurInfo
.BasePointers
.size() ||
8070 StructBasePointersIdx
<
8071 StructBaseCurInfo
.BasePointers
.size()) &&
8072 "Unexpected number of mapped base pointers.");
8074 // Choose a base pointer index which is always valid:
8075 const ValueDecl
*RelevantVD
=
8076 L
.Components
.back().getAssociatedDeclaration();
8077 assert(RelevantVD
&&
8078 "No relevant declaration related with device pointer??");
8080 // If StructBaseCurInfo has been updated this iteration then work on
8081 // the first new entry added to it i.e. make sure that when multiple
8082 // values are added to any of the lists, the first value added is
8083 // being modified by the assignments below (not the last value
8085 if (StructBasePointersIdx
< StructBaseCurInfo
.BasePointers
.size()) {
8086 StructBaseCurInfo
.DevicePtrDecls
[StructBasePointersIdx
] =
8088 StructBaseCurInfo
.DevicePointers
[StructBasePointersIdx
] =
8089 L
.ForDeviceAddr
? DeviceInfoTy::Address
8090 : DeviceInfoTy::Pointer
;
8091 StructBaseCurInfo
.Types
[StructBasePointersIdx
] |=
8092 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
;
8094 CurInfo
.DevicePtrDecls
[CurrentBasePointersIdx
] = RelevantVD
;
8095 CurInfo
.DevicePointers
[CurrentBasePointersIdx
] =
8096 L
.ForDeviceAddr
? DeviceInfoTy::Address
8097 : DeviceInfoTy::Pointer
;
8098 CurInfo
.Types
[CurrentBasePointersIdx
] |=
8099 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
;
8105 // Append any pending zero-length pointers which are struct members and
8106 // used with use_device_ptr or use_device_addr.
8107 auto CI
= DeferredInfo
.find(Data
.first
);
8108 if (CI
!= DeferredInfo
.end()) {
8109 for (const DeferredDevicePtrEntryTy
&L
: CI
->second
) {
8110 llvm::Value
*BasePtr
;
8112 if (L
.ForDeviceAddr
) {
8113 if (L
.IE
->isGLValue())
8114 Ptr
= this->CGF
.EmitLValue(L
.IE
).getPointer(CGF
);
8116 Ptr
= this->CGF
.EmitScalarExpr(L
.IE
);
8118 // Entry is RETURN_PARAM. Also, set the placeholder value
8119 // MEMBER_OF=FFFF so that the entry is later updated with the
8120 // correct value of MEMBER_OF.
8121 CurInfo
.Types
.push_back(
8122 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
|
8123 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
8125 BasePtr
= this->CGF
.EmitLValue(L
.IE
).getPointer(CGF
);
8126 Ptr
= this->CGF
.EmitLoadOfScalar(this->CGF
.EmitLValue(L
.IE
),
8127 L
.IE
->getExprLoc());
8128 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8129 // placeholder value MEMBER_OF=FFFF so that the entry is later
8130 // updated with the correct value of MEMBER_OF.
8131 CurInfo
.Types
.push_back(
8132 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8133 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
|
8134 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
8136 CurInfo
.Exprs
.push_back(L
.VD
);
8137 CurInfo
.BasePointers
.emplace_back(BasePtr
);
8138 CurInfo
.DevicePtrDecls
.emplace_back(L
.VD
);
8139 CurInfo
.DevicePointers
.emplace_back(
8140 L
.ForDeviceAddr
? DeviceInfoTy::Address
: DeviceInfoTy::Pointer
);
8141 CurInfo
.Pointers
.push_back(Ptr
);
8142 CurInfo
.Sizes
.push_back(
8143 llvm::Constant::getNullValue(this->CGF
.Int64Ty
));
8144 CurInfo
.Mappers
.push_back(nullptr);
8148 // Unify entries in one list making sure the struct mapping precedes the
8149 // individual fields:
8150 MapCombinedInfoTy UnionCurInfo
;
8151 UnionCurInfo
.append(StructBaseCurInfo
);
8152 UnionCurInfo
.append(CurInfo
);
8154 // If there is an entry in PartialStruct it means we have a struct with
8155 // individual members mapped. Emit an extra combined entry.
8156 if (PartialStruct
.Base
.isValid()) {
8157 UnionCurInfo
.NonContigInfo
.Dims
.push_back(0);
8158 // Emit a combined entry:
8159 emitCombinedEntry(CombinedInfo
, UnionCurInfo
.Types
, PartialStruct
,
8160 /*IsMapThis*/ !VD
, OMPBuilder
, VD
);
8163 // We need to append the results of this capture to what we already have.
8164 CombinedInfo
.append(UnionCurInfo
);
8166 // Append data for use_device_ptr clauses.
8167 CombinedInfo
.append(UseDeviceDataCombinedInfo
);
8171 MappableExprsHandler(const OMPExecutableDirective
&Dir
, CodeGenFunction
&CGF
)
8172 : CurDir(&Dir
), CGF(CGF
) {
8173 // Extract firstprivate clause information.
8174 for (const auto *C
: Dir
.getClausesOfKind
<OMPFirstprivateClause
>())
8175 for (const auto *D
: C
->varlist())
8176 FirstPrivateDecls
.try_emplace(
8177 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
)->getDecl()), C
->isImplicit());
8178 // Extract implicit firstprivates from uses_allocators clauses.
8179 for (const auto *C
: Dir
.getClausesOfKind
<OMPUsesAllocatorsClause
>()) {
8180 for (unsigned I
= 0, E
= C
->getNumberOfAllocators(); I
< E
; ++I
) {
8181 OMPUsesAllocatorsClause::Data D
= C
->getAllocatorData(I
);
8182 if (const auto *DRE
= dyn_cast_or_null
<DeclRefExpr
>(D
.AllocatorTraits
))
8183 FirstPrivateDecls
.try_emplace(cast
<VarDecl
>(DRE
->getDecl()),
8185 else if (const auto *VD
= dyn_cast
<VarDecl
>(
8186 cast
<DeclRefExpr
>(D
.Allocator
->IgnoreParenImpCasts())
8188 FirstPrivateDecls
.try_emplace(VD
, /*Implicit=*/true);
8191 // Extract device pointer clause information.
8192 for (const auto *C
: Dir
.getClausesOfKind
<OMPIsDevicePtrClause
>())
8193 for (auto L
: C
->component_lists())
8194 DevPointersMap
[std::get
<0>(L
)].push_back(std::get
<1>(L
));
8195 // Extract device addr clause information.
8196 for (const auto *C
: Dir
.getClausesOfKind
<OMPHasDeviceAddrClause
>())
8197 for (auto L
: C
->component_lists())
8198 HasDevAddrsMap
[std::get
<0>(L
)].push_back(std::get
<1>(L
));
8199 // Extract map information.
8200 for (const auto *C
: Dir
.getClausesOfKind
<OMPMapClause
>()) {
8201 if (C
->getMapType() != OMPC_MAP_to
)
8203 for (auto L
: C
->component_lists()) {
8204 const ValueDecl
*VD
= std::get
<0>(L
);
8205 const auto *RD
= VD
? VD
->getType()
8207 .getNonReferenceType()
8208 ->getAsCXXRecordDecl()
8210 if (RD
&& RD
->isLambda())
8211 LambdasMap
.try_emplace(std::get
<0>(L
), C
);
8216 /// Constructor for the declare mapper directive.
8217 MappableExprsHandler(const OMPDeclareMapperDecl
&Dir
, CodeGenFunction
&CGF
)
8218 : CurDir(&Dir
), CGF(CGF
) {}
8220 /// Generate code for the combined entry if we have a partially mapped struct
8221 /// and take care of the mapping flags of the arguments corresponding to
8222 /// individual struct members.
8223 void emitCombinedEntry(MapCombinedInfoTy
&CombinedInfo
,
8224 MapFlagsArrayTy
&CurTypes
,
8225 const StructRangeInfoTy
&PartialStruct
, bool IsMapThis
,
8226 llvm::OpenMPIRBuilder
&OMPBuilder
,
8227 const ValueDecl
*VD
= nullptr,
8228 bool NotTargetParams
= true) const {
8229 if (CurTypes
.size() == 1 &&
8230 ((CurTypes
.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
) !=
8231 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
) &&
8232 !PartialStruct
.IsArraySection
)
8234 Address LBAddr
= PartialStruct
.LowestElem
.second
;
8235 Address HBAddr
= PartialStruct
.HighestElem
.second
;
8236 if (PartialStruct
.HasCompleteRecord
) {
8237 LBAddr
= PartialStruct
.LB
;
8238 HBAddr
= PartialStruct
.LB
;
8240 CombinedInfo
.Exprs
.push_back(VD
);
8241 // Base is the base of the struct
8242 CombinedInfo
.BasePointers
.push_back(PartialStruct
.Base
.emitRawPointer(CGF
));
8243 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8244 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8245 // Pointer is the address of the lowest element
8246 llvm::Value
*LB
= LBAddr
.emitRawPointer(CGF
);
8247 const CXXMethodDecl
*MD
=
8248 CGF
.CurFuncDecl
? dyn_cast
<CXXMethodDecl
>(CGF
.CurFuncDecl
) : nullptr;
8249 const CXXRecordDecl
*RD
= MD
? MD
->getParent() : nullptr;
8250 bool HasBaseClass
= RD
&& IsMapThis
? RD
->getNumBases() > 0 : false;
8251 // There should not be a mapper for a combined entry.
8253 // OpenMP 5.2 148:21:
8254 // If the target construct is within a class non-static member function,
8255 // and a variable is an accessible data member of the object for which the
8256 // non-static data member function is invoked, the variable is treated as
8257 // if the this[:1] expression had appeared in a map clause with a map-type
8260 CombinedInfo
.Pointers
.push_back(PartialStruct
.Base
.emitRawPointer(CGF
));
8261 QualType Ty
= MD
->getFunctionObjectParameterType();
8263 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(Ty
), CGF
.Int64Ty
,
8265 CombinedInfo
.Sizes
.push_back(Size
);
8267 CombinedInfo
.Pointers
.push_back(LB
);
8268 // Size is (addr of {highest+1} element) - (addr of lowest element)
8269 llvm::Value
*HB
= HBAddr
.emitRawPointer(CGF
);
8270 llvm::Value
*HAddr
= CGF
.Builder
.CreateConstGEP1_32(
8271 HBAddr
.getElementType(), HB
, /*Idx0=*/1);
8272 llvm::Value
*CLAddr
= CGF
.Builder
.CreatePointerCast(LB
, CGF
.VoidPtrTy
);
8273 llvm::Value
*CHAddr
= CGF
.Builder
.CreatePointerCast(HAddr
, CGF
.VoidPtrTy
);
8274 llvm::Value
*Diff
= CGF
.Builder
.CreatePtrDiff(CGF
.Int8Ty
, CHAddr
, CLAddr
);
8275 llvm::Value
*Size
= CGF
.Builder
.CreateIntCast(Diff
, CGF
.Int64Ty
,
8276 /*isSigned=*/false);
8277 CombinedInfo
.Sizes
.push_back(Size
);
8279 CombinedInfo
.Mappers
.push_back(nullptr);
8280 // Map type is always TARGET_PARAM, if generate info for captures.
8281 CombinedInfo
.Types
.push_back(
8282 NotTargetParams
? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8283 : !PartialStruct
.PreliminaryMapData
.BasePointers
.empty()
8284 ? OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
8285 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
);
8286 // If any element has the present modifier, then make sure the runtime
8287 // doesn't attempt to allocate the struct.
8288 if (CurTypes
.end() !=
8289 llvm::find_if(CurTypes
, [](OpenMPOffloadMappingFlags Type
) {
8290 return static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
8291 Type
& OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
);
8293 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
;
8294 // Remove TARGET_PARAM flag from the first element
8295 (*CurTypes
.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
8296 // If any element has the ompx_hold modifier, then make sure the runtime
8297 // uses the hold reference count for the struct as a whole so that it won't
8298 // be unmapped by an extra dynamic reference count decrement. Add it to all
8299 // elements as well so the runtime knows which reference count to check
8300 // when determining whether it's time for device-to-host transfers of
8301 // individual elements.
8302 if (CurTypes
.end() !=
8303 llvm::find_if(CurTypes
, [](OpenMPOffloadMappingFlags Type
) {
8304 return static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
8305 Type
& OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
);
8307 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
8308 for (auto &M
: CurTypes
)
8309 M
|= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
8312 // All other current entries will be MEMBER_OF the combined entry
8313 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8314 // 0xFFFF in the MEMBER_OF field).
8315 OpenMPOffloadMappingFlags MemberOfFlag
=
8316 OMPBuilder
.getMemberOfFlag(CombinedInfo
.BasePointers
.size() - 1);
8317 for (auto &M
: CurTypes
)
8318 OMPBuilder
.setCorrectMemberOfFlag(M
, MemberOfFlag
);
8321 /// Generate all the base pointers, section pointers, sizes, map types, and
8322 /// mappers for the extracted mappable expressions (all included in \a
8323 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8324 /// pair of the relevant declaration and index where it occurs is appended to
8325 /// the device pointers info array.
8326 void generateAllInfo(
8327 MapCombinedInfoTy
&CombinedInfo
, llvm::OpenMPIRBuilder
&OMPBuilder
,
8328 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkipVarSet
=
8329 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) const {
8330 assert(CurDir
.is
<const OMPExecutableDirective
*>() &&
8331 "Expect a executable directive");
8332 const auto *CurExecDir
= CurDir
.get
<const OMPExecutableDirective
*>();
8333 generateAllInfoForClauses(CurExecDir
->clauses(), CombinedInfo
, OMPBuilder
,
8337 /// Generate all the base pointers, section pointers, sizes, map types, and
8338 /// mappers for the extracted map clauses of user-defined mapper (all included
8339 /// in \a CombinedInfo).
8340 void generateAllInfoForMapper(MapCombinedInfoTy
&CombinedInfo
,
8341 llvm::OpenMPIRBuilder
&OMPBuilder
) const {
8342 assert(CurDir
.is
<const OMPDeclareMapperDecl
*>() &&
8343 "Expect a declare mapper directive");
8344 const auto *CurMapperDir
= CurDir
.get
<const OMPDeclareMapperDecl
*>();
8345 generateAllInfoForClauses(CurMapperDir
->clauses(), CombinedInfo
,
8349 /// Emit capture info for lambdas for variables captured by reference.
8350 void generateInfoForLambdaCaptures(
8351 const ValueDecl
*VD
, llvm::Value
*Arg
, MapCombinedInfoTy
&CombinedInfo
,
8352 llvm::DenseMap
<llvm::Value
*, llvm::Value
*> &LambdaPointers
) const {
8353 QualType VDType
= VD
->getType().getCanonicalType().getNonReferenceType();
8354 const auto *RD
= VDType
->getAsCXXRecordDecl();
8355 if (!RD
|| !RD
->isLambda())
8357 Address
VDAddr(Arg
, CGF
.ConvertTypeForMem(VDType
),
8358 CGF
.getContext().getDeclAlign(VD
));
8359 LValue VDLVal
= CGF
.MakeAddrLValue(VDAddr
, VDType
);
8360 llvm::DenseMap
<const ValueDecl
*, FieldDecl
*> Captures
;
8361 FieldDecl
*ThisCapture
= nullptr;
8362 RD
->getCaptureFields(Captures
, ThisCapture
);
8365 CGF
.EmitLValueForFieldInitialization(VDLVal
, ThisCapture
);
8366 LValue ThisLValVal
= CGF
.EmitLValueForField(VDLVal
, ThisCapture
);
8367 LambdaPointers
.try_emplace(ThisLVal
.getPointer(CGF
),
8368 VDLVal
.getPointer(CGF
));
8369 CombinedInfo
.Exprs
.push_back(VD
);
8370 CombinedInfo
.BasePointers
.push_back(ThisLVal
.getPointer(CGF
));
8371 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8372 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8373 CombinedInfo
.Pointers
.push_back(ThisLValVal
.getPointer(CGF
));
8374 CombinedInfo
.Sizes
.push_back(
8375 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(CGF
.getContext().VoidPtrTy
),
8376 CGF
.Int64Ty
, /*isSigned=*/true));
8377 CombinedInfo
.Types
.push_back(
8378 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8379 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8380 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8381 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
8382 CombinedInfo
.Mappers
.push_back(nullptr);
8384 for (const LambdaCapture
&LC
: RD
->captures()) {
8385 if (!LC
.capturesVariable())
8387 const VarDecl
*VD
= cast
<VarDecl
>(LC
.getCapturedVar());
8388 if (LC
.getCaptureKind() != LCK_ByRef
&& !VD
->getType()->isPointerType())
8390 auto It
= Captures
.find(VD
);
8391 assert(It
!= Captures
.end() && "Found lambda capture without field.");
8392 LValue VarLVal
= CGF
.EmitLValueForFieldInitialization(VDLVal
, It
->second
);
8393 if (LC
.getCaptureKind() == LCK_ByRef
) {
8394 LValue VarLValVal
= CGF
.EmitLValueForField(VDLVal
, It
->second
);
8395 LambdaPointers
.try_emplace(VarLVal
.getPointer(CGF
),
8396 VDLVal
.getPointer(CGF
));
8397 CombinedInfo
.Exprs
.push_back(VD
);
8398 CombinedInfo
.BasePointers
.push_back(VarLVal
.getPointer(CGF
));
8399 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8400 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8401 CombinedInfo
.Pointers
.push_back(VarLValVal
.getPointer(CGF
));
8402 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8404 VD
->getType().getCanonicalType().getNonReferenceType()),
8405 CGF
.Int64Ty
, /*isSigned=*/true));
8407 RValue VarRVal
= CGF
.EmitLoadOfLValue(VarLVal
, RD
->getLocation());
8408 LambdaPointers
.try_emplace(VarLVal
.getPointer(CGF
),
8409 VDLVal
.getPointer(CGF
));
8410 CombinedInfo
.Exprs
.push_back(VD
);
8411 CombinedInfo
.BasePointers
.push_back(VarLVal
.getPointer(CGF
));
8412 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8413 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8414 CombinedInfo
.Pointers
.push_back(VarRVal
.getScalarVal());
8415 CombinedInfo
.Sizes
.push_back(llvm::ConstantInt::get(CGF
.Int64Ty
, 0));
8417 CombinedInfo
.Types
.push_back(
8418 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8419 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8420 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8421 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
8422 CombinedInfo
.Mappers
.push_back(nullptr);
8426 /// Set correct indices for lambdas captures.
8427 void adjustMemberOfForLambdaCaptures(
8428 llvm::OpenMPIRBuilder
&OMPBuilder
,
8429 const llvm::DenseMap
<llvm::Value
*, llvm::Value
*> &LambdaPointers
,
8430 MapBaseValuesArrayTy
&BasePointers
, MapValuesArrayTy
&Pointers
,
8431 MapFlagsArrayTy
&Types
) const {
8432 for (unsigned I
= 0, E
= Types
.size(); I
< E
; ++I
) {
8433 // Set correct member_of idx for all implicit lambda captures.
8434 if (Types
[I
] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8435 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8436 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8437 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
))
8439 llvm::Value
*BasePtr
= LambdaPointers
.lookup(BasePointers
[I
]);
8440 assert(BasePtr
&& "Unable to find base lambda address.");
8442 for (unsigned J
= I
; J
> 0; --J
) {
8443 unsigned Idx
= J
- 1;
8444 if (Pointers
[Idx
] != BasePtr
)
8449 assert(TgtIdx
!= -1 && "Unable to find parent lambda.");
8450 // All other current entries will be MEMBER_OF the combined entry
8451 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8452 // 0xFFFF in the MEMBER_OF field).
8453 OpenMPOffloadMappingFlags MemberOfFlag
=
8454 OMPBuilder
.getMemberOfFlag(TgtIdx
);
8455 OMPBuilder
.setCorrectMemberOfFlag(Types
[I
], MemberOfFlag
);
8459 /// Generate the base pointers, section pointers, sizes, map types, and
8460 /// mappers associated to a given capture (all included in \a CombinedInfo).
8461 void generateInfoForCapture(const CapturedStmt::Capture
*Cap
,
8462 llvm::Value
*Arg
, MapCombinedInfoTy
&CombinedInfo
,
8463 StructRangeInfoTy
&PartialStruct
) const {
8464 assert(!Cap
->capturesVariableArrayType() &&
8465 "Not expecting to generate map info for a variable array type!");
8467 // We need to know when we generating information for the first component
8468 const ValueDecl
*VD
= Cap
->capturesThis()
8470 : Cap
->getCapturedVar()->getCanonicalDecl();
8472 // for map(to: lambda): skip here, processing it in
8473 // generateDefaultMapInfo
8474 if (LambdasMap
.count(VD
))
8477 // If this declaration appears in a is_device_ptr clause we just have to
8478 // pass the pointer by value. If it is a reference to a declaration, we just
8480 if (VD
&& (DevPointersMap
.count(VD
) || HasDevAddrsMap
.count(VD
))) {
8481 CombinedInfo
.Exprs
.push_back(VD
);
8482 CombinedInfo
.BasePointers
.emplace_back(Arg
);
8483 CombinedInfo
.DevicePtrDecls
.emplace_back(VD
);
8484 CombinedInfo
.DevicePointers
.emplace_back(DeviceInfoTy::Pointer
);
8485 CombinedInfo
.Pointers
.push_back(Arg
);
8486 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8487 CGF
.getTypeSize(CGF
.getContext().VoidPtrTy
), CGF
.Int64Ty
,
8488 /*isSigned=*/true));
8489 CombinedInfo
.Types
.push_back(
8490 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8491 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
);
8492 CombinedInfo
.Mappers
.push_back(nullptr);
8497 std::tuple
<OMPClauseMappableExprCommon::MappableExprComponentListRef
,
8498 OpenMPMapClauseKind
, ArrayRef
<OpenMPMapModifierKind
>, bool,
8499 const ValueDecl
*, const Expr
*>;
8500 SmallVector
<MapData
, 4> DeclComponentLists
;
8501 // For member fields list in is_device_ptr, store it in
8502 // DeclComponentLists for generating components info.
8503 static const OpenMPMapModifierKind Unknown
= OMPC_MAP_MODIFIER_unknown
;
8504 auto It
= DevPointersMap
.find(VD
);
8505 if (It
!= DevPointersMap
.end())
8506 for (const auto &MCL
: It
->second
)
8507 DeclComponentLists
.emplace_back(MCL
, OMPC_MAP_to
, Unknown
,
8508 /*IsImpicit = */ true, nullptr,
8510 auto I
= HasDevAddrsMap
.find(VD
);
8511 if (I
!= HasDevAddrsMap
.end())
8512 for (const auto &MCL
: I
->second
)
8513 DeclComponentLists
.emplace_back(MCL
, OMPC_MAP_tofrom
, Unknown
,
8514 /*IsImpicit = */ true, nullptr,
8516 assert(CurDir
.is
<const OMPExecutableDirective
*>() &&
8517 "Expect a executable directive");
8518 const auto *CurExecDir
= CurDir
.get
<const OMPExecutableDirective
*>();
8519 bool HasMapBasePtr
= false;
8520 bool HasMapArraySec
= false;
8521 for (const auto *C
: CurExecDir
->getClausesOfKind
<OMPMapClause
>()) {
8522 const auto *EI
= C
->getVarRefs().begin();
8523 for (const auto L
: C
->decl_component_lists(VD
)) {
8524 const ValueDecl
*VDecl
, *Mapper
;
8525 // The Expression is not correct if the mapping is implicit
8526 const Expr
*E
= (C
->getMapLoc().isValid()) ? *EI
: nullptr;
8527 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8528 std::tie(VDecl
, Components
, Mapper
) = L
;
8529 assert(VDecl
== VD
&& "We got information for the wrong declaration??");
8530 assert(!Components
.empty() &&
8531 "Not expecting declaration with no component lists.");
8532 if (VD
&& E
&& VD
->getType()->isAnyPointerType() && isa
<DeclRefExpr
>(E
))
8533 HasMapBasePtr
= true;
8534 if (VD
&& E
&& VD
->getType()->isAnyPointerType() &&
8535 (isa
<ArraySectionExpr
>(E
) || isa
<ArraySubscriptExpr
>(E
)))
8536 HasMapArraySec
= true;
8537 DeclComponentLists
.emplace_back(Components
, C
->getMapType(),
8538 C
->getMapTypeModifiers(),
8539 C
->isImplicit(), Mapper
, E
);
8543 llvm::stable_sort(DeclComponentLists
, [](const MapData
&LHS
,
8544 const MapData
&RHS
) {
8545 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
= std::get
<2>(LHS
);
8546 OpenMPMapClauseKind MapType
= std::get
<1>(RHS
);
8548 llvm::is_contained(MapModifiers
, clang::OMPC_MAP_MODIFIER_present
);
8549 bool HasAllocs
= MapType
== OMPC_MAP_alloc
;
8550 MapModifiers
= std::get
<2>(RHS
);
8551 MapType
= std::get
<1>(LHS
);
8553 llvm::is_contained(MapModifiers
, clang::OMPC_MAP_MODIFIER_present
);
8554 bool HasAllocsR
= MapType
== OMPC_MAP_alloc
;
8555 return (HasPresent
&& !HasPresentR
) || (HasAllocs
&& !HasAllocsR
);
8558 // Find overlapping elements (including the offset from the base element).
8559 llvm::SmallDenseMap
<
8562 OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>,
8566 for (const MapData
&L
: DeclComponentLists
) {
8567 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8568 OpenMPMapClauseKind MapType
;
8569 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8571 const ValueDecl
*Mapper
;
8573 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8576 for (const MapData
&L1
: ArrayRef(DeclComponentLists
).slice(Count
)) {
8577 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1
;
8578 std::tie(Components1
, MapType
, MapModifiers
, IsImplicit
, Mapper
,
8580 auto CI
= Components
.rbegin();
8581 auto CE
= Components
.rend();
8582 auto SI
= Components1
.rbegin();
8583 auto SE
= Components1
.rend();
8584 for (; CI
!= CE
&& SI
!= SE
; ++CI
, ++SI
) {
8585 if (CI
->getAssociatedExpression()->getStmtClass() !=
8586 SI
->getAssociatedExpression()->getStmtClass())
8588 // Are we dealing with different variables/fields?
8589 if (CI
->getAssociatedDeclaration() != SI
->getAssociatedDeclaration())
8592 // Found overlapping if, at least for one component, reached the head
8593 // of the components list.
8594 if (CI
== CE
|| SI
== SE
) {
8595 // Ignore it if it is the same component.
8596 if (CI
== CE
&& SI
== SE
)
8598 const auto It
= (SI
== SE
) ? CI
: SI
;
8599 // If one component is a pointer and another one is a kind of
8600 // dereference of this pointer (array subscript, section, dereference,
8601 // etc.), it is not an overlapping.
8602 // Same, if one component is a base and another component is a
8603 // dereferenced pointer memberexpr with the same base.
8604 if (!isa
<MemberExpr
>(It
->getAssociatedExpression()) ||
8605 (std::prev(It
)->getAssociatedDeclaration() &&
8607 ->getAssociatedDeclaration()
8609 ->isPointerType()) ||
8610 (It
->getAssociatedDeclaration() &&
8611 It
->getAssociatedDeclaration()->getType()->isPointerType() &&
8612 std::next(It
) != CE
&& std::next(It
) != SE
))
8614 const MapData
&BaseData
= CI
== CE
? L
: L1
;
8615 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData
=
8616 SI
== SE
? Components
: Components1
;
8617 OverlappedData
[&BaseData
].push_back(SubData
);
8621 // Sort the overlapped elements for each item.
8622 llvm::SmallVector
<const FieldDecl
*, 4> Layout
;
8623 if (!OverlappedData
.empty()) {
8624 const Type
*BaseType
= VD
->getType().getCanonicalType().getTypePtr();
8625 const Type
*OrigType
= BaseType
->getPointeeOrArrayElementType();
8626 while (BaseType
!= OrigType
) {
8627 BaseType
= OrigType
->getCanonicalTypeInternal().getTypePtr();
8628 OrigType
= BaseType
->getPointeeOrArrayElementType();
8631 if (const auto *CRD
= BaseType
->getAsCXXRecordDecl())
8632 getPlainLayout(CRD
, Layout
, /*AsBase=*/false);
8634 const auto *RD
= BaseType
->getAsRecordDecl();
8635 Layout
.append(RD
->field_begin(), RD
->field_end());
8638 for (auto &Pair
: OverlappedData
) {
8642 OMPClauseMappableExprCommon::MappableExprComponentListRef First
,
8643 OMPClauseMappableExprCommon::MappableExprComponentListRef
8645 auto CI
= First
.rbegin();
8646 auto CE
= First
.rend();
8647 auto SI
= Second
.rbegin();
8648 auto SE
= Second
.rend();
8649 for (; CI
!= CE
&& SI
!= SE
; ++CI
, ++SI
) {
8650 if (CI
->getAssociatedExpression()->getStmtClass() !=
8651 SI
->getAssociatedExpression()->getStmtClass())
8653 // Are we dealing with different variables/fields?
8654 if (CI
->getAssociatedDeclaration() !=
8655 SI
->getAssociatedDeclaration())
8659 // Lists contain the same elements.
8660 if (CI
== CE
&& SI
== SE
)
8663 // List with less elements is less than list with more elements.
8664 if (CI
== CE
|| SI
== SE
)
8667 const auto *FD1
= cast
<FieldDecl
>(CI
->getAssociatedDeclaration());
8668 const auto *FD2
= cast
<FieldDecl
>(SI
->getAssociatedDeclaration());
8669 if (FD1
->getParent() == FD2
->getParent())
8670 return FD1
->getFieldIndex() < FD2
->getFieldIndex();
8672 llvm::find_if(Layout
, [FD1
, FD2
](const FieldDecl
*FD
) {
8673 return FD
== FD1
|| FD
== FD2
;
8679 // Associated with a capture, because the mapping flags depend on it.
8680 // Go through all of the elements with the overlapped elements.
8681 bool IsFirstComponentList
= true;
8682 MapCombinedInfoTy StructBaseCombinedInfo
;
8683 for (const auto &Pair
: OverlappedData
) {
8684 const MapData
&L
= *Pair
.getFirst();
8685 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8686 OpenMPMapClauseKind MapType
;
8687 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8689 const ValueDecl
*Mapper
;
8691 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8693 ArrayRef
<OMPClauseMappableExprCommon::MappableExprComponentListRef
>
8694 OverlappedComponents
= Pair
.getSecond();
8695 generateInfoForComponentList(
8696 MapType
, MapModifiers
, {}, Components
, CombinedInfo
,
8697 StructBaseCombinedInfo
, PartialStruct
, IsFirstComponentList
,
8698 IsImplicit
, /*GenerateAllInfoForClauses*/ false, Mapper
,
8699 /*ForDeviceAddr=*/false, VD
, VarRef
, OverlappedComponents
);
8700 IsFirstComponentList
= false;
8702 // Go through other elements without overlapped elements.
8703 for (const MapData
&L
: DeclComponentLists
) {
8704 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8705 OpenMPMapClauseKind MapType
;
8706 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8708 const ValueDecl
*Mapper
;
8710 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8712 auto It
= OverlappedData
.find(&L
);
8713 if (It
== OverlappedData
.end())
8714 generateInfoForComponentList(
8715 MapType
, MapModifiers
, {}, Components
, CombinedInfo
,
8716 StructBaseCombinedInfo
, PartialStruct
, IsFirstComponentList
,
8717 IsImplicit
, /*GenerateAllInfoForClauses*/ false, Mapper
,
8718 /*ForDeviceAddr=*/false, VD
, VarRef
,
8719 /*OverlappedElements*/ {}, HasMapBasePtr
&& HasMapArraySec
);
8720 IsFirstComponentList
= false;
8724 /// Generate the default map information for a given capture \a CI,
8725 /// record field declaration \a RI and captured value \a CV.
8726 void generateDefaultMapInfo(const CapturedStmt::Capture
&CI
,
8727 const FieldDecl
&RI
, llvm::Value
*CV
,
8728 MapCombinedInfoTy
&CombinedInfo
) const {
8729 bool IsImplicit
= true;
8730 // Do the default mapping.
8731 if (CI
.capturesThis()) {
8732 CombinedInfo
.Exprs
.push_back(nullptr);
8733 CombinedInfo
.BasePointers
.push_back(CV
);
8734 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8735 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8736 CombinedInfo
.Pointers
.push_back(CV
);
8737 const auto *PtrTy
= cast
<PointerType
>(RI
.getType().getTypePtr());
8738 CombinedInfo
.Sizes
.push_back(
8739 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(PtrTy
->getPointeeType()),
8740 CGF
.Int64Ty
, /*isSigned=*/true));
8741 // Default map type.
8742 CombinedInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO
|
8743 OpenMPOffloadMappingFlags::OMP_MAP_FROM
);
8744 } else if (CI
.capturesVariableByCopy()) {
8745 const VarDecl
*VD
= CI
.getCapturedVar();
8746 CombinedInfo
.Exprs
.push_back(VD
->getCanonicalDecl());
8747 CombinedInfo
.BasePointers
.push_back(CV
);
8748 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8749 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8750 CombinedInfo
.Pointers
.push_back(CV
);
8751 if (!RI
.getType()->isAnyPointerType()) {
8752 // We have to signal to the runtime captures passed by value that are
8754 CombinedInfo
.Types
.push_back(
8755 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
);
8756 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8757 CGF
.getTypeSize(RI
.getType()), CGF
.Int64Ty
, /*isSigned=*/true));
8759 // Pointers are implicitly mapped with a zero size and no flags
8760 // (other than first map that is added for all implicit maps).
8761 CombinedInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE
);
8762 CombinedInfo
.Sizes
.push_back(llvm::Constant::getNullValue(CGF
.Int64Ty
));
8764 auto I
= FirstPrivateDecls
.find(VD
);
8765 if (I
!= FirstPrivateDecls
.end())
8766 IsImplicit
= I
->getSecond();
8768 assert(CI
.capturesVariable() && "Expected captured reference.");
8769 const auto *PtrTy
= cast
<ReferenceType
>(RI
.getType().getTypePtr());
8770 QualType ElementType
= PtrTy
->getPointeeType();
8771 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8772 CGF
.getTypeSize(ElementType
), CGF
.Int64Ty
, /*isSigned=*/true));
8773 // The default map type for a scalar/complex type is 'to' because by
8774 // default the value doesn't have to be retrieved. For an aggregate
8775 // type, the default is 'tofrom'.
8776 CombinedInfo
.Types
.push_back(getMapModifiersForPrivateClauses(CI
));
8777 const VarDecl
*VD
= CI
.getCapturedVar();
8778 auto I
= FirstPrivateDecls
.find(VD
);
8779 CombinedInfo
.Exprs
.push_back(VD
->getCanonicalDecl());
8780 CombinedInfo
.BasePointers
.push_back(CV
);
8781 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8782 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8783 if (I
!= FirstPrivateDecls
.end() && ElementType
->isAnyPointerType()) {
8784 Address PtrAddr
= CGF
.EmitLoadOfReference(CGF
.MakeAddrLValue(
8785 CV
, ElementType
, CGF
.getContext().getDeclAlign(VD
),
8786 AlignmentSource::Decl
));
8787 CombinedInfo
.Pointers
.push_back(PtrAddr
.emitRawPointer(CGF
));
8789 CombinedInfo
.Pointers
.push_back(CV
);
8791 if (I
!= FirstPrivateDecls
.end())
8792 IsImplicit
= I
->getSecond();
8794 // Every default map produces a single argument which is a target parameter.
8795 CombinedInfo
.Types
.back() |=
8796 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
8798 // Add flag stating this is an implicit map.
8800 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
;
8802 // No user-defined mapper for default mapping.
8803 CombinedInfo
.Mappers
.push_back(nullptr);
8806 } // anonymous namespace
8808 // Try to extract the base declaration from a `this->x` expression if possible.
8809 static ValueDecl
*getDeclFromThisExpr(const Expr
*E
) {
8813 if (const auto *OASE
= dyn_cast
<ArraySectionExpr
>(E
->IgnoreParenCasts()))
8814 if (const MemberExpr
*ME
=
8815 dyn_cast
<MemberExpr
>(OASE
->getBase()->IgnoreParenImpCasts()))
8816 return ME
->getMemberDecl();
8820 /// Emit a string constant containing the names of the values mapped to the
8821 /// offloading runtime library.
8822 static llvm::Constant
*
8823 emitMappingInformation(CodeGenFunction
&CGF
, llvm::OpenMPIRBuilder
&OMPBuilder
,
8824 MappableExprsHandler::MappingExprInfo
&MapExprs
) {
8826 uint32_t SrcLocStrSize
;
8827 if (!MapExprs
.getMapDecl() && !MapExprs
.getMapExpr())
8828 return OMPBuilder
.getOrCreateDefaultSrcLocStr(SrcLocStrSize
);
8831 if (!MapExprs
.getMapDecl() && MapExprs
.getMapExpr()) {
8832 if (const ValueDecl
*VD
= getDeclFromThisExpr(MapExprs
.getMapExpr()))
8833 Loc
= VD
->getLocation();
8835 Loc
= MapExprs
.getMapExpr()->getExprLoc();
8837 Loc
= MapExprs
.getMapDecl()->getLocation();
8840 std::string ExprName
;
8841 if (MapExprs
.getMapExpr()) {
8842 PrintingPolicy
P(CGF
.getContext().getLangOpts());
8843 llvm::raw_string_ostream
OS(ExprName
);
8844 MapExprs
.getMapExpr()->printPretty(OS
, nullptr, P
);
8846 ExprName
= MapExprs
.getMapDecl()->getNameAsString();
8849 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
8850 return OMPBuilder
.getOrCreateSrcLocStr(PLoc
.getFilename(), ExprName
,
8851 PLoc
.getLine(), PLoc
.getColumn(),
8854 /// Emit the arrays used to pass the captures and map information to the
8855 /// offloading runtime library. If there is no map or capture information,
8856 /// return nullptr by reference.
8857 static void emitOffloadingArraysAndArgs(
8858 CodeGenFunction
&CGF
, MappableExprsHandler::MapCombinedInfoTy
&CombinedInfo
,
8859 CGOpenMPRuntime::TargetDataInfo
&Info
, llvm::OpenMPIRBuilder
&OMPBuilder
,
8860 bool IsNonContiguous
= false, bool ForEndCall
= false) {
8861 CodeGenModule
&CGM
= CGF
.CGM
;
8863 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
8864 InsertPointTy
AllocaIP(CGF
.AllocaInsertPt
->getParent(),
8865 CGF
.AllocaInsertPt
->getIterator());
8866 InsertPointTy
CodeGenIP(CGF
.Builder
.GetInsertBlock(),
8867 CGF
.Builder
.GetInsertPoint());
8869 auto DeviceAddrCB
= [&](unsigned int I
, llvm::Value
*NewDecl
) {
8870 if (const ValueDecl
*DevVD
= CombinedInfo
.DevicePtrDecls
[I
]) {
8871 Info
.CaptureDeviceAddrMap
.try_emplace(DevVD
, NewDecl
);
8875 auto CustomMapperCB
= [&](unsigned int I
) {
8876 llvm::Value
*MFunc
= nullptr;
8877 if (CombinedInfo
.Mappers
[I
]) {
8878 Info
.HasMapper
= true;
8879 MFunc
= CGM
.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8880 cast
<OMPDeclareMapperDecl
>(CombinedInfo
.Mappers
[I
]));
8884 OMPBuilder
.emitOffloadingArraysAndArgs(
8885 AllocaIP
, CodeGenIP
, Info
, Info
.RTArgs
, CombinedInfo
, IsNonContiguous
,
8886 ForEndCall
, DeviceAddrCB
, CustomMapperCB
);
8889 /// Check for inner distribute directive.
8890 static const OMPExecutableDirective
*
8891 getNestedDistributeDirective(ASTContext
&Ctx
, const OMPExecutableDirective
&D
) {
8892 const auto *CS
= D
.getInnermostCapturedStmt();
8894 CS
->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8895 const Stmt
*ChildStmt
=
8896 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx
, Body
);
8898 if (const auto *NestedDir
=
8899 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
8900 OpenMPDirectiveKind DKind
= NestedDir
->getDirectiveKind();
8901 switch (D
.getDirectiveKind()) {
8903 // For now, treat 'target' with nested 'teams loop' as if it's
8904 // distributed (target teams distribute).
8905 if (isOpenMPDistributeDirective(DKind
) || DKind
== OMPD_teams_loop
)
8907 if (DKind
== OMPD_teams
) {
8908 Body
= NestedDir
->getInnermostCapturedStmt()->IgnoreContainers(
8909 /*IgnoreCaptured=*/true);
8912 ChildStmt
= CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx
, Body
);
8913 if (const auto *NND
=
8914 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
8915 DKind
= NND
->getDirectiveKind();
8916 if (isOpenMPDistributeDirective(DKind
))
8921 case OMPD_target_teams
:
8922 if (isOpenMPDistributeDirective(DKind
))
8925 case OMPD_target_parallel
:
8926 case OMPD_target_simd
:
8927 case OMPD_target_parallel_for
:
8928 case OMPD_target_parallel_for_simd
:
8930 case OMPD_target_teams_distribute
:
8931 case OMPD_target_teams_distribute_simd
:
8932 case OMPD_target_teams_distribute_parallel_for
:
8933 case OMPD_target_teams_distribute_parallel_for_simd
:
8936 case OMPD_parallel_for
:
8937 case OMPD_parallel_master
:
8938 case OMPD_parallel_sections
:
8940 case OMPD_parallel_for_simd
:
8942 case OMPD_cancellation_point
:
8944 case OMPD_threadprivate
:
8955 case OMPD_taskyield
:
8958 case OMPD_taskgroup
:
8964 case OMPD_target_data
:
8965 case OMPD_target_exit_data
:
8966 case OMPD_target_enter_data
:
8967 case OMPD_distribute
:
8968 case OMPD_distribute_simd
:
8969 case OMPD_distribute_parallel_for
:
8970 case OMPD_distribute_parallel_for_simd
:
8971 case OMPD_teams_distribute
:
8972 case OMPD_teams_distribute_simd
:
8973 case OMPD_teams_distribute_parallel_for
:
8974 case OMPD_teams_distribute_parallel_for_simd
:
8975 case OMPD_target_update
:
8976 case OMPD_declare_simd
:
8977 case OMPD_declare_variant
:
8978 case OMPD_begin_declare_variant
:
8979 case OMPD_end_declare_variant
:
8980 case OMPD_declare_target
:
8981 case OMPD_end_declare_target
:
8982 case OMPD_declare_reduction
:
8983 case OMPD_declare_mapper
:
8985 case OMPD_taskloop_simd
:
8986 case OMPD_master_taskloop
:
8987 case OMPD_master_taskloop_simd
:
8988 case OMPD_parallel_master_taskloop
:
8989 case OMPD_parallel_master_taskloop_simd
:
8991 case OMPD_metadirective
:
8994 llvm_unreachable("Unexpected directive.");
9001 /// Emit the user-defined mapper function. The code generation follows the
9002 /// pattern in the example below.
9004 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9005 /// void *base, void *begin,
9006 /// int64_t size, int64_t type,
9007 /// void *name = nullptr) {
9008 /// // Allocate space for an array section first or add a base/begin for
9009 /// // pointer dereference.
9010 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9011 /// !maptype.IsDelete)
9012 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9013 /// size*sizeof(Ty), clearToFromMember(type));
9015 /// for (unsigned i = 0; i < size; i++) {
9016 /// // For each component specified by this mapper:
9017 /// for (auto c : begin[i]->all_components) {
9018 /// if (c.hasMapper())
9019 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9020 /// c.arg_type, c.arg_name);
9022 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9023 /// c.arg_begin, c.arg_size, c.arg_type,
9027 /// // Delete the array section.
9028 /// if (size > 1 && maptype.IsDelete)
9029 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9030 /// size*sizeof(Ty), clearToFromMember(type));
9033 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl
*D
,
9034 CodeGenFunction
*CGF
) {
9035 if (UDMMap
.count(D
) > 0)
9037 ASTContext
&C
= CGM
.getContext();
9038 QualType Ty
= D
->getType();
9039 QualType PtrTy
= C
.getPointerType(Ty
).withRestrict();
9040 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9041 auto *MapperVarDecl
=
9042 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getMapperVarRef())->getDecl());
9043 SourceLocation Loc
= D
->getLocation();
9044 CharUnits ElementSize
= C
.getTypeSizeInChars(Ty
);
9045 llvm::Type
*ElemTy
= CGM
.getTypes().ConvertTypeForMem(Ty
);
9047 // Prepare mapper function arguments and attributes.
9048 ImplicitParamDecl
HandleArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
9049 C
.VoidPtrTy
, ImplicitParamKind::Other
);
9050 ImplicitParamDecl
BaseArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
9051 ImplicitParamKind::Other
);
9052 ImplicitParamDecl
BeginArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
9053 C
.VoidPtrTy
, ImplicitParamKind::Other
);
9054 ImplicitParamDecl
SizeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, Int64Ty
,
9055 ImplicitParamKind::Other
);
9056 ImplicitParamDecl
TypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, Int64Ty
,
9057 ImplicitParamKind::Other
);
9058 ImplicitParamDecl
NameArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
9059 ImplicitParamKind::Other
);
9060 FunctionArgList Args
;
9061 Args
.push_back(&HandleArg
);
9062 Args
.push_back(&BaseArg
);
9063 Args
.push_back(&BeginArg
);
9064 Args
.push_back(&SizeArg
);
9065 Args
.push_back(&TypeArg
);
9066 Args
.push_back(&NameArg
);
9067 const CGFunctionInfo
&FnInfo
=
9068 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
9069 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
9070 SmallString
<64> TyStr
;
9071 llvm::raw_svector_ostream
Out(TyStr
);
9072 CGM
.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty
, Out
);
9073 std::string Name
= getName({"omp_mapper", TyStr
, D
->getName()});
9074 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
9075 Name
, &CGM
.getModule());
9076 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
9077 Fn
->removeFnAttr(llvm::Attribute::OptimizeNone
);
9078 // Start the mapper function code generation.
9079 CodeGenFunction
MapperCGF(CGM
);
9080 MapperCGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
9081 // Compute the starting and end addresses of array elements.
9082 llvm::Value
*Size
= MapperCGF
.EmitLoadOfScalar(
9083 MapperCGF
.GetAddrOfLocalVar(&SizeArg
), /*Volatile=*/false,
9084 C
.getPointerType(Int64Ty
), Loc
);
9085 // Prepare common arguments for array initiation and deletion.
9086 llvm::Value
*Handle
= MapperCGF
.EmitLoadOfScalar(
9087 MapperCGF
.GetAddrOfLocalVar(&HandleArg
),
9088 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9089 llvm::Value
*BaseIn
= MapperCGF
.EmitLoadOfScalar(
9090 MapperCGF
.GetAddrOfLocalVar(&BaseArg
),
9091 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9092 llvm::Value
*BeginIn
= MapperCGF
.EmitLoadOfScalar(
9093 MapperCGF
.GetAddrOfLocalVar(&BeginArg
),
9094 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9095 // Convert the size in bytes into the number of array elements.
9096 Size
= MapperCGF
.Builder
.CreateExactUDiv(
9097 Size
, MapperCGF
.Builder
.getInt64(ElementSize
.getQuantity()));
9098 llvm::Value
*PtrBegin
= MapperCGF
.Builder
.CreateBitCast(
9099 BeginIn
, CGM
.getTypes().ConvertTypeForMem(PtrTy
));
9100 llvm::Value
*PtrEnd
= MapperCGF
.Builder
.CreateGEP(ElemTy
, PtrBegin
, Size
);
9101 llvm::Value
*MapType
= MapperCGF
.EmitLoadOfScalar(
9102 MapperCGF
.GetAddrOfLocalVar(&TypeArg
), /*Volatile=*/false,
9103 C
.getPointerType(Int64Ty
), Loc
);
9104 llvm::Value
*MapName
= MapperCGF
.EmitLoadOfScalar(
9105 MapperCGF
.GetAddrOfLocalVar(&NameArg
),
9106 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9108 // Emit array initiation if this is an array section and \p MapType indicates
9109 // that memory allocation is required.
9110 llvm::BasicBlock
*HeadBB
= MapperCGF
.createBasicBlock("omp.arraymap.head");
9111 emitUDMapperArrayInitOrDel(MapperCGF
, Handle
, BaseIn
, BeginIn
, Size
, MapType
,
9112 MapName
, ElementSize
, HeadBB
, /*IsInit=*/true);
9114 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9116 // Emit the loop header block.
9117 MapperCGF
.EmitBlock(HeadBB
);
9118 llvm::BasicBlock
*BodyBB
= MapperCGF
.createBasicBlock("omp.arraymap.body");
9119 llvm::BasicBlock
*DoneBB
= MapperCGF
.createBasicBlock("omp.done");
9120 // Evaluate whether the initial condition is satisfied.
9121 llvm::Value
*IsEmpty
=
9122 MapperCGF
.Builder
.CreateICmpEQ(PtrBegin
, PtrEnd
, "omp.arraymap.isempty");
9123 MapperCGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
9124 llvm::BasicBlock
*EntryBB
= MapperCGF
.Builder
.GetInsertBlock();
9126 // Emit the loop body block.
9127 MapperCGF
.EmitBlock(BodyBB
);
9128 llvm::BasicBlock
*LastBB
= BodyBB
;
9129 llvm::PHINode
*PtrPHI
= MapperCGF
.Builder
.CreatePHI(
9130 PtrBegin
->getType(), 2, "omp.arraymap.ptrcurrent");
9131 PtrPHI
->addIncoming(PtrBegin
, EntryBB
);
9132 Address
PtrCurrent(PtrPHI
, ElemTy
,
9133 MapperCGF
.GetAddrOfLocalVar(&BeginArg
)
9135 .alignmentOfArrayElement(ElementSize
));
9136 // Privatize the declared variable of mapper to be the current array element.
9137 CodeGenFunction::OMPPrivateScope
Scope(MapperCGF
);
9138 Scope
.addPrivate(MapperVarDecl
, PtrCurrent
);
9139 (void)Scope
.Privatize();
9141 // Get map clause information. Fill up the arrays with all mapped variables.
9142 MappableExprsHandler::MapCombinedInfoTy Info
;
9143 MappableExprsHandler
MEHandler(*D
, MapperCGF
);
9144 MEHandler
.generateAllInfoForMapper(Info
, OMPBuilder
);
9146 // Call the runtime API __tgt_mapper_num_components to get the number of
9147 // pre-existing components.
9148 llvm::Value
*OffloadingArgs
[] = {Handle
};
9149 llvm::Value
*PreviousSize
= MapperCGF
.EmitRuntimeCall(
9150 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
9151 OMPRTL___tgt_mapper_num_components
),
9153 llvm::Value
*ShiftedPreviousSize
= MapperCGF
.Builder
.CreateShl(
9155 MapperCGF
.Builder
.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9157 // Fill up the runtime mapper handle for all components.
9158 for (unsigned I
= 0; I
< Info
.BasePointers
.size(); ++I
) {
9159 llvm::Value
*CurBaseArg
= MapperCGF
.Builder
.CreateBitCast(
9160 Info
.BasePointers
[I
], CGM
.getTypes().ConvertTypeForMem(C
.VoidPtrTy
));
9161 llvm::Value
*CurBeginArg
= MapperCGF
.Builder
.CreateBitCast(
9162 Info
.Pointers
[I
], CGM
.getTypes().ConvertTypeForMem(C
.VoidPtrTy
));
9163 llvm::Value
*CurSizeArg
= Info
.Sizes
[I
];
9164 llvm::Value
*CurNameArg
=
9165 (CGM
.getCodeGenOpts().getDebugInfo() ==
9166 llvm::codegenoptions::NoDebugInfo
)
9167 ? llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
)
9168 : emitMappingInformation(MapperCGF
, OMPBuilder
, Info
.Exprs
[I
]);
9170 // Extract the MEMBER_OF field from the map type.
9171 llvm::Value
*OriMapType
= MapperCGF
.Builder
.getInt64(
9172 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9174 llvm::Value
*MemberMapType
=
9175 MapperCGF
.Builder
.CreateNUWAdd(OriMapType
, ShiftedPreviousSize
);
9177 // Combine the map type inherited from user-defined mapper with that
9178 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9179 // bits of the \a MapType, which is the input argument of the mapper
9180 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9181 // bits of MemberMapType.
9182 // [OpenMP 5.0], 1.2.6. map-type decay.
9183 // | alloc | to | from | tofrom | release | delete
9184 // ----------------------------------------------------------
9185 // alloc | alloc | alloc | alloc | alloc | release | delete
9186 // to | alloc | to | alloc | to | release | delete
9187 // from | alloc | alloc | from | from | release | delete
9188 // tofrom | alloc | to | from | tofrom | release | delete
9189 llvm::Value
*LeftToFrom
= MapperCGF
.Builder
.CreateAnd(
9191 MapperCGF
.Builder
.getInt64(
9192 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9193 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9194 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9195 llvm::BasicBlock
*AllocBB
= MapperCGF
.createBasicBlock("omp.type.alloc");
9196 llvm::BasicBlock
*AllocElseBB
=
9197 MapperCGF
.createBasicBlock("omp.type.alloc.else");
9198 llvm::BasicBlock
*ToBB
= MapperCGF
.createBasicBlock("omp.type.to");
9199 llvm::BasicBlock
*ToElseBB
= MapperCGF
.createBasicBlock("omp.type.to.else");
9200 llvm::BasicBlock
*FromBB
= MapperCGF
.createBasicBlock("omp.type.from");
9201 llvm::BasicBlock
*EndBB
= MapperCGF
.createBasicBlock("omp.type.end");
9202 llvm::Value
*IsAlloc
= MapperCGF
.Builder
.CreateIsNull(LeftToFrom
);
9203 MapperCGF
.Builder
.CreateCondBr(IsAlloc
, AllocBB
, AllocElseBB
);
9204 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9205 MapperCGF
.EmitBlock(AllocBB
);
9206 llvm::Value
*AllocMapType
= MapperCGF
.Builder
.CreateAnd(
9208 MapperCGF
.Builder
.getInt64(
9209 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9210 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9211 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9212 MapperCGF
.Builder
.CreateBr(EndBB
);
9213 MapperCGF
.EmitBlock(AllocElseBB
);
9214 llvm::Value
*IsTo
= MapperCGF
.Builder
.CreateICmpEQ(
9216 MapperCGF
.Builder
.getInt64(
9217 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9218 OpenMPOffloadMappingFlags::OMP_MAP_TO
)));
9219 MapperCGF
.Builder
.CreateCondBr(IsTo
, ToBB
, ToElseBB
);
9220 // In case of to, clear OMP_MAP_FROM.
9221 MapperCGF
.EmitBlock(ToBB
);
9222 llvm::Value
*ToMapType
= MapperCGF
.Builder
.CreateAnd(
9224 MapperCGF
.Builder
.getInt64(
9225 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9226 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9227 MapperCGF
.Builder
.CreateBr(EndBB
);
9228 MapperCGF
.EmitBlock(ToElseBB
);
9229 llvm::Value
*IsFrom
= MapperCGF
.Builder
.CreateICmpEQ(
9231 MapperCGF
.Builder
.getInt64(
9232 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9233 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9234 MapperCGF
.Builder
.CreateCondBr(IsFrom
, FromBB
, EndBB
);
9235 // In case of from, clear OMP_MAP_TO.
9236 MapperCGF
.EmitBlock(FromBB
);
9237 llvm::Value
*FromMapType
= MapperCGF
.Builder
.CreateAnd(
9239 MapperCGF
.Builder
.getInt64(
9240 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9241 OpenMPOffloadMappingFlags::OMP_MAP_TO
)));
9242 // In case of tofrom, do nothing.
9243 MapperCGF
.EmitBlock(EndBB
);
9245 llvm::PHINode
*CurMapType
=
9246 MapperCGF
.Builder
.CreatePHI(CGM
.Int64Ty
, 4, "omp.maptype");
9247 CurMapType
->addIncoming(AllocMapType
, AllocBB
);
9248 CurMapType
->addIncoming(ToMapType
, ToBB
);
9249 CurMapType
->addIncoming(FromMapType
, FromBB
);
9250 CurMapType
->addIncoming(MemberMapType
, ToElseBB
);
9252 llvm::Value
*OffloadingArgs
[] = {Handle
, CurBaseArg
, CurBeginArg
,
9253 CurSizeArg
, CurMapType
, CurNameArg
};
9254 if (Info
.Mappers
[I
]) {
9255 // Call the corresponding mapper function.
9256 llvm::Function
*MapperFunc
= getOrCreateUserDefinedMapperFunc(
9257 cast
<OMPDeclareMapperDecl
>(Info
.Mappers
[I
]));
9258 assert(MapperFunc
&& "Expect a valid mapper function is available.");
9259 MapperCGF
.EmitNounwindRuntimeCall(MapperFunc
, OffloadingArgs
);
9261 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9263 MapperCGF
.EmitRuntimeCall(
9264 OMPBuilder
.getOrCreateRuntimeFunction(
9265 CGM
.getModule(), OMPRTL___tgt_push_mapper_component
),
9270 // Update the pointer to point to the next element that needs to be mapped,
9271 // and check whether we have mapped all elements.
9272 llvm::Value
*PtrNext
= MapperCGF
.Builder
.CreateConstGEP1_32(
9273 ElemTy
, PtrPHI
, /*Idx0=*/1, "omp.arraymap.next");
9274 PtrPHI
->addIncoming(PtrNext
, LastBB
);
9275 llvm::Value
*IsDone
=
9276 MapperCGF
.Builder
.CreateICmpEQ(PtrNext
, PtrEnd
, "omp.arraymap.isdone");
9277 llvm::BasicBlock
*ExitBB
= MapperCGF
.createBasicBlock("omp.arraymap.exit");
9278 MapperCGF
.Builder
.CreateCondBr(IsDone
, ExitBB
, BodyBB
);
9280 MapperCGF
.EmitBlock(ExitBB
);
9281 // Emit array deletion if this is an array section and \p MapType indicates
9282 // that deletion is required.
9283 emitUDMapperArrayInitOrDel(MapperCGF
, Handle
, BaseIn
, BeginIn
, Size
, MapType
,
9284 MapName
, ElementSize
, DoneBB
, /*IsInit=*/false);
9286 // Emit the function exit block.
9287 MapperCGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
9288 MapperCGF
.FinishFunction();
9289 UDMMap
.try_emplace(D
, Fn
);
9291 FunctionUDMMap
[CGF
->CurFn
].push_back(D
);
9294 /// Emit the array initialization or deletion portion for user-defined mapper
9295 /// code generation. First, it evaluates whether an array section is mapped and
9296 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9297 /// true, and \a MapType indicates to not delete this array, array
9298 /// initialization code is generated. If \a IsInit is false, and \a MapType
9299 /// indicates to not this array, array deletion code is generated.
9300 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9301 CodeGenFunction
&MapperCGF
, llvm::Value
*Handle
, llvm::Value
*Base
,
9302 llvm::Value
*Begin
, llvm::Value
*Size
, llvm::Value
*MapType
,
9303 llvm::Value
*MapName
, CharUnits ElementSize
, llvm::BasicBlock
*ExitBB
,
9305 StringRef Prefix
= IsInit
? ".init" : ".del";
9307 // Evaluate if this is an array section.
9308 llvm::BasicBlock
*BodyBB
=
9309 MapperCGF
.createBasicBlock(getName({"omp.array", Prefix
}));
9310 llvm::Value
*IsArray
= MapperCGF
.Builder
.CreateICmpSGT(
9311 Size
, MapperCGF
.Builder
.getInt64(1), "omp.arrayinit.isarray");
9312 llvm::Value
*DeleteBit
= MapperCGF
.Builder
.CreateAnd(
9314 MapperCGF
.Builder
.getInt64(
9315 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9316 OpenMPOffloadMappingFlags::OMP_MAP_DELETE
)));
9317 llvm::Value
*DeleteCond
;
9321 llvm::Value
*BaseIsBegin
= MapperCGF
.Builder
.CreateICmpNE(Base
, Begin
);
9323 llvm::Value
*PtrAndObjBit
= MapperCGF
.Builder
.CreateAnd(
9325 MapperCGF
.Builder
.getInt64(
9326 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9327 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
)));
9328 PtrAndObjBit
= MapperCGF
.Builder
.CreateIsNotNull(PtrAndObjBit
);
9329 BaseIsBegin
= MapperCGF
.Builder
.CreateAnd(BaseIsBegin
, PtrAndObjBit
);
9330 Cond
= MapperCGF
.Builder
.CreateOr(IsArray
, BaseIsBegin
);
9331 DeleteCond
= MapperCGF
.Builder
.CreateIsNull(
9332 DeleteBit
, getName({"omp.array", Prefix
, ".delete"}));
9335 DeleteCond
= MapperCGF
.Builder
.CreateIsNotNull(
9336 DeleteBit
, getName({"omp.array", Prefix
, ".delete"}));
9338 Cond
= MapperCGF
.Builder
.CreateAnd(Cond
, DeleteCond
);
9339 MapperCGF
.Builder
.CreateCondBr(Cond
, BodyBB
, ExitBB
);
9341 MapperCGF
.EmitBlock(BodyBB
);
9342 // Get the array size by multiplying element size and element number (i.e., \p
9344 llvm::Value
*ArraySize
= MapperCGF
.Builder
.CreateNUWMul(
9345 Size
, MapperCGF
.Builder
.getInt64(ElementSize
.getQuantity()));
9346 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9347 // memory allocation/deletion purpose only.
9348 llvm::Value
*MapTypeArg
= MapperCGF
.Builder
.CreateAnd(
9350 MapperCGF
.Builder
.getInt64(
9351 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9352 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9353 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9354 MapTypeArg
= MapperCGF
.Builder
.CreateOr(
9356 MapperCGF
.Builder
.getInt64(
9357 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9358 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
)));
9360 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9362 llvm::Value
*OffloadingArgs
[] = {Handle
, Base
, Begin
,
9363 ArraySize
, MapTypeArg
, MapName
};
9364 MapperCGF
.EmitRuntimeCall(
9365 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
9366 OMPRTL___tgt_push_mapper_component
),
9370 llvm::Function
*CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9371 const OMPDeclareMapperDecl
*D
) {
9372 auto I
= UDMMap
.find(D
);
9373 if (I
!= UDMMap
.end())
9375 emitUserDefinedMapper(D
);
9376 return UDMMap
.lookup(D
);
9379 llvm::Value
*CGOpenMPRuntime::emitTargetNumIterationsCall(
9380 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
9381 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9382 const OMPLoopDirective
&D
)>
9384 OpenMPDirectiveKind Kind
= D
.getDirectiveKind();
9385 const OMPExecutableDirective
*TD
= &D
;
9386 // Get nested teams distribute kind directive, if any. For now, treat
9387 // 'target_teams_loop' as if it's really a target_teams_distribute.
9388 if ((!isOpenMPDistributeDirective(Kind
) || !isOpenMPTeamsDirective(Kind
)) &&
9389 Kind
!= OMPD_target_teams_loop
)
9390 TD
= getNestedDistributeDirective(CGM
.getContext(), D
);
9392 return llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
9394 const auto *LD
= cast
<OMPLoopDirective
>(TD
);
9395 if (llvm::Value
*NumIterations
= SizeEmitter(CGF
, *LD
))
9396 return NumIterations
;
9397 return llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
9401 emitTargetCallFallback(CGOpenMPRuntime
*OMPRuntime
, llvm::Function
*OutlinedFn
,
9402 const OMPExecutableDirective
&D
,
9403 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
,
9404 bool RequiresOuterTask
, const CapturedStmt
&CS
,
9405 bool OffloadingMandatory
, CodeGenFunction
&CGF
) {
9406 if (OffloadingMandatory
) {
9407 CGF
.Builder
.CreateUnreachable();
9409 if (RequiresOuterTask
) {
9410 CapturedVars
.clear();
9411 CGF
.GenerateOpenMPCapturedVars(CS
, CapturedVars
);
9413 OMPRuntime
->emitOutlinedFunctionCall(CGF
, D
.getBeginLoc(), OutlinedFn
,
9418 static llvm::Value
*emitDeviceID(
9419 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9420 CodeGenFunction
&CGF
) {
9421 // Emit device ID if any.
9422 llvm::Value
*DeviceID
;
9423 if (Device
.getPointer()) {
9424 assert((Device
.getInt() == OMPC_DEVICE_unknown
||
9425 Device
.getInt() == OMPC_DEVICE_device_num
) &&
9426 "Expected device_num modifier.");
9427 llvm::Value
*DevVal
= CGF
.EmitScalarExpr(Device
.getPointer());
9429 CGF
.Builder
.CreateIntCast(DevVal
, CGF
.Int64Ty
, /*isSigned=*/true);
9431 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
9436 static llvm::Value
*emitDynCGGroupMem(const OMPExecutableDirective
&D
,
9437 CodeGenFunction
&CGF
) {
9438 llvm::Value
*DynCGroupMem
= CGF
.Builder
.getInt32(0);
9440 if (auto *DynMemClause
= D
.getSingleClause
<OMPXDynCGroupMemClause
>()) {
9441 CodeGenFunction::RunCleanupsScope
DynCGroupMemScope(CGF
);
9442 llvm::Value
*DynCGroupMemVal
= CGF
.EmitScalarExpr(
9443 DynMemClause
->getSize(), /*IgnoreResultAssign=*/true);
9444 DynCGroupMem
= CGF
.Builder
.CreateIntCast(DynCGroupMemVal
, CGF
.Int32Ty
,
9445 /*isSigned=*/false);
9447 return DynCGroupMem
;
9449 static void genMapInfoForCaptures(
9450 MappableExprsHandler
&MEHandler
, CodeGenFunction
&CGF
,
9451 const CapturedStmt
&CS
, llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
,
9452 llvm::OpenMPIRBuilder
&OMPBuilder
,
9453 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &MappedVarSet
,
9454 MappableExprsHandler::MapCombinedInfoTy
&CombinedInfo
) {
9456 llvm::DenseMap
<llvm::Value
*, llvm::Value
*> LambdaPointers
;
9457 auto RI
= CS
.getCapturedRecordDecl()->field_begin();
9458 auto *CV
= CapturedVars
.begin();
9459 for (CapturedStmt::const_capture_iterator CI
= CS
.capture_begin(),
9460 CE
= CS
.capture_end();
9461 CI
!= CE
; ++CI
, ++RI
, ++CV
) {
9462 MappableExprsHandler::MapCombinedInfoTy CurInfo
;
9463 MappableExprsHandler::StructRangeInfoTy PartialStruct
;
9465 // VLA sizes are passed to the outlined region by copy and do not have map
9466 // information associated.
9467 if (CI
->capturesVariableArrayType()) {
9468 CurInfo
.Exprs
.push_back(nullptr);
9469 CurInfo
.BasePointers
.push_back(*CV
);
9470 CurInfo
.DevicePtrDecls
.push_back(nullptr);
9471 CurInfo
.DevicePointers
.push_back(
9472 MappableExprsHandler::DeviceInfoTy::None
);
9473 CurInfo
.Pointers
.push_back(*CV
);
9474 CurInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
9475 CGF
.getTypeSize(RI
->getType()), CGF
.Int64Ty
, /*isSigned=*/true));
9476 // Copy to the device as an argument. No need to retrieve it.
9477 CurInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
9478 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
|
9479 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
9480 CurInfo
.Mappers
.push_back(nullptr);
9482 // If we have any information in the map clause, we use it, otherwise we
9483 // just do a default mapping.
9484 MEHandler
.generateInfoForCapture(CI
, *CV
, CurInfo
, PartialStruct
);
9485 if (!CI
->capturesThis())
9486 MappedVarSet
.insert(CI
->getCapturedVar());
9488 MappedVarSet
.insert(nullptr);
9489 if (CurInfo
.BasePointers
.empty() && !PartialStruct
.Base
.isValid())
9490 MEHandler
.generateDefaultMapInfo(*CI
, **RI
, *CV
, CurInfo
);
9491 // Generate correct mapping for variables captured by reference in
9493 if (CI
->capturesVariable())
9494 MEHandler
.generateInfoForLambdaCaptures(CI
->getCapturedVar(), *CV
,
9495 CurInfo
, LambdaPointers
);
9497 // We expect to have at least an element of information for this capture.
9498 assert((!CurInfo
.BasePointers
.empty() || PartialStruct
.Base
.isValid()) &&
9499 "Non-existing map pointer for capture!");
9500 assert(CurInfo
.BasePointers
.size() == CurInfo
.Pointers
.size() &&
9501 CurInfo
.BasePointers
.size() == CurInfo
.Sizes
.size() &&
9502 CurInfo
.BasePointers
.size() == CurInfo
.Types
.size() &&
9503 CurInfo
.BasePointers
.size() == CurInfo
.Mappers
.size() &&
9504 "Inconsistent map information sizes!");
9506 // If there is an entry in PartialStruct it means we have a struct with
9507 // individual members mapped. Emit an extra combined entry.
9508 if (PartialStruct
.Base
.isValid()) {
9509 CombinedInfo
.append(PartialStruct
.PreliminaryMapData
);
9510 MEHandler
.emitCombinedEntry(CombinedInfo
, CurInfo
.Types
, PartialStruct
,
9511 CI
->capturesThis(), OMPBuilder
, nullptr,
9512 /*NotTargetParams*/ false);
9515 // We need to append the results of this capture to what we already have.
9516 CombinedInfo
.append(CurInfo
);
9518 // Adjust MEMBER_OF flags for the lambdas captures.
9519 MEHandler
.adjustMemberOfForLambdaCaptures(
9520 OMPBuilder
, LambdaPointers
, CombinedInfo
.BasePointers
,
9521 CombinedInfo
.Pointers
, CombinedInfo
.Types
);
9524 genMapInfo(MappableExprsHandler
&MEHandler
, CodeGenFunction
&CGF
,
9525 MappableExprsHandler::MapCombinedInfoTy
&CombinedInfo
,
9526 llvm::OpenMPIRBuilder
&OMPBuilder
,
9527 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkippedVarSet
=
9528 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) {
9530 CodeGenModule
&CGM
= CGF
.CGM
;
9531 // Map any list items in a map clause that were not captures because they
9532 // weren't referenced within the construct.
9533 MEHandler
.generateAllInfo(CombinedInfo
, OMPBuilder
, SkippedVarSet
);
9535 auto FillInfoMap
= [&](MappableExprsHandler::MappingExprInfo
&MapExpr
) {
9536 return emitMappingInformation(CGF
, OMPBuilder
, MapExpr
);
9538 if (CGM
.getCodeGenOpts().getDebugInfo() !=
9539 llvm::codegenoptions::NoDebugInfo
) {
9540 CombinedInfo
.Names
.resize(CombinedInfo
.Exprs
.size());
9541 llvm::transform(CombinedInfo
.Exprs
, CombinedInfo
.Names
.begin(),
9546 static void genMapInfo(const OMPExecutableDirective
&D
, CodeGenFunction
&CGF
,
9547 const CapturedStmt
&CS
,
9548 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
,
9549 llvm::OpenMPIRBuilder
&OMPBuilder
,
9550 MappableExprsHandler::MapCombinedInfoTy
&CombinedInfo
) {
9551 // Get mappable expression information.
9552 MappableExprsHandler
MEHandler(D
, CGF
);
9553 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> MappedVarSet
;
9555 genMapInfoForCaptures(MEHandler
, CGF
, CS
, CapturedVars
, OMPBuilder
,
9556 MappedVarSet
, CombinedInfo
);
9557 genMapInfo(MEHandler
, CGF
, CombinedInfo
, OMPBuilder
, MappedVarSet
);
9560 template <typename ClauseTy
>
9562 emitClauseForBareTargetDirective(CodeGenFunction
&CGF
,
9563 const OMPExecutableDirective
&D
,
9564 llvm::SmallVectorImpl
<llvm::Value
*> &Values
) {
9565 const auto *C
= D
.getSingleClause
<ClauseTy
>();
9566 assert(!C
->varlist_empty() &&
9567 "ompx_bare requires explicit num_teams and thread_limit");
9568 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
9569 for (auto *E
: C
->varlist()) {
9570 llvm::Value
*V
= CGF
.EmitScalarExpr(E
);
9572 CGF
.Builder
.CreateIntCast(V
, CGF
.Int32Ty
, /*isSigned=*/true));
9576 static void emitTargetCallKernelLaunch(
9577 CGOpenMPRuntime
*OMPRuntime
, llvm::Function
*OutlinedFn
,
9578 const OMPExecutableDirective
&D
,
9579 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
, bool RequiresOuterTask
,
9580 const CapturedStmt
&CS
, bool OffloadingMandatory
,
9581 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9582 llvm::Value
*OutlinedFnID
, CodeGenFunction::OMPTargetDataInfo
&InputInfo
,
9583 llvm::Value
*&MapTypesArray
, llvm::Value
*&MapNamesArray
,
9584 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9585 const OMPLoopDirective
&D
)>
9587 CodeGenFunction
&CGF
, CodeGenModule
&CGM
) {
9588 llvm::OpenMPIRBuilder
&OMPBuilder
= OMPRuntime
->getOMPBuilder();
9590 // Fill up the arrays with all the captured variables.
9591 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
9592 CGOpenMPRuntime::TargetDataInfo Info
;
9593 genMapInfo(D
, CGF
, CS
, CapturedVars
, OMPBuilder
, CombinedInfo
);
9595 emitOffloadingArraysAndArgs(CGF
, CombinedInfo
, Info
, OMPBuilder
,
9596 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
9598 InputInfo
.NumberOfTargetItems
= Info
.NumberOfPtrs
;
9599 InputInfo
.BasePointersArray
= Address(Info
.RTArgs
.BasePointersArray
,
9600 CGF
.VoidPtrTy
, CGM
.getPointerAlign());
9601 InputInfo
.PointersArray
=
9602 Address(Info
.RTArgs
.PointersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
9603 InputInfo
.SizesArray
=
9604 Address(Info
.RTArgs
.SizesArray
, CGF
.Int64Ty
, CGM
.getPointerAlign());
9605 InputInfo
.MappersArray
=
9606 Address(Info
.RTArgs
.MappersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
9607 MapTypesArray
= Info
.RTArgs
.MapTypesArray
;
9608 MapNamesArray
= Info
.RTArgs
.MapNamesArray
;
9610 auto &&ThenGen
= [&OMPRuntime
, OutlinedFn
, &D
, &CapturedVars
,
9611 RequiresOuterTask
, &CS
, OffloadingMandatory
, Device
,
9612 OutlinedFnID
, &InputInfo
, &MapTypesArray
, &MapNamesArray
,
9613 SizeEmitter
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9614 bool IsReverseOffloading
= Device
.getInt() == OMPC_DEVICE_ancestor
;
9616 if (IsReverseOffloading
) {
9617 // Reverse offloading is not supported, so just execute on the host.
9618 // FIXME: This fallback solution is incorrect since it ignores the
9619 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9620 // assert here and ensure SEMA emits an error.
9621 emitTargetCallFallback(OMPRuntime
, OutlinedFn
, D
, CapturedVars
,
9622 RequiresOuterTask
, CS
, OffloadingMandatory
, CGF
);
9626 bool HasNoWait
= D
.hasClausesOfKind
<OMPNowaitClause
>();
9627 unsigned NumTargetItems
= InputInfo
.NumberOfTargetItems
;
9629 llvm::Value
*BasePointersArray
=
9630 InputInfo
.BasePointersArray
.emitRawPointer(CGF
);
9631 llvm::Value
*PointersArray
= InputInfo
.PointersArray
.emitRawPointer(CGF
);
9632 llvm::Value
*SizesArray
= InputInfo
.SizesArray
.emitRawPointer(CGF
);
9633 llvm::Value
*MappersArray
= InputInfo
.MappersArray
.emitRawPointer(CGF
);
9635 auto &&EmitTargetCallFallbackCB
=
9636 [&OMPRuntime
, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
, &CS
,
9637 OffloadingMandatory
, &CGF
](llvm::OpenMPIRBuilder::InsertPointTy IP
)
9638 -> llvm::OpenMPIRBuilder::InsertPointTy
{
9639 CGF
.Builder
.restoreIP(IP
);
9640 emitTargetCallFallback(OMPRuntime
, OutlinedFn
, D
, CapturedVars
,
9641 RequiresOuterTask
, CS
, OffloadingMandatory
, CGF
);
9642 return CGF
.Builder
.saveIP();
9645 bool IsBare
= D
.hasClausesOfKind
<OMPXBareClause
>();
9646 SmallVector
<llvm::Value
*, 3> NumTeams
;
9647 SmallVector
<llvm::Value
*, 3> NumThreads
;
9649 emitClauseForBareTargetDirective
<OMPNumTeamsClause
>(CGF
, D
, NumTeams
);
9650 emitClauseForBareTargetDirective
<OMPThreadLimitClause
>(CGF
, D
,
9653 NumTeams
.push_back(OMPRuntime
->emitNumTeamsForTargetDirective(CGF
, D
));
9654 NumThreads
.push_back(
9655 OMPRuntime
->emitNumThreadsForTargetDirective(CGF
, D
));
9658 llvm::Value
*DeviceID
= emitDeviceID(Device
, CGF
);
9659 llvm::Value
*RTLoc
= OMPRuntime
->emitUpdateLocation(CGF
, D
.getBeginLoc());
9660 llvm::Value
*NumIterations
=
9661 OMPRuntime
->emitTargetNumIterationsCall(CGF
, D
, SizeEmitter
);
9662 llvm::Value
*DynCGGroupMem
= emitDynCGGroupMem(D
, CGF
);
9663 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
9664 CGF
.AllocaInsertPt
->getParent(), CGF
.AllocaInsertPt
->getIterator());
9666 llvm::OpenMPIRBuilder::TargetDataRTArgs
RTArgs(
9667 BasePointersArray
, PointersArray
, SizesArray
, MapTypesArray
,
9668 nullptr /* MapTypesArrayEnd */, MappersArray
, MapNamesArray
);
9670 llvm::OpenMPIRBuilder::TargetKernelArgs
Args(
9671 NumTargetItems
, RTArgs
, NumIterations
, NumTeams
, NumThreads
,
9672 DynCGGroupMem
, HasNoWait
);
9674 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP
=
9675 OMPRuntime
->getOMPBuilder().emitKernelLaunch(
9676 CGF
.Builder
, OutlinedFnID
, EmitTargetCallFallbackCB
, Args
, DeviceID
,
9678 assert(AfterIP
&& "unexpected error creating kernel launch");
9679 CGF
.Builder
.restoreIP(*AfterIP
);
9682 if (RequiresOuterTask
)
9683 CGF
.EmitOMPTargetTaskBasedDirective(D
, ThenGen
, InputInfo
);
9685 OMPRuntime
->emitInlinedDirective(CGF
, D
.getDirectiveKind(), ThenGen
);
9689 emitTargetCallElse(CGOpenMPRuntime
*OMPRuntime
, llvm::Function
*OutlinedFn
,
9690 const OMPExecutableDirective
&D
,
9691 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
,
9692 bool RequiresOuterTask
, const CapturedStmt
&CS
,
9693 bool OffloadingMandatory
, CodeGenFunction
&CGF
) {
9695 // Notify that the host version must be executed.
9697 [&OMPRuntime
, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
, &CS
,
9698 OffloadingMandatory
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9699 emitTargetCallFallback(OMPRuntime
, OutlinedFn
, D
, CapturedVars
,
9700 RequiresOuterTask
, CS
, OffloadingMandatory
, CGF
);
9703 if (RequiresOuterTask
) {
9704 CodeGenFunction::OMPTargetDataInfo InputInfo
;
9705 CGF
.EmitOMPTargetTaskBasedDirective(D
, ElseGen
, InputInfo
);
9707 OMPRuntime
->emitInlinedDirective(CGF
, D
.getDirectiveKind(), ElseGen
);
9711 void CGOpenMPRuntime::emitTargetCall(
9712 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
9713 llvm::Function
*OutlinedFn
, llvm::Value
*OutlinedFnID
, const Expr
*IfCond
,
9714 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9715 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9716 const OMPLoopDirective
&D
)>
9718 if (!CGF
.HaveInsertPoint())
9721 const bool OffloadingMandatory
= !CGM
.getLangOpts().OpenMPIsTargetDevice
&&
9722 CGM
.getLangOpts().OpenMPOffloadMandatory
;
9724 assert((OffloadingMandatory
|| OutlinedFn
) && "Invalid outlined function!");
9726 const bool RequiresOuterTask
=
9727 D
.hasClausesOfKind
<OMPDependClause
>() ||
9728 D
.hasClausesOfKind
<OMPNowaitClause
>() ||
9729 D
.hasClausesOfKind
<OMPInReductionClause
>() ||
9730 (CGM
.getLangOpts().OpenMP
>= 51 &&
9731 needsTaskBasedThreadLimit(D
.getDirectiveKind()) &&
9732 D
.hasClausesOfKind
<OMPThreadLimitClause
>());
9733 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
9734 const CapturedStmt
&CS
= *D
.getCapturedStmt(OMPD_target
);
9735 auto &&ArgsCodegen
= [&CS
, &CapturedVars
](CodeGenFunction
&CGF
,
9736 PrePostActionTy
&) {
9737 CGF
.GenerateOpenMPCapturedVars(CS
, CapturedVars
);
9739 emitInlinedDirective(CGF
, OMPD_unknown
, ArgsCodegen
);
9741 CodeGenFunction::OMPTargetDataInfo InputInfo
;
9742 llvm::Value
*MapTypesArray
= nullptr;
9743 llvm::Value
*MapNamesArray
= nullptr;
9745 auto &&TargetThenGen
= [this, OutlinedFn
, &D
, &CapturedVars
,
9746 RequiresOuterTask
, &CS
, OffloadingMandatory
, Device
,
9747 OutlinedFnID
, &InputInfo
, &MapTypesArray
,
9748 &MapNamesArray
, SizeEmitter
](CodeGenFunction
&CGF
,
9749 PrePostActionTy
&) {
9750 emitTargetCallKernelLaunch(this, OutlinedFn
, D
, CapturedVars
,
9751 RequiresOuterTask
, CS
, OffloadingMandatory
,
9752 Device
, OutlinedFnID
, InputInfo
, MapTypesArray
,
9753 MapNamesArray
, SizeEmitter
, CGF
, CGM
);
9756 auto &&TargetElseGen
=
9757 [this, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
, &CS
,
9758 OffloadingMandatory
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9759 emitTargetCallElse(this, OutlinedFn
, D
, CapturedVars
, RequiresOuterTask
,
9760 CS
, OffloadingMandatory
, CGF
);
9763 // If we have a target function ID it means that we need to support
9764 // offloading, otherwise, just execute on the host. We need to execute on host
9765 // regardless of the conditional in the if clause if, e.g., the user do not
9766 // specify target triples.
9769 emitIfClause(CGF
, IfCond
, TargetThenGen
, TargetElseGen
);
9771 RegionCodeGenTy
ThenRCG(TargetThenGen
);
9775 RegionCodeGenTy
ElseRCG(TargetElseGen
);
9780 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt
*S
,
9781 StringRef ParentName
) {
9785 // Codegen OMP target directives that offload compute to the device.
9786 bool RequiresDeviceCodegen
=
9787 isa
<OMPExecutableDirective
>(S
) &&
9788 isOpenMPTargetExecutionDirective(
9789 cast
<OMPExecutableDirective
>(S
)->getDirectiveKind());
9791 if (RequiresDeviceCodegen
) {
9792 const auto &E
= *cast
<OMPExecutableDirective
>(S
);
9794 llvm::TargetRegionEntryInfo EntryInfo
= getEntryInfoFromPresumedLoc(
9795 CGM
, OMPBuilder
, E
.getBeginLoc(), ParentName
);
9797 // Is this a target region that should not be emitted as an entry point? If
9798 // so just signal we are done with this target region.
9799 if (!OMPBuilder
.OffloadInfoManager
.hasTargetRegionEntryInfo(EntryInfo
))
9802 switch (E
.getDirectiveKind()) {
9804 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM
, ParentName
,
9805 cast
<OMPTargetDirective
>(E
));
9807 case OMPD_target_parallel
:
9808 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9809 CGM
, ParentName
, cast
<OMPTargetParallelDirective
>(E
));
9811 case OMPD_target_teams
:
9812 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9813 CGM
, ParentName
, cast
<OMPTargetTeamsDirective
>(E
));
9815 case OMPD_target_teams_distribute
:
9816 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9817 CGM
, ParentName
, cast
<OMPTargetTeamsDistributeDirective
>(E
));
9819 case OMPD_target_teams_distribute_simd
:
9820 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9821 CGM
, ParentName
, cast
<OMPTargetTeamsDistributeSimdDirective
>(E
));
9823 case OMPD_target_parallel_for
:
9824 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9825 CGM
, ParentName
, cast
<OMPTargetParallelForDirective
>(E
));
9827 case OMPD_target_parallel_for_simd
:
9828 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9829 CGM
, ParentName
, cast
<OMPTargetParallelForSimdDirective
>(E
));
9831 case OMPD_target_simd
:
9832 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9833 CGM
, ParentName
, cast
<OMPTargetSimdDirective
>(E
));
9835 case OMPD_target_teams_distribute_parallel_for
:
9836 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9838 cast
<OMPTargetTeamsDistributeParallelForDirective
>(E
));
9840 case OMPD_target_teams_distribute_parallel_for_simd
:
9842 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9844 cast
<OMPTargetTeamsDistributeParallelForSimdDirective
>(E
));
9846 case OMPD_target_teams_loop
:
9847 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9848 CGM
, ParentName
, cast
<OMPTargetTeamsGenericLoopDirective
>(E
));
9850 case OMPD_target_parallel_loop
:
9851 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9852 CGM
, ParentName
, cast
<OMPTargetParallelGenericLoopDirective
>(E
));
9856 case OMPD_parallel_for
:
9857 case OMPD_parallel_master
:
9858 case OMPD_parallel_sections
:
9860 case OMPD_parallel_for_simd
:
9862 case OMPD_cancellation_point
:
9864 case OMPD_threadprivate
:
9875 case OMPD_taskyield
:
9878 case OMPD_taskgroup
:
9884 case OMPD_target_data
:
9885 case OMPD_target_exit_data
:
9886 case OMPD_target_enter_data
:
9887 case OMPD_distribute
:
9888 case OMPD_distribute_simd
:
9889 case OMPD_distribute_parallel_for
:
9890 case OMPD_distribute_parallel_for_simd
:
9891 case OMPD_teams_distribute
:
9892 case OMPD_teams_distribute_simd
:
9893 case OMPD_teams_distribute_parallel_for
:
9894 case OMPD_teams_distribute_parallel_for_simd
:
9895 case OMPD_target_update
:
9896 case OMPD_declare_simd
:
9897 case OMPD_declare_variant
:
9898 case OMPD_begin_declare_variant
:
9899 case OMPD_end_declare_variant
:
9900 case OMPD_declare_target
:
9901 case OMPD_end_declare_target
:
9902 case OMPD_declare_reduction
:
9903 case OMPD_declare_mapper
:
9905 case OMPD_taskloop_simd
:
9906 case OMPD_master_taskloop
:
9907 case OMPD_master_taskloop_simd
:
9908 case OMPD_parallel_master_taskloop
:
9909 case OMPD_parallel_master_taskloop_simd
:
9911 case OMPD_metadirective
:
9914 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9919 if (const auto *E
= dyn_cast
<OMPExecutableDirective
>(S
)) {
9920 if (!E
->hasAssociatedStmt() || !E
->getAssociatedStmt())
9923 scanForTargetRegionsFunctions(E
->getRawStmt(), ParentName
);
9927 // If this is a lambda function, look into its body.
9928 if (const auto *L
= dyn_cast
<LambdaExpr
>(S
))
9931 // Keep looking for target regions recursively.
9932 for (const Stmt
*II
: S
->children())
9933 scanForTargetRegionsFunctions(II
, ParentName
);
9936 static bool isAssumedToBeNotEmitted(const ValueDecl
*VD
, bool IsDevice
) {
9937 std::optional
<OMPDeclareTargetDeclAttr::DevTypeTy
> DevTy
=
9938 OMPDeclareTargetDeclAttr::getDeviceType(VD
);
9941 // Do not emit device_type(nohost) functions for the host.
9942 if (!IsDevice
&& DevTy
== OMPDeclareTargetDeclAttr::DT_NoHost
)
9944 // Do not emit device_type(host) functions for the device.
9945 if (IsDevice
&& DevTy
== OMPDeclareTargetDeclAttr::DT_Host
)
9950 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD
) {
9951 // If emitting code for the host, we do not process FD here. Instead we do
9952 // the normal code generation.
9953 if (!CGM
.getLangOpts().OpenMPIsTargetDevice
) {
9954 if (const auto *FD
= dyn_cast
<FunctionDecl
>(GD
.getDecl()))
9955 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(FD
),
9956 CGM
.getLangOpts().OpenMPIsTargetDevice
))
9961 const ValueDecl
*VD
= cast
<ValueDecl
>(GD
.getDecl());
9962 // Try to detect target regions in the function.
9963 if (const auto *FD
= dyn_cast
<FunctionDecl
>(VD
)) {
9964 StringRef Name
= CGM
.getMangledName(GD
);
9965 scanForTargetRegionsFunctions(FD
->getBody(), Name
);
9966 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(FD
),
9967 CGM
.getLangOpts().OpenMPIsTargetDevice
))
9971 // Do not to emit function if it is not marked as declare target.
9972 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
) &&
9973 AlreadyEmittedTargetDecls
.count(VD
) == 0;
9976 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD
) {
9977 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(GD
.getDecl()),
9978 CGM
.getLangOpts().OpenMPIsTargetDevice
))
9981 if (!CGM
.getLangOpts().OpenMPIsTargetDevice
)
9984 // Check if there are Ctors/Dtors in this declaration and look for target
9985 // regions in it. We use the complete variant to produce the kernel name
9987 QualType RDTy
= cast
<VarDecl
>(GD
.getDecl())->getType();
9988 if (const auto *RD
= RDTy
->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9989 for (const CXXConstructorDecl
*Ctor
: RD
->ctors()) {
9990 StringRef ParentName
=
9991 CGM
.getMangledName(GlobalDecl(Ctor
, Ctor_Complete
));
9992 scanForTargetRegionsFunctions(Ctor
->getBody(), ParentName
);
9994 if (const CXXDestructorDecl
*Dtor
= RD
->getDestructor()) {
9995 StringRef ParentName
=
9996 CGM
.getMangledName(GlobalDecl(Dtor
, Dtor_Complete
));
9997 scanForTargetRegionsFunctions(Dtor
->getBody(), ParentName
);
10001 // Do not to emit variable if it is not marked as declare target.
10002 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10003 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10004 cast
<VarDecl
>(GD
.getDecl()));
10005 if (!Res
|| *Res
== OMPDeclareTargetDeclAttr::MT_Link
||
10006 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10007 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10008 HasRequiresUnifiedSharedMemory
)) {
10009 DeferredGlobalVariables
.insert(cast
<VarDecl
>(GD
.getDecl()));
10015 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl
*VD
,
10016 llvm::Constant
*Addr
) {
10017 if (CGM
.getLangOpts().OMPTargetTriples
.empty() &&
10018 !CGM
.getLangOpts().OpenMPIsTargetDevice
)
10021 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10022 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
10024 // If this is an 'extern' declaration we defer to the canonical definition and
10025 // do not emit an offloading entry.
10026 if (Res
&& *Res
!= OMPDeclareTargetDeclAttr::MT_Link
&&
10027 VD
->hasExternalStorage())
10031 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
10032 // Register non-target variables being emitted in device code (debug info
10033 // may cause this).
10034 StringRef VarName
= CGM
.getMangledName(VD
);
10035 EmittedNonTargetVariables
.try_emplace(VarName
, Addr
);
10040 auto AddrOfGlobal
= [&VD
, this]() { return CGM
.GetAddrOfGlobal(VD
); };
10041 auto LinkageForVariable
= [&VD
, this]() {
10042 return CGM
.getLLVMLinkageVarDefinition(VD
);
10045 std::vector
<llvm::GlobalVariable
*> GeneratedRefs
;
10046 OMPBuilder
.registerTargetGlobalVariable(
10047 convertCaptureClause(VD
), convertDeviceClause(VD
),
10048 VD
->hasDefinition(CGM
.getContext()) == VarDecl::DeclarationOnly
,
10049 VD
->isExternallyVisible(),
10050 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
,
10051 VD
->getCanonicalDecl()->getBeginLoc()),
10052 CGM
.getMangledName(VD
), GeneratedRefs
, CGM
.getLangOpts().OpenMPSimd
,
10053 CGM
.getLangOpts().OMPTargetTriples
, AddrOfGlobal
, LinkageForVariable
,
10054 CGM
.getTypes().ConvertTypeForMem(
10055 CGM
.getContext().getPointerType(VD
->getType())),
10058 for (auto *ref
: GeneratedRefs
)
10059 CGM
.addCompilerUsedGlobal(ref
);
10062 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD
) {
10063 if (isa
<FunctionDecl
>(GD
.getDecl()) ||
10064 isa
<OMPDeclareReductionDecl
>(GD
.getDecl()))
10065 return emitTargetFunctions(GD
);
10067 return emitTargetGlobalVariable(GD
);
10070 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10071 for (const VarDecl
*VD
: DeferredGlobalVariables
) {
10072 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10073 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
10076 if ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10077 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10078 !HasRequiresUnifiedSharedMemory
) {
10079 CGM
.EmitGlobal(VD
);
10081 assert((*Res
== OMPDeclareTargetDeclAttr::MT_Link
||
10082 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10083 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10084 HasRequiresUnifiedSharedMemory
)) &&
10085 "Expected link clause or to clause with unified memory.");
10086 (void)CGM
.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD
);
10091 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10092 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) const {
10093 assert(isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) &&
10094 " Expected target-based directive.");
10097 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl
*D
) {
10098 for (const OMPClause
*Clause
: D
->clauselists()) {
10099 if (Clause
->getClauseKind() == OMPC_unified_shared_memory
) {
10100 HasRequiresUnifiedSharedMemory
= true;
10101 OMPBuilder
.Config
.setHasRequiresUnifiedSharedMemory(true);
10102 } else if (const auto *AC
=
10103 dyn_cast
<OMPAtomicDefaultMemOrderClause
>(Clause
)) {
10104 switch (AC
->getAtomicDefaultMemOrderKind()) {
10105 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel
:
10106 RequiresAtomicOrdering
= llvm::AtomicOrdering::AcquireRelease
;
10108 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst
:
10109 RequiresAtomicOrdering
= llvm::AtomicOrdering::SequentiallyConsistent
;
10111 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed
:
10112 RequiresAtomicOrdering
= llvm::AtomicOrdering::Monotonic
;
10114 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
:
10121 llvm::AtomicOrdering
CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10122 return RequiresAtomicOrdering
;
10125 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl
*VD
,
10127 if (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())
10129 const auto *A
= VD
->getAttr
<OMPAllocateDeclAttr
>();
10130 switch(A
->getAllocatorType()) {
10131 case OMPAllocateDeclAttr::OMPNullMemAlloc
:
10132 case OMPAllocateDeclAttr::OMPDefaultMemAlloc
:
10133 // Not supported, fallback to the default mem space.
10134 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc
:
10135 case OMPAllocateDeclAttr::OMPCGroupMemAlloc
:
10136 case OMPAllocateDeclAttr::OMPHighBWMemAlloc
:
10137 case OMPAllocateDeclAttr::OMPLowLatMemAlloc
:
10138 case OMPAllocateDeclAttr::OMPThreadMemAlloc
:
10139 case OMPAllocateDeclAttr::OMPConstMemAlloc
:
10140 case OMPAllocateDeclAttr::OMPPTeamMemAlloc
:
10141 AS
= LangAS::Default
;
10143 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc
:
10144 llvm_unreachable("Expected predefined allocator for the variables with the "
10145 "static storage.");
10150 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10151 return HasRequiresUnifiedSharedMemory
;
10154 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10155 CodeGenModule
&CGM
)
10157 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
10158 SavedShouldMarkAsGlobal
= CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
;
10159 CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
= false;
10163 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10164 if (CGM
.getLangOpts().OpenMPIsTargetDevice
)
10165 CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
= SavedShouldMarkAsGlobal
;
10168 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD
) {
10169 if (!CGM
.getLangOpts().OpenMPIsTargetDevice
|| !ShouldMarkAsGlobal
)
10172 const auto *D
= cast
<FunctionDecl
>(GD
.getDecl());
10173 // Do not to emit function if it is marked as declare target as it was already
10175 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D
)) {
10176 if (D
->hasBody() && AlreadyEmittedTargetDecls
.count(D
) == 0) {
10177 if (auto *F
= dyn_cast_or_null
<llvm::Function
>(
10178 CGM
.GetGlobalValue(CGM
.getMangledName(GD
))))
10179 return !F
->isDeclaration();
10185 return !AlreadyEmittedTargetDecls
.insert(D
).second
;
10188 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction
&CGF
,
10189 const OMPExecutableDirective
&D
,
10190 SourceLocation Loc
,
10191 llvm::Function
*OutlinedFn
,
10192 ArrayRef
<llvm::Value
*> CapturedVars
) {
10193 if (!CGF
.HaveInsertPoint())
10196 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10197 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
10199 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10200 llvm::Value
*Args
[] = {
10202 CGF
.Builder
.getInt32(CapturedVars
.size()), // Number of captured vars
10203 CGF
.Builder
.CreateBitCast(OutlinedFn
, getKmpc_MicroPointerTy())};
10204 llvm::SmallVector
<llvm::Value
*, 16> RealArgs
;
10205 RealArgs
.append(std::begin(Args
), std::end(Args
));
10206 RealArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
10208 llvm::FunctionCallee RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
10209 CGM
.getModule(), OMPRTL___kmpc_fork_teams
);
10210 CGF
.EmitRuntimeCall(RTLFn
, RealArgs
);
10213 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction
&CGF
,
10214 const Expr
*NumTeams
,
10215 const Expr
*ThreadLimit
,
10216 SourceLocation Loc
) {
10217 if (!CGF
.HaveInsertPoint())
10220 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10222 llvm::Value
*NumTeamsVal
=
10224 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(NumTeams
),
10225 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10226 : CGF
.Builder
.getInt32(0);
10228 llvm::Value
*ThreadLimitVal
=
10230 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(ThreadLimit
),
10231 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10232 : CGF
.Builder
.getInt32(0);
10234 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10235 llvm::Value
*PushNumTeamsArgs
[] = {RTLoc
, getThreadID(CGF
, Loc
), NumTeamsVal
,
10237 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10238 CGM
.getModule(), OMPRTL___kmpc_push_num_teams
),
10242 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction
&CGF
,
10243 const Expr
*ThreadLimit
,
10244 SourceLocation Loc
) {
10245 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10246 llvm::Value
*ThreadLimitVal
=
10248 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(ThreadLimit
),
10249 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10250 : CGF
.Builder
.getInt32(0);
10252 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10253 llvm::Value
*ThreadLimitArgs
[] = {RTLoc
, getThreadID(CGF
, Loc
),
10255 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10256 CGM
.getModule(), OMPRTL___kmpc_set_thread_limit
),
10260 void CGOpenMPRuntime::emitTargetDataCalls(
10261 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
10262 const Expr
*Device
, const RegionCodeGenTy
&CodeGen
,
10263 CGOpenMPRuntime::TargetDataInfo
&Info
) {
10264 if (!CGF
.HaveInsertPoint())
10267 // Action used to replace the default codegen action and turn privatization
10269 PrePostActionTy NoPrivAction
;
10271 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
10273 llvm::Value
*IfCondVal
= nullptr;
10275 IfCondVal
= CGF
.EvaluateExprAsBool(IfCond
);
10277 // Emit device ID if any.
10278 llvm::Value
*DeviceID
= nullptr;
10280 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10281 CGF
.Int64Ty
, /*isSigned=*/true);
10283 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10286 // Fill up the arrays with all the mapped variables.
10287 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10288 auto GenMapInfoCB
=
10289 [&](InsertPointTy CodeGenIP
) -> llvm::OpenMPIRBuilder::MapInfosTy
& {
10290 CGF
.Builder
.restoreIP(CodeGenIP
);
10291 // Get map clause information.
10292 MappableExprsHandler
MEHandler(D
, CGF
);
10293 MEHandler
.generateAllInfo(CombinedInfo
, OMPBuilder
);
10295 auto FillInfoMap
= [&](MappableExprsHandler::MappingExprInfo
&MapExpr
) {
10296 return emitMappingInformation(CGF
, OMPBuilder
, MapExpr
);
10298 if (CGM
.getCodeGenOpts().getDebugInfo() !=
10299 llvm::codegenoptions::NoDebugInfo
) {
10300 CombinedInfo
.Names
.resize(CombinedInfo
.Exprs
.size());
10301 llvm::transform(CombinedInfo
.Exprs
, CombinedInfo
.Names
.begin(),
10305 return CombinedInfo
;
10307 using BodyGenTy
= llvm::OpenMPIRBuilder::BodyGenTy
;
10308 auto BodyCB
= [&](InsertPointTy CodeGenIP
, BodyGenTy BodyGenType
) {
10309 CGF
.Builder
.restoreIP(CodeGenIP
);
10310 switch (BodyGenType
) {
10311 case BodyGenTy::Priv
:
10312 if (!Info
.CaptureDeviceAddrMap
.empty())
10315 case BodyGenTy::DupNoPriv
:
10316 if (!Info
.CaptureDeviceAddrMap
.empty()) {
10317 CodeGen
.setAction(NoPrivAction
);
10321 case BodyGenTy::NoPriv
:
10322 if (Info
.CaptureDeviceAddrMap
.empty()) {
10323 CodeGen
.setAction(NoPrivAction
);
10328 return InsertPointTy(CGF
.Builder
.GetInsertBlock(),
10329 CGF
.Builder
.GetInsertPoint());
10332 auto DeviceAddrCB
= [&](unsigned int I
, llvm::Value
*NewDecl
) {
10333 if (const ValueDecl
*DevVD
= CombinedInfo
.DevicePtrDecls
[I
]) {
10334 Info
.CaptureDeviceAddrMap
.try_emplace(DevVD
, NewDecl
);
10338 auto CustomMapperCB
= [&](unsigned int I
) {
10339 llvm::Value
*MFunc
= nullptr;
10340 if (CombinedInfo
.Mappers
[I
]) {
10341 Info
.HasMapper
= true;
10342 MFunc
= CGF
.CGM
.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10343 cast
<OMPDeclareMapperDecl
>(CombinedInfo
.Mappers
[I
]));
10348 // Source location for the ident struct
10349 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10351 InsertPointTy
AllocaIP(CGF
.AllocaInsertPt
->getParent(),
10352 CGF
.AllocaInsertPt
->getIterator());
10353 InsertPointTy
CodeGenIP(CGF
.Builder
.GetInsertBlock(),
10354 CGF
.Builder
.GetInsertPoint());
10355 llvm::OpenMPIRBuilder::LocationDescription
OmpLoc(CodeGenIP
);
10356 llvm::OpenMPIRBuilder::InsertPointOrErrorTy AfterIP
=
10357 OMPBuilder
.createTargetData(
10358 OmpLoc
, AllocaIP
, CodeGenIP
, DeviceID
, IfCondVal
, Info
, GenMapInfoCB
,
10359 /*MapperFunc=*/nullptr, BodyCB
, DeviceAddrCB
, CustomMapperCB
, RTLoc
);
10360 assert(AfterIP
&& "unexpected error creating target data");
10361 CGF
.Builder
.restoreIP(*AfterIP
);
10364 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10365 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
10366 const Expr
*Device
) {
10367 if (!CGF
.HaveInsertPoint())
10370 assert((isa
<OMPTargetEnterDataDirective
>(D
) ||
10371 isa
<OMPTargetExitDataDirective
>(D
) ||
10372 isa
<OMPTargetUpdateDirective
>(D
)) &&
10373 "Expecting either target enter, exit data, or update directives.");
10375 CodeGenFunction::OMPTargetDataInfo InputInfo
;
10376 llvm::Value
*MapTypesArray
= nullptr;
10377 llvm::Value
*MapNamesArray
= nullptr;
10378 // Generate the code for the opening of the data environment.
10379 auto &&ThenGen
= [this, &D
, Device
, &InputInfo
, &MapTypesArray
,
10380 &MapNamesArray
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
10381 // Emit device ID if any.
10382 llvm::Value
*DeviceID
= nullptr;
10384 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10385 CGF
.Int64Ty
, /*isSigned=*/true);
10387 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10390 // Emit the number of elements in the offloading arrays.
10391 llvm::Constant
*PointerNum
=
10392 CGF
.Builder
.getInt32(InputInfo
.NumberOfTargetItems
);
10394 // Source location for the ident struct
10395 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10397 SmallVector
<llvm::Value
*, 13> OffloadingArgs(
10398 {RTLoc
, DeviceID
, PointerNum
,
10399 InputInfo
.BasePointersArray
.emitRawPointer(CGF
),
10400 InputInfo
.PointersArray
.emitRawPointer(CGF
),
10401 InputInfo
.SizesArray
.emitRawPointer(CGF
), MapTypesArray
, MapNamesArray
,
10402 InputInfo
.MappersArray
.emitRawPointer(CGF
)});
10404 // Select the right runtime function call for each standalone
10406 const bool HasNowait
= D
.hasClausesOfKind
<OMPNowaitClause
>();
10407 RuntimeFunction RTLFn
;
10408 switch (D
.getDirectiveKind()) {
10409 case OMPD_target_enter_data
:
10410 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_begin_nowait_mapper
10411 : OMPRTL___tgt_target_data_begin_mapper
;
10413 case OMPD_target_exit_data
:
10414 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_end_nowait_mapper
10415 : OMPRTL___tgt_target_data_end_mapper
;
10417 case OMPD_target_update
:
10418 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_update_nowait_mapper
10419 : OMPRTL___tgt_target_data_update_mapper
;
10421 case OMPD_parallel
:
10423 case OMPD_parallel_for
:
10424 case OMPD_parallel_master
:
10425 case OMPD_parallel_sections
:
10426 case OMPD_for_simd
:
10427 case OMPD_parallel_for_simd
:
10429 case OMPD_cancellation_point
:
10431 case OMPD_threadprivate
:
10432 case OMPD_allocate
:
10437 case OMPD_sections
:
10441 case OMPD_critical
:
10442 case OMPD_taskyield
:
10444 case OMPD_taskwait
:
10445 case OMPD_taskgroup
:
10451 case OMPD_target_data
:
10452 case OMPD_distribute
:
10453 case OMPD_distribute_simd
:
10454 case OMPD_distribute_parallel_for
:
10455 case OMPD_distribute_parallel_for_simd
:
10456 case OMPD_teams_distribute
:
10457 case OMPD_teams_distribute_simd
:
10458 case OMPD_teams_distribute_parallel_for
:
10459 case OMPD_teams_distribute_parallel_for_simd
:
10460 case OMPD_declare_simd
:
10461 case OMPD_declare_variant
:
10462 case OMPD_begin_declare_variant
:
10463 case OMPD_end_declare_variant
:
10464 case OMPD_declare_target
:
10465 case OMPD_end_declare_target
:
10466 case OMPD_declare_reduction
:
10467 case OMPD_declare_mapper
:
10468 case OMPD_taskloop
:
10469 case OMPD_taskloop_simd
:
10470 case OMPD_master_taskloop
:
10471 case OMPD_master_taskloop_simd
:
10472 case OMPD_parallel_master_taskloop
:
10473 case OMPD_parallel_master_taskloop_simd
:
10475 case OMPD_target_simd
:
10476 case OMPD_target_teams_distribute
:
10477 case OMPD_target_teams_distribute_simd
:
10478 case OMPD_target_teams_distribute_parallel_for
:
10479 case OMPD_target_teams_distribute_parallel_for_simd
:
10480 case OMPD_target_teams
:
10481 case OMPD_target_parallel
:
10482 case OMPD_target_parallel_for
:
10483 case OMPD_target_parallel_for_simd
:
10484 case OMPD_requires
:
10485 case OMPD_metadirective
:
10488 llvm_unreachable("Unexpected standalone target data directive.");
10492 OffloadingArgs
.push_back(llvm::Constant::getNullValue(CGF
.Int32Ty
));
10493 OffloadingArgs
.push_back(llvm::Constant::getNullValue(CGF
.VoidPtrTy
));
10494 OffloadingArgs
.push_back(llvm::Constant::getNullValue(CGF
.Int32Ty
));
10495 OffloadingArgs
.push_back(llvm::Constant::getNullValue(CGF
.VoidPtrTy
));
10497 CGF
.EmitRuntimeCall(
10498 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(), RTLFn
),
10502 auto &&TargetThenGen
= [this, &ThenGen
, &D
, &InputInfo
, &MapTypesArray
,
10503 &MapNamesArray
](CodeGenFunction
&CGF
,
10504 PrePostActionTy
&) {
10505 // Fill up the arrays with all the mapped variables.
10506 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10507 CGOpenMPRuntime::TargetDataInfo Info
;
10508 MappableExprsHandler
MEHandler(D
, CGF
);
10509 genMapInfo(MEHandler
, CGF
, CombinedInfo
, OMPBuilder
);
10510 emitOffloadingArraysAndArgs(CGF
, CombinedInfo
, Info
, OMPBuilder
,
10511 /*IsNonContiguous=*/true, /*ForEndCall=*/false);
10513 bool RequiresOuterTask
= D
.hasClausesOfKind
<OMPDependClause
>() ||
10514 D
.hasClausesOfKind
<OMPNowaitClause
>();
10516 InputInfo
.NumberOfTargetItems
= Info
.NumberOfPtrs
;
10517 InputInfo
.BasePointersArray
= Address(Info
.RTArgs
.BasePointersArray
,
10518 CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10519 InputInfo
.PointersArray
= Address(Info
.RTArgs
.PointersArray
, CGF
.VoidPtrTy
,
10520 CGM
.getPointerAlign());
10521 InputInfo
.SizesArray
=
10522 Address(Info
.RTArgs
.SizesArray
, CGF
.Int64Ty
, CGM
.getPointerAlign());
10523 InputInfo
.MappersArray
=
10524 Address(Info
.RTArgs
.MappersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10525 MapTypesArray
= Info
.RTArgs
.MapTypesArray
;
10526 MapNamesArray
= Info
.RTArgs
.MapNamesArray
;
10527 if (RequiresOuterTask
)
10528 CGF
.EmitOMPTargetTaskBasedDirective(D
, ThenGen
, InputInfo
);
10530 emitInlinedDirective(CGF
, D
.getDirectiveKind(), ThenGen
);
10534 emitIfClause(CGF
, IfCond
, TargetThenGen
,
10535 [](CodeGenFunction
&CGF
, PrePostActionTy
&) {});
10537 RegionCodeGenTy
ThenRCG(TargetThenGen
);
10543 /// Kind of parameter in a function with 'declare simd' directive.
10552 /// Attribute set of the parameter.
10553 struct ParamAttrTy
{
10554 ParamKindTy Kind
= Vector
;
10555 llvm::APSInt StrideOrArg
;
10556 llvm::APSInt Alignment
;
10557 bool HasVarStride
= false;
10561 static unsigned evaluateCDTSize(const FunctionDecl
*FD
,
10562 ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10563 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10564 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10565 // of that clause. The VLEN value must be power of 2.
10566 // In other case the notion of the function`s "characteristic data type" (CDT)
10567 // is used to compute the vector length.
10568 // CDT is defined in the following order:
10569 // a) For non-void function, the CDT is the return type.
10570 // b) If the function has any non-uniform, non-linear parameters, then the
10571 // CDT is the type of the first such parameter.
10572 // c) If the CDT determined by a) or b) above is struct, union, or class
10573 // type which is pass-by-value (except for the type that maps to the
10574 // built-in complex data type), the characteristic data type is int.
10575 // d) If none of the above three cases is applicable, the CDT is int.
10576 // The VLEN is then determined based on the CDT and the size of vector
10577 // register of that ISA for which current vector version is generated. The
10578 // VLEN is computed using the formula below:
10579 // VLEN = sizeof(vector_register) / sizeof(CDT),
10580 // where vector register size specified in section 3.2.1 Registers and the
10581 // Stack Frame of original AMD64 ABI document.
10582 QualType RetType
= FD
->getReturnType();
10583 if (RetType
.isNull())
10585 ASTContext
&C
= FD
->getASTContext();
10587 if (!RetType
.isNull() && !RetType
->isVoidType()) {
10590 unsigned Offset
= 0;
10591 if (const auto *MD
= dyn_cast
<CXXMethodDecl
>(FD
)) {
10592 if (ParamAttrs
[Offset
].Kind
== Vector
)
10593 CDT
= C
.getPointerType(C
.getRecordType(MD
->getParent()));
10596 if (CDT
.isNull()) {
10597 for (unsigned I
= 0, E
= FD
->getNumParams(); I
< E
; ++I
) {
10598 if (ParamAttrs
[I
+ Offset
].Kind
== Vector
) {
10599 CDT
= FD
->getParamDecl(I
)->getType();
10607 CDT
= CDT
->getCanonicalTypeUnqualified();
10608 if (CDT
->isRecordType() || CDT
->isUnionType())
10610 return C
.getTypeSize(CDT
);
10613 /// Mangle the parameter part of the vector function name according to
10614 /// their OpenMP classification. The mangling function is defined in
10615 /// section 4.5 of the AAVFABI(2021Q1).
10616 static std::string
mangleVectorParameters(ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10617 SmallString
<256> Buffer
;
10618 llvm::raw_svector_ostream
Out(Buffer
);
10619 for (const auto &ParamAttr
: ParamAttrs
) {
10620 switch (ParamAttr
.Kind
) {
10640 if (ParamAttr
.HasVarStride
)
10641 Out
<< "s" << ParamAttr
.StrideOrArg
;
10642 else if (ParamAttr
.Kind
== Linear
|| ParamAttr
.Kind
== LinearRef
||
10643 ParamAttr
.Kind
== LinearUVal
|| ParamAttr
.Kind
== LinearVal
) {
10644 // Don't print the step value if it is not present or if it is
10646 if (ParamAttr
.StrideOrArg
< 0)
10647 Out
<< 'n' << -ParamAttr
.StrideOrArg
;
10648 else if (ParamAttr
.StrideOrArg
!= 1)
10649 Out
<< ParamAttr
.StrideOrArg
;
10652 if (!!ParamAttr
.Alignment
)
10653 Out
<< 'a' << ParamAttr
.Alignment
;
10656 return std::string(Out
.str());
10660 emitX86DeclareSimdFunction(const FunctionDecl
*FD
, llvm::Function
*Fn
,
10661 const llvm::APSInt
&VLENVal
,
10662 ArrayRef
<ParamAttrTy
> ParamAttrs
,
10663 OMPDeclareSimdDeclAttr::BranchStateTy State
) {
10666 unsigned VecRegSize
;
10668 ISADataTy ISAData
[] = {
10682 llvm::SmallVector
<char, 2> Masked
;
10684 case OMPDeclareSimdDeclAttr::BS_Undefined
:
10685 Masked
.push_back('N');
10686 Masked
.push_back('M');
10688 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
10689 Masked
.push_back('N');
10691 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
10692 Masked
.push_back('M');
10695 for (char Mask
: Masked
) {
10696 for (const ISADataTy
&Data
: ISAData
) {
10697 SmallString
<256> Buffer
;
10698 llvm::raw_svector_ostream
Out(Buffer
);
10699 Out
<< "_ZGV" << Data
.ISA
<< Mask
;
10701 unsigned NumElts
= evaluateCDTSize(FD
, ParamAttrs
);
10702 assert(NumElts
&& "Non-zero simdlen/cdtsize expected");
10703 Out
<< llvm::APSInt::getUnsigned(Data
.VecRegSize
/ NumElts
);
10707 Out
<< mangleVectorParameters(ParamAttrs
);
10708 Out
<< '_' << Fn
->getName();
10709 Fn
->addFnAttr(Out
.str());
10714 // This are the Functions that are needed to mangle the name of the
10715 // vector functions generated by the compiler, according to the rules
10716 // defined in the "Vector Function ABI specifications for AArch64",
10718 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10720 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10721 static bool getAArch64MTV(QualType QT
, ParamKindTy Kind
) {
10722 QT
= QT
.getCanonicalType();
10724 if (QT
->isVoidType())
10727 if (Kind
== ParamKindTy::Uniform
)
10730 if (Kind
== ParamKindTy::LinearUVal
|| Kind
== ParamKindTy::LinearRef
)
10733 if ((Kind
== ParamKindTy::Linear
|| Kind
== ParamKindTy::LinearVal
) &&
10734 !QT
->isReferenceType())
10740 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10741 static bool getAArch64PBV(QualType QT
, ASTContext
&C
) {
10742 QT
= QT
.getCanonicalType();
10743 unsigned Size
= C
.getTypeSize(QT
);
10745 // Only scalars and complex within 16 bytes wide set PVB to true.
10746 if (Size
!= 8 && Size
!= 16 && Size
!= 32 && Size
!= 64 && Size
!= 128)
10749 if (QT
->isFloatingType())
10752 if (QT
->isIntegerType())
10755 if (QT
->isPointerType())
10758 // TODO: Add support for complex types (section 3.1.2, item 2).
10763 /// Computes the lane size (LS) of a return type or of an input parameter,
10764 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10765 /// TODO: Add support for references, section 3.2.1, item 1.
10766 static unsigned getAArch64LS(QualType QT
, ParamKindTy Kind
, ASTContext
&C
) {
10767 if (!getAArch64MTV(QT
, Kind
) && QT
.getCanonicalType()->isPointerType()) {
10768 QualType PTy
= QT
.getCanonicalType()->getPointeeType();
10769 if (getAArch64PBV(PTy
, C
))
10770 return C
.getTypeSize(PTy
);
10772 if (getAArch64PBV(QT
, C
))
10773 return C
.getTypeSize(QT
);
10775 return C
.getTypeSize(C
.getUIntPtrType());
10778 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10779 // signature of the scalar function, as defined in 3.2.2 of the
10781 static std::tuple
<unsigned, unsigned, bool>
10782 getNDSWDS(const FunctionDecl
*FD
, ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10783 QualType RetType
= FD
->getReturnType().getCanonicalType();
10785 ASTContext
&C
= FD
->getASTContext();
10787 bool OutputBecomesInput
= false;
10789 llvm::SmallVector
<unsigned, 8> Sizes
;
10790 if (!RetType
->isVoidType()) {
10791 Sizes
.push_back(getAArch64LS(RetType
, ParamKindTy::Vector
, C
));
10792 if (!getAArch64PBV(RetType
, C
) && getAArch64MTV(RetType
, {}))
10793 OutputBecomesInput
= true;
10795 for (unsigned I
= 0, E
= FD
->getNumParams(); I
< E
; ++I
) {
10796 QualType QT
= FD
->getParamDecl(I
)->getType().getCanonicalType();
10797 Sizes
.push_back(getAArch64LS(QT
, ParamAttrs
[I
].Kind
, C
));
10800 assert(!Sizes
.empty() && "Unable to determine NDS and WDS.");
10801 // The LS of a function parameter / return value can only be a power
10802 // of 2, starting from 8 bits, up to 128.
10803 assert(llvm::all_of(Sizes
,
10804 [](unsigned Size
) {
10805 return Size
== 8 || Size
== 16 || Size
== 32 ||
10806 Size
== 64 || Size
== 128;
10810 return std::make_tuple(*std::min_element(std::begin(Sizes
), std::end(Sizes
)),
10811 *std::max_element(std::begin(Sizes
), std::end(Sizes
)),
10812 OutputBecomesInput
);
10815 // Function used to add the attribute. The parameter `VLEN` is
10816 // templated to allow the use of "x" when targeting scalable functions
10818 template <typename T
>
10819 static void addAArch64VectorName(T VLEN
, StringRef LMask
, StringRef Prefix
,
10820 char ISA
, StringRef ParSeq
,
10821 StringRef MangledName
, bool OutputBecomesInput
,
10822 llvm::Function
*Fn
) {
10823 SmallString
<256> Buffer
;
10824 llvm::raw_svector_ostream
Out(Buffer
);
10825 Out
<< Prefix
<< ISA
<< LMask
<< VLEN
;
10826 if (OutputBecomesInput
)
10828 Out
<< ParSeq
<< "_" << MangledName
;
10829 Fn
->addFnAttr(Out
.str());
10832 // Helper function to generate the Advanced SIMD names depending on
10833 // the value of the NDS when simdlen is not present.
10834 static void addAArch64AdvSIMDNDSNames(unsigned NDS
, StringRef Mask
,
10835 StringRef Prefix
, char ISA
,
10836 StringRef ParSeq
, StringRef MangledName
,
10837 bool OutputBecomesInput
,
10838 llvm::Function
*Fn
) {
10841 addAArch64VectorName(8, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10842 OutputBecomesInput
, Fn
);
10843 addAArch64VectorName(16, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10844 OutputBecomesInput
, Fn
);
10847 addAArch64VectorName(4, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10848 OutputBecomesInput
, Fn
);
10849 addAArch64VectorName(8, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10850 OutputBecomesInput
, Fn
);
10853 addAArch64VectorName(2, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10854 OutputBecomesInput
, Fn
);
10855 addAArch64VectorName(4, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10856 OutputBecomesInput
, Fn
);
10860 addAArch64VectorName(2, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10861 OutputBecomesInput
, Fn
);
10864 llvm_unreachable("Scalar type is too wide.");
10868 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10869 static void emitAArch64DeclareSimdFunction(
10870 CodeGenModule
&CGM
, const FunctionDecl
*FD
, unsigned UserVLEN
,
10871 ArrayRef
<ParamAttrTy
> ParamAttrs
,
10872 OMPDeclareSimdDeclAttr::BranchStateTy State
, StringRef MangledName
,
10873 char ISA
, unsigned VecRegSize
, llvm::Function
*Fn
, SourceLocation SLoc
) {
10875 // Get basic data for building the vector signature.
10876 const auto Data
= getNDSWDS(FD
, ParamAttrs
);
10877 const unsigned NDS
= std::get
<0>(Data
);
10878 const unsigned WDS
= std::get
<1>(Data
);
10879 const bool OutputBecomesInput
= std::get
<2>(Data
);
10881 // Check the values provided via `simdlen` by the user.
10882 // 1. A `simdlen(1)` doesn't produce vector signatures,
10883 if (UserVLEN
== 1) {
10884 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
10885 DiagnosticsEngine::Warning
,
10886 "The clause simdlen(1) has no effect when targeting aarch64.");
10887 CGM
.getDiags().Report(SLoc
, DiagID
);
10891 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10892 // Advanced SIMD output.
10893 if (ISA
== 'n' && UserVLEN
&& !llvm::isPowerOf2_32(UserVLEN
)) {
10894 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
10895 DiagnosticsEngine::Warning
, "The value specified in simdlen must be a "
10896 "power of 2 when targeting Advanced SIMD.");
10897 CGM
.getDiags().Report(SLoc
, DiagID
);
10901 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10903 if (ISA
== 's' && UserVLEN
!= 0) {
10904 if ((UserVLEN
* WDS
> 2048) || (UserVLEN
* WDS
% 128 != 0)) {
10905 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
10906 DiagnosticsEngine::Warning
, "The clause simdlen must fit the %0-bit "
10907 "lanes in the architectural constraints "
10908 "for SVE (min is 128-bit, max is "
10909 "2048-bit, by steps of 128-bit)");
10910 CGM
.getDiags().Report(SLoc
, DiagID
) << WDS
;
10915 // Sort out parameter sequence.
10916 const std::string ParSeq
= mangleVectorParameters(ParamAttrs
);
10917 StringRef Prefix
= "_ZGV";
10918 // Generate simdlen from user input (if any).
10921 // SVE generates only a masked function.
10922 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10923 OutputBecomesInput
, Fn
);
10925 assert(ISA
== 'n' && "Expected ISA either 's' or 'n'.");
10926 // Advanced SIMD generates one or two functions, depending on
10927 // the `[not]inbranch` clause.
10929 case OMPDeclareSimdDeclAttr::BS_Undefined
:
10930 addAArch64VectorName(UserVLEN
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10931 OutputBecomesInput
, Fn
);
10932 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10933 OutputBecomesInput
, Fn
);
10935 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
10936 addAArch64VectorName(UserVLEN
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10937 OutputBecomesInput
, Fn
);
10939 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
10940 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10941 OutputBecomesInput
, Fn
);
10946 // If no user simdlen is provided, follow the AAVFABI rules for
10947 // generating the vector length.
10949 // SVE, section 3.4.1, item 1.
10950 addAArch64VectorName("x", "M", Prefix
, ISA
, ParSeq
, MangledName
,
10951 OutputBecomesInput
, Fn
);
10953 assert(ISA
== 'n' && "Expected ISA either 's' or 'n'.");
10954 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10955 // two vector names depending on the use of the clause
10956 // `[not]inbranch`.
10958 case OMPDeclareSimdDeclAttr::BS_Undefined
:
10959 addAArch64AdvSIMDNDSNames(NDS
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10960 OutputBecomesInput
, Fn
);
10961 addAArch64AdvSIMDNDSNames(NDS
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10962 OutputBecomesInput
, Fn
);
10964 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
10965 addAArch64AdvSIMDNDSNames(NDS
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10966 OutputBecomesInput
, Fn
);
10968 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
10969 addAArch64AdvSIMDNDSNames(NDS
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10970 OutputBecomesInput
, Fn
);
10977 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl
*FD
,
10978 llvm::Function
*Fn
) {
10979 ASTContext
&C
= CGM
.getContext();
10980 FD
= FD
->getMostRecentDecl();
10982 // Map params to their positions in function decl.
10983 llvm::DenseMap
<const Decl
*, unsigned> ParamPositions
;
10984 if (isa
<CXXMethodDecl
>(FD
))
10985 ParamPositions
.try_emplace(FD
, 0);
10986 unsigned ParamPos
= ParamPositions
.size();
10987 for (const ParmVarDecl
*P
: FD
->parameters()) {
10988 ParamPositions
.try_emplace(P
->getCanonicalDecl(), ParamPos
);
10991 for (const auto *Attr
: FD
->specific_attrs
<OMPDeclareSimdDeclAttr
>()) {
10992 llvm::SmallVector
<ParamAttrTy
, 8> ParamAttrs(ParamPositions
.size());
10993 // Mark uniform parameters.
10994 for (const Expr
*E
: Attr
->uniforms()) {
10995 E
= E
->IgnoreParenImpCasts();
10997 if (isa
<CXXThisExpr
>(E
)) {
10998 Pos
= ParamPositions
[FD
];
11000 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11001 ->getCanonicalDecl();
11002 auto It
= ParamPositions
.find(PVD
);
11003 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11006 ParamAttrs
[Pos
].Kind
= Uniform
;
11008 // Get alignment info.
11009 auto *NI
= Attr
->alignments_begin();
11010 for (const Expr
*E
: Attr
->aligneds()) {
11011 E
= E
->IgnoreParenImpCasts();
11014 if (isa
<CXXThisExpr
>(E
)) {
11015 Pos
= ParamPositions
[FD
];
11016 ParmTy
= E
->getType();
11018 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11019 ->getCanonicalDecl();
11020 auto It
= ParamPositions
.find(PVD
);
11021 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11023 ParmTy
= PVD
->getType();
11025 ParamAttrs
[Pos
].Alignment
=
11027 ? (*NI
)->EvaluateKnownConstInt(C
)
11028 : llvm::APSInt::getUnsigned(
11029 C
.toCharUnitsFromBits(C
.getOpenMPDefaultSimdAlign(ParmTy
))
11033 // Mark linear parameters.
11034 auto *SI
= Attr
->steps_begin();
11035 auto *MI
= Attr
->modifiers_begin();
11036 for (const Expr
*E
: Attr
->linears()) {
11037 E
= E
->IgnoreParenImpCasts();
11039 bool IsReferenceType
= false;
11040 // Rescaling factor needed to compute the linear parameter
11041 // value in the mangled name.
11042 unsigned PtrRescalingFactor
= 1;
11043 if (isa
<CXXThisExpr
>(E
)) {
11044 Pos
= ParamPositions
[FD
];
11045 auto *P
= cast
<PointerType
>(E
->getType());
11046 PtrRescalingFactor
= CGM
.getContext()
11047 .getTypeSizeInChars(P
->getPointeeType())
11050 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11051 ->getCanonicalDecl();
11052 auto It
= ParamPositions
.find(PVD
);
11053 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11055 if (auto *P
= dyn_cast
<PointerType
>(PVD
->getType()))
11056 PtrRescalingFactor
= CGM
.getContext()
11057 .getTypeSizeInChars(P
->getPointeeType())
11059 else if (PVD
->getType()->isReferenceType()) {
11060 IsReferenceType
= true;
11061 PtrRescalingFactor
=
11063 .getTypeSizeInChars(PVD
->getType().getNonReferenceType())
11067 ParamAttrTy
&ParamAttr
= ParamAttrs
[Pos
];
11068 if (*MI
== OMPC_LINEAR_ref
)
11069 ParamAttr
.Kind
= LinearRef
;
11070 else if (*MI
== OMPC_LINEAR_uval
)
11071 ParamAttr
.Kind
= LinearUVal
;
11072 else if (IsReferenceType
)
11073 ParamAttr
.Kind
= LinearVal
;
11075 ParamAttr
.Kind
= Linear
;
11076 // Assuming a stride of 1, for `linear` without modifiers.
11077 ParamAttr
.StrideOrArg
= llvm::APSInt::getUnsigned(1);
11079 Expr::EvalResult Result
;
11080 if (!(*SI
)->EvaluateAsInt(Result
, C
, Expr::SE_AllowSideEffects
)) {
11081 if (const auto *DRE
=
11082 cast
<DeclRefExpr
>((*SI
)->IgnoreParenImpCasts())) {
11083 if (const auto *StridePVD
=
11084 dyn_cast
<ParmVarDecl
>(DRE
->getDecl())) {
11085 ParamAttr
.HasVarStride
= true;
11086 auto It
= ParamPositions
.find(StridePVD
->getCanonicalDecl());
11087 assert(It
!= ParamPositions
.end() &&
11088 "Function parameter not found");
11089 ParamAttr
.StrideOrArg
= llvm::APSInt::getUnsigned(It
->second
);
11093 ParamAttr
.StrideOrArg
= Result
.Val
.getInt();
11096 // If we are using a linear clause on a pointer, we need to
11097 // rescale the value of linear_step with the byte size of the
11099 if (!ParamAttr
.HasVarStride
&&
11100 (ParamAttr
.Kind
== Linear
|| ParamAttr
.Kind
== LinearRef
))
11101 ParamAttr
.StrideOrArg
= ParamAttr
.StrideOrArg
* PtrRescalingFactor
;
11105 llvm::APSInt VLENVal
;
11106 SourceLocation ExprLoc
;
11107 const Expr
*VLENExpr
= Attr
->getSimdlen();
11109 VLENVal
= VLENExpr
->EvaluateKnownConstInt(C
);
11110 ExprLoc
= VLENExpr
->getExprLoc();
11112 OMPDeclareSimdDeclAttr::BranchStateTy State
= Attr
->getBranchState();
11113 if (CGM
.getTriple().isX86()) {
11114 emitX86DeclareSimdFunction(FD
, Fn
, VLENVal
, ParamAttrs
, State
);
11115 } else if (CGM
.getTriple().getArch() == llvm::Triple::aarch64
) {
11116 unsigned VLEN
= VLENVal
.getExtValue();
11117 StringRef MangledName
= Fn
->getName();
11118 if (CGM
.getTarget().hasFeature("sve"))
11119 emitAArch64DeclareSimdFunction(CGM
, FD
, VLEN
, ParamAttrs
, State
,
11120 MangledName
, 's', 128, Fn
, ExprLoc
);
11121 else if (CGM
.getTarget().hasFeature("neon"))
11122 emitAArch64DeclareSimdFunction(CGM
, FD
, VLEN
, ParamAttrs
, State
,
11123 MangledName
, 'n', 128, Fn
, ExprLoc
);
11126 FD
= FD
->getPreviousDecl();
11131 /// Cleanup action for doacross support.
11132 class DoacrossCleanupTy final
: public EHScopeStack::Cleanup
{
11134 static const int DoacrossFinArgs
= 2;
11137 llvm::FunctionCallee RTLFn
;
11138 llvm::Value
*Args
[DoacrossFinArgs
];
11141 DoacrossCleanupTy(llvm::FunctionCallee RTLFn
,
11142 ArrayRef
<llvm::Value
*> CallArgs
)
11144 assert(CallArgs
.size() == DoacrossFinArgs
);
11145 std::copy(CallArgs
.begin(), CallArgs
.end(), std::begin(Args
));
11147 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
11148 if (!CGF
.HaveInsertPoint())
11150 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11155 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction
&CGF
,
11156 const OMPLoopDirective
&D
,
11157 ArrayRef
<Expr
*> NumIterations
) {
11158 if (!CGF
.HaveInsertPoint())
11161 ASTContext
&C
= CGM
.getContext();
11162 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11164 if (KmpDimTy
.isNull()) {
11165 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11166 // kmp_int64 lo; // lower
11167 // kmp_int64 up; // upper
11168 // kmp_int64 st; // stride
11170 RD
= C
.buildImplicitRecord("kmp_dim");
11171 RD
->startDefinition();
11172 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11173 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11174 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11175 RD
->completeDefinition();
11176 KmpDimTy
= C
.getRecordType(RD
);
11178 RD
= cast
<RecordDecl
>(KmpDimTy
->getAsTagDecl());
11180 llvm::APInt
Size(/*numBits=*/32, NumIterations
.size());
11181 QualType ArrayTy
= C
.getConstantArrayType(KmpDimTy
, Size
, nullptr,
11182 ArraySizeModifier::Normal
, 0);
11184 Address DimsAddr
= CGF
.CreateMemTemp(ArrayTy
, "dims");
11185 CGF
.EmitNullInitialization(DimsAddr
, ArrayTy
);
11186 enum { LowerFD
= 0, UpperFD
, StrideFD
};
11187 // Fill dims with data.
11188 for (unsigned I
= 0, E
= NumIterations
.size(); I
< E
; ++I
) {
11189 LValue DimsLVal
= CGF
.MakeAddrLValue(
11190 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, I
), KmpDimTy
);
11191 // dims.upper = num_iterations;
11192 LValue UpperLVal
= CGF
.EmitLValueForField(
11193 DimsLVal
, *std::next(RD
->field_begin(), UpperFD
));
11194 llvm::Value
*NumIterVal
= CGF
.EmitScalarConversion(
11195 CGF
.EmitScalarExpr(NumIterations
[I
]), NumIterations
[I
]->getType(),
11196 Int64Ty
, NumIterations
[I
]->getExprLoc());
11197 CGF
.EmitStoreOfScalar(NumIterVal
, UpperLVal
);
11198 // dims.stride = 1;
11199 LValue StrideLVal
= CGF
.EmitLValueForField(
11200 DimsLVal
, *std::next(RD
->field_begin(), StrideFD
));
11201 CGF
.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM
.Int64Ty
, /*V=*/1),
11205 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11206 // kmp_int32 num_dims, struct kmp_dim * dims);
11207 llvm::Value
*Args
[] = {
11208 emitUpdateLocation(CGF
, D
.getBeginLoc()),
11209 getThreadID(CGF
, D
.getBeginLoc()),
11210 llvm::ConstantInt::getSigned(CGM
.Int32Ty
, NumIterations
.size()),
11211 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11212 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, 0).emitRawPointer(CGF
),
11215 llvm::FunctionCallee RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11216 CGM
.getModule(), OMPRTL___kmpc_doacross_init
);
11217 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11218 llvm::Value
*FiniArgs
[DoacrossCleanupTy::DoacrossFinArgs
] = {
11219 emitUpdateLocation(CGF
, D
.getEndLoc()), getThreadID(CGF
, D
.getEndLoc())};
11220 llvm::FunctionCallee FiniRTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11221 CGM
.getModule(), OMPRTL___kmpc_doacross_fini
);
11222 CGF
.EHStack
.pushCleanup
<DoacrossCleanupTy
>(NormalAndEHCleanup
, FiniRTLFn
,
11223 llvm::ArrayRef(FiniArgs
));
11226 template <typename T
>
11227 static void EmitDoacrossOrdered(CodeGenFunction
&CGF
, CodeGenModule
&CGM
,
11228 const T
*C
, llvm::Value
*ULoc
,
11229 llvm::Value
*ThreadID
) {
11231 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11232 llvm::APInt
Size(/*numBits=*/32, C
->getNumLoops());
11233 QualType ArrayTy
= CGM
.getContext().getConstantArrayType(
11234 Int64Ty
, Size
, nullptr, ArraySizeModifier::Normal
, 0);
11235 Address CntAddr
= CGF
.CreateMemTemp(ArrayTy
, ".cnt.addr");
11236 for (unsigned I
= 0, E
= C
->getNumLoops(); I
< E
; ++I
) {
11237 const Expr
*CounterVal
= C
->getLoopData(I
);
11238 assert(CounterVal
);
11239 llvm::Value
*CntVal
= CGF
.EmitScalarConversion(
11240 CGF
.EmitScalarExpr(CounterVal
), CounterVal
->getType(), Int64Ty
,
11241 CounterVal
->getExprLoc());
11242 CGF
.EmitStoreOfScalar(CntVal
, CGF
.Builder
.CreateConstArrayGEP(CntAddr
, I
),
11243 /*Volatile=*/false, Int64Ty
);
11245 llvm::Value
*Args
[] = {
11247 CGF
.Builder
.CreateConstArrayGEP(CntAddr
, 0).emitRawPointer(CGF
)};
11248 llvm::FunctionCallee RTLFn
;
11249 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
11250 OMPDoacrossKind
<T
> ODK
;
11251 if (ODK
.isSource(C
)) {
11252 RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
11253 OMPRTL___kmpc_doacross_post
);
11255 assert(ODK
.isSink(C
) && "Expect sink modifier.");
11256 RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
11257 OMPRTL___kmpc_doacross_wait
);
11259 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11262 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
11263 const OMPDependClause
*C
) {
11264 return EmitDoacrossOrdered
<OMPDependClause
>(
11265 CGF
, CGM
, C
, emitUpdateLocation(CGF
, C
->getBeginLoc()),
11266 getThreadID(CGF
, C
->getBeginLoc()));
11269 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
11270 const OMPDoacrossClause
*C
) {
11271 return EmitDoacrossOrdered
<OMPDoacrossClause
>(
11272 CGF
, CGM
, C
, emitUpdateLocation(CGF
, C
->getBeginLoc()),
11273 getThreadID(CGF
, C
->getBeginLoc()));
11276 void CGOpenMPRuntime::emitCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
11277 llvm::FunctionCallee Callee
,
11278 ArrayRef
<llvm::Value
*> Args
) const {
11279 assert(Loc
.isValid() && "Outlined function call location must be valid.");
11280 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
11282 if (auto *Fn
= dyn_cast
<llvm::Function
>(Callee
.getCallee())) {
11283 if (Fn
->doesNotThrow()) {
11284 CGF
.EmitNounwindRuntimeCall(Fn
, Args
);
11288 CGF
.EmitRuntimeCall(Callee
, Args
);
11291 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11292 CodeGenFunction
&CGF
, SourceLocation Loc
, llvm::FunctionCallee OutlinedFn
,
11293 ArrayRef
<llvm::Value
*> Args
) const {
11294 emitCall(CGF
, Loc
, OutlinedFn
, Args
);
11297 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction
&CGF
, const Decl
*D
) {
11298 if (const auto *FD
= dyn_cast
<FunctionDecl
>(D
))
11299 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD
))
11300 HasEmittedDeclareTargetRegion
= true;
11303 Address
CGOpenMPRuntime::getParameterAddress(CodeGenFunction
&CGF
,
11304 const VarDecl
*NativeParam
,
11305 const VarDecl
*TargetParam
) const {
11306 return CGF
.GetAddrOfLocalVar(NativeParam
);
11309 /// Return allocator value from expression, or return a null allocator (default
11310 /// when no allocator specified).
11311 static llvm::Value
*getAllocatorVal(CodeGenFunction
&CGF
,
11312 const Expr
*Allocator
) {
11313 llvm::Value
*AllocVal
;
11315 AllocVal
= CGF
.EmitScalarExpr(Allocator
);
11316 // According to the standard, the original allocator type is a enum
11317 // (integer). Convert to pointer type, if required.
11318 AllocVal
= CGF
.EmitScalarConversion(AllocVal
, Allocator
->getType(),
11319 CGF
.getContext().VoidPtrTy
,
11320 Allocator
->getExprLoc());
11322 // If no allocator specified, it defaults to the null allocator.
11323 AllocVal
= llvm::Constant::getNullValue(
11324 CGF
.CGM
.getTypes().ConvertType(CGF
.getContext().VoidPtrTy
));
11329 /// Return the alignment from an allocate directive if present.
11330 static llvm::Value
*getAlignmentValue(CodeGenModule
&CGM
, const VarDecl
*VD
) {
11331 std::optional
<CharUnits
> AllocateAlignment
= CGM
.getOMPAllocateAlignment(VD
);
11333 if (!AllocateAlignment
)
11336 return llvm::ConstantInt::get(CGM
.SizeTy
, AllocateAlignment
->getQuantity());
11339 Address
CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction
&CGF
,
11340 const VarDecl
*VD
) {
11342 return Address::invalid();
11343 Address UntiedAddr
= Address::invalid();
11344 Address UntiedRealAddr
= Address::invalid();
11345 auto It
= FunctionToUntiedTaskStackMap
.find(CGF
.CurFn
);
11346 if (It
!= FunctionToUntiedTaskStackMap
.end()) {
11347 const UntiedLocalVarsAddressesMap
&UntiedData
=
11348 UntiedLocalVarsStack
[It
->second
];
11349 auto I
= UntiedData
.find(VD
);
11350 if (I
!= UntiedData
.end()) {
11351 UntiedAddr
= I
->second
.first
;
11352 UntiedRealAddr
= I
->second
.second
;
11355 const VarDecl
*CVD
= VD
->getCanonicalDecl();
11356 if (CVD
->hasAttr
<OMPAllocateDeclAttr
>()) {
11357 // Use the default allocation.
11358 if (!isAllocatableDecl(VD
))
11361 CharUnits Align
= CGM
.getContext().getDeclAlign(CVD
);
11362 if (CVD
->getType()->isVariablyModifiedType()) {
11363 Size
= CGF
.getTypeSize(CVD
->getType());
11364 // Align the size: ((size + align - 1) / align) * align
11365 Size
= CGF
.Builder
.CreateNUWAdd(
11366 Size
, CGM
.getSize(Align
- CharUnits::fromQuantity(1)));
11367 Size
= CGF
.Builder
.CreateUDiv(Size
, CGM
.getSize(Align
));
11368 Size
= CGF
.Builder
.CreateNUWMul(Size
, CGM
.getSize(Align
));
11370 CharUnits Sz
= CGM
.getContext().getTypeSizeInChars(CVD
->getType());
11371 Size
= CGM
.getSize(Sz
.alignTo(Align
));
11373 llvm::Value
*ThreadID
= getThreadID(CGF
, CVD
->getBeginLoc());
11374 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
11375 const Expr
*Allocator
= AA
->getAllocator();
11376 llvm::Value
*AllocVal
= getAllocatorVal(CGF
, Allocator
);
11377 llvm::Value
*Alignment
= getAlignmentValue(CGM
, CVD
);
11378 SmallVector
<llvm::Value
*, 4> Args
;
11379 Args
.push_back(ThreadID
);
11381 Args
.push_back(Alignment
);
11382 Args
.push_back(Size
);
11383 Args
.push_back(AllocVal
);
11384 llvm::omp::RuntimeFunction FnID
=
11385 Alignment
? OMPRTL___kmpc_aligned_alloc
: OMPRTL___kmpc_alloc
;
11386 llvm::Value
*Addr
= CGF
.EmitRuntimeCall(
11387 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(), FnID
), Args
,
11388 getName({CVD
->getName(), ".void.addr"}));
11389 llvm::FunctionCallee FiniRTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11390 CGM
.getModule(), OMPRTL___kmpc_free
);
11391 QualType Ty
= CGM
.getContext().getPointerType(CVD
->getType());
11392 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11393 Addr
, CGF
.ConvertTypeForMem(Ty
), getName({CVD
->getName(), ".addr"}));
11394 if (UntiedAddr
.isValid())
11395 CGF
.EmitStoreOfScalar(Addr
, UntiedAddr
, /*Volatile=*/false, Ty
);
11397 // Cleanup action for allocate support.
11398 class OMPAllocateCleanupTy final
: public EHScopeStack::Cleanup
{
11399 llvm::FunctionCallee RTLFn
;
11400 SourceLocation::UIntTy LocEncoding
;
11402 const Expr
*AllocExpr
;
11405 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn
,
11406 SourceLocation::UIntTy LocEncoding
, Address Addr
,
11407 const Expr
*AllocExpr
)
11408 : RTLFn(RTLFn
), LocEncoding(LocEncoding
), Addr(Addr
),
11409 AllocExpr(AllocExpr
) {}
11410 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
11411 if (!CGF
.HaveInsertPoint())
11413 llvm::Value
*Args
[3];
11414 Args
[0] = CGF
.CGM
.getOpenMPRuntime().getThreadID(
11415 CGF
, SourceLocation::getFromRawEncoding(LocEncoding
));
11416 Args
[1] = CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11417 Addr
.emitRawPointer(CGF
), CGF
.VoidPtrTy
);
11418 llvm::Value
*AllocVal
= getAllocatorVal(CGF
, AllocExpr
);
11419 Args
[2] = AllocVal
;
11420 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11424 UntiedRealAddr
.isValid()
11426 : Address(Addr
, CGF
.ConvertTypeForMem(CVD
->getType()), Align
);
11427 CGF
.EHStack
.pushCleanup
<OMPAllocateCleanupTy
>(
11428 NormalAndEHCleanup
, FiniRTLFn
, CVD
->getLocation().getRawEncoding(),
11429 VDAddr
, Allocator
);
11430 if (UntiedRealAddr
.isValid())
11432 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
11433 Region
->emitUntiedSwitch(CGF
);
11439 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction
&CGF
,
11440 const VarDecl
*VD
) const {
11441 auto It
= FunctionToUntiedTaskStackMap
.find(CGF
.CurFn
);
11442 if (It
== FunctionToUntiedTaskStackMap
.end())
11444 return UntiedLocalVarsStack
[It
->second
].count(VD
) > 0;
11447 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11448 CodeGenModule
&CGM
, const OMPLoopDirective
&S
)
11449 : CGM(CGM
), NeedToPush(S
.hasClausesOfKind
<OMPNontemporalClause
>()) {
11450 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11453 NontemporalDeclsSet
&DS
=
11454 CGM
.getOpenMPRuntime().NontemporalDeclsStack
.emplace_back();
11455 for (const auto *C
: S
.getClausesOfKind
<OMPNontemporalClause
>()) {
11456 for (const Stmt
*Ref
: C
->private_refs()) {
11457 const auto *SimpleRefExpr
= cast
<Expr
>(Ref
)->IgnoreParenImpCasts();
11458 const ValueDecl
*VD
;
11459 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(SimpleRefExpr
)) {
11460 VD
= DRE
->getDecl();
11462 const auto *ME
= cast
<MemberExpr
>(SimpleRefExpr
);
11463 assert((ME
->isImplicitCXXThis() ||
11464 isa
<CXXThisExpr
>(ME
->getBase()->IgnoreParenImpCasts())) &&
11465 "Expected member of current class.");
11466 VD
= ME
->getMemberDecl();
11473 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11476 CGM
.getOpenMPRuntime().NontemporalDeclsStack
.pop_back();
11479 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11480 CodeGenFunction
&CGF
,
11481 const llvm::MapVector
<CanonicalDeclPtr
<const VarDecl
>,
11482 std::pair
<Address
, Address
>> &LocalVars
)
11483 : CGM(CGF
.CGM
), NeedToPush(!LocalVars
.empty()) {
11486 CGM
.getOpenMPRuntime().FunctionToUntiedTaskStackMap
.try_emplace(
11487 CGF
.CurFn
, CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.size());
11488 CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.push_back(LocalVars
);
11491 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11494 CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.pop_back();
11497 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl
*VD
) const {
11498 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11500 return llvm::any_of(
11501 CGM
.getOpenMPRuntime().NontemporalDeclsStack
,
11502 [VD
](const NontemporalDeclsSet
&Set
) { return Set
.contains(VD
); });
11505 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11506 const OMPExecutableDirective
&S
,
11507 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &NeedToAddForLPCsAsDisabled
)
11509 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> NeedToCheckForLPCs
;
11510 // Vars in target/task regions must be excluded completely.
11511 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()) ||
11512 isOpenMPTaskingDirective(S
.getDirectiveKind())) {
11513 SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
11514 getOpenMPCaptureRegions(CaptureRegions
, S
.getDirectiveKind());
11515 const CapturedStmt
*CS
= S
.getCapturedStmt(CaptureRegions
.front());
11516 for (const CapturedStmt::Capture
&Cap
: CS
->captures()) {
11517 if (Cap
.capturesVariable() || Cap
.capturesVariableByCopy())
11518 NeedToCheckForLPCs
.insert(Cap
.getCapturedVar());
11521 // Exclude vars in private clauses.
11522 for (const auto *C
: S
.getClausesOfKind
<OMPPrivateClause
>()) {
11523 for (const Expr
*Ref
: C
->varlist()) {
11524 if (!Ref
->getType()->isScalarType())
11526 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11529 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11532 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
11533 for (const Expr
*Ref
: C
->varlist()) {
11534 if (!Ref
->getType()->isScalarType())
11536 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11539 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11542 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
11543 for (const Expr
*Ref
: C
->varlist()) {
11544 if (!Ref
->getType()->isScalarType())
11546 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11549 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11552 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
11553 for (const Expr
*Ref
: C
->varlist()) {
11554 if (!Ref
->getType()->isScalarType())
11556 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11559 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11562 for (const auto *C
: S
.getClausesOfKind
<OMPLinearClause
>()) {
11563 for (const Expr
*Ref
: C
->varlist()) {
11564 if (!Ref
->getType()->isScalarType())
11566 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11569 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11572 for (const Decl
*VD
: NeedToCheckForLPCs
) {
11573 for (const LastprivateConditionalData
&Data
:
11574 llvm::reverse(CGM
.getOpenMPRuntime().LastprivateConditionalStack
)) {
11575 if (Data
.DeclToUniqueName
.count(VD
) > 0) {
11576 if (!Data
.Disabled
)
11577 NeedToAddForLPCsAsDisabled
.insert(VD
);
11584 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11585 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
, LValue IVLVal
)
11587 Action((CGM
.getLangOpts().OpenMP
>= 50 &&
11588 llvm::any_of(S
.getClausesOfKind
<OMPLastprivateClause
>(),
11589 [](const OMPLastprivateClause
*C
) {
11590 return C
->getKind() ==
11591 OMPC_LASTPRIVATE_conditional
;
11593 ? ActionToDo::PushAsLastprivateConditional
11594 : ActionToDo::DoNotPush
) {
11595 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11596 if (CGM
.getLangOpts().OpenMP
< 50 || Action
== ActionToDo::DoNotPush
)
11598 assert(Action
== ActionToDo::PushAsLastprivateConditional
&&
11599 "Expected a push action.");
11600 LastprivateConditionalData
&Data
=
11601 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.emplace_back();
11602 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
11603 if (C
->getKind() != OMPC_LASTPRIVATE_conditional
)
11606 for (const Expr
*Ref
: C
->varlist()) {
11607 Data
.DeclToUniqueName
.insert(std::make_pair(
11608 cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts())->getDecl(),
11609 SmallString
<16>(generateUniqueName(CGM
, "pl_cond", Ref
))));
11612 Data
.IVLVal
= IVLVal
;
11613 Data
.Fn
= CGF
.CurFn
;
11616 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11617 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
11618 : CGM(CGF
.CGM
), Action(ActionToDo::DoNotPush
) {
11619 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11620 if (CGM
.getLangOpts().OpenMP
< 50)
11622 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> NeedToAddForLPCsAsDisabled
;
11623 tryToDisableInnerAnalysis(S
, NeedToAddForLPCsAsDisabled
);
11624 if (!NeedToAddForLPCsAsDisabled
.empty()) {
11625 Action
= ActionToDo::DisableLastprivateConditional
;
11626 LastprivateConditionalData
&Data
=
11627 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.emplace_back();
11628 for (const Decl
*VD
: NeedToAddForLPCsAsDisabled
)
11629 Data
.DeclToUniqueName
.insert(std::make_pair(VD
, SmallString
<16>()));
11630 Data
.Fn
= CGF
.CurFn
;
11631 Data
.Disabled
= true;
11635 CGOpenMPRuntime::LastprivateConditionalRAII
11636 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11637 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
11638 return LastprivateConditionalRAII(CGF
, S
);
11641 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11642 if (CGM
.getLangOpts().OpenMP
< 50)
11644 if (Action
== ActionToDo::DisableLastprivateConditional
) {
11645 assert(CGM
.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled
&&
11646 "Expected list of disabled private vars.");
11647 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.pop_back();
11649 if (Action
== ActionToDo::PushAsLastprivateConditional
) {
11651 !CGM
.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled
&&
11652 "Expected list of lastprivate conditional vars.");
11653 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.pop_back();
11657 Address
CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction
&CGF
,
11658 const VarDecl
*VD
) {
11659 ASTContext
&C
= CGM
.getContext();
11660 auto I
= LastprivateConditionalToTypes
.try_emplace(CGF
.CurFn
).first
;
11662 const FieldDecl
*VDField
;
11663 const FieldDecl
*FiredField
;
11665 auto VI
= I
->getSecond().find(VD
);
11666 if (VI
== I
->getSecond().end()) {
11667 RecordDecl
*RD
= C
.buildImplicitRecord("lasprivate.conditional");
11668 RD
->startDefinition();
11669 VDField
= addFieldToRecordDecl(C
, RD
, VD
->getType().getNonReferenceType());
11670 FiredField
= addFieldToRecordDecl(C
, RD
, C
.CharTy
);
11671 RD
->completeDefinition();
11672 NewType
= C
.getRecordType(RD
);
11673 Address Addr
= CGF
.CreateMemTemp(NewType
, C
.getDeclAlign(VD
), VD
->getName());
11674 BaseLVal
= CGF
.MakeAddrLValue(Addr
, NewType
, AlignmentSource::Decl
);
11675 I
->getSecond().try_emplace(VD
, NewType
, VDField
, FiredField
, BaseLVal
);
11677 NewType
= std::get
<0>(VI
->getSecond());
11678 VDField
= std::get
<1>(VI
->getSecond());
11679 FiredField
= std::get
<2>(VI
->getSecond());
11680 BaseLVal
= std::get
<3>(VI
->getSecond());
11683 CGF
.EmitLValueForField(BaseLVal
, FiredField
);
11684 CGF
.EmitStoreOfScalar(
11685 llvm::ConstantInt::getNullValue(CGF
.ConvertTypeForMem(C
.CharTy
)),
11687 return CGF
.EmitLValueForField(BaseLVal
, VDField
).getAddress();
11691 /// Checks if the lastprivate conditional variable is referenced in LHS.
11692 class LastprivateConditionalRefChecker final
11693 : public ConstStmtVisitor
<LastprivateConditionalRefChecker
, bool> {
11694 ArrayRef
<CGOpenMPRuntime::LastprivateConditionalData
> LPM
;
11695 const Expr
*FoundE
= nullptr;
11696 const Decl
*FoundD
= nullptr;
11697 StringRef UniqueDeclName
;
11699 llvm::Function
*FoundFn
= nullptr;
11700 SourceLocation Loc
;
11703 bool VisitDeclRefExpr(const DeclRefExpr
*E
) {
11704 for (const CGOpenMPRuntime::LastprivateConditionalData
&D
:
11705 llvm::reverse(LPM
)) {
11706 auto It
= D
.DeclToUniqueName
.find(E
->getDecl());
11707 if (It
== D
.DeclToUniqueName
.end())
11712 FoundD
= E
->getDecl()->getCanonicalDecl();
11713 UniqueDeclName
= It
->second
;
11718 return FoundE
== E
;
11720 bool VisitMemberExpr(const MemberExpr
*E
) {
11721 if (!CodeGenFunction::IsWrappedCXXThis(E
->getBase()))
11723 for (const CGOpenMPRuntime::LastprivateConditionalData
&D
:
11724 llvm::reverse(LPM
)) {
11725 auto It
= D
.DeclToUniqueName
.find(E
->getMemberDecl());
11726 if (It
== D
.DeclToUniqueName
.end())
11731 FoundD
= E
->getMemberDecl()->getCanonicalDecl();
11732 UniqueDeclName
= It
->second
;
11737 return FoundE
== E
;
11739 bool VisitStmt(const Stmt
*S
) {
11740 for (const Stmt
*Child
: S
->children()) {
11743 if (const auto *E
= dyn_cast
<Expr
>(Child
))
11744 if (!E
->isGLValue())
11751 explicit LastprivateConditionalRefChecker(
11752 ArrayRef
<CGOpenMPRuntime::LastprivateConditionalData
> LPM
)
11754 std::tuple
<const Expr
*, const Decl
*, StringRef
, LValue
, llvm::Function
*>
11755 getFoundData() const {
11756 return std::make_tuple(FoundE
, FoundD
, UniqueDeclName
, IVLVal
, FoundFn
);
11761 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction
&CGF
,
11763 StringRef UniqueDeclName
,
11765 SourceLocation Loc
) {
11766 // Last updated loop counter for the lastprivate conditional var.
11767 // int<xx> last_iv = 0;
11768 llvm::Type
*LLIVTy
= CGF
.ConvertTypeForMem(IVLVal
.getType());
11769 llvm::Constant
*LastIV
= OMPBuilder
.getOrCreateInternalVariable(
11770 LLIVTy
, getName({UniqueDeclName
, "iv"}));
11771 cast
<llvm::GlobalVariable
>(LastIV
)->setAlignment(
11772 IVLVal
.getAlignment().getAsAlign());
11773 LValue LastIVLVal
=
11774 CGF
.MakeNaturalAlignRawAddrLValue(LastIV
, IVLVal
.getType());
11776 // Last value of the lastprivate conditional.
11777 // decltype(priv_a) last_a;
11778 llvm::GlobalVariable
*Last
= OMPBuilder
.getOrCreateInternalVariable(
11779 CGF
.ConvertTypeForMem(LVal
.getType()), UniqueDeclName
);
11780 cast
<llvm::GlobalVariable
>(Last
)->setAlignment(
11781 LVal
.getAlignment().getAsAlign());
11783 CGF
.MakeRawAddrLValue(Last
, LVal
.getType(), LVal
.getAlignment());
11785 // Global loop counter. Required to handle inner parallel-for regions.
11787 llvm::Value
*IVVal
= CGF
.EmitLoadOfScalar(IVLVal
, Loc
);
11789 // #pragma omp critical(a)
11790 // if (last_iv <= iv) {
11792 // last_a = priv_a;
11794 auto &&CodeGen
= [&LastIVLVal
, &IVLVal
, IVVal
, &LVal
, &LastLVal
,
11795 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
11797 llvm::Value
*LastIVVal
= CGF
.EmitLoadOfScalar(LastIVLVal
, Loc
);
11798 // (last_iv <= iv) ? Check if the variable is updated and store new
11799 // value in global var.
11800 llvm::Value
*CmpRes
;
11801 if (IVLVal
.getType()->isSignedIntegerType()) {
11802 CmpRes
= CGF
.Builder
.CreateICmpSLE(LastIVVal
, IVVal
);
11804 assert(IVLVal
.getType()->isUnsignedIntegerType() &&
11805 "Loop iteration variable must be integer.");
11806 CmpRes
= CGF
.Builder
.CreateICmpULE(LastIVVal
, IVVal
);
11808 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock("lp_cond_then");
11809 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock("lp_cond_exit");
11810 CGF
.Builder
.CreateCondBr(CmpRes
, ThenBB
, ExitBB
);
11812 CGF
.EmitBlock(ThenBB
);
11815 CGF
.EmitStoreOfScalar(IVVal
, LastIVLVal
);
11817 // last_a = priv_a;
11818 switch (CGF
.getEvaluationKind(LVal
.getType())) {
11820 llvm::Value
*PrivVal
= CGF
.EmitLoadOfScalar(LVal
, Loc
);
11821 CGF
.EmitStoreOfScalar(PrivVal
, LastLVal
);
11824 case TEK_Complex
: {
11825 CodeGenFunction::ComplexPairTy PrivVal
= CGF
.EmitLoadOfComplex(LVal
, Loc
);
11826 CGF
.EmitStoreOfComplex(PrivVal
, LastLVal
, /*isInit=*/false);
11829 case TEK_Aggregate
:
11831 "Aggregates are not supported in lastprivate conditional.");
11834 CGF
.EmitBranch(ExitBB
);
11835 // There is no need to emit line number for unconditional branch.
11836 (void)ApplyDebugLocation::CreateEmpty(CGF
);
11837 CGF
.EmitBlock(ExitBB
, /*IsFinished=*/true);
11840 if (CGM
.getLangOpts().OpenMPSimd
) {
11841 // Do not emit as a critical region as no parallel region could be emitted.
11842 RegionCodeGenTy
ThenRCG(CodeGen
);
11845 emitCriticalRegion(CGF
, UniqueDeclName
, CodeGen
, Loc
);
11849 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction
&CGF
,
11851 if (CGF
.getLangOpts().OpenMP
< 50 || LastprivateConditionalStack
.empty())
11853 LastprivateConditionalRefChecker
Checker(LastprivateConditionalStack
);
11854 if (!Checker
.Visit(LHS
))
11856 const Expr
*FoundE
;
11857 const Decl
*FoundD
;
11858 StringRef UniqueDeclName
;
11860 llvm::Function
*FoundFn
;
11861 std::tie(FoundE
, FoundD
, UniqueDeclName
, IVLVal
, FoundFn
) =
11862 Checker
.getFoundData();
11863 if (FoundFn
!= CGF
.CurFn
) {
11864 // Special codegen for inner parallel regions.
11865 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11866 auto It
= LastprivateConditionalToTypes
[FoundFn
].find(FoundD
);
11867 assert(It
!= LastprivateConditionalToTypes
[FoundFn
].end() &&
11868 "Lastprivate conditional is not found in outer region.");
11869 QualType StructTy
= std::get
<0>(It
->getSecond());
11870 const FieldDecl
* FiredDecl
= std::get
<2>(It
->getSecond());
11871 LValue PrivLVal
= CGF
.EmitLValue(FoundE
);
11872 Address StructAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11873 PrivLVal
.getAddress(),
11874 CGF
.ConvertTypeForMem(CGF
.getContext().getPointerType(StructTy
)),
11875 CGF
.ConvertTypeForMem(StructTy
));
11877 CGF
.MakeAddrLValue(StructAddr
, StructTy
, AlignmentSource::Decl
);
11878 LValue FiredLVal
= CGF
.EmitLValueForField(BaseLVal
, FiredDecl
);
11879 CGF
.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11880 CGF
.ConvertTypeForMem(FiredDecl
->getType()), 1)),
11881 FiredLVal
, llvm::AtomicOrdering::Unordered
,
11882 /*IsVolatile=*/true, /*isInit=*/false);
11886 // Private address of the lastprivate conditional in the current context.
11888 LValue LVal
= CGF
.EmitLValue(FoundE
);
11889 emitLastprivateConditionalUpdate(CGF
, IVLVal
, UniqueDeclName
, LVal
,
11890 FoundE
->getExprLoc());
11893 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11894 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
11895 const llvm::DenseSet
<CanonicalDeclPtr
<const VarDecl
>> &IgnoredDecls
) {
11896 if (CGF
.getLangOpts().OpenMP
< 50 || LastprivateConditionalStack
.empty())
11898 auto Range
= llvm::reverse(LastprivateConditionalStack
);
11899 auto It
= llvm::find_if(
11900 Range
, [](const LastprivateConditionalData
&D
) { return !D
.Disabled
; });
11901 if (It
== Range
.end() || It
->Fn
!= CGF
.CurFn
)
11903 auto LPCI
= LastprivateConditionalToTypes
.find(It
->Fn
);
11904 assert(LPCI
!= LastprivateConditionalToTypes
.end() &&
11905 "Lastprivates must be registered already.");
11906 SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
11907 getOpenMPCaptureRegions(CaptureRegions
, D
.getDirectiveKind());
11908 const CapturedStmt
*CS
= D
.getCapturedStmt(CaptureRegions
.back());
11909 for (const auto &Pair
: It
->DeclToUniqueName
) {
11910 const auto *VD
= cast
<VarDecl
>(Pair
.first
->getCanonicalDecl());
11911 if (!CS
->capturesVariable(VD
) || IgnoredDecls
.contains(VD
))
11913 auto I
= LPCI
->getSecond().find(Pair
.first
);
11914 assert(I
!= LPCI
->getSecond().end() &&
11915 "Lastprivate must be rehistered already.");
11916 // bool Cmp = priv_a.Fired != 0;
11917 LValue BaseLVal
= std::get
<3>(I
->getSecond());
11919 CGF
.EmitLValueForField(BaseLVal
, std::get
<2>(I
->getSecond()));
11920 llvm::Value
*Res
= CGF
.EmitLoadOfScalar(FiredLVal
, D
.getBeginLoc());
11921 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Res
);
11922 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock("lpc.then");
11923 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("lpc.done");
11925 CGF
.Builder
.CreateCondBr(Cmp
, ThenBB
, DoneBB
);
11926 CGF
.EmitBlock(ThenBB
);
11927 Address Addr
= CGF
.GetAddrOfLocalVar(VD
);
11929 if (VD
->getType()->isReferenceType())
11930 LVal
= CGF
.EmitLoadOfReferenceLValue(Addr
, VD
->getType(),
11931 AlignmentSource::Decl
);
11933 LVal
= CGF
.MakeAddrLValue(Addr
, VD
->getType().getNonReferenceType(),
11934 AlignmentSource::Decl
);
11935 emitLastprivateConditionalUpdate(CGF
, It
->IVLVal
, Pair
.second
, LVal
,
11937 auto AL
= ApplyDebugLocation::CreateArtificial(CGF
);
11938 CGF
.EmitBlock(DoneBB
, /*IsFinal=*/true);
11943 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11944 CodeGenFunction
&CGF
, LValue PrivLVal
, const VarDecl
*VD
,
11945 SourceLocation Loc
) {
11946 if (CGF
.getLangOpts().OpenMP
< 50)
11948 auto It
= LastprivateConditionalStack
.back().DeclToUniqueName
.find(VD
);
11949 assert(It
!= LastprivateConditionalStack
.back().DeclToUniqueName
.end() &&
11950 "Unknown lastprivate conditional variable.");
11951 StringRef UniqueName
= It
->second
;
11952 llvm::GlobalVariable
*GV
= CGM
.getModule().getNamedGlobal(UniqueName
);
11953 // The variable was not updated in the region - exit.
11956 LValue LPLVal
= CGF
.MakeRawAddrLValue(
11957 GV
, PrivLVal
.getType().getNonReferenceType(), PrivLVal
.getAlignment());
11958 llvm::Value
*Res
= CGF
.EmitLoadOfScalar(LPLVal
, Loc
);
11959 CGF
.EmitStoreOfScalar(Res
, PrivLVal
);
11962 llvm::Function
*CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11963 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
11964 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
11965 const RegionCodeGenTy
&CodeGen
) {
11966 llvm_unreachable("Not supported in SIMD-only mode");
11969 llvm::Function
*CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11970 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
11971 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
11972 const RegionCodeGenTy
&CodeGen
) {
11973 llvm_unreachable("Not supported in SIMD-only mode");
11976 llvm::Function
*CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11977 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
11978 const VarDecl
*PartIDVar
, const VarDecl
*TaskTVar
,
11979 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
11980 bool Tied
, unsigned &NumberOfParts
) {
11981 llvm_unreachable("Not supported in SIMD-only mode");
11984 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction
&CGF
,
11985 SourceLocation Loc
,
11986 llvm::Function
*OutlinedFn
,
11987 ArrayRef
<llvm::Value
*> CapturedVars
,
11988 const Expr
*IfCond
,
11989 llvm::Value
*NumThreads
) {
11990 llvm_unreachable("Not supported in SIMD-only mode");
11993 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11994 CodeGenFunction
&CGF
, StringRef CriticalName
,
11995 const RegionCodeGenTy
&CriticalOpGen
, SourceLocation Loc
,
11996 const Expr
*Hint
) {
11997 llvm_unreachable("Not supported in SIMD-only mode");
12000 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction
&CGF
,
12001 const RegionCodeGenTy
&MasterOpGen
,
12002 SourceLocation Loc
) {
12003 llvm_unreachable("Not supported in SIMD-only mode");
12006 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction
&CGF
,
12007 const RegionCodeGenTy
&MasterOpGen
,
12008 SourceLocation Loc
,
12009 const Expr
*Filter
) {
12010 llvm_unreachable("Not supported in SIMD-only mode");
12013 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction
&CGF
,
12014 SourceLocation Loc
) {
12015 llvm_unreachable("Not supported in SIMD-only mode");
12018 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12019 CodeGenFunction
&CGF
, const RegionCodeGenTy
&TaskgroupOpGen
,
12020 SourceLocation Loc
) {
12021 llvm_unreachable("Not supported in SIMD-only mode");
12024 void CGOpenMPSIMDRuntime::emitSingleRegion(
12025 CodeGenFunction
&CGF
, const RegionCodeGenTy
&SingleOpGen
,
12026 SourceLocation Loc
, ArrayRef
<const Expr
*> CopyprivateVars
,
12027 ArrayRef
<const Expr
*> DestExprs
, ArrayRef
<const Expr
*> SrcExprs
,
12028 ArrayRef
<const Expr
*> AssignmentOps
) {
12029 llvm_unreachable("Not supported in SIMD-only mode");
12032 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction
&CGF
,
12033 const RegionCodeGenTy
&OrderedOpGen
,
12034 SourceLocation Loc
,
12036 llvm_unreachable("Not supported in SIMD-only mode");
12039 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction
&CGF
,
12040 SourceLocation Loc
,
12041 OpenMPDirectiveKind Kind
,
12043 bool ForceSimpleCall
) {
12044 llvm_unreachable("Not supported in SIMD-only mode");
12047 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12048 CodeGenFunction
&CGF
, SourceLocation Loc
,
12049 const OpenMPScheduleTy
&ScheduleKind
, unsigned IVSize
, bool IVSigned
,
12050 bool Ordered
, const DispatchRTInput
&DispatchValues
) {
12051 llvm_unreachable("Not supported in SIMD-only mode");
12054 void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction
&CGF
,
12055 SourceLocation Loc
) {
12056 llvm_unreachable("Not supported in SIMD-only mode");
12059 void CGOpenMPSIMDRuntime::emitForStaticInit(
12060 CodeGenFunction
&CGF
, SourceLocation Loc
, OpenMPDirectiveKind DKind
,
12061 const OpenMPScheduleTy
&ScheduleKind
, const StaticRTInput
&Values
) {
12062 llvm_unreachable("Not supported in SIMD-only mode");
12065 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12066 CodeGenFunction
&CGF
, SourceLocation Loc
,
12067 OpenMPDistScheduleClauseKind SchedKind
, const StaticRTInput
&Values
) {
12068 llvm_unreachable("Not supported in SIMD-only mode");
12071 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction
&CGF
,
12072 SourceLocation Loc
,
12075 llvm_unreachable("Not supported in SIMD-only mode");
12078 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction
&CGF
,
12079 SourceLocation Loc
,
12080 OpenMPDirectiveKind DKind
) {
12081 llvm_unreachable("Not supported in SIMD-only mode");
12084 llvm::Value
*CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction
&CGF
,
12085 SourceLocation Loc
,
12086 unsigned IVSize
, bool IVSigned
,
12087 Address IL
, Address LB
,
12088 Address UB
, Address ST
) {
12089 llvm_unreachable("Not supported in SIMD-only mode");
12092 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction
&CGF
,
12093 llvm::Value
*NumThreads
,
12094 SourceLocation Loc
) {
12095 llvm_unreachable("Not supported in SIMD-only mode");
12098 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction
&CGF
,
12099 ProcBindKind ProcBind
,
12100 SourceLocation Loc
) {
12101 llvm_unreachable("Not supported in SIMD-only mode");
12104 Address
CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction
&CGF
,
12107 SourceLocation Loc
) {
12108 llvm_unreachable("Not supported in SIMD-only mode");
12111 llvm::Function
*CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12112 const VarDecl
*VD
, Address VDAddr
, SourceLocation Loc
, bool PerformInit
,
12113 CodeGenFunction
*CGF
) {
12114 llvm_unreachable("Not supported in SIMD-only mode");
12117 Address
CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12118 CodeGenFunction
&CGF
, QualType VarType
, StringRef Name
) {
12119 llvm_unreachable("Not supported in SIMD-only mode");
12122 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction
&CGF
,
12123 ArrayRef
<const Expr
*> Vars
,
12124 SourceLocation Loc
,
12125 llvm::AtomicOrdering AO
) {
12126 llvm_unreachable("Not supported in SIMD-only mode");
12129 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
12130 const OMPExecutableDirective
&D
,
12131 llvm::Function
*TaskFunction
,
12132 QualType SharedsTy
, Address Shareds
,
12133 const Expr
*IfCond
,
12134 const OMPTaskDataTy
&Data
) {
12135 llvm_unreachable("Not supported in SIMD-only mode");
12138 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12139 CodeGenFunction
&CGF
, SourceLocation Loc
, const OMPLoopDirective
&D
,
12140 llvm::Function
*TaskFunction
, QualType SharedsTy
, Address Shareds
,
12141 const Expr
*IfCond
, const OMPTaskDataTy
&Data
) {
12142 llvm_unreachable("Not supported in SIMD-only mode");
12145 void CGOpenMPSIMDRuntime::emitReduction(
12146 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> Privates
,
12147 ArrayRef
<const Expr
*> LHSExprs
, ArrayRef
<const Expr
*> RHSExprs
,
12148 ArrayRef
<const Expr
*> ReductionOps
, ReductionOptionsTy Options
) {
12149 assert(Options
.SimpleReduction
&& "Only simple reduction is expected.");
12150 CGOpenMPRuntime::emitReduction(CGF
, Loc
, Privates
, LHSExprs
, RHSExprs
,
12151 ReductionOps
, Options
);
12154 llvm::Value
*CGOpenMPSIMDRuntime::emitTaskReductionInit(
12155 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> LHSExprs
,
12156 ArrayRef
<const Expr
*> RHSExprs
, const OMPTaskDataTy
&Data
) {
12157 llvm_unreachable("Not supported in SIMD-only mode");
12160 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction
&CGF
,
12161 SourceLocation Loc
,
12162 bool IsWorksharingReduction
) {
12163 llvm_unreachable("Not supported in SIMD-only mode");
12166 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction
&CGF
,
12167 SourceLocation Loc
,
12168 ReductionCodeGen
&RCG
,
12170 llvm_unreachable("Not supported in SIMD-only mode");
12173 Address
CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction
&CGF
,
12174 SourceLocation Loc
,
12175 llvm::Value
*ReductionsPtr
,
12176 LValue SharedLVal
) {
12177 llvm_unreachable("Not supported in SIMD-only mode");
12180 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction
&CGF
,
12181 SourceLocation Loc
,
12182 const OMPTaskDataTy
&Data
) {
12183 llvm_unreachable("Not supported in SIMD-only mode");
12186 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12187 CodeGenFunction
&CGF
, SourceLocation Loc
,
12188 OpenMPDirectiveKind CancelRegion
) {
12189 llvm_unreachable("Not supported in SIMD-only mode");
12192 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction
&CGF
,
12193 SourceLocation Loc
, const Expr
*IfCond
,
12194 OpenMPDirectiveKind CancelRegion
) {
12195 llvm_unreachable("Not supported in SIMD-only mode");
12198 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12199 const OMPExecutableDirective
&D
, StringRef ParentName
,
12200 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
12201 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
12202 llvm_unreachable("Not supported in SIMD-only mode");
12205 void CGOpenMPSIMDRuntime::emitTargetCall(
12206 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
12207 llvm::Function
*OutlinedFn
, llvm::Value
*OutlinedFnID
, const Expr
*IfCond
,
12208 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
12209 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
12210 const OMPLoopDirective
&D
)>
12212 llvm_unreachable("Not supported in SIMD-only mode");
12215 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD
) {
12216 llvm_unreachable("Not supported in SIMD-only mode");
12219 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD
) {
12220 llvm_unreachable("Not supported in SIMD-only mode");
12223 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD
) {
12227 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction
&CGF
,
12228 const OMPExecutableDirective
&D
,
12229 SourceLocation Loc
,
12230 llvm::Function
*OutlinedFn
,
12231 ArrayRef
<llvm::Value
*> CapturedVars
) {
12232 llvm_unreachable("Not supported in SIMD-only mode");
12235 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction
&CGF
,
12236 const Expr
*NumTeams
,
12237 const Expr
*ThreadLimit
,
12238 SourceLocation Loc
) {
12239 llvm_unreachable("Not supported in SIMD-only mode");
12242 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12243 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
12244 const Expr
*Device
, const RegionCodeGenTy
&CodeGen
,
12245 CGOpenMPRuntime::TargetDataInfo
&Info
) {
12246 llvm_unreachable("Not supported in SIMD-only mode");
12249 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12250 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
12251 const Expr
*Device
) {
12252 llvm_unreachable("Not supported in SIMD-only mode");
12255 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction
&CGF
,
12256 const OMPLoopDirective
&D
,
12257 ArrayRef
<Expr
*> NumIterations
) {
12258 llvm_unreachable("Not supported in SIMD-only mode");
12261 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
12262 const OMPDependClause
*C
) {
12263 llvm_unreachable("Not supported in SIMD-only mode");
12266 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
12267 const OMPDoacrossClause
*C
) {
12268 llvm_unreachable("Not supported in SIMD-only mode");
12272 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl
*FD
,
12273 const VarDecl
*NativeParam
) const {
12274 llvm_unreachable("Not supported in SIMD-only mode");
12278 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction
&CGF
,
12279 const VarDecl
*NativeParam
,
12280 const VarDecl
*TargetParam
) const {
12281 llvm_unreachable("Not supported in SIMD-only mode");