1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
13 #include "CGOpenMPRuntime.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
48 using namespace clang
;
49 using namespace CodeGen
;
50 using namespace llvm::omp
;
53 /// Base class for handling code generation inside OpenMP regions.
54 class CGOpenMPRegionInfo
: public CodeGenFunction::CGCapturedStmtInfo
{
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind
{
58 /// Region with outlined function for standalone 'parallel'
60 ParallelOutlinedRegion
,
61 /// Region with outlined function for standalone 'task' directive.
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
66 /// Region with outlined function for standalone 'target' directive.
70 CGOpenMPRegionInfo(const CapturedStmt
&CS
,
71 const CGOpenMPRegionKind RegionKind
,
72 const RegionCodeGenTy
&CodeGen
, OpenMPDirectiveKind Kind
,
74 : CGCapturedStmtInfo(CS
, CR_OpenMP
), RegionKind(RegionKind
),
75 CodeGen(CodeGen
), Kind(Kind
), HasCancel(HasCancel
) {}
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind
,
78 const RegionCodeGenTy
&CodeGen
, OpenMPDirectiveKind Kind
,
80 : CGCapturedStmtInfo(CR_OpenMP
), RegionKind(RegionKind
), CodeGen(CodeGen
),
81 Kind(Kind
), HasCancel(HasCancel
) {}
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl
*getThreadIDVariable() const = 0;
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) override
;
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
);
94 virtual void emitUntiedSwitch(CodeGenFunction
& /*CGF*/) {}
96 CGOpenMPRegionKind
getRegionKind() const { return RegionKind
; }
98 OpenMPDirectiveKind
getDirectiveKind() const { return Kind
; }
100 bool hasCancel() const { return HasCancel
; }
102 static bool classof(const CGCapturedStmtInfo
*Info
) {
103 return Info
->getKind() == CR_OpenMP
;
106 ~CGOpenMPRegionInfo() override
= default;
109 CGOpenMPRegionKind RegionKind
;
110 RegionCodeGenTy CodeGen
;
111 OpenMPDirectiveKind Kind
;
115 /// API for captured statement code generation in OpenMP constructs.
116 class CGOpenMPOutlinedRegionInfo final
: public CGOpenMPRegionInfo
{
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt
&CS
, const VarDecl
*ThreadIDVar
,
119 const RegionCodeGenTy
&CodeGen
,
120 OpenMPDirectiveKind Kind
, bool HasCancel
,
121 StringRef HelperName
)
122 : CGOpenMPRegionInfo(CS
, ParallelOutlinedRegion
, CodeGen
, Kind
,
124 ThreadIDVar(ThreadIDVar
), HelperName(HelperName
) {
125 assert(ThreadIDVar
!= nullptr && "No ThreadID in OpenMP region.");
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl
*getThreadIDVariable() const override
{ return ThreadIDVar
; }
132 /// Get the name of the capture helper.
133 StringRef
getHelperName() const override
{ return HelperName
; }
135 static bool classof(const CGCapturedStmtInfo
*Info
) {
136 return CGOpenMPRegionInfo::classof(Info
) &&
137 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() ==
138 ParallelOutlinedRegion
;
142 /// A variable or parameter storing global thread id for OpenMP
144 const VarDecl
*ThreadIDVar
;
145 StringRef HelperName
;
148 /// API for captured statement code generation in OpenMP constructs.
149 class CGOpenMPTaskOutlinedRegionInfo final
: public CGOpenMPRegionInfo
{
151 class UntiedTaskActionTy final
: public PrePostActionTy
{
153 const VarDecl
*PartIDVar
;
154 const RegionCodeGenTy UntiedCodeGen
;
155 llvm::SwitchInst
*UntiedSwitch
= nullptr;
158 UntiedTaskActionTy(bool Tied
, const VarDecl
*PartIDVar
,
159 const RegionCodeGenTy
&UntiedCodeGen
)
160 : Untied(!Tied
), PartIDVar(PartIDVar
), UntiedCodeGen(UntiedCodeGen
) {}
161 void Enter(CodeGenFunction
&CGF
) override
{
163 // Emit task switching point.
164 LValue PartIdLVal
= CGF
.EmitLoadOfPointerLValue(
165 CGF
.GetAddrOfLocalVar(PartIDVar
),
166 PartIDVar
->getType()->castAs
<PointerType
>());
168 CGF
.EmitLoadOfScalar(PartIdLVal
, PartIDVar
->getLocation());
169 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock(".untied.done.");
170 UntiedSwitch
= CGF
.Builder
.CreateSwitch(Res
, DoneBB
);
171 CGF
.EmitBlock(DoneBB
);
172 CGF
.EmitBranchThroughCleanup(CGF
.ReturnBlock
);
173 CGF
.EmitBlock(CGF
.createBasicBlock(".untied.jmp."));
174 UntiedSwitch
->addCase(CGF
.Builder
.getInt32(0),
175 CGF
.Builder
.GetInsertBlock());
176 emitUntiedSwitch(CGF
);
179 void emitUntiedSwitch(CodeGenFunction
&CGF
) const {
181 LValue PartIdLVal
= CGF
.EmitLoadOfPointerLValue(
182 CGF
.GetAddrOfLocalVar(PartIDVar
),
183 PartIDVar
->getType()->castAs
<PointerType
>());
184 CGF
.EmitStoreOfScalar(CGF
.Builder
.getInt32(UntiedSwitch
->getNumCases()),
187 CodeGenFunction::JumpDest CurPoint
=
188 CGF
.getJumpDestInCurrentScope(".untied.next.");
189 CGF
.EmitBranch(CGF
.ReturnBlock
.getBlock());
190 CGF
.EmitBlock(CGF
.createBasicBlock(".untied.jmp."));
191 UntiedSwitch
->addCase(CGF
.Builder
.getInt32(UntiedSwitch
->getNumCases()),
192 CGF
.Builder
.GetInsertBlock());
193 CGF
.EmitBranchThroughCleanup(CurPoint
);
194 CGF
.EmitBlock(CurPoint
.getBlock());
197 unsigned getNumberOfParts() const { return UntiedSwitch
->getNumCases(); }
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt
&CS
,
200 const VarDecl
*ThreadIDVar
,
201 const RegionCodeGenTy
&CodeGen
,
202 OpenMPDirectiveKind Kind
, bool HasCancel
,
203 const UntiedTaskActionTy
&Action
)
204 : CGOpenMPRegionInfo(CS
, TaskOutlinedRegion
, CodeGen
, Kind
, HasCancel
),
205 ThreadIDVar(ThreadIDVar
), Action(Action
) {
206 assert(ThreadIDVar
!= nullptr && "No ThreadID in OpenMP region.");
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl
*getThreadIDVariable() const override
{ return ThreadIDVar
; }
213 /// Get an LValue for the current ThreadID variable.
214 LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
) override
;
216 /// Get the name of the capture helper.
217 StringRef
getHelperName() const override
{ return ".omp_outlined."; }
219 void emitUntiedSwitch(CodeGenFunction
&CGF
) override
{
220 Action
.emitUntiedSwitch(CGF
);
223 static bool classof(const CGCapturedStmtInfo
*Info
) {
224 return CGOpenMPRegionInfo::classof(Info
) &&
225 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() ==
230 /// A variable or parameter storing global thread id for OpenMP
232 const VarDecl
*ThreadIDVar
;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy
&Action
;
237 /// API for inlined captured statement code generation in OpenMP
239 class CGOpenMPInlinedRegionInfo
: public CGOpenMPRegionInfo
{
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo
*OldCSI
,
242 const RegionCodeGenTy
&CodeGen
,
243 OpenMPDirectiveKind Kind
, bool HasCancel
)
244 : CGOpenMPRegionInfo(InlinedRegion
, CodeGen
, Kind
, HasCancel
),
246 OuterRegionInfo(dyn_cast_or_null
<CGOpenMPRegionInfo
>(OldCSI
)) {}
248 // Retrieve the value of the context parameter.
249 llvm::Value
*getContextValue() const override
{
251 return OuterRegionInfo
->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
255 void setContextValue(llvm::Value
*V
) override
{
256 if (OuterRegionInfo
) {
257 OuterRegionInfo
->setContextValue(V
);
260 llvm_unreachable("No context value for inlined OpenMP region");
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl
*lookup(const VarDecl
*VD
) const override
{
266 return OuterRegionInfo
->lookup(VD
);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
272 FieldDecl
*getThisFieldDecl() const override
{
274 return OuterRegionInfo
->getThisFieldDecl();
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl
*getThreadIDVariable() const override
{
282 return OuterRegionInfo
->getThreadIDVariable();
286 /// Get an LValue for the current ThreadID variable.
287 LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
) override
{
289 return OuterRegionInfo
->getThreadIDVariableLValue(CGF
);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
293 /// Get the name of the capture helper.
294 StringRef
getHelperName() const override
{
295 if (auto *OuterRegionInfo
= getOldCSI())
296 return OuterRegionInfo
->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
300 void emitUntiedSwitch(CodeGenFunction
&CGF
) override
{
302 OuterRegionInfo
->emitUntiedSwitch(CGF
);
305 CodeGenFunction::CGCapturedStmtInfo
*getOldCSI() const { return OldCSI
; }
307 static bool classof(const CGCapturedStmtInfo
*Info
) {
308 return CGOpenMPRegionInfo::classof(Info
) &&
309 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() == InlinedRegion
;
312 ~CGOpenMPInlinedRegionInfo() override
= default;
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo
*OldCSI
;
317 CGOpenMPRegionInfo
*OuterRegionInfo
;
320 /// API for captured statement code generation in OpenMP target
321 /// constructs. For this captures, implicit parameters are used instead of the
322 /// captured fields. The name of the target region has to be unique in a given
323 /// application so it is provided by the client, because only the client has
324 /// the information to generate that.
325 class CGOpenMPTargetRegionInfo final
: public CGOpenMPRegionInfo
{
327 CGOpenMPTargetRegionInfo(const CapturedStmt
&CS
,
328 const RegionCodeGenTy
&CodeGen
, StringRef HelperName
)
329 : CGOpenMPRegionInfo(CS
, TargetRegion
, CodeGen
, OMPD_target
,
330 /*HasCancel=*/false),
331 HelperName(HelperName
) {}
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl
*getThreadIDVariable() const override
{ return nullptr; }
337 /// Get the name of the capture helper.
338 StringRef
getHelperName() const override
{ return HelperName
; }
340 static bool classof(const CGCapturedStmtInfo
*Info
) {
341 return CGOpenMPRegionInfo::classof(Info
) &&
342 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() == TargetRegion
;
346 StringRef HelperName
;
349 static void EmptyCodeGen(CodeGenFunction
&, PrePostActionTy
&) {
350 llvm_unreachable("No codegen for expressions");
352 /// API for generation of expressions captured in a innermost OpenMP
354 class CGOpenMPInnerExprInfo final
: public CGOpenMPInlinedRegionInfo
{
356 CGOpenMPInnerExprInfo(CodeGenFunction
&CGF
, const CapturedStmt
&CS
)
357 : CGOpenMPInlinedRegionInfo(CGF
.CapturedStmtInfo
, EmptyCodeGen
,
359 /*HasCancel=*/false),
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C
: CS
.captures()) {
365 if (!C
.capturesVariable() && !C
.capturesVariableByCopy())
368 const VarDecl
*VD
= C
.getCapturedVar();
369 if (VD
->isLocalVarDeclOrParm())
372 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(VD
),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD
->getType().getNonReferenceType(), VK_LValue
,
376 PrivScope
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
378 (void)PrivScope
.Privatize();
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl
*lookup(const VarDecl
*VD
) const override
{
383 if (const FieldDecl
*FD
= CGOpenMPInlinedRegionInfo::lookup(VD
))
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) override
{
390 llvm_unreachable("No body for expressions");
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl
*getThreadIDVariable() const override
{
396 llvm_unreachable("No thread id for expressions");
399 /// Get the name of the capture helper.
400 StringRef
getHelperName() const override
{
401 llvm_unreachable("No helper name for expressions");
404 static bool classof(const CGCapturedStmtInfo
*Info
) { return false; }
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope
;
411 /// RAII for emitting code of OpenMP constructs.
412 class InlinedOpenMPRegionRAII
{
413 CodeGenFunction
&CGF
;
414 llvm::DenseMap
<const ValueDecl
*, FieldDecl
*> LambdaCaptureFields
;
415 FieldDecl
*LambdaThisCaptureField
= nullptr;
416 const CodeGen::CGBlockInfo
*BlockInfo
= nullptr;
417 bool NoInheritance
= false;
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
424 InlinedOpenMPRegionRAII(CodeGenFunction
&CGF
, const RegionCodeGenTy
&CodeGen
,
425 OpenMPDirectiveKind Kind
, bool HasCancel
,
426 bool NoInheritance
= true)
427 : CGF(CGF
), NoInheritance(NoInheritance
) {
428 // Start emission for the construct.
429 CGF
.CapturedStmtInfo
= new CGOpenMPInlinedRegionInfo(
430 CGF
.CapturedStmtInfo
, CodeGen
, Kind
, HasCancel
);
432 std::swap(CGF
.LambdaCaptureFields
, LambdaCaptureFields
);
433 LambdaThisCaptureField
= CGF
.LambdaThisCaptureField
;
434 CGF
.LambdaThisCaptureField
= nullptr;
435 BlockInfo
= CGF
.BlockInfo
;
436 CGF
.BlockInfo
= nullptr;
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
443 cast
<CGOpenMPInlinedRegionInfo
>(CGF
.CapturedStmtInfo
)->getOldCSI();
444 delete CGF
.CapturedStmtInfo
;
445 CGF
.CapturedStmtInfo
= OldCSI
;
447 std::swap(CGF
.LambdaCaptureFields
, LambdaCaptureFields
);
448 CGF
.LambdaThisCaptureField
= LambdaThisCaptureField
;
449 CGF
.BlockInfo
= BlockInfo
;
454 /// Values for bit flags used in the ident_t to describe the fields.
455 /// All enumeric elements are named and described in accordance with the code
456 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457 enum OpenMPLocationFlags
: unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD
= 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC
= 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE
= 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL
= 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL
= 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR
= 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS
= 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE
= 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP
= 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS
= 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE
= 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE
)
483 /// Describes ident structure that describes a source location.
484 /// All descriptions are taken from
485 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
486 /// Original structure:
487 /// typedef struct ident {
488 /// kmp_int32 reserved_1; /**< might be used in Fortran;
490 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
491 /// KMP_IDENT_KMPC identifies this union
493 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
496 /// /* but currently used for storing
497 /// region-specific ITT */
498 /// /* contextual information. */
499 ///#endif /* USE_ITT_BUILD */
500 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
502 /// char const *psource; /**< String describing the source location.
503 /// The string is composed of semi-colon separated
504 // fields which describe the source file,
505 /// the function and a pair of line numbers that
506 /// delimit the construct.
509 enum IdentFieldIndex
{
510 /// might be used in Fortran
511 IdentField_Reserved_1
,
512 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
514 /// Not really used in Fortran any more
515 IdentField_Reserved_2
,
516 /// Source[4] in Fortran, do not use for C++
517 IdentField_Reserved_3
,
518 /// String describing the source location. The string is composed of
519 /// semi-colon separated fields which describe the source file, the function
520 /// and a pair of line numbers that delimit the construct.
524 /// Schedule types for 'omp for' loops (these enumerators are taken from
525 /// the enum sched_type in kmp.h).
526 enum OpenMPSchedType
{
527 /// Lower bound for default (unordered) versions.
529 OMP_sch_static_chunked
= 33,
531 OMP_sch_dynamic_chunked
= 35,
532 OMP_sch_guided_chunked
= 36,
533 OMP_sch_runtime
= 37,
535 /// static with chunk adjustment (e.g., simd)
536 OMP_sch_static_balanced_chunked
= 45,
537 /// Lower bound for 'ordered' versions.
539 OMP_ord_static_chunked
= 65,
541 OMP_ord_dynamic_chunked
= 67,
542 OMP_ord_guided_chunked
= 68,
543 OMP_ord_runtime
= 69,
545 OMP_sch_default
= OMP_sch_static
,
546 /// dist_schedule types
547 OMP_dist_sch_static_chunked
= 91,
548 OMP_dist_sch_static
= 92,
549 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
550 /// Set if the monotonic schedule modifier was present.
551 OMP_sch_modifier_monotonic
= (1 << 29),
552 /// Set if the nonmonotonic schedule modifier was present.
553 OMP_sch_modifier_nonmonotonic
= (1 << 30),
556 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
558 class CleanupTy final
: public EHScopeStack::Cleanup
{
559 PrePostActionTy
*Action
;
562 explicit CleanupTy(PrePostActionTy
*Action
) : Action(Action
) {}
563 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
564 if (!CGF
.HaveInsertPoint())
570 } // anonymous namespace
572 void RegionCodeGenTy::operator()(CodeGenFunction
&CGF
) const {
573 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
575 CGF
.EHStack
.pushCleanup
<CleanupTy
>(NormalAndEHCleanup
, PrePostAction
);
576 Callback(CodeGen
, CGF
, *PrePostAction
);
578 PrePostActionTy Action
;
579 Callback(CodeGen
, CGF
, Action
);
583 /// Check if the combiner is a call to UDR combiner and if it is so return the
584 /// UDR decl used for reduction.
585 static const OMPDeclareReductionDecl
*
586 getReductionInit(const Expr
*ReductionOp
) {
587 if (const auto *CE
= dyn_cast
<CallExpr
>(ReductionOp
))
588 if (const auto *OVE
= dyn_cast
<OpaqueValueExpr
>(CE
->getCallee()))
589 if (const auto *DRE
=
590 dyn_cast
<DeclRefExpr
>(OVE
->getSourceExpr()->IgnoreImpCasts()))
591 if (const auto *DRD
= dyn_cast
<OMPDeclareReductionDecl
>(DRE
->getDecl()))
596 static void emitInitWithReductionInitializer(CodeGenFunction
&CGF
,
597 const OMPDeclareReductionDecl
*DRD
,
599 Address Private
, Address Original
,
601 if (DRD
->getInitializer()) {
602 std::pair
<llvm::Function
*, llvm::Function
*> Reduction
=
603 CGF
.CGM
.getOpenMPRuntime().getUserDefinedReduction(DRD
);
604 const auto *CE
= cast
<CallExpr
>(InitOp
);
605 const auto *OVE
= cast
<OpaqueValueExpr
>(CE
->getCallee());
606 const Expr
*LHS
= CE
->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
607 const Expr
*RHS
= CE
->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
609 cast
<DeclRefExpr
>(cast
<UnaryOperator
>(LHS
)->getSubExpr());
611 cast
<DeclRefExpr
>(cast
<UnaryOperator
>(RHS
)->getSubExpr());
612 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
613 PrivateScope
.addPrivate(cast
<VarDecl
>(LHSDRE
->getDecl()), Private
);
614 PrivateScope
.addPrivate(cast
<VarDecl
>(RHSDRE
->getDecl()), Original
);
615 (void)PrivateScope
.Privatize();
616 RValue Func
= RValue::get(Reduction
.second
);
617 CodeGenFunction::OpaqueValueMapping
Map(CGF
, OVE
, Func
);
618 CGF
.EmitIgnoredExpr(InitOp
);
620 llvm::Constant
*Init
= CGF
.CGM
.EmitNullConstant(Ty
);
621 std::string Name
= CGF
.CGM
.getOpenMPRuntime().getName({"init"});
622 auto *GV
= new llvm::GlobalVariable(
623 CGF
.CGM
.getModule(), Init
->getType(), /*isConstant=*/true,
624 llvm::GlobalValue::PrivateLinkage
, Init
, Name
);
625 LValue LV
= CGF
.MakeNaturalAlignAddrLValue(GV
, Ty
);
627 switch (CGF
.getEvaluationKind(Ty
)) {
629 InitRVal
= CGF
.EmitLoadOfLValue(LV
, DRD
->getLocation());
633 RValue::getComplex(CGF
.EmitLoadOfComplex(LV
, DRD
->getLocation()));
635 case TEK_Aggregate
: {
636 OpaqueValueExpr
OVE(DRD
->getLocation(), Ty
, VK_LValue
);
637 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, &OVE
, LV
);
638 CGF
.EmitAnyExprToMem(&OVE
, Private
, Ty
.getQualifiers(),
639 /*IsInitializer=*/false);
643 OpaqueValueExpr
OVE(DRD
->getLocation(), Ty
, VK_PRValue
);
644 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, &OVE
, InitRVal
);
645 CGF
.EmitAnyExprToMem(&OVE
, Private
, Ty
.getQualifiers(),
646 /*IsInitializer=*/false);
650 /// Emit initialization of arrays of complex types.
651 /// \param DestAddr Address of the array.
652 /// \param Type Type of array.
653 /// \param Init Initial expression of array.
654 /// \param SrcAddr Address of the original array.
655 static void EmitOMPAggregateInit(CodeGenFunction
&CGF
, Address DestAddr
,
656 QualType Type
, bool EmitDeclareReductionInit
,
658 const OMPDeclareReductionDecl
*DRD
,
659 Address SrcAddr
= Address::invalid()) {
660 // Perform element-by-element initialization.
663 // Drill down to the base element type on both arrays.
664 const ArrayType
*ArrayTy
= Type
->getAsArrayTypeUnsafe();
665 llvm::Value
*NumElements
= CGF
.emitArrayLength(ArrayTy
, ElementTy
, DestAddr
);
667 SrcAddr
= SrcAddr
.withElementType(DestAddr
.getElementType());
669 llvm::Value
*SrcBegin
= nullptr;
671 SrcBegin
= SrcAddr
.getPointer();
672 llvm::Value
*DestBegin
= DestAddr
.getPointer();
673 // Cast from pointer to array type to pointer to single element.
674 llvm::Value
*DestEnd
=
675 CGF
.Builder
.CreateGEP(DestAddr
.getElementType(), DestBegin
, NumElements
);
676 // The basic structure here is a while-do loop.
677 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.arrayinit.body");
678 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.arrayinit.done");
679 llvm::Value
*IsEmpty
=
680 CGF
.Builder
.CreateICmpEQ(DestBegin
, DestEnd
, "omp.arrayinit.isempty");
681 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
683 // Enter the loop body, making that address the current address.
684 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
685 CGF
.EmitBlock(BodyBB
);
687 CharUnits ElementSize
= CGF
.getContext().getTypeSizeInChars(ElementTy
);
689 llvm::PHINode
*SrcElementPHI
= nullptr;
690 Address SrcElementCurrent
= Address::invalid();
692 SrcElementPHI
= CGF
.Builder
.CreatePHI(SrcBegin
->getType(), 2,
693 "omp.arraycpy.srcElementPast");
694 SrcElementPHI
->addIncoming(SrcBegin
, EntryBB
);
696 Address(SrcElementPHI
, SrcAddr
.getElementType(),
697 SrcAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
699 llvm::PHINode
*DestElementPHI
= CGF
.Builder
.CreatePHI(
700 DestBegin
->getType(), 2, "omp.arraycpy.destElementPast");
701 DestElementPHI
->addIncoming(DestBegin
, EntryBB
);
702 Address DestElementCurrent
=
703 Address(DestElementPHI
, DestAddr
.getElementType(),
704 DestAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
708 CodeGenFunction::RunCleanupsScope
InitScope(CGF
);
709 if (EmitDeclareReductionInit
) {
710 emitInitWithReductionInitializer(CGF
, DRD
, Init
, DestElementCurrent
,
711 SrcElementCurrent
, ElementTy
);
713 CGF
.EmitAnyExprToMem(Init
, DestElementCurrent
, ElementTy
.getQualifiers(),
714 /*IsInitializer=*/false);
718 // Shift the address forward by one element.
719 llvm::Value
*SrcElementNext
= CGF
.Builder
.CreateConstGEP1_32(
720 SrcAddr
.getElementType(), SrcElementPHI
, /*Idx0=*/1,
721 "omp.arraycpy.dest.element");
722 SrcElementPHI
->addIncoming(SrcElementNext
, CGF
.Builder
.GetInsertBlock());
725 // Shift the address forward by one element.
726 llvm::Value
*DestElementNext
= CGF
.Builder
.CreateConstGEP1_32(
727 DestAddr
.getElementType(), DestElementPHI
, /*Idx0=*/1,
728 "omp.arraycpy.dest.element");
729 // Check whether we've reached the end.
731 CGF
.Builder
.CreateICmpEQ(DestElementNext
, DestEnd
, "omp.arraycpy.done");
732 CGF
.Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
733 DestElementPHI
->addIncoming(DestElementNext
, CGF
.Builder
.GetInsertBlock());
736 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
739 LValue
ReductionCodeGen::emitSharedLValue(CodeGenFunction
&CGF
, const Expr
*E
) {
740 return CGF
.EmitOMPSharedLValue(E
);
743 LValue
ReductionCodeGen::emitSharedLValueUB(CodeGenFunction
&CGF
,
745 if (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(E
))
746 return CGF
.EmitOMPArraySectionExpr(OASE
, /*IsLowerBound=*/false);
750 void ReductionCodeGen::emitAggregateInitialization(
751 CodeGenFunction
&CGF
, unsigned N
, Address PrivateAddr
, Address SharedAddr
,
752 const OMPDeclareReductionDecl
*DRD
) {
753 // Emit VarDecl with copy init for arrays.
754 // Get the address of the original variable captured in current
756 const auto *PrivateVD
=
757 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Private
)->getDecl());
758 bool EmitDeclareReductionInit
=
759 DRD
&& (DRD
->getInitializer() || !PrivateVD
->hasInit());
760 EmitOMPAggregateInit(CGF
, PrivateAddr
, PrivateVD
->getType(),
761 EmitDeclareReductionInit
,
762 EmitDeclareReductionInit
? ClausesData
[N
].ReductionOp
763 : PrivateVD
->getInit(),
767 ReductionCodeGen::ReductionCodeGen(ArrayRef
<const Expr
*> Shareds
,
768 ArrayRef
<const Expr
*> Origs
,
769 ArrayRef
<const Expr
*> Privates
,
770 ArrayRef
<const Expr
*> ReductionOps
) {
771 ClausesData
.reserve(Shareds
.size());
772 SharedAddresses
.reserve(Shareds
.size());
773 Sizes
.reserve(Shareds
.size());
774 BaseDecls
.reserve(Shareds
.size());
775 const auto *IOrig
= Origs
.begin();
776 const auto *IPriv
= Privates
.begin();
777 const auto *IRed
= ReductionOps
.begin();
778 for (const Expr
*Ref
: Shareds
) {
779 ClausesData
.emplace_back(Ref
, *IOrig
, *IPriv
, *IRed
);
780 std::advance(IOrig
, 1);
781 std::advance(IPriv
, 1);
782 std::advance(IRed
, 1);
786 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction
&CGF
, unsigned N
) {
787 assert(SharedAddresses
.size() == N
&& OrigAddresses
.size() == N
&&
788 "Number of generated lvalues must be exactly N.");
789 LValue First
= emitSharedLValue(CGF
, ClausesData
[N
].Shared
);
790 LValue Second
= emitSharedLValueUB(CGF
, ClausesData
[N
].Shared
);
791 SharedAddresses
.emplace_back(First
, Second
);
792 if (ClausesData
[N
].Shared
== ClausesData
[N
].Ref
) {
793 OrigAddresses
.emplace_back(First
, Second
);
795 LValue First
= emitSharedLValue(CGF
, ClausesData
[N
].Ref
);
796 LValue Second
= emitSharedLValueUB(CGF
, ClausesData
[N
].Ref
);
797 OrigAddresses
.emplace_back(First
, Second
);
801 void ReductionCodeGen::emitAggregateType(CodeGenFunction
&CGF
, unsigned N
) {
802 QualType PrivateType
= getPrivateType(N
);
803 bool AsArraySection
= isa
<OMPArraySectionExpr
>(ClausesData
[N
].Ref
);
804 if (!PrivateType
->isVariablyModifiedType()) {
806 CGF
.getTypeSize(OrigAddresses
[N
].first
.getType().getNonReferenceType()),
811 llvm::Value
*SizeInChars
;
812 auto *ElemType
= OrigAddresses
[N
].first
.getAddress(CGF
).getElementType();
813 auto *ElemSizeOf
= llvm::ConstantExpr::getSizeOf(ElemType
);
814 if (AsArraySection
) {
815 Size
= CGF
.Builder
.CreatePtrDiff(ElemType
,
816 OrigAddresses
[N
].second
.getPointer(CGF
),
817 OrigAddresses
[N
].first
.getPointer(CGF
));
818 Size
= CGF
.Builder
.CreateNUWAdd(
819 Size
, llvm::ConstantInt::get(Size
->getType(), /*V=*/1));
820 SizeInChars
= CGF
.Builder
.CreateNUWMul(Size
, ElemSizeOf
);
823 CGF
.getTypeSize(OrigAddresses
[N
].first
.getType().getNonReferenceType());
824 Size
= CGF
.Builder
.CreateExactUDiv(SizeInChars
, ElemSizeOf
);
826 Sizes
.emplace_back(SizeInChars
, Size
);
827 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
829 cast
<OpaqueValueExpr
>(
830 CGF
.getContext().getAsVariableArrayType(PrivateType
)->getSizeExpr()),
832 CGF
.EmitVariablyModifiedType(PrivateType
);
835 void ReductionCodeGen::emitAggregateType(CodeGenFunction
&CGF
, unsigned N
,
837 QualType PrivateType
= getPrivateType(N
);
838 if (!PrivateType
->isVariablyModifiedType()) {
839 assert(!Size
&& !Sizes
[N
].second
&&
840 "Size should be nullptr for non-variably modified reduction "
844 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
846 cast
<OpaqueValueExpr
>(
847 CGF
.getContext().getAsVariableArrayType(PrivateType
)->getSizeExpr()),
849 CGF
.EmitVariablyModifiedType(PrivateType
);
852 void ReductionCodeGen::emitInitialization(
853 CodeGenFunction
&CGF
, unsigned N
, Address PrivateAddr
, Address SharedAddr
,
854 llvm::function_ref
<bool(CodeGenFunction
&)> DefaultInit
) {
855 assert(SharedAddresses
.size() > N
&& "No variable was generated");
856 const auto *PrivateVD
=
857 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Private
)->getDecl());
858 const OMPDeclareReductionDecl
*DRD
=
859 getReductionInit(ClausesData
[N
].ReductionOp
);
860 if (CGF
.getContext().getAsArrayType(PrivateVD
->getType())) {
861 if (DRD
&& DRD
->getInitializer())
862 (void)DefaultInit(CGF
);
863 emitAggregateInitialization(CGF
, N
, PrivateAddr
, SharedAddr
, DRD
);
864 } else if (DRD
&& (DRD
->getInitializer() || !PrivateVD
->hasInit())) {
865 (void)DefaultInit(CGF
);
866 QualType SharedType
= SharedAddresses
[N
].first
.getType();
867 emitInitWithReductionInitializer(CGF
, DRD
, ClausesData
[N
].ReductionOp
,
868 PrivateAddr
, SharedAddr
, SharedType
);
869 } else if (!DefaultInit(CGF
) && PrivateVD
->hasInit() &&
870 !CGF
.isTrivialInitializer(PrivateVD
->getInit())) {
871 CGF
.EmitAnyExprToMem(PrivateVD
->getInit(), PrivateAddr
,
872 PrivateVD
->getType().getQualifiers(),
873 /*IsInitializer=*/false);
877 bool ReductionCodeGen::needCleanups(unsigned N
) {
878 QualType PrivateType
= getPrivateType(N
);
879 QualType::DestructionKind DTorKind
= PrivateType
.isDestructedType();
880 return DTorKind
!= QualType::DK_none
;
883 void ReductionCodeGen::emitCleanups(CodeGenFunction
&CGF
, unsigned N
,
884 Address PrivateAddr
) {
885 QualType PrivateType
= getPrivateType(N
);
886 QualType::DestructionKind DTorKind
= PrivateType
.isDestructedType();
887 if (needCleanups(N
)) {
889 PrivateAddr
.withElementType(CGF
.ConvertTypeForMem(PrivateType
));
890 CGF
.pushDestroy(DTorKind
, PrivateAddr
, PrivateType
);
894 static LValue
loadToBegin(CodeGenFunction
&CGF
, QualType BaseTy
, QualType ElTy
,
896 BaseTy
= BaseTy
.getNonReferenceType();
897 while ((BaseTy
->isPointerType() || BaseTy
->isReferenceType()) &&
898 !CGF
.getContext().hasSameType(BaseTy
, ElTy
)) {
899 if (const auto *PtrTy
= BaseTy
->getAs
<PointerType
>()) {
900 BaseLV
= CGF
.EmitLoadOfPointerLValue(BaseLV
.getAddress(CGF
), PtrTy
);
902 LValue RefLVal
= CGF
.MakeAddrLValue(BaseLV
.getAddress(CGF
), BaseTy
);
903 BaseLV
= CGF
.EmitLoadOfReferenceLValue(RefLVal
);
905 BaseTy
= BaseTy
->getPointeeType();
907 return CGF
.MakeAddrLValue(
908 BaseLV
.getAddress(CGF
).withElementType(CGF
.ConvertTypeForMem(ElTy
)),
909 BaseLV
.getType(), BaseLV
.getBaseInfo(),
910 CGF
.CGM
.getTBAAInfoForSubobject(BaseLV
, BaseLV
.getType()));
913 static Address
castToBase(CodeGenFunction
&CGF
, QualType BaseTy
, QualType ElTy
,
914 Address OriginalBaseAddress
, llvm::Value
*Addr
) {
915 Address Tmp
= Address::invalid();
916 Address TopTmp
= Address::invalid();
917 Address MostTopTmp
= Address::invalid();
918 BaseTy
= BaseTy
.getNonReferenceType();
919 while ((BaseTy
->isPointerType() || BaseTy
->isReferenceType()) &&
920 !CGF
.getContext().hasSameType(BaseTy
, ElTy
)) {
921 Tmp
= CGF
.CreateMemTemp(BaseTy
);
922 if (TopTmp
.isValid())
923 CGF
.Builder
.CreateStore(Tmp
.getPointer(), TopTmp
);
927 BaseTy
= BaseTy
->getPointeeType();
931 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
932 Addr
, Tmp
.getElementType());
933 CGF
.Builder
.CreateStore(Addr
, Tmp
);
937 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
938 Addr
, OriginalBaseAddress
.getType());
939 return OriginalBaseAddress
.withPointer(Addr
, NotKnownNonNull
);
942 static const VarDecl
*getBaseDecl(const Expr
*Ref
, const DeclRefExpr
*&DE
) {
943 const VarDecl
*OrigVD
= nullptr;
944 if (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(Ref
)) {
945 const Expr
*Base
= OASE
->getBase()->IgnoreParenImpCasts();
946 while (const auto *TempOASE
= dyn_cast
<OMPArraySectionExpr
>(Base
))
947 Base
= TempOASE
->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
949 Base
= TempASE
->getBase()->IgnoreParenImpCasts();
950 DE
= cast
<DeclRefExpr
>(Base
);
951 OrigVD
= cast
<VarDecl
>(DE
->getDecl());
952 } else if (const auto *ASE
= dyn_cast
<ArraySubscriptExpr
>(Ref
)) {
953 const Expr
*Base
= ASE
->getBase()->IgnoreParenImpCasts();
954 while (const auto *TempASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
955 Base
= TempASE
->getBase()->IgnoreParenImpCasts();
956 DE
= cast
<DeclRefExpr
>(Base
);
957 OrigVD
= cast
<VarDecl
>(DE
->getDecl());
962 Address
ReductionCodeGen::adjustPrivateAddress(CodeGenFunction
&CGF
, unsigned N
,
963 Address PrivateAddr
) {
964 const DeclRefExpr
*DE
;
965 if (const VarDecl
*OrigVD
= ::getBaseDecl(ClausesData
[N
].Ref
, DE
)) {
966 BaseDecls
.emplace_back(OrigVD
);
967 LValue OriginalBaseLValue
= CGF
.EmitLValue(DE
);
969 loadToBegin(CGF
, OrigVD
->getType(), SharedAddresses
[N
].first
.getType(),
971 Address SharedAddr
= SharedAddresses
[N
].first
.getAddress(CGF
);
972 llvm::Value
*Adjustment
= CGF
.Builder
.CreatePtrDiff(
973 SharedAddr
.getElementType(), BaseLValue
.getPointer(CGF
),
974 SharedAddr
.getPointer());
975 llvm::Value
*PrivatePointer
=
976 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
977 PrivateAddr
.getPointer(), SharedAddr
.getType());
978 llvm::Value
*Ptr
= CGF
.Builder
.CreateGEP(
979 SharedAddr
.getElementType(), PrivatePointer
, Adjustment
);
980 return castToBase(CGF
, OrigVD
->getType(),
981 SharedAddresses
[N
].first
.getType(),
982 OriginalBaseLValue
.getAddress(CGF
), Ptr
);
984 BaseDecls
.emplace_back(
985 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Ref
)->getDecl()));
989 bool ReductionCodeGen::usesReductionInitializer(unsigned N
) const {
990 const OMPDeclareReductionDecl
*DRD
=
991 getReductionInit(ClausesData
[N
].ReductionOp
);
992 return DRD
&& DRD
->getInitializer();
995 LValue
CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction
&CGF
) {
996 return CGF
.EmitLoadOfPointerLValue(
997 CGF
.GetAddrOfLocalVar(getThreadIDVariable()),
998 getThreadIDVariable()->getType()->castAs
<PointerType
>());
1001 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) {
1002 if (!CGF
.HaveInsertPoint())
1004 // 1.2.2 OpenMP Language Terminology
1005 // Structured block - An executable statement with a single entry at the
1006 // top and a single exit at the bottom.
1007 // The point of exit cannot be a branch out of the structured block.
1008 // longjmp() and throw() must not violate the entry/exit criteria.
1009 CGF
.EHStack
.pushTerminate();
1011 CGF
.incrementProfileCounter(S
);
1013 CGF
.EHStack
.popTerminate();
1016 LValue
CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1017 CodeGenFunction
&CGF
) {
1018 return CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(getThreadIDVariable()),
1019 getThreadIDVariable()->getType(),
1020 AlignmentSource::Decl
);
1023 static FieldDecl
*addFieldToRecordDecl(ASTContext
&C
, DeclContext
*DC
,
1025 auto *Field
= FieldDecl::Create(
1026 C
, DC
, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy
,
1027 C
.getTrivialTypeSourceInfo(FieldTy
, SourceLocation()),
1028 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit
);
1029 Field
->setAccess(AS_public
);
1034 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule
&CGM
)
1035 : CGM(CGM
), OMPBuilder(CGM
.getModule()) {
1036 KmpCriticalNameTy
= llvm::ArrayType::get(CGM
.Int32Ty
, /*NumElements*/ 8);
1037 llvm::OpenMPIRBuilderConfig
Config(
1038 CGM
.getLangOpts().OpenMPIsTargetDevice
, isGPU(),
1039 CGM
.getLangOpts().OpenMPOffloadMandatory
,
1040 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1041 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1042 OMPBuilder
.initialize();
1043 OMPBuilder
.loadOffloadInfoMetadata(CGM
.getLangOpts().OpenMPIsTargetDevice
1044 ? CGM
.getLangOpts().OMPHostIRFile
1046 OMPBuilder
.setConfig(Config
);
1049 void CGOpenMPRuntime::clear() {
1050 InternalVars
.clear();
1051 // Clean non-target variable declarations possibly used only in debug info.
1052 for (const auto &Data
: EmittedNonTargetVariables
) {
1053 if (!Data
.getValue().pointsToAliveValue())
1055 auto *GV
= dyn_cast
<llvm::GlobalVariable
>(Data
.getValue());
1058 if (!GV
->isDeclaration() || GV
->getNumUses() > 0)
1060 GV
->eraseFromParent();
1064 std::string
CGOpenMPRuntime::getName(ArrayRef
<StringRef
> Parts
) const {
1065 return OMPBuilder
.createPlatformSpecificName(Parts
);
1068 static llvm::Function
*
1069 emitCombinerOrInitializer(CodeGenModule
&CGM
, QualType Ty
,
1070 const Expr
*CombinerInitializer
, const VarDecl
*In
,
1071 const VarDecl
*Out
, bool IsCombiner
) {
1072 // void .omp_combiner.(Ty *in, Ty *out);
1073 ASTContext
&C
= CGM
.getContext();
1074 QualType PtrTy
= C
.getPointerType(Ty
).withRestrict();
1075 FunctionArgList Args
;
1076 ImplicitParamDecl
OmpOutParm(C
, /*DC=*/nullptr, Out
->getLocation(),
1077 /*Id=*/nullptr, PtrTy
, ImplicitParamDecl::Other
);
1078 ImplicitParamDecl
OmpInParm(C
, /*DC=*/nullptr, In
->getLocation(),
1079 /*Id=*/nullptr, PtrTy
, ImplicitParamDecl::Other
);
1080 Args
.push_back(&OmpOutParm
);
1081 Args
.push_back(&OmpInParm
);
1082 const CGFunctionInfo
&FnInfo
=
1083 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
1084 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
1085 std::string Name
= CGM
.getOpenMPRuntime().getName(
1086 {IsCombiner
? "omp_combiner" : "omp_initializer", ""});
1087 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
1088 Name
, &CGM
.getModule());
1089 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
1090 if (CGM
.getLangOpts().Optimize
) {
1091 Fn
->removeFnAttr(llvm::Attribute::NoInline
);
1092 Fn
->removeFnAttr(llvm::Attribute::OptimizeNone
);
1093 Fn
->addFnAttr(llvm::Attribute::AlwaysInline
);
1095 CodeGenFunction
CGF(CGM
);
1096 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1097 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1098 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, In
->getLocation(),
1099 Out
->getLocation());
1100 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
1101 Address AddrIn
= CGF
.GetAddrOfLocalVar(&OmpInParm
);
1103 In
, CGF
.EmitLoadOfPointerLValue(AddrIn
, PtrTy
->castAs
<PointerType
>())
1105 Address AddrOut
= CGF
.GetAddrOfLocalVar(&OmpOutParm
);
1107 Out
, CGF
.EmitLoadOfPointerLValue(AddrOut
, PtrTy
->castAs
<PointerType
>())
1109 (void)Scope
.Privatize();
1110 if (!IsCombiner
&& Out
->hasInit() &&
1111 !CGF
.isTrivialInitializer(Out
->getInit())) {
1112 CGF
.EmitAnyExprToMem(Out
->getInit(), CGF
.GetAddrOfLocalVar(Out
),
1113 Out
->getType().getQualifiers(),
1114 /*IsInitializer=*/true);
1116 if (CombinerInitializer
)
1117 CGF
.EmitIgnoredExpr(CombinerInitializer
);
1118 Scope
.ForceCleanup();
1119 CGF
.FinishFunction();
1123 void CGOpenMPRuntime::emitUserDefinedReduction(
1124 CodeGenFunction
*CGF
, const OMPDeclareReductionDecl
*D
) {
1125 if (UDRMap
.count(D
) > 0)
1127 llvm::Function
*Combiner
= emitCombinerOrInitializer(
1128 CGM
, D
->getType(), D
->getCombiner(),
1129 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getCombinerIn())->getDecl()),
1130 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getCombinerOut())->getDecl()),
1131 /*IsCombiner=*/true);
1132 llvm::Function
*Initializer
= nullptr;
1133 if (const Expr
*Init
= D
->getInitializer()) {
1134 Initializer
= emitCombinerOrInitializer(
1136 D
->getInitializerKind() == OMPDeclareReductionInitKind::Call
? Init
1138 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getInitOrig())->getDecl()),
1139 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getInitPriv())->getDecl()),
1140 /*IsCombiner=*/false);
1142 UDRMap
.try_emplace(D
, Combiner
, Initializer
);
1144 auto &Decls
= FunctionUDRMap
.FindAndConstruct(CGF
->CurFn
);
1145 Decls
.second
.push_back(D
);
1149 std::pair
<llvm::Function
*, llvm::Function
*>
1150 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl
*D
) {
1151 auto I
= UDRMap
.find(D
);
1152 if (I
!= UDRMap
.end())
1154 emitUserDefinedReduction(/*CGF=*/nullptr, D
);
1155 return UDRMap
.lookup(D
);
1159 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1160 // Builder if one is present.
1161 struct PushAndPopStackRAII
{
1162 PushAndPopStackRAII(llvm::OpenMPIRBuilder
*OMPBuilder
, CodeGenFunction
&CGF
,
1163 bool HasCancel
, llvm::omp::Directive Kind
)
1164 : OMPBuilder(OMPBuilder
) {
1168 // The following callback is the crucial part of clangs cleanup process.
1171 // Once the OpenMPIRBuilder is used to create parallel regions (and
1172 // similar), the cancellation destination (Dest below) is determined via
1173 // IP. That means if we have variables to finalize we split the block at IP,
1174 // use the new block (=BB) as destination to build a JumpDest (via
1175 // getJumpDestInCurrentScope(BB)) which then is fed to
1176 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1177 // to push & pop an FinalizationInfo object.
1178 // The FiniCB will still be needed but at the point where the
1179 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1180 auto FiniCB
= [&CGF
](llvm::OpenMPIRBuilder::InsertPointTy IP
) {
1181 assert(IP
.getBlock()->end() == IP
.getPoint() &&
1182 "Clang CG should cause non-terminated block!");
1183 CGBuilderTy::InsertPointGuard
IPG(CGF
.Builder
);
1184 CGF
.Builder
.restoreIP(IP
);
1185 CodeGenFunction::JumpDest Dest
=
1186 CGF
.getOMPCancelDestination(OMPD_parallel
);
1187 CGF
.EmitBranchThroughCleanup(Dest
);
1190 // TODO: Remove this once we emit parallel regions through the
1191 // OpenMPIRBuilder as it can do this setup internally.
1192 llvm::OpenMPIRBuilder::FinalizationInfo
FI({FiniCB
, Kind
, HasCancel
});
1193 OMPBuilder
->pushFinalizationCB(std::move(FI
));
1195 ~PushAndPopStackRAII() {
1197 OMPBuilder
->popFinalizationCB();
1199 llvm::OpenMPIRBuilder
*OMPBuilder
;
1203 static llvm::Function
*emitParallelOrTeamsOutlinedFunction(
1204 CodeGenModule
&CGM
, const OMPExecutableDirective
&D
, const CapturedStmt
*CS
,
1205 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1206 const StringRef OutlinedHelperName
, const RegionCodeGenTy
&CodeGen
) {
1207 assert(ThreadIDVar
->getType()->isPointerType() &&
1208 "thread id variable must be of type kmp_int32 *");
1209 CodeGenFunction
CGF(CGM
, true);
1210 bool HasCancel
= false;
1211 if (const auto *OPD
= dyn_cast
<OMPParallelDirective
>(&D
))
1212 HasCancel
= OPD
->hasCancel();
1213 else if (const auto *OPD
= dyn_cast
<OMPTargetParallelDirective
>(&D
))
1214 HasCancel
= OPD
->hasCancel();
1215 else if (const auto *OPSD
= dyn_cast
<OMPParallelSectionsDirective
>(&D
))
1216 HasCancel
= OPSD
->hasCancel();
1217 else if (const auto *OPFD
= dyn_cast
<OMPParallelForDirective
>(&D
))
1218 HasCancel
= OPFD
->hasCancel();
1219 else if (const auto *OPFD
= dyn_cast
<OMPTargetParallelForDirective
>(&D
))
1220 HasCancel
= OPFD
->hasCancel();
1221 else if (const auto *OPFD
= dyn_cast
<OMPDistributeParallelForDirective
>(&D
))
1222 HasCancel
= OPFD
->hasCancel();
1223 else if (const auto *OPFD
=
1224 dyn_cast
<OMPTeamsDistributeParallelForDirective
>(&D
))
1225 HasCancel
= OPFD
->hasCancel();
1226 else if (const auto *OPFD
=
1227 dyn_cast
<OMPTargetTeamsDistributeParallelForDirective
>(&D
))
1228 HasCancel
= OPFD
->hasCancel();
1230 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1231 // parallel region to make cancellation barriers work properly.
1232 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1233 PushAndPopStackRAII
PSR(&OMPBuilder
, CGF
, HasCancel
, InnermostKind
);
1234 CGOpenMPOutlinedRegionInfo
CGInfo(*CS
, ThreadIDVar
, CodeGen
, InnermostKind
,
1235 HasCancel
, OutlinedHelperName
);
1236 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
1237 return CGF
.GenerateOpenMPCapturedStmtFunction(*CS
, D
.getBeginLoc());
1240 std::string
CGOpenMPRuntime::getOutlinedHelperName(StringRef Name
) const {
1241 std::string Suffix
= getName({"omp_outlined"});
1242 return (Name
+ Suffix
).str();
1245 std::string
CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction
&CGF
) const {
1246 return getOutlinedHelperName(CGF
.CurFn
->getName());
1249 std::string
CGOpenMPRuntime::getReductionFuncName(StringRef Name
) const {
1250 std::string Suffix
= getName({"omp", "reduction", "reduction_func"});
1251 return (Name
+ Suffix
).str();
1254 llvm::Function
*CGOpenMPRuntime::emitParallelOutlinedFunction(
1255 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
1256 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1257 const RegionCodeGenTy
&CodeGen
) {
1258 const CapturedStmt
*CS
= D
.getCapturedStmt(OMPD_parallel
);
1259 return emitParallelOrTeamsOutlinedFunction(
1260 CGM
, D
, CS
, ThreadIDVar
, InnermostKind
, getOutlinedHelperName(CGF
),
1264 llvm::Function
*CGOpenMPRuntime::emitTeamsOutlinedFunction(
1265 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
1266 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1267 const RegionCodeGenTy
&CodeGen
) {
1268 const CapturedStmt
*CS
= D
.getCapturedStmt(OMPD_teams
);
1269 return emitParallelOrTeamsOutlinedFunction(
1270 CGM
, D
, CS
, ThreadIDVar
, InnermostKind
, getOutlinedHelperName(CGF
),
1274 llvm::Function
*CGOpenMPRuntime::emitTaskOutlinedFunction(
1275 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
1276 const VarDecl
*PartIDVar
, const VarDecl
*TaskTVar
,
1277 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
1278 bool Tied
, unsigned &NumberOfParts
) {
1279 auto &&UntiedCodeGen
= [this, &D
, TaskTVar
](CodeGenFunction
&CGF
,
1280 PrePostActionTy
&) {
1281 llvm::Value
*ThreadID
= getThreadID(CGF
, D
.getBeginLoc());
1282 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
1283 llvm::Value
*TaskArgs
[] = {
1285 CGF
.EmitLoadOfPointerLValue(CGF
.GetAddrOfLocalVar(TaskTVar
),
1286 TaskTVar
->getType()->castAs
<PointerType
>())
1288 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1289 CGM
.getModule(), OMPRTL___kmpc_omp_task
),
1292 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy
Action(Tied
, PartIDVar
,
1294 CodeGen
.setAction(Action
);
1295 assert(!ThreadIDVar
->getType()->isPointerType() &&
1296 "thread id variable must be of type kmp_int32 for tasks");
1297 const OpenMPDirectiveKind Region
=
1298 isOpenMPTaskLoopDirective(D
.getDirectiveKind()) ? OMPD_taskloop
1300 const CapturedStmt
*CS
= D
.getCapturedStmt(Region
);
1301 bool HasCancel
= false;
1302 if (const auto *TD
= dyn_cast
<OMPTaskDirective
>(&D
))
1303 HasCancel
= TD
->hasCancel();
1304 else if (const auto *TD
= dyn_cast
<OMPTaskLoopDirective
>(&D
))
1305 HasCancel
= TD
->hasCancel();
1306 else if (const auto *TD
= dyn_cast
<OMPMasterTaskLoopDirective
>(&D
))
1307 HasCancel
= TD
->hasCancel();
1308 else if (const auto *TD
= dyn_cast
<OMPParallelMasterTaskLoopDirective
>(&D
))
1309 HasCancel
= TD
->hasCancel();
1311 CodeGenFunction
CGF(CGM
, true);
1312 CGOpenMPTaskOutlinedRegionInfo
CGInfo(*CS
, ThreadIDVar
, CodeGen
,
1313 InnermostKind
, HasCancel
, Action
);
1314 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
1315 llvm::Function
*Res
= CGF
.GenerateCapturedStmtFunction(*CS
);
1317 NumberOfParts
= Action
.getNumberOfParts();
1321 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction
&CGF
,
1322 bool AtCurrentPoint
) {
1323 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1324 assert(!Elem
.second
.ServiceInsertPt
&& "Insert point is set already.");
1326 llvm::Value
*Undef
= llvm::UndefValue::get(CGF
.Int32Ty
);
1327 if (AtCurrentPoint
) {
1328 Elem
.second
.ServiceInsertPt
= new llvm::BitCastInst(
1329 Undef
, CGF
.Int32Ty
, "svcpt", CGF
.Builder
.GetInsertBlock());
1331 Elem
.second
.ServiceInsertPt
=
1332 new llvm::BitCastInst(Undef
, CGF
.Int32Ty
, "svcpt");
1333 Elem
.second
.ServiceInsertPt
->insertAfter(CGF
.AllocaInsertPt
);
1337 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction
&CGF
) {
1338 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1339 if (Elem
.second
.ServiceInsertPt
) {
1340 llvm::Instruction
*Ptr
= Elem
.second
.ServiceInsertPt
;
1341 Elem
.second
.ServiceInsertPt
= nullptr;
1342 Ptr
->eraseFromParent();
1346 static StringRef
getIdentStringFromSourceLocation(CodeGenFunction
&CGF
,
1348 SmallString
<128> &Buffer
) {
1349 llvm::raw_svector_ostream
OS(Buffer
);
1350 // Build debug location
1351 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
1352 OS
<< ";" << PLoc
.getFilename() << ";";
1353 if (const auto *FD
= dyn_cast_or_null
<FunctionDecl
>(CGF
.CurFuncDecl
))
1354 OS
<< FD
->getQualifiedNameAsString();
1355 OS
<< ";" << PLoc
.getLine() << ";" << PLoc
.getColumn() << ";;";
1359 llvm::Value
*CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction
&CGF
,
1361 unsigned Flags
, bool EmitLoc
) {
1362 uint32_t SrcLocStrSize
;
1363 llvm::Constant
*SrcLocStr
;
1364 if ((!EmitLoc
&& CGM
.getCodeGenOpts().getDebugInfo() ==
1365 llvm::codegenoptions::NoDebugInfo
) ||
1367 SrcLocStr
= OMPBuilder
.getOrCreateDefaultSrcLocStr(SrcLocStrSize
);
1369 std::string FunctionName
;
1370 if (const auto *FD
= dyn_cast_or_null
<FunctionDecl
>(CGF
.CurFuncDecl
))
1371 FunctionName
= FD
->getQualifiedNameAsString();
1372 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
1373 const char *FileName
= PLoc
.getFilename();
1374 unsigned Line
= PLoc
.getLine();
1375 unsigned Column
= PLoc
.getColumn();
1376 SrcLocStr
= OMPBuilder
.getOrCreateSrcLocStr(FunctionName
, FileName
, Line
,
1377 Column
, SrcLocStrSize
);
1379 unsigned Reserved2Flags
= getDefaultLocationReserved2Flags();
1380 return OMPBuilder
.getOrCreateIdent(
1381 SrcLocStr
, SrcLocStrSize
, llvm::omp::IdentFlag(Flags
), Reserved2Flags
);
1384 llvm::Value
*CGOpenMPRuntime::getThreadID(CodeGenFunction
&CGF
,
1385 SourceLocation Loc
) {
1386 assert(CGF
.CurFn
&& "No function in current CodeGenFunction.");
1387 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1388 // the clang invariants used below might be broken.
1389 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
1390 SmallString
<128> Buffer
;
1391 OMPBuilder
.updateToLocation(CGF
.Builder
.saveIP());
1392 uint32_t SrcLocStrSize
;
1393 auto *SrcLocStr
= OMPBuilder
.getOrCreateSrcLocStr(
1394 getIdentStringFromSourceLocation(CGF
, Loc
, Buffer
), SrcLocStrSize
);
1395 return OMPBuilder
.getOrCreateThreadID(
1396 OMPBuilder
.getOrCreateIdent(SrcLocStr
, SrcLocStrSize
));
1399 llvm::Value
*ThreadID
= nullptr;
1400 // Check whether we've already cached a load of the thread id in this
1402 auto I
= OpenMPLocThreadIDMap
.find(CGF
.CurFn
);
1403 if (I
!= OpenMPLocThreadIDMap
.end()) {
1404 ThreadID
= I
->second
.ThreadID
;
1405 if (ThreadID
!= nullptr)
1408 // If exceptions are enabled, do not use parameter to avoid possible crash.
1409 if (auto *OMPRegionInfo
=
1410 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
1411 if (OMPRegionInfo
->getThreadIDVariable()) {
1412 // Check if this an outlined function with thread id passed as argument.
1413 LValue LVal
= OMPRegionInfo
->getThreadIDVariableLValue(CGF
);
1414 llvm::BasicBlock
*TopBlock
= CGF
.AllocaInsertPt
->getParent();
1415 if (!CGF
.EHStack
.requiresLandingPad() || !CGF
.getLangOpts().Exceptions
||
1416 !CGF
.getLangOpts().CXXExceptions
||
1417 CGF
.Builder
.GetInsertBlock() == TopBlock
||
1418 !isa
<llvm::Instruction
>(LVal
.getPointer(CGF
)) ||
1419 cast
<llvm::Instruction
>(LVal
.getPointer(CGF
))->getParent() ==
1421 cast
<llvm::Instruction
>(LVal
.getPointer(CGF
))->getParent() ==
1422 CGF
.Builder
.GetInsertBlock()) {
1423 ThreadID
= CGF
.EmitLoadOfScalar(LVal
, Loc
);
1424 // If value loaded in entry block, cache it and use it everywhere in
1426 if (CGF
.Builder
.GetInsertBlock() == TopBlock
) {
1427 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1428 Elem
.second
.ThreadID
= ThreadID
;
1435 // This is not an outlined function region - need to call __kmpc_int32
1436 // kmpc_global_thread_num(ident_t *loc).
1437 // Generate thread id value and cache this value for use across the
1439 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1440 if (!Elem
.second
.ServiceInsertPt
)
1441 setLocThreadIdInsertPt(CGF
);
1442 CGBuilderTy::InsertPointGuard
IPG(CGF
.Builder
);
1443 CGF
.Builder
.SetInsertPoint(Elem
.second
.ServiceInsertPt
);
1444 llvm::CallInst
*Call
= CGF
.Builder
.CreateCall(
1445 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
1446 OMPRTL___kmpc_global_thread_num
),
1447 emitUpdateLocation(CGF
, Loc
));
1448 Call
->setCallingConv(CGF
.getRuntimeCC());
1449 Elem
.second
.ThreadID
= Call
;
1453 void CGOpenMPRuntime::functionFinished(CodeGenFunction
&CGF
) {
1454 assert(CGF
.CurFn
&& "No function in current CodeGenFunction.");
1455 if (OpenMPLocThreadIDMap
.count(CGF
.CurFn
)) {
1456 clearLocThreadIdInsertPt(CGF
);
1457 OpenMPLocThreadIDMap
.erase(CGF
.CurFn
);
1459 if (FunctionUDRMap
.count(CGF
.CurFn
) > 0) {
1460 for(const auto *D
: FunctionUDRMap
[CGF
.CurFn
])
1462 FunctionUDRMap
.erase(CGF
.CurFn
);
1464 auto I
= FunctionUDMMap
.find(CGF
.CurFn
);
1465 if (I
!= FunctionUDMMap
.end()) {
1466 for(const auto *D
: I
->second
)
1468 FunctionUDMMap
.erase(I
);
1470 LastprivateConditionalToTypes
.erase(CGF
.CurFn
);
1471 FunctionToUntiedTaskStackMap
.erase(CGF
.CurFn
);
1474 llvm::Type
*CGOpenMPRuntime::getIdentTyPointerTy() {
1475 return OMPBuilder
.IdentPtr
;
1478 llvm::Type
*CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1479 if (!Kmpc_MicroTy
) {
1480 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1481 llvm::Type
*MicroParams
[] = {llvm::PointerType::getUnqual(CGM
.Int32Ty
),
1482 llvm::PointerType::getUnqual(CGM
.Int32Ty
)};
1483 Kmpc_MicroTy
= llvm::FunctionType::get(CGM
.VoidTy
, MicroParams
, true);
1485 return llvm::PointerType::getUnqual(Kmpc_MicroTy
);
1488 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1489 convertDeviceClause(const VarDecl
*VD
) {
1490 std::optional
<OMPDeclareTargetDeclAttr::DevTypeTy
> DevTy
=
1491 OMPDeclareTargetDeclAttr::getDeviceType(VD
);
1493 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone
;
1495 switch ((int)*DevTy
) { // Avoid -Wcovered-switch-default
1496 case OMPDeclareTargetDeclAttr::DT_Host
:
1497 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost
;
1499 case OMPDeclareTargetDeclAttr::DT_NoHost
:
1500 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost
;
1502 case OMPDeclareTargetDeclAttr::DT_Any
:
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny
;
1506 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone
;
1511 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1512 convertCaptureClause(const VarDecl
*VD
) {
1513 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> MapType
=
1514 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
1516 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone
;
1517 switch ((int)*MapType
) { // Avoid -Wcovered-switch-default
1518 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To
:
1519 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo
;
1521 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter
:
1522 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter
;
1524 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link
:
1525 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink
;
1528 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone
;
1533 static llvm::TargetRegionEntryInfo
getEntryInfoFromPresumedLoc(
1534 CodeGenModule
&CGM
, llvm::OpenMPIRBuilder
&OMPBuilder
,
1535 SourceLocation BeginLoc
, llvm::StringRef ParentName
= "") {
1537 auto FileInfoCallBack
= [&]() {
1538 SourceManager
&SM
= CGM
.getContext().getSourceManager();
1539 PresumedLoc PLoc
= SM
.getPresumedLoc(BeginLoc
);
1541 llvm::sys::fs::UniqueID ID
;
1542 if (llvm::sys::fs::getUniqueID(PLoc
.getFilename(), ID
)) {
1543 PLoc
= SM
.getPresumedLoc(BeginLoc
, /*UseLineDirectives=*/false);
1546 return std::pair
<std::string
, uint64_t>(PLoc
.getFilename(), PLoc
.getLine());
1549 return OMPBuilder
.getTargetEntryUniqueInfo(FileInfoCallBack
, ParentName
);
1552 Address
CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl
*VD
) {
1553 auto AddrOfGlobal
= [&VD
, this]() { return CGM
.GetAddrOfGlobal(VD
); };
1555 auto LinkageForVariable
= [&VD
, this]() {
1556 return CGM
.getLLVMLinkageVarDefinition(VD
);
1559 std::vector
<llvm::GlobalVariable
*> GeneratedRefs
;
1561 llvm::Type
*LlvmPtrTy
= CGM
.getTypes().ConvertTypeForMem(
1562 CGM
.getContext().getPointerType(VD
->getType()));
1563 llvm::Constant
*addr
= OMPBuilder
.getAddrOfDeclareTargetVar(
1564 convertCaptureClause(VD
), convertDeviceClause(VD
),
1565 VD
->hasDefinition(CGM
.getContext()) == VarDecl::DeclarationOnly
,
1566 VD
->isExternallyVisible(),
1567 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
,
1568 VD
->getCanonicalDecl()->getBeginLoc()),
1569 CGM
.getMangledName(VD
), GeneratedRefs
, CGM
.getLangOpts().OpenMPSimd
,
1570 CGM
.getLangOpts().OMPTargetTriples
, LlvmPtrTy
, AddrOfGlobal
,
1571 LinkageForVariable
);
1574 return Address::invalid();
1575 return Address(addr
, LlvmPtrTy
, CGM
.getContext().getDeclAlign(VD
));
1579 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl
*VD
) {
1580 assert(!CGM
.getLangOpts().OpenMPUseTLS
||
1581 !CGM
.getContext().getTargetInfo().isTLSSupported());
1582 // Lookup the entry, lazily creating it if necessary.
1583 std::string Suffix
= getName({"cache", ""});
1584 return OMPBuilder
.getOrCreateInternalVariable(
1585 CGM
.Int8PtrPtrTy
, Twine(CGM
.getMangledName(VD
)).concat(Suffix
).str());
1588 Address
CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction
&CGF
,
1591 SourceLocation Loc
) {
1592 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1593 CGM
.getContext().getTargetInfo().isTLSSupported())
1596 llvm::Type
*VarTy
= VDAddr
.getElementType();
1597 llvm::Value
*Args
[] = {
1598 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
1599 CGF
.Builder
.CreatePointerCast(VDAddr
.getPointer(), CGM
.Int8PtrTy
),
1600 CGM
.getSize(CGM
.GetTargetTypeStoreSize(VarTy
)),
1601 getOrCreateThreadPrivateCache(VD
)};
1603 CGF
.EmitRuntimeCall(
1604 OMPBuilder
.getOrCreateRuntimeFunction(
1605 CGM
.getModule(), OMPRTL___kmpc_threadprivate_cached
),
1607 CGF
.Int8Ty
, VDAddr
.getAlignment());
1610 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1611 CodeGenFunction
&CGF
, Address VDAddr
, llvm::Value
*Ctor
,
1612 llvm::Value
*CopyCtor
, llvm::Value
*Dtor
, SourceLocation Loc
) {
1613 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1615 llvm::Value
*OMPLoc
= emitUpdateLocation(CGF
, Loc
);
1616 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1617 CGM
.getModule(), OMPRTL___kmpc_global_thread_num
),
1619 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1620 // to register constructor/destructor for variable.
1621 llvm::Value
*Args
[] = {
1622 OMPLoc
, CGF
.Builder
.CreatePointerCast(VDAddr
.getPointer(), CGM
.VoidPtrTy
),
1623 Ctor
, CopyCtor
, Dtor
};
1624 CGF
.EmitRuntimeCall(
1625 OMPBuilder
.getOrCreateRuntimeFunction(
1626 CGM
.getModule(), OMPRTL___kmpc_threadprivate_register
),
1630 llvm::Function
*CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1631 const VarDecl
*VD
, Address VDAddr
, SourceLocation Loc
,
1632 bool PerformInit
, CodeGenFunction
*CGF
) {
1633 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1634 CGM
.getContext().getTargetInfo().isTLSSupported())
1637 VD
= VD
->getDefinition(CGM
.getContext());
1638 if (VD
&& ThreadPrivateWithDefinition
.insert(CGM
.getMangledName(VD
)).second
) {
1639 QualType ASTTy
= VD
->getType();
1641 llvm::Value
*Ctor
= nullptr, *CopyCtor
= nullptr, *Dtor
= nullptr;
1642 const Expr
*Init
= VD
->getAnyInitializer();
1643 if (CGM
.getLangOpts().CPlusPlus
&& PerformInit
) {
1644 // Generate function that re-emits the declaration's initializer into the
1645 // threadprivate copy of the variable VD
1646 CodeGenFunction
CtorCGF(CGM
);
1647 FunctionArgList Args
;
1648 ImplicitParamDecl
Dst(CGM
.getContext(), /*DC=*/nullptr, Loc
,
1649 /*Id=*/nullptr, CGM
.getContext().VoidPtrTy
,
1650 ImplicitParamDecl::Other
);
1651 Args
.push_back(&Dst
);
1653 const auto &FI
= CGM
.getTypes().arrangeBuiltinFunctionDeclaration(
1654 CGM
.getContext().VoidPtrTy
, Args
);
1655 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1656 std::string Name
= getName({"__kmpc_global_ctor_", ""});
1657 llvm::Function
*Fn
=
1658 CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, Name
, FI
, Loc
);
1659 CtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidPtrTy
, Fn
, FI
,
1661 llvm::Value
*ArgVal
= CtorCGF
.EmitLoadOfScalar(
1662 CtorCGF
.GetAddrOfLocalVar(&Dst
), /*Volatile=*/false,
1663 CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1664 Address
Arg(ArgVal
, CtorCGF
.ConvertTypeForMem(ASTTy
),
1665 VDAddr
.getAlignment());
1666 CtorCGF
.EmitAnyExprToMem(Init
, Arg
, Init
->getType().getQualifiers(),
1667 /*IsInitializer=*/true);
1668 ArgVal
= CtorCGF
.EmitLoadOfScalar(
1669 CtorCGF
.GetAddrOfLocalVar(&Dst
), /*Volatile=*/false,
1670 CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1671 CtorCGF
.Builder
.CreateStore(ArgVal
, CtorCGF
.ReturnValue
);
1672 CtorCGF
.FinishFunction();
1675 if (VD
->getType().isDestructedType() != QualType::DK_none
) {
1676 // Generate function that emits destructor call for the threadprivate copy
1677 // of the variable VD
1678 CodeGenFunction
DtorCGF(CGM
);
1679 FunctionArgList Args
;
1680 ImplicitParamDecl
Dst(CGM
.getContext(), /*DC=*/nullptr, Loc
,
1681 /*Id=*/nullptr, CGM
.getContext().VoidPtrTy
,
1682 ImplicitParamDecl::Other
);
1683 Args
.push_back(&Dst
);
1685 const auto &FI
= CGM
.getTypes().arrangeBuiltinFunctionDeclaration(
1686 CGM
.getContext().VoidTy
, Args
);
1687 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1688 std::string Name
= getName({"__kmpc_global_dtor_", ""});
1689 llvm::Function
*Fn
=
1690 CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, Name
, FI
, Loc
);
1691 auto NL
= ApplyDebugLocation::CreateEmpty(DtorCGF
);
1692 DtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
, Args
,
1694 // Create a scope with an artificial location for the body of this function.
1695 auto AL
= ApplyDebugLocation::CreateArtificial(DtorCGF
);
1696 llvm::Value
*ArgVal
= DtorCGF
.EmitLoadOfScalar(
1697 DtorCGF
.GetAddrOfLocalVar(&Dst
),
1698 /*Volatile=*/false, CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1699 DtorCGF
.emitDestroy(
1700 Address(ArgVal
, DtorCGF
.Int8Ty
, VDAddr
.getAlignment()), ASTTy
,
1701 DtorCGF
.getDestroyer(ASTTy
.isDestructedType()),
1702 DtorCGF
.needsEHCleanup(ASTTy
.isDestructedType()));
1703 DtorCGF
.FinishFunction();
1706 // Do not emit init function if it is not required.
1710 llvm::Type
*CopyCtorTyArgs
[] = {CGM
.VoidPtrTy
, CGM
.VoidPtrTy
};
1711 auto *CopyCtorTy
= llvm::FunctionType::get(CGM
.VoidPtrTy
, CopyCtorTyArgs
,
1714 // Copying constructor for the threadprivate variable.
1715 // Must be NULL - reserved by runtime, but currently it requires that this
1716 // parameter is always NULL. Otherwise it fires assertion.
1717 CopyCtor
= llvm::Constant::getNullValue(CopyCtorTy
);
1718 if (Ctor
== nullptr) {
1719 auto *CtorTy
= llvm::FunctionType::get(CGM
.VoidPtrTy
, CGM
.VoidPtrTy
,
1722 Ctor
= llvm::Constant::getNullValue(CtorTy
);
1724 if (Dtor
== nullptr) {
1725 auto *DtorTy
= llvm::FunctionType::get(CGM
.VoidTy
, CGM
.VoidPtrTy
,
1728 Dtor
= llvm::Constant::getNullValue(DtorTy
);
1731 auto *InitFunctionTy
=
1732 llvm::FunctionType::get(CGM
.VoidTy
, /*isVarArg*/ false);
1733 std::string Name
= getName({"__omp_threadprivate_init_", ""});
1734 llvm::Function
*InitFunction
= CGM
.CreateGlobalInitOrCleanUpFunction(
1735 InitFunctionTy
, Name
, CGM
.getTypes().arrangeNullaryFunction());
1736 CodeGenFunction
InitCGF(CGM
);
1737 FunctionArgList ArgList
;
1738 InitCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, InitFunction
,
1739 CGM
.getTypes().arrangeNullaryFunction(), ArgList
,
1741 emitThreadPrivateVarInit(InitCGF
, VDAddr
, Ctor
, CopyCtor
, Dtor
, Loc
);
1742 InitCGF
.FinishFunction();
1743 return InitFunction
;
1745 emitThreadPrivateVarInit(*CGF
, VDAddr
, Ctor
, CopyCtor
, Dtor
, Loc
);
1750 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl
*VD
,
1751 llvm::GlobalVariable
*Addr
,
1753 if (CGM
.getLangOpts().OMPTargetTriples
.empty() &&
1754 !CGM
.getLangOpts().OpenMPIsTargetDevice
)
1756 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
1757 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
1758 if (!Res
|| *Res
== OMPDeclareTargetDeclAttr::MT_Link
||
1759 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
1760 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
1761 HasRequiresUnifiedSharedMemory
))
1762 return CGM
.getLangOpts().OpenMPIsTargetDevice
;
1763 VD
= VD
->getDefinition(CGM
.getContext());
1764 assert(VD
&& "Unknown VarDecl");
1766 if (!DeclareTargetWithDefinition
.insert(CGM
.getMangledName(VD
)).second
)
1767 return CGM
.getLangOpts().OpenMPIsTargetDevice
;
1769 QualType ASTTy
= VD
->getType();
1770 SourceLocation Loc
= VD
->getCanonicalDecl()->getBeginLoc();
1772 // Produce the unique prefix to identify the new target regions. We use
1773 // the source location of the variable declaration which we know to not
1774 // conflict with any target region.
1775 llvm::TargetRegionEntryInfo EntryInfo
=
1776 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
, Loc
, VD
->getName());
1777 SmallString
<128> Buffer
, Out
;
1778 OMPBuilder
.OffloadInfoManager
.getTargetRegionEntryFnName(Buffer
, EntryInfo
);
1780 const Expr
*Init
= VD
->getAnyInitializer();
1781 if (CGM
.getLangOpts().CPlusPlus
&& PerformInit
) {
1782 llvm::Constant
*Ctor
;
1784 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
1785 // Generate function that re-emits the declaration's initializer into
1786 // the threadprivate copy of the variable VD
1787 CodeGenFunction
CtorCGF(CGM
);
1789 const CGFunctionInfo
&FI
= CGM
.getTypes().arrangeNullaryFunction();
1790 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1791 llvm::Function
*Fn
= CGM
.CreateGlobalInitOrCleanUpFunction(
1792 FTy
, Twine(Buffer
, "_ctor"), FI
, Loc
, false,
1793 llvm::GlobalValue::WeakODRLinkage
);
1794 Fn
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
1795 if (CGM
.getTriple().isAMDGCN())
1796 Fn
->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL
);
1797 auto NL
= ApplyDebugLocation::CreateEmpty(CtorCGF
);
1798 CtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
,
1799 FunctionArgList(), Loc
, Loc
);
1800 auto AL
= ApplyDebugLocation::CreateArtificial(CtorCGF
);
1801 llvm::Constant
*AddrInAS0
= Addr
;
1802 if (Addr
->getAddressSpace() != 0)
1803 AddrInAS0
= llvm::ConstantExpr::getAddrSpaceCast(
1804 Addr
, llvm::PointerType::get(CGM
.getLLVMContext(), 0));
1805 CtorCGF
.EmitAnyExprToMem(Init
,
1806 Address(AddrInAS0
, Addr
->getValueType(),
1807 CGM
.getContext().getDeclAlign(VD
)),
1808 Init
->getType().getQualifiers(),
1809 /*IsInitializer=*/true);
1810 CtorCGF
.FinishFunction();
1814 Ctor
= new llvm::GlobalVariable(
1815 CGM
.getModule(), CGM
.Int8Ty
, /*isConstant=*/true,
1816 llvm::GlobalValue::PrivateLinkage
,
1817 llvm::Constant::getNullValue(CGM
.Int8Ty
), Twine(Buffer
, "_ctor"));
1821 // Register the information for the entry associated with the constructor.
1823 auto CtorEntryInfo
= EntryInfo
;
1824 CtorEntryInfo
.ParentName
= Twine(Buffer
, "_ctor").toStringRef(Out
);
1825 OMPBuilder
.OffloadInfoManager
.registerTargetRegionEntryInfo(
1826 CtorEntryInfo
, Ctor
, ID
,
1827 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor
);
1829 if (VD
->getType().isDestructedType() != QualType::DK_none
) {
1830 llvm::Constant
*Dtor
;
1832 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
1833 // Generate function that emits destructor call for the threadprivate
1834 // copy of the variable VD
1835 CodeGenFunction
DtorCGF(CGM
);
1837 const CGFunctionInfo
&FI
= CGM
.getTypes().arrangeNullaryFunction();
1838 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1839 llvm::Function
*Fn
= CGM
.CreateGlobalInitOrCleanUpFunction(
1840 FTy
, Twine(Buffer
, "_dtor"), FI
, Loc
, false,
1841 llvm::GlobalValue::WeakODRLinkage
);
1842 Fn
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
1843 if (CGM
.getTriple().isAMDGCN())
1844 Fn
->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL
);
1845 auto NL
= ApplyDebugLocation::CreateEmpty(DtorCGF
);
1846 DtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
,
1847 FunctionArgList(), Loc
, Loc
);
1848 // Create a scope with an artificial location for the body of this
1850 auto AL
= ApplyDebugLocation::CreateArtificial(DtorCGF
);
1851 llvm::Constant
*AddrInAS0
= Addr
;
1852 if (Addr
->getAddressSpace() != 0)
1853 AddrInAS0
= llvm::ConstantExpr::getAddrSpaceCast(
1854 Addr
, llvm::PointerType::get(CGM
.getLLVMContext(), 0));
1855 DtorCGF
.emitDestroy(Address(AddrInAS0
, Addr
->getValueType(),
1856 CGM
.getContext().getDeclAlign(VD
)),
1857 ASTTy
, DtorCGF
.getDestroyer(ASTTy
.isDestructedType()),
1858 DtorCGF
.needsEHCleanup(ASTTy
.isDestructedType()));
1859 DtorCGF
.FinishFunction();
1863 Dtor
= new llvm::GlobalVariable(
1864 CGM
.getModule(), CGM
.Int8Ty
, /*isConstant=*/true,
1865 llvm::GlobalValue::PrivateLinkage
,
1866 llvm::Constant::getNullValue(CGM
.Int8Ty
), Twine(Buffer
, "_dtor"));
1869 // Register the information for the entry associated with the destructor.
1871 auto DtorEntryInfo
= EntryInfo
;
1872 DtorEntryInfo
.ParentName
= Twine(Buffer
, "_dtor").toStringRef(Out
);
1873 OMPBuilder
.OffloadInfoManager
.registerTargetRegionEntryInfo(
1874 DtorEntryInfo
, Dtor
, ID
,
1875 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor
);
1877 return CGM
.getLangOpts().OpenMPIsTargetDevice
;
1880 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl
*FD
,
1881 llvm::GlobalValue
*GV
) {
1882 std::optional
<OMPDeclareTargetDeclAttr
*> ActiveAttr
=
1883 OMPDeclareTargetDeclAttr::getActiveAttr(FD
);
1885 // We only need to handle active 'indirect' declare target functions.
1886 if (!ActiveAttr
|| !(*ActiveAttr
)->getIndirect())
1889 // Get a mangled name to store the new device global in.
1890 llvm::TargetRegionEntryInfo EntryInfo
= getEntryInfoFromPresumedLoc(
1891 CGM
, OMPBuilder
, FD
->getCanonicalDecl()->getBeginLoc(), FD
->getName());
1892 SmallString
<128> Name
;
1893 OMPBuilder
.OffloadInfoManager
.getTargetRegionEntryFnName(Name
, EntryInfo
);
1895 // We need to generate a new global to hold the address of the indirectly
1896 // called device function. Doing this allows us to keep the visibility and
1897 // linkage of the associated function unchanged while allowing the runtime to
1898 // access its value.
1899 llvm::GlobalValue
*Addr
= GV
;
1900 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
1901 Addr
= new llvm::GlobalVariable(
1902 CGM
.getModule(), CGM
.VoidPtrTy
,
1903 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage
, GV
, Name
,
1904 nullptr, llvm::GlobalValue::NotThreadLocal
,
1905 CGM
.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1906 Addr
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
1909 OMPBuilder
.OffloadInfoManager
.registerDeviceGlobalVarEntryInfo(
1910 Name
, Addr
, CGM
.GetTargetTypeStoreSize(CGM
.VoidPtrTy
).getQuantity(),
1911 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect
,
1912 llvm::GlobalValue::WeakODRLinkage
);
1915 Address
CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction
&CGF
,
1918 std::string Suffix
= getName({"artificial", ""});
1919 llvm::Type
*VarLVType
= CGF
.ConvertTypeForMem(VarType
);
1920 llvm::GlobalVariable
*GAddr
= OMPBuilder
.getOrCreateInternalVariable(
1921 VarLVType
, Twine(Name
).concat(Suffix
).str());
1922 if (CGM
.getLangOpts().OpenMP
&& CGM
.getLangOpts().OpenMPUseTLS
&&
1923 CGM
.getTarget().isTLSSupported()) {
1924 GAddr
->setThreadLocal(/*Val=*/true);
1925 return Address(GAddr
, GAddr
->getValueType(),
1926 CGM
.getContext().getTypeAlignInChars(VarType
));
1928 std::string CacheSuffix
= getName({"cache", ""});
1929 llvm::Value
*Args
[] = {
1930 emitUpdateLocation(CGF
, SourceLocation()),
1931 getThreadID(CGF
, SourceLocation()),
1932 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(GAddr
, CGM
.VoidPtrTy
),
1933 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(VarType
), CGM
.SizeTy
,
1934 /*isSigned=*/false),
1935 OMPBuilder
.getOrCreateInternalVariable(
1937 Twine(Name
).concat(Suffix
).concat(CacheSuffix
).str())};
1939 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
1940 CGF
.EmitRuntimeCall(
1941 OMPBuilder
.getOrCreateRuntimeFunction(
1942 CGM
.getModule(), OMPRTL___kmpc_threadprivate_cached
),
1944 VarLVType
->getPointerTo(/*AddrSpace=*/0)),
1945 VarLVType
, CGM
.getContext().getTypeAlignInChars(VarType
));
1948 void CGOpenMPRuntime::emitIfClause(CodeGenFunction
&CGF
, const Expr
*Cond
,
1949 const RegionCodeGenTy
&ThenGen
,
1950 const RegionCodeGenTy
&ElseGen
) {
1951 CodeGenFunction::LexicalScope
ConditionScope(CGF
, Cond
->getSourceRange());
1953 // If the condition constant folds and can be elided, try to avoid emitting
1954 // the condition and the dead arm of the if/else.
1956 if (CGF
.ConstantFoldsToSimpleInteger(Cond
, CondConstant
)) {
1964 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1965 // emit the conditional branch.
1966 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("omp_if.then");
1967 llvm::BasicBlock
*ElseBlock
= CGF
.createBasicBlock("omp_if.else");
1968 llvm::BasicBlock
*ContBlock
= CGF
.createBasicBlock("omp_if.end");
1969 CGF
.EmitBranchOnBoolExpr(Cond
, ThenBlock
, ElseBlock
, /*TrueCount=*/0);
1971 // Emit the 'then' code.
1972 CGF
.EmitBlock(ThenBlock
);
1974 CGF
.EmitBranch(ContBlock
);
1975 // Emit the 'else' code if present.
1976 // There is no need to emit line number for unconditional branch.
1977 (void)ApplyDebugLocation::CreateEmpty(CGF
);
1978 CGF
.EmitBlock(ElseBlock
);
1980 // There is no need to emit line number for unconditional branch.
1981 (void)ApplyDebugLocation::CreateEmpty(CGF
);
1982 CGF
.EmitBranch(ContBlock
);
1983 // Emit the continuation block for code after the if.
1984 CGF
.EmitBlock(ContBlock
, /*IsFinished=*/true);
1987 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
1988 llvm::Function
*OutlinedFn
,
1989 ArrayRef
<llvm::Value
*> CapturedVars
,
1991 llvm::Value
*NumThreads
) {
1992 if (!CGF
.HaveInsertPoint())
1994 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
1995 auto &M
= CGM
.getModule();
1996 auto &&ThenGen
= [&M
, OutlinedFn
, CapturedVars
, RTLoc
,
1997 this](CodeGenFunction
&CGF
, PrePostActionTy
&) {
1998 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1999 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
2000 llvm::Value
*Args
[] = {
2002 CGF
.Builder
.getInt32(CapturedVars
.size()), // Number of captured vars
2003 CGF
.Builder
.CreateBitCast(OutlinedFn
, RT
.getKmpc_MicroPointerTy())};
2004 llvm::SmallVector
<llvm::Value
*, 16> RealArgs
;
2005 RealArgs
.append(std::begin(Args
), std::end(Args
));
2006 RealArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
2008 llvm::FunctionCallee RTLFn
=
2009 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_fork_call
);
2010 CGF
.EmitRuntimeCall(RTLFn
, RealArgs
);
2012 auto &&ElseGen
= [&M
, OutlinedFn
, CapturedVars
, RTLoc
, Loc
,
2013 this](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2014 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
2015 llvm::Value
*ThreadID
= RT
.getThreadID(CGF
, Loc
);
2017 // __kmpc_serialized_parallel(&Loc, GTid);
2018 llvm::Value
*Args
[] = {RTLoc
, ThreadID
};
2019 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2020 M
, OMPRTL___kmpc_serialized_parallel
),
2023 // OutlinedFn(>id, &zero_bound, CapturedStruct);
2024 Address ThreadIDAddr
= RT
.emitThreadIDAddress(CGF
, Loc
);
2025 Address ZeroAddrBound
=
2026 CGF
.CreateDefaultAlignTempAlloca(CGF
.Int32Ty
,
2027 /*Name=*/".bound.zero.addr");
2028 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(/*C*/ 0), ZeroAddrBound
);
2029 llvm::SmallVector
<llvm::Value
*, 16> OutlinedFnArgs
;
2030 // ThreadId for serialized parallels is 0.
2031 OutlinedFnArgs
.push_back(ThreadIDAddr
.getPointer());
2032 OutlinedFnArgs
.push_back(ZeroAddrBound
.getPointer());
2033 OutlinedFnArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
2035 // Ensure we do not inline the function. This is trivially true for the ones
2036 // passed to __kmpc_fork_call but the ones called in serialized regions
2037 // could be inlined. This is not a perfect but it is closer to the invariant
2038 // we want, namely, every data environment starts with a new function.
2039 // TODO: We should pass the if condition to the runtime function and do the
2040 // handling there. Much cleaner code.
2041 OutlinedFn
->removeFnAttr(llvm::Attribute::AlwaysInline
);
2042 OutlinedFn
->addFnAttr(llvm::Attribute::NoInline
);
2043 RT
.emitOutlinedFunctionCall(CGF
, Loc
, OutlinedFn
, OutlinedFnArgs
);
2045 // __kmpc_end_serialized_parallel(&Loc, GTid);
2046 llvm::Value
*EndArgs
[] = {RT
.emitUpdateLocation(CGF
, Loc
), ThreadID
};
2047 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2048 M
, OMPRTL___kmpc_end_serialized_parallel
),
2052 emitIfClause(CGF
, IfCond
, ThenGen
, ElseGen
);
2054 RegionCodeGenTy
ThenRCG(ThenGen
);
2059 // If we're inside an (outlined) parallel region, use the region info's
2060 // thread-ID variable (it is passed in a first argument of the outlined function
2061 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2062 // regular serial code region, get thread ID by calling kmp_int32
2063 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2064 // return the address of that temp.
2065 Address
CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction
&CGF
,
2066 SourceLocation Loc
) {
2067 if (auto *OMPRegionInfo
=
2068 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
2069 if (OMPRegionInfo
->getThreadIDVariable())
2070 return OMPRegionInfo
->getThreadIDVariableLValue(CGF
).getAddress(CGF
);
2072 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
2074 CGF
.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2075 Address ThreadIDTemp
= CGF
.CreateMemTemp(Int32Ty
, /*Name*/ ".threadid_temp.");
2076 CGF
.EmitStoreOfScalar(ThreadID
,
2077 CGF
.MakeAddrLValue(ThreadIDTemp
, Int32Ty
));
2079 return ThreadIDTemp
;
2082 llvm::Value
*CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName
) {
2083 std::string Prefix
= Twine("gomp_critical_user_", CriticalName
).str();
2084 std::string Name
= getName({Prefix
, "var"});
2085 return OMPBuilder
.getOrCreateInternalVariable(KmpCriticalNameTy
, Name
);
2089 /// Common pre(post)-action for different OpenMP constructs.
2090 class CommonActionTy final
: public PrePostActionTy
{
2091 llvm::FunctionCallee EnterCallee
;
2092 ArrayRef
<llvm::Value
*> EnterArgs
;
2093 llvm::FunctionCallee ExitCallee
;
2094 ArrayRef
<llvm::Value
*> ExitArgs
;
2096 llvm::BasicBlock
*ContBlock
= nullptr;
2099 CommonActionTy(llvm::FunctionCallee EnterCallee
,
2100 ArrayRef
<llvm::Value
*> EnterArgs
,
2101 llvm::FunctionCallee ExitCallee
,
2102 ArrayRef
<llvm::Value
*> ExitArgs
, bool Conditional
= false)
2103 : EnterCallee(EnterCallee
), EnterArgs(EnterArgs
), ExitCallee(ExitCallee
),
2104 ExitArgs(ExitArgs
), Conditional(Conditional
) {}
2105 void Enter(CodeGenFunction
&CGF
) override
{
2106 llvm::Value
*EnterRes
= CGF
.EmitRuntimeCall(EnterCallee
, EnterArgs
);
2108 llvm::Value
*CallBool
= CGF
.Builder
.CreateIsNotNull(EnterRes
);
2109 auto *ThenBlock
= CGF
.createBasicBlock("omp_if.then");
2110 ContBlock
= CGF
.createBasicBlock("omp_if.end");
2111 // Generate the branch (If-stmt)
2112 CGF
.Builder
.CreateCondBr(CallBool
, ThenBlock
, ContBlock
);
2113 CGF
.EmitBlock(ThenBlock
);
2116 void Done(CodeGenFunction
&CGF
) {
2117 // Emit the rest of blocks/branches
2118 CGF
.EmitBranch(ContBlock
);
2119 CGF
.EmitBlock(ContBlock
, true);
2121 void Exit(CodeGenFunction
&CGF
) override
{
2122 CGF
.EmitRuntimeCall(ExitCallee
, ExitArgs
);
2125 } // anonymous namespace
2127 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction
&CGF
,
2128 StringRef CriticalName
,
2129 const RegionCodeGenTy
&CriticalOpGen
,
2130 SourceLocation Loc
, const Expr
*Hint
) {
2131 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2133 // __kmpc_end_critical(ident_t *, gtid, Lock);
2134 // Prepare arguments and build a call to __kmpc_critical
2135 if (!CGF
.HaveInsertPoint())
2137 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2138 getCriticalRegionLock(CriticalName
)};
2139 llvm::SmallVector
<llvm::Value
*, 4> EnterArgs(std::begin(Args
),
2142 EnterArgs
.push_back(CGF
.Builder
.CreateIntCast(
2143 CGF
.EmitScalarExpr(Hint
), CGM
.Int32Ty
, /*isSigned=*/false));
2145 CommonActionTy
Action(
2146 OMPBuilder
.getOrCreateRuntimeFunction(
2148 Hint
? OMPRTL___kmpc_critical_with_hint
: OMPRTL___kmpc_critical
),
2150 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
2151 OMPRTL___kmpc_end_critical
),
2153 CriticalOpGen
.setAction(Action
);
2154 emitInlinedDirective(CGF
, OMPD_critical
, CriticalOpGen
);
2157 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction
&CGF
,
2158 const RegionCodeGenTy
&MasterOpGen
,
2159 SourceLocation Loc
) {
2160 if (!CGF
.HaveInsertPoint())
2162 // if(__kmpc_master(ident_t *, gtid)) {
2164 // __kmpc_end_master(ident_t *, gtid);
2166 // Prepare arguments and build a call to __kmpc_master
2167 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2168 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2169 CGM
.getModule(), OMPRTL___kmpc_master
),
2171 OMPBuilder
.getOrCreateRuntimeFunction(
2172 CGM
.getModule(), OMPRTL___kmpc_end_master
),
2174 /*Conditional=*/true);
2175 MasterOpGen
.setAction(Action
);
2176 emitInlinedDirective(CGF
, OMPD_master
, MasterOpGen
);
2180 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction
&CGF
,
2181 const RegionCodeGenTy
&MaskedOpGen
,
2182 SourceLocation Loc
, const Expr
*Filter
) {
2183 if (!CGF
.HaveInsertPoint())
2185 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2187 // __kmpc_end_masked(iden_t *, gtid);
2189 // Prepare arguments and build a call to __kmpc_masked
2190 llvm::Value
*FilterVal
= Filter
2191 ? CGF
.EmitScalarExpr(Filter
, CGF
.Int32Ty
)
2192 : llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/0);
2193 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2195 llvm::Value
*ArgsEnd
[] = {emitUpdateLocation(CGF
, Loc
),
2196 getThreadID(CGF
, Loc
)};
2197 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2198 CGM
.getModule(), OMPRTL___kmpc_masked
),
2200 OMPBuilder
.getOrCreateRuntimeFunction(
2201 CGM
.getModule(), OMPRTL___kmpc_end_masked
),
2203 /*Conditional=*/true);
2204 MaskedOpGen
.setAction(Action
);
2205 emitInlinedDirective(CGF
, OMPD_masked
, MaskedOpGen
);
2209 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction
&CGF
,
2210 SourceLocation Loc
) {
2211 if (!CGF
.HaveInsertPoint())
2213 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2214 OMPBuilder
.createTaskyield(CGF
.Builder
);
2216 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2217 llvm::Value
*Args
[] = {
2218 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2219 llvm::ConstantInt::get(CGM
.IntTy
, /*V=*/0, /*isSigned=*/true)};
2220 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2221 CGM
.getModule(), OMPRTL___kmpc_omp_taskyield
),
2225 if (auto *Region
= dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
2226 Region
->emitUntiedSwitch(CGF
);
2229 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction
&CGF
,
2230 const RegionCodeGenTy
&TaskgroupOpGen
,
2231 SourceLocation Loc
) {
2232 if (!CGF
.HaveInsertPoint())
2234 // __kmpc_taskgroup(ident_t *, gtid);
2235 // TaskgroupOpGen();
2236 // __kmpc_end_taskgroup(ident_t *, gtid);
2237 // Prepare arguments and build a call to __kmpc_taskgroup
2238 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2239 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2240 CGM
.getModule(), OMPRTL___kmpc_taskgroup
),
2242 OMPBuilder
.getOrCreateRuntimeFunction(
2243 CGM
.getModule(), OMPRTL___kmpc_end_taskgroup
),
2245 TaskgroupOpGen
.setAction(Action
);
2246 emitInlinedDirective(CGF
, OMPD_taskgroup
, TaskgroupOpGen
);
2249 /// Given an array of pointers to variables, project the address of a
2251 static Address
emitAddrOfVarFromArray(CodeGenFunction
&CGF
, Address Array
,
2252 unsigned Index
, const VarDecl
*Var
) {
2253 // Pull out the pointer to the variable.
2254 Address PtrAddr
= CGF
.Builder
.CreateConstArrayGEP(Array
, Index
);
2255 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(PtrAddr
);
2257 llvm::Type
*ElemTy
= CGF
.ConvertTypeForMem(Var
->getType());
2259 CGF
.Builder
.CreateBitCast(
2260 Ptr
, ElemTy
->getPointerTo(Ptr
->getType()->getPointerAddressSpace())),
2261 ElemTy
, CGF
.getContext().getDeclAlign(Var
));
2264 static llvm::Value
*emitCopyprivateCopyFunction(
2265 CodeGenModule
&CGM
, llvm::Type
*ArgsElemType
,
2266 ArrayRef
<const Expr
*> CopyprivateVars
, ArrayRef
<const Expr
*> DestExprs
,
2267 ArrayRef
<const Expr
*> SrcExprs
, ArrayRef
<const Expr
*> AssignmentOps
,
2268 SourceLocation Loc
) {
2269 ASTContext
&C
= CGM
.getContext();
2270 // void copy_func(void *LHSArg, void *RHSArg);
2271 FunctionArgList Args
;
2272 ImplicitParamDecl
LHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
2273 ImplicitParamDecl::Other
);
2274 ImplicitParamDecl
RHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
2275 ImplicitParamDecl::Other
);
2276 Args
.push_back(&LHSArg
);
2277 Args
.push_back(&RHSArg
);
2279 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
2281 CGM
.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2282 auto *Fn
= llvm::Function::Create(CGM
.getTypes().GetFunctionType(CGFI
),
2283 llvm::GlobalValue::InternalLinkage
, Name
,
2285 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, CGFI
);
2286 Fn
->setDoesNotRecurse();
2287 CodeGenFunction
CGF(CGM
);
2288 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, CGFI
, Args
, Loc
, Loc
);
2289 // Dest = (void*[n])(LHSArg);
2290 // Src = (void*[n])(RHSArg);
2291 Address
LHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2292 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&LHSArg
)),
2293 ArgsElemType
->getPointerTo()),
2294 ArgsElemType
, CGF
.getPointerAlign());
2295 Address
RHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2296 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&RHSArg
)),
2297 ArgsElemType
->getPointerTo()),
2298 ArgsElemType
, CGF
.getPointerAlign());
2299 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2300 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2302 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2303 for (unsigned I
= 0, E
= AssignmentOps
.size(); I
< E
; ++I
) {
2304 const auto *DestVar
=
2305 cast
<VarDecl
>(cast
<DeclRefExpr
>(DestExprs
[I
])->getDecl());
2306 Address DestAddr
= emitAddrOfVarFromArray(CGF
, LHS
, I
, DestVar
);
2308 const auto *SrcVar
=
2309 cast
<VarDecl
>(cast
<DeclRefExpr
>(SrcExprs
[I
])->getDecl());
2310 Address SrcAddr
= emitAddrOfVarFromArray(CGF
, RHS
, I
, SrcVar
);
2312 const auto *VD
= cast
<DeclRefExpr
>(CopyprivateVars
[I
])->getDecl();
2313 QualType Type
= VD
->getType();
2314 CGF
.EmitOMPCopy(Type
, DestAddr
, SrcAddr
, DestVar
, SrcVar
, AssignmentOps
[I
]);
2316 CGF
.FinishFunction();
2320 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction
&CGF
,
2321 const RegionCodeGenTy
&SingleOpGen
,
2323 ArrayRef
<const Expr
*> CopyprivateVars
,
2324 ArrayRef
<const Expr
*> SrcExprs
,
2325 ArrayRef
<const Expr
*> DstExprs
,
2326 ArrayRef
<const Expr
*> AssignmentOps
) {
2327 if (!CGF
.HaveInsertPoint())
2329 assert(CopyprivateVars
.size() == SrcExprs
.size() &&
2330 CopyprivateVars
.size() == DstExprs
.size() &&
2331 CopyprivateVars
.size() == AssignmentOps
.size());
2332 ASTContext
&C
= CGM
.getContext();
2333 // int32 did_it = 0;
2334 // if(__kmpc_single(ident_t *, gtid)) {
2336 // __kmpc_end_single(ident_t *, gtid);
2339 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2340 // <copy_func>, did_it);
2342 Address DidIt
= Address::invalid();
2343 if (!CopyprivateVars
.empty()) {
2344 // int32 did_it = 0;
2345 QualType KmpInt32Ty
=
2346 C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2347 DidIt
= CGF
.CreateMemTemp(KmpInt32Ty
, ".omp.copyprivate.did_it");
2348 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(0), DidIt
);
2350 // Prepare arguments and build a call to __kmpc_single
2351 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2352 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2353 CGM
.getModule(), OMPRTL___kmpc_single
),
2355 OMPBuilder
.getOrCreateRuntimeFunction(
2356 CGM
.getModule(), OMPRTL___kmpc_end_single
),
2358 /*Conditional=*/true);
2359 SingleOpGen
.setAction(Action
);
2360 emitInlinedDirective(CGF
, OMPD_single
, SingleOpGen
);
2361 if (DidIt
.isValid()) {
2363 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(1), DidIt
);
2366 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2367 // <copy_func>, did_it);
2368 if (DidIt
.isValid()) {
2369 llvm::APInt
ArraySize(/*unsigned int numBits=*/32, CopyprivateVars
.size());
2370 QualType CopyprivateArrayTy
= C
.getConstantArrayType(
2371 C
.VoidPtrTy
, ArraySize
, nullptr, ArraySizeModifier::Normal
,
2372 /*IndexTypeQuals=*/0);
2373 // Create a list of all private variables for copyprivate.
2374 Address CopyprivateList
=
2375 CGF
.CreateMemTemp(CopyprivateArrayTy
, ".omp.copyprivate.cpr_list");
2376 for (unsigned I
= 0, E
= CopyprivateVars
.size(); I
< E
; ++I
) {
2377 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(CopyprivateList
, I
);
2378 CGF
.Builder
.CreateStore(
2379 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2380 CGF
.EmitLValue(CopyprivateVars
[I
]).getPointer(CGF
),
2384 // Build function that copies private values from single region to all other
2385 // threads in the corresponding parallel region.
2386 llvm::Value
*CpyFn
= emitCopyprivateCopyFunction(
2387 CGM
, CGF
.ConvertTypeForMem(CopyprivateArrayTy
), CopyprivateVars
,
2388 SrcExprs
, DstExprs
, AssignmentOps
, Loc
);
2389 llvm::Value
*BufSize
= CGF
.getTypeSize(CopyprivateArrayTy
);
2390 Address CL
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2391 CopyprivateList
, CGF
.VoidPtrTy
, CGF
.Int8Ty
);
2392 llvm::Value
*DidItVal
= CGF
.Builder
.CreateLoad(DidIt
);
2393 llvm::Value
*Args
[] = {
2394 emitUpdateLocation(CGF
, Loc
), // ident_t *<loc>
2395 getThreadID(CGF
, Loc
), // i32 <gtid>
2396 BufSize
, // size_t <buf_size>
2397 CL
.getPointer(), // void *<copyprivate list>
2398 CpyFn
, // void (*) (void *, void *) <copy_func>
2399 DidItVal
// i32 did_it
2401 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2402 CGM
.getModule(), OMPRTL___kmpc_copyprivate
),
2407 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction
&CGF
,
2408 const RegionCodeGenTy
&OrderedOpGen
,
2409 SourceLocation Loc
, bool IsThreads
) {
2410 if (!CGF
.HaveInsertPoint())
2412 // __kmpc_ordered(ident_t *, gtid);
2414 // __kmpc_end_ordered(ident_t *, gtid);
2415 // Prepare arguments and build a call to __kmpc_ordered
2417 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2418 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2419 CGM
.getModule(), OMPRTL___kmpc_ordered
),
2421 OMPBuilder
.getOrCreateRuntimeFunction(
2422 CGM
.getModule(), OMPRTL___kmpc_end_ordered
),
2424 OrderedOpGen
.setAction(Action
);
2425 emitInlinedDirective(CGF
, OMPD_ordered
, OrderedOpGen
);
2428 emitInlinedDirective(CGF
, OMPD_ordered
, OrderedOpGen
);
2431 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind
) {
2433 if (Kind
== OMPD_for
)
2434 Flags
= OMP_IDENT_BARRIER_IMPL_FOR
;
2435 else if (Kind
== OMPD_sections
)
2436 Flags
= OMP_IDENT_BARRIER_IMPL_SECTIONS
;
2437 else if (Kind
== OMPD_single
)
2438 Flags
= OMP_IDENT_BARRIER_IMPL_SINGLE
;
2439 else if (Kind
== OMPD_barrier
)
2440 Flags
= OMP_IDENT_BARRIER_EXPL
;
2442 Flags
= OMP_IDENT_BARRIER_IMPL
;
2446 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2447 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2448 OpenMPScheduleClauseKind
&ScheduleKind
, const Expr
*&ChunkExpr
) const {
2449 // Check if the loop directive is actually a doacross loop directive. In this
2450 // case choose static, 1 schedule.
2452 S
.getClausesOfKind
<OMPOrderedClause
>(),
2453 [](const OMPOrderedClause
*C
) { return C
->getNumForLoops(); })) {
2454 ScheduleKind
= OMPC_SCHEDULE_static
;
2455 // Chunk size is 1 in this case.
2456 llvm::APInt
ChunkSize(32, 1);
2457 ChunkExpr
= IntegerLiteral::Create(
2458 CGF
.getContext(), ChunkSize
,
2459 CGF
.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2464 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2465 OpenMPDirectiveKind Kind
, bool EmitChecks
,
2466 bool ForceSimpleCall
) {
2467 // Check if we should use the OMPBuilder
2468 auto *OMPRegionInfo
=
2469 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
);
2470 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2471 CGF
.Builder
.restoreIP(OMPBuilder
.createBarrier(
2472 CGF
.Builder
, Kind
, ForceSimpleCall
, EmitChecks
));
2476 if (!CGF
.HaveInsertPoint())
2478 // Build call __kmpc_cancel_barrier(loc, thread_id);
2479 // Build call __kmpc_barrier(loc, thread_id);
2480 unsigned Flags
= getDefaultFlagsForBarriers(Kind
);
2481 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2483 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
, Flags
),
2484 getThreadID(CGF
, Loc
)};
2485 if (OMPRegionInfo
) {
2486 if (!ForceSimpleCall
&& OMPRegionInfo
->hasCancel()) {
2487 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
2488 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
2489 OMPRTL___kmpc_cancel_barrier
),
2492 // if (__kmpc_cancel_barrier()) {
2493 // exit from construct;
2495 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
2496 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
2497 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
2498 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
2499 CGF
.EmitBlock(ExitBB
);
2500 // exit from construct;
2501 CodeGenFunction::JumpDest CancelDestination
=
2502 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
2503 CGF
.EmitBranchThroughCleanup(CancelDestination
);
2504 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
2509 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2510 CGM
.getModule(), OMPRTL___kmpc_barrier
),
2514 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2515 Expr
*ME
, bool IsFatal
) {
2517 ME
? CGF
.EmitStringLiteralLValue(cast
<StringLiteral
>(ME
)).getPointer(CGF
)
2518 : llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
2519 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2521 llvm::Value
*Args
[] = {
2522 emitUpdateLocation(CGF
, Loc
, /*Flags=*/0, /*GenLoc=*/true),
2523 llvm::ConstantInt::get(CGM
.Int32Ty
, IsFatal
? 2 : 1),
2524 CGF
.Builder
.CreatePointerCast(MVL
, CGM
.Int8PtrTy
)};
2525 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2526 CGM
.getModule(), OMPRTL___kmpc_error
),
2530 /// Map the OpenMP loop schedule to the runtime enumeration.
2531 static OpenMPSchedType
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind
,
2532 bool Chunked
, bool Ordered
) {
2533 switch (ScheduleKind
) {
2534 case OMPC_SCHEDULE_static
:
2535 return Chunked
? (Ordered
? OMP_ord_static_chunked
: OMP_sch_static_chunked
)
2536 : (Ordered
? OMP_ord_static
: OMP_sch_static
);
2537 case OMPC_SCHEDULE_dynamic
:
2538 return Ordered
? OMP_ord_dynamic_chunked
: OMP_sch_dynamic_chunked
;
2539 case OMPC_SCHEDULE_guided
:
2540 return Ordered
? OMP_ord_guided_chunked
: OMP_sch_guided_chunked
;
2541 case OMPC_SCHEDULE_runtime
:
2542 return Ordered
? OMP_ord_runtime
: OMP_sch_runtime
;
2543 case OMPC_SCHEDULE_auto
:
2544 return Ordered
? OMP_ord_auto
: OMP_sch_auto
;
2545 case OMPC_SCHEDULE_unknown
:
2546 assert(!Chunked
&& "chunk was specified but schedule kind not known");
2547 return Ordered
? OMP_ord_static
: OMP_sch_static
;
2549 llvm_unreachable("Unexpected runtime schedule");
2552 /// Map the OpenMP distribute schedule to the runtime enumeration.
2553 static OpenMPSchedType
2554 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) {
2555 // only static is allowed for dist_schedule
2556 return Chunked
? OMP_dist_sch_static_chunked
: OMP_dist_sch_static
;
2559 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind
,
2560 bool Chunked
) const {
2561 OpenMPSchedType Schedule
=
2562 getRuntimeSchedule(ScheduleKind
, Chunked
, /*Ordered=*/false);
2563 return Schedule
== OMP_sch_static
;
2566 bool CGOpenMPRuntime::isStaticNonchunked(
2567 OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) const {
2568 OpenMPSchedType Schedule
= getRuntimeSchedule(ScheduleKind
, Chunked
);
2569 return Schedule
== OMP_dist_sch_static
;
2572 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind
,
2573 bool Chunked
) const {
2574 OpenMPSchedType Schedule
=
2575 getRuntimeSchedule(ScheduleKind
, Chunked
, /*Ordered=*/false);
2576 return Schedule
== OMP_sch_static_chunked
;
2579 bool CGOpenMPRuntime::isStaticChunked(
2580 OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) const {
2581 OpenMPSchedType Schedule
= getRuntimeSchedule(ScheduleKind
, Chunked
);
2582 return Schedule
== OMP_dist_sch_static_chunked
;
2585 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind
) const {
2586 OpenMPSchedType Schedule
=
2587 getRuntimeSchedule(ScheduleKind
, /*Chunked=*/false, /*Ordered=*/false);
2588 assert(Schedule
!= OMP_sch_static_chunked
&& "cannot be chunked here");
2589 return Schedule
!= OMP_sch_static
;
2592 static int addMonoNonMonoModifier(CodeGenModule
&CGM
, OpenMPSchedType Schedule
,
2593 OpenMPScheduleClauseModifier M1
,
2594 OpenMPScheduleClauseModifier M2
) {
2597 case OMPC_SCHEDULE_MODIFIER_monotonic
:
2598 Modifier
= OMP_sch_modifier_monotonic
;
2600 case OMPC_SCHEDULE_MODIFIER_nonmonotonic
:
2601 Modifier
= OMP_sch_modifier_nonmonotonic
;
2603 case OMPC_SCHEDULE_MODIFIER_simd
:
2604 if (Schedule
== OMP_sch_static_chunked
)
2605 Schedule
= OMP_sch_static_balanced_chunked
;
2607 case OMPC_SCHEDULE_MODIFIER_last
:
2608 case OMPC_SCHEDULE_MODIFIER_unknown
:
2612 case OMPC_SCHEDULE_MODIFIER_monotonic
:
2613 Modifier
= OMP_sch_modifier_monotonic
;
2615 case OMPC_SCHEDULE_MODIFIER_nonmonotonic
:
2616 Modifier
= OMP_sch_modifier_nonmonotonic
;
2618 case OMPC_SCHEDULE_MODIFIER_simd
:
2619 if (Schedule
== OMP_sch_static_chunked
)
2620 Schedule
= OMP_sch_static_balanced_chunked
;
2622 case OMPC_SCHEDULE_MODIFIER_last
:
2623 case OMPC_SCHEDULE_MODIFIER_unknown
:
2626 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2627 // If the static schedule kind is specified or if the ordered clause is
2628 // specified, and if the nonmonotonic modifier is not specified, the effect is
2629 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2630 // modifier is specified, the effect is as if the nonmonotonic modifier is
2632 if (CGM
.getLangOpts().OpenMP
>= 50 && Modifier
== 0) {
2633 if (!(Schedule
== OMP_sch_static_chunked
|| Schedule
== OMP_sch_static
||
2634 Schedule
== OMP_sch_static_balanced_chunked
||
2635 Schedule
== OMP_ord_static_chunked
|| Schedule
== OMP_ord_static
||
2636 Schedule
== OMP_dist_sch_static_chunked
||
2637 Schedule
== OMP_dist_sch_static
))
2638 Modifier
= OMP_sch_modifier_nonmonotonic
;
2640 return Schedule
| Modifier
;
2643 void CGOpenMPRuntime::emitForDispatchInit(
2644 CodeGenFunction
&CGF
, SourceLocation Loc
,
2645 const OpenMPScheduleTy
&ScheduleKind
, unsigned IVSize
, bool IVSigned
,
2646 bool Ordered
, const DispatchRTInput
&DispatchValues
) {
2647 if (!CGF
.HaveInsertPoint())
2649 OpenMPSchedType Schedule
= getRuntimeSchedule(
2650 ScheduleKind
.Schedule
, DispatchValues
.Chunk
!= nullptr, Ordered
);
2652 (Schedule
!= OMP_sch_static
&& Schedule
!= OMP_sch_static_chunked
&&
2653 Schedule
!= OMP_ord_static
&& Schedule
!= OMP_ord_static_chunked
&&
2654 Schedule
!= OMP_sch_static_balanced_chunked
));
2655 // Call __kmpc_dispatch_init(
2656 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2657 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2658 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2660 // If the Chunk was not specified in the clause - use default value 1.
2661 llvm::Value
*Chunk
= DispatchValues
.Chunk
? DispatchValues
.Chunk
2662 : CGF
.Builder
.getIntN(IVSize
, 1);
2663 llvm::Value
*Args
[] = {
2664 emitUpdateLocation(CGF
, Loc
),
2665 getThreadID(CGF
, Loc
),
2666 CGF
.Builder
.getInt32(addMonoNonMonoModifier(
2667 CGM
, Schedule
, ScheduleKind
.M1
, ScheduleKind
.M2
)), // Schedule type
2668 DispatchValues
.LB
, // Lower
2669 DispatchValues
.UB
, // Upper
2670 CGF
.Builder
.getIntN(IVSize
, 1), // Stride
2673 CGF
.EmitRuntimeCall(OMPBuilder
.createDispatchInitFunction(IVSize
, IVSigned
),
2677 static void emitForStaticInitCall(
2678 CodeGenFunction
&CGF
, llvm::Value
*UpdateLocation
, llvm::Value
*ThreadId
,
2679 llvm::FunctionCallee ForStaticInitFunction
, OpenMPSchedType Schedule
,
2680 OpenMPScheduleClauseModifier M1
, OpenMPScheduleClauseModifier M2
,
2681 const CGOpenMPRuntime::StaticRTInput
&Values
) {
2682 if (!CGF
.HaveInsertPoint())
2685 assert(!Values
.Ordered
);
2686 assert(Schedule
== OMP_sch_static
|| Schedule
== OMP_sch_static_chunked
||
2687 Schedule
== OMP_sch_static_balanced_chunked
||
2688 Schedule
== OMP_ord_static
|| Schedule
== OMP_ord_static_chunked
||
2689 Schedule
== OMP_dist_sch_static
||
2690 Schedule
== OMP_dist_sch_static_chunked
);
2692 // Call __kmpc_for_static_init(
2693 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2694 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2695 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2696 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2697 llvm::Value
*Chunk
= Values
.Chunk
;
2698 if (Chunk
== nullptr) {
2699 assert((Schedule
== OMP_sch_static
|| Schedule
== OMP_ord_static
||
2700 Schedule
== OMP_dist_sch_static
) &&
2701 "expected static non-chunked schedule");
2702 // If the Chunk was not specified in the clause - use default value 1.
2703 Chunk
= CGF
.Builder
.getIntN(Values
.IVSize
, 1);
2705 assert((Schedule
== OMP_sch_static_chunked
||
2706 Schedule
== OMP_sch_static_balanced_chunked
||
2707 Schedule
== OMP_ord_static_chunked
||
2708 Schedule
== OMP_dist_sch_static_chunked
) &&
2709 "expected static chunked schedule");
2711 llvm::Value
*Args
[] = {
2714 CGF
.Builder
.getInt32(addMonoNonMonoModifier(CGF
.CGM
, Schedule
, M1
,
2715 M2
)), // Schedule type
2716 Values
.IL
.getPointer(), // &isLastIter
2717 Values
.LB
.getPointer(), // &LB
2718 Values
.UB
.getPointer(), // &UB
2719 Values
.ST
.getPointer(), // &Stride
2720 CGF
.Builder
.getIntN(Values
.IVSize
, 1), // Incr
2723 CGF
.EmitRuntimeCall(ForStaticInitFunction
, Args
);
2726 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction
&CGF
,
2728 OpenMPDirectiveKind DKind
,
2729 const OpenMPScheduleTy
&ScheduleKind
,
2730 const StaticRTInput
&Values
) {
2731 OpenMPSchedType ScheduleNum
= getRuntimeSchedule(
2732 ScheduleKind
.Schedule
, Values
.Chunk
!= nullptr, Values
.Ordered
);
2733 assert((isOpenMPWorksharingDirective(DKind
) || (DKind
== OMPD_loop
)) &&
2734 "Expected loop-based or sections-based directive.");
2735 llvm::Value
*UpdatedLocation
= emitUpdateLocation(CGF
, Loc
,
2736 isOpenMPLoopDirective(DKind
)
2737 ? OMP_IDENT_WORK_LOOP
2738 : OMP_IDENT_WORK_SECTIONS
);
2739 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
2740 llvm::FunctionCallee StaticInitFunction
=
2741 OMPBuilder
.createForStaticInitFunction(Values
.IVSize
, Values
.IVSigned
,
2743 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
2744 emitForStaticInitCall(CGF
, UpdatedLocation
, ThreadId
, StaticInitFunction
,
2745 ScheduleNum
, ScheduleKind
.M1
, ScheduleKind
.M2
, Values
);
2748 void CGOpenMPRuntime::emitDistributeStaticInit(
2749 CodeGenFunction
&CGF
, SourceLocation Loc
,
2750 OpenMPDistScheduleClauseKind SchedKind
,
2751 const CGOpenMPRuntime::StaticRTInput
&Values
) {
2752 OpenMPSchedType ScheduleNum
=
2753 getRuntimeSchedule(SchedKind
, Values
.Chunk
!= nullptr);
2754 llvm::Value
*UpdatedLocation
=
2755 emitUpdateLocation(CGF
, Loc
, OMP_IDENT_WORK_DISTRIBUTE
);
2756 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
2757 llvm::FunctionCallee StaticInitFunction
;
2758 bool isGPUDistribute
=
2759 CGM
.getLangOpts().OpenMPIsTargetDevice
&&
2760 (CGM
.getTriple().isAMDGCN() || CGM
.getTriple().isNVPTX());
2761 StaticInitFunction
= OMPBuilder
.createForStaticInitFunction(
2762 Values
.IVSize
, Values
.IVSigned
, isGPUDistribute
);
2764 emitForStaticInitCall(CGF
, UpdatedLocation
, ThreadId
, StaticInitFunction
,
2765 ScheduleNum
, OMPC_SCHEDULE_MODIFIER_unknown
,
2766 OMPC_SCHEDULE_MODIFIER_unknown
, Values
);
2769 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction
&CGF
,
2771 OpenMPDirectiveKind DKind
) {
2772 if (!CGF
.HaveInsertPoint())
2774 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2775 llvm::Value
*Args
[] = {
2776 emitUpdateLocation(CGF
, Loc
,
2777 isOpenMPDistributeDirective(DKind
)
2778 ? OMP_IDENT_WORK_DISTRIBUTE
2779 : isOpenMPLoopDirective(DKind
)
2780 ? OMP_IDENT_WORK_LOOP
2781 : OMP_IDENT_WORK_SECTIONS
),
2782 getThreadID(CGF
, Loc
)};
2783 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
2784 if (isOpenMPDistributeDirective(DKind
) &&
2785 CGM
.getLangOpts().OpenMPIsTargetDevice
&&
2786 (CGM
.getTriple().isAMDGCN() || CGM
.getTriple().isNVPTX()))
2787 CGF
.EmitRuntimeCall(
2788 OMPBuilder
.getOrCreateRuntimeFunction(
2789 CGM
.getModule(), OMPRTL___kmpc_distribute_static_fini
),
2792 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2793 CGM
.getModule(), OMPRTL___kmpc_for_static_fini
),
2797 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction
&CGF
,
2801 if (!CGF
.HaveInsertPoint())
2803 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2804 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2805 CGF
.EmitRuntimeCall(OMPBuilder
.createDispatchFiniFunction(IVSize
, IVSigned
),
2809 llvm::Value
*CGOpenMPRuntime::emitForNext(CodeGenFunction
&CGF
,
2810 SourceLocation Loc
, unsigned IVSize
,
2811 bool IVSigned
, Address IL
,
2812 Address LB
, Address UB
,
2814 // Call __kmpc_dispatch_next(
2815 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2816 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2817 // kmp_int[32|64] *p_stride);
2818 llvm::Value
*Args
[] = {
2819 emitUpdateLocation(CGF
, Loc
),
2820 getThreadID(CGF
, Loc
),
2821 IL
.getPointer(), // &isLastIter
2822 LB
.getPointer(), // &Lower
2823 UB
.getPointer(), // &Upper
2824 ST
.getPointer() // &Stride
2826 llvm::Value
*Call
= CGF
.EmitRuntimeCall(
2827 OMPBuilder
.createDispatchNextFunction(IVSize
, IVSigned
), Args
);
2828 return CGF
.EmitScalarConversion(
2829 Call
, CGF
.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2830 CGF
.getContext().BoolTy
, Loc
);
2833 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction
&CGF
,
2834 llvm::Value
*NumThreads
,
2835 SourceLocation Loc
) {
2836 if (!CGF
.HaveInsertPoint())
2838 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2839 llvm::Value
*Args
[] = {
2840 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2841 CGF
.Builder
.CreateIntCast(NumThreads
, CGF
.Int32Ty
, /*isSigned*/ true)};
2842 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2843 CGM
.getModule(), OMPRTL___kmpc_push_num_threads
),
2847 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction
&CGF
,
2848 ProcBindKind ProcBind
,
2849 SourceLocation Loc
) {
2850 if (!CGF
.HaveInsertPoint())
2852 assert(ProcBind
!= OMP_PROC_BIND_unknown
&& "Unsupported proc_bind value.");
2853 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2854 llvm::Value
*Args
[] = {
2855 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2856 llvm::ConstantInt::get(CGM
.IntTy
, unsigned(ProcBind
), /*isSigned=*/true)};
2857 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2858 CGM
.getModule(), OMPRTL___kmpc_push_proc_bind
),
2862 void CGOpenMPRuntime::emitFlush(CodeGenFunction
&CGF
, ArrayRef
<const Expr
*>,
2863 SourceLocation Loc
, llvm::AtomicOrdering AO
) {
2864 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2865 OMPBuilder
.createFlush(CGF
.Builder
);
2867 if (!CGF
.HaveInsertPoint())
2869 // Build call void __kmpc_flush(ident_t *loc)
2870 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2871 CGM
.getModule(), OMPRTL___kmpc_flush
),
2872 emitUpdateLocation(CGF
, Loc
));
2877 /// Indexes of fields for type kmp_task_t.
2878 enum KmpTaskTFields
{
2879 /// List of shared variables.
2883 /// Partition id for the untied tasks.
2885 /// Function with call of destructors for private variables.
2889 /// (Taskloops only) Lower bound.
2891 /// (Taskloops only) Upper bound.
2893 /// (Taskloops only) Stride.
2895 /// (Taskloops only) Is last iteration flag.
2897 /// (Taskloops only) Reduction data.
2900 } // anonymous namespace
2902 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2903 // If we are in simd mode or there are no entries, we don't need to do
2905 if (CGM
.getLangOpts().OpenMPSimd
|| OMPBuilder
.OffloadInfoManager
.empty())
2908 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy
&&ErrorReportFn
=
2909 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind
,
2910 const llvm::TargetRegionEntryInfo
&EntryInfo
) -> void {
2912 if (Kind
!= llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR
) {
2913 for (auto I
= CGM
.getContext().getSourceManager().fileinfo_begin(),
2914 E
= CGM
.getContext().getSourceManager().fileinfo_end();
2916 if (I
->getFirst().getUniqueID().getDevice() == EntryInfo
.DeviceID
&&
2917 I
->getFirst().getUniqueID().getFile() == EntryInfo
.FileID
) {
2918 Loc
= CGM
.getContext().getSourceManager().translateFileLineCol(
2919 I
->getFirst(), EntryInfo
.Line
, 1);
2925 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR
: {
2926 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2927 DiagnosticsEngine::Error
, "Offloading entry for target region in "
2928 "%0 is incorrect: either the "
2929 "address or the ID is invalid.");
2930 CGM
.getDiags().Report(Loc
, DiagID
) << EntryInfo
.ParentName
;
2932 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR
: {
2933 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2934 DiagnosticsEngine::Error
, "Offloading entry for declare target "
2935 "variable %0 is incorrect: the "
2936 "address is invalid.");
2937 CGM
.getDiags().Report(Loc
, DiagID
) << EntryInfo
.ParentName
;
2939 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR
: {
2940 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2941 DiagnosticsEngine::Error
,
2942 "Offloading entry for declare target variable is incorrect: the "
2943 "address is invalid.");
2944 CGM
.getDiags().Report(DiagID
);
2949 OMPBuilder
.createOffloadEntriesAndInfoMetadata(ErrorReportFn
);
2952 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty
) {
2953 if (!KmpRoutineEntryPtrTy
) {
2954 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2955 ASTContext
&C
= CGM
.getContext();
2956 QualType KmpRoutineEntryTyArgs
[] = {KmpInt32Ty
, C
.VoidPtrTy
};
2957 FunctionProtoType::ExtProtoInfo EPI
;
2958 KmpRoutineEntryPtrQTy
= C
.getPointerType(
2959 C
.getFunctionType(KmpInt32Ty
, KmpRoutineEntryTyArgs
, EPI
));
2960 KmpRoutineEntryPtrTy
= CGM
.getTypes().ConvertType(KmpRoutineEntryPtrQTy
);
2965 struct PrivateHelpersTy
{
2966 PrivateHelpersTy(const Expr
*OriginalRef
, const VarDecl
*Original
,
2967 const VarDecl
*PrivateCopy
, const VarDecl
*PrivateElemInit
)
2968 : OriginalRef(OriginalRef
), Original(Original
), PrivateCopy(PrivateCopy
),
2969 PrivateElemInit(PrivateElemInit
) {}
2970 PrivateHelpersTy(const VarDecl
*Original
) : Original(Original
) {}
2971 const Expr
*OriginalRef
= nullptr;
2972 const VarDecl
*Original
= nullptr;
2973 const VarDecl
*PrivateCopy
= nullptr;
2974 const VarDecl
*PrivateElemInit
= nullptr;
2975 bool isLocalPrivate() const {
2976 return !OriginalRef
&& !PrivateCopy
&& !PrivateElemInit
;
2979 typedef std::pair
<CharUnits
/*Align*/, PrivateHelpersTy
> PrivateDataTy
;
2980 } // anonymous namespace
2982 static bool isAllocatableDecl(const VarDecl
*VD
) {
2983 const VarDecl
*CVD
= VD
->getCanonicalDecl();
2984 if (!CVD
->hasAttr
<OMPAllocateDeclAttr
>())
2986 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
2987 // Use the default allocation.
2988 return !(AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc
&&
2989 !AA
->getAllocator());
2993 createPrivatesRecordDecl(CodeGenModule
&CGM
, ArrayRef
<PrivateDataTy
> Privates
) {
2994 if (!Privates
.empty()) {
2995 ASTContext
&C
= CGM
.getContext();
2996 // Build struct .kmp_privates_t. {
2997 // /* private vars */
2999 RecordDecl
*RD
= C
.buildImplicitRecord(".kmp_privates.t");
3000 RD
->startDefinition();
3001 for (const auto &Pair
: Privates
) {
3002 const VarDecl
*VD
= Pair
.second
.Original
;
3003 QualType Type
= VD
->getType().getNonReferenceType();
3004 // If the private variable is a local variable with lvalue ref type,
3005 // allocate the pointer instead of the pointee type.
3006 if (Pair
.second
.isLocalPrivate()) {
3007 if (VD
->getType()->isLValueReferenceType())
3008 Type
= C
.getPointerType(Type
);
3009 if (isAllocatableDecl(VD
))
3010 Type
= C
.getPointerType(Type
);
3012 FieldDecl
*FD
= addFieldToRecordDecl(C
, RD
, Type
);
3013 if (VD
->hasAttrs()) {
3014 for (specific_attr_iterator
<AlignedAttr
> I(VD
->getAttrs().begin()),
3015 E(VD
->getAttrs().end());
3020 RD
->completeDefinition();
3027 createKmpTaskTRecordDecl(CodeGenModule
&CGM
, OpenMPDirectiveKind Kind
,
3028 QualType KmpInt32Ty
,
3029 QualType KmpRoutineEntryPointerQTy
) {
3030 ASTContext
&C
= CGM
.getContext();
3031 // Build struct kmp_task_t {
3033 // kmp_routine_entry_t routine;
3034 // kmp_int32 part_id;
3035 // kmp_cmplrdata_t data1;
3036 // kmp_cmplrdata_t data2;
3037 // For taskloops additional fields:
3042 // void * reductions;
3044 RecordDecl
*UD
= C
.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union
);
3045 UD
->startDefinition();
3046 addFieldToRecordDecl(C
, UD
, KmpInt32Ty
);
3047 addFieldToRecordDecl(C
, UD
, KmpRoutineEntryPointerQTy
);
3048 UD
->completeDefinition();
3049 QualType KmpCmplrdataTy
= C
.getRecordType(UD
);
3050 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_task_t");
3051 RD
->startDefinition();
3052 addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
3053 addFieldToRecordDecl(C
, RD
, KmpRoutineEntryPointerQTy
);
3054 addFieldToRecordDecl(C
, RD
, KmpInt32Ty
);
3055 addFieldToRecordDecl(C
, RD
, KmpCmplrdataTy
);
3056 addFieldToRecordDecl(C
, RD
, KmpCmplrdataTy
);
3057 if (isOpenMPTaskLoopDirective(Kind
)) {
3058 QualType KmpUInt64Ty
=
3059 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3060 QualType KmpInt64Ty
=
3061 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3062 addFieldToRecordDecl(C
, RD
, KmpUInt64Ty
);
3063 addFieldToRecordDecl(C
, RD
, KmpUInt64Ty
);
3064 addFieldToRecordDecl(C
, RD
, KmpInt64Ty
);
3065 addFieldToRecordDecl(C
, RD
, KmpInt32Ty
);
3066 addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
3068 RD
->completeDefinition();
3073 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule
&CGM
, QualType KmpTaskTQTy
,
3074 ArrayRef
<PrivateDataTy
> Privates
) {
3075 ASTContext
&C
= CGM
.getContext();
3076 // Build struct kmp_task_t_with_privates {
3077 // kmp_task_t task_data;
3078 // .kmp_privates_t. privates;
3080 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_task_t_with_privates");
3081 RD
->startDefinition();
3082 addFieldToRecordDecl(C
, RD
, KmpTaskTQTy
);
3083 if (const RecordDecl
*PrivateRD
= createPrivatesRecordDecl(CGM
, Privates
))
3084 addFieldToRecordDecl(C
, RD
, C
.getRecordType(PrivateRD
));
3085 RD
->completeDefinition();
3089 /// Emit a proxy function which accepts kmp_task_t as the second
3092 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3093 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3095 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3096 /// tt->reductions, tt->shareds);
3100 static llvm::Function
*
3101 emitProxyTaskFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3102 OpenMPDirectiveKind Kind
, QualType KmpInt32Ty
,
3103 QualType KmpTaskTWithPrivatesPtrQTy
,
3104 QualType KmpTaskTWithPrivatesQTy
, QualType KmpTaskTQTy
,
3105 QualType SharedsPtrTy
, llvm::Function
*TaskFunction
,
3106 llvm::Value
*TaskPrivatesMap
) {
3107 ASTContext
&C
= CGM
.getContext();
3108 FunctionArgList Args
;
3109 ImplicitParamDecl
GtidArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, KmpInt32Ty
,
3110 ImplicitParamDecl::Other
);
3111 ImplicitParamDecl
TaskTypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3112 KmpTaskTWithPrivatesPtrQTy
.withRestrict(),
3113 ImplicitParamDecl::Other
);
3114 Args
.push_back(&GtidArg
);
3115 Args
.push_back(&TaskTypeArg
);
3116 const auto &TaskEntryFnInfo
=
3117 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty
, Args
);
3118 llvm::FunctionType
*TaskEntryTy
=
3119 CGM
.getTypes().GetFunctionType(TaskEntryFnInfo
);
3120 std::string Name
= CGM
.getOpenMPRuntime().getName({"omp_task_entry", ""});
3121 auto *TaskEntry
= llvm::Function::Create(
3122 TaskEntryTy
, llvm::GlobalValue::InternalLinkage
, Name
, &CGM
.getModule());
3123 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry
, TaskEntryFnInfo
);
3124 TaskEntry
->setDoesNotRecurse();
3125 CodeGenFunction
CGF(CGM
);
3126 CGF
.StartFunction(GlobalDecl(), KmpInt32Ty
, TaskEntry
, TaskEntryFnInfo
, Args
,
3129 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3132 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3133 // tt->task_data.shareds);
3134 llvm::Value
*GtidParam
= CGF
.EmitLoadOfScalar(
3135 CGF
.GetAddrOfLocalVar(&GtidArg
), /*Volatile=*/false, KmpInt32Ty
, Loc
);
3136 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3137 CGF
.GetAddrOfLocalVar(&TaskTypeArg
),
3138 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3139 const auto *KmpTaskTWithPrivatesQTyRD
=
3140 cast
<RecordDecl
>(KmpTaskTWithPrivatesQTy
->getAsTagDecl());
3142 CGF
.EmitLValueForField(TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3143 const auto *KmpTaskTQTyRD
= cast
<RecordDecl
>(KmpTaskTQTy
->getAsTagDecl());
3144 auto PartIdFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTPartId
);
3145 LValue PartIdLVal
= CGF
.EmitLValueForField(Base
, *PartIdFI
);
3146 llvm::Value
*PartidParam
= PartIdLVal
.getPointer(CGF
);
3148 auto SharedsFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTShareds
);
3149 LValue SharedsLVal
= CGF
.EmitLValueForField(Base
, *SharedsFI
);
3150 llvm::Value
*SharedsParam
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3151 CGF
.EmitLoadOfScalar(SharedsLVal
, Loc
),
3152 CGF
.ConvertTypeForMem(SharedsPtrTy
));
3154 auto PrivatesFI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin(), 1);
3155 llvm::Value
*PrivatesParam
;
3156 if (PrivatesFI
!= KmpTaskTWithPrivatesQTyRD
->field_end()) {
3157 LValue PrivatesLVal
= CGF
.EmitLValueForField(TDBase
, *PrivatesFI
);
3158 PrivatesParam
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3159 PrivatesLVal
.getPointer(CGF
), CGF
.VoidPtrTy
);
3161 PrivatesParam
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
3164 llvm::Value
*CommonArgs
[] = {
3165 GtidParam
, PartidParam
, PrivatesParam
, TaskPrivatesMap
,
3167 .CreatePointerBitCastOrAddrSpaceCast(TDBase
.getAddress(CGF
),
3168 CGF
.VoidPtrTy
, CGF
.Int8Ty
)
3170 SmallVector
<llvm::Value
*, 16> CallArgs(std::begin(CommonArgs
),
3171 std::end(CommonArgs
));
3172 if (isOpenMPTaskLoopDirective(Kind
)) {
3173 auto LBFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLowerBound
);
3174 LValue LBLVal
= CGF
.EmitLValueForField(Base
, *LBFI
);
3175 llvm::Value
*LBParam
= CGF
.EmitLoadOfScalar(LBLVal
, Loc
);
3176 auto UBFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTUpperBound
);
3177 LValue UBLVal
= CGF
.EmitLValueForField(Base
, *UBFI
);
3178 llvm::Value
*UBParam
= CGF
.EmitLoadOfScalar(UBLVal
, Loc
);
3179 auto StFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTStride
);
3180 LValue StLVal
= CGF
.EmitLValueForField(Base
, *StFI
);
3181 llvm::Value
*StParam
= CGF
.EmitLoadOfScalar(StLVal
, Loc
);
3182 auto LIFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLastIter
);
3183 LValue LILVal
= CGF
.EmitLValueForField(Base
, *LIFI
);
3184 llvm::Value
*LIParam
= CGF
.EmitLoadOfScalar(LILVal
, Loc
);
3185 auto RFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTReductions
);
3186 LValue RLVal
= CGF
.EmitLValueForField(Base
, *RFI
);
3187 llvm::Value
*RParam
= CGF
.EmitLoadOfScalar(RLVal
, Loc
);
3188 CallArgs
.push_back(LBParam
);
3189 CallArgs
.push_back(UBParam
);
3190 CallArgs
.push_back(StParam
);
3191 CallArgs
.push_back(LIParam
);
3192 CallArgs
.push_back(RParam
);
3194 CallArgs
.push_back(SharedsParam
);
3196 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, Loc
, TaskFunction
,
3198 CGF
.EmitStoreThroughLValue(RValue::get(CGF
.Builder
.getInt32(/*C=*/0)),
3199 CGF
.MakeAddrLValue(CGF
.ReturnValue
, KmpInt32Ty
));
3200 CGF
.FinishFunction();
3204 static llvm::Value
*emitDestructorsFunction(CodeGenModule
&CGM
,
3206 QualType KmpInt32Ty
,
3207 QualType KmpTaskTWithPrivatesPtrQTy
,
3208 QualType KmpTaskTWithPrivatesQTy
) {
3209 ASTContext
&C
= CGM
.getContext();
3210 FunctionArgList Args
;
3211 ImplicitParamDecl
GtidArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, KmpInt32Ty
,
3212 ImplicitParamDecl::Other
);
3213 ImplicitParamDecl
TaskTypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3214 KmpTaskTWithPrivatesPtrQTy
.withRestrict(),
3215 ImplicitParamDecl::Other
);
3216 Args
.push_back(&GtidArg
);
3217 Args
.push_back(&TaskTypeArg
);
3218 const auto &DestructorFnInfo
=
3219 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty
, Args
);
3220 llvm::FunctionType
*DestructorFnTy
=
3221 CGM
.getTypes().GetFunctionType(DestructorFnInfo
);
3223 CGM
.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3224 auto *DestructorFn
=
3225 llvm::Function::Create(DestructorFnTy
, llvm::GlobalValue::InternalLinkage
,
3226 Name
, &CGM
.getModule());
3227 CGM
.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn
,
3229 DestructorFn
->setDoesNotRecurse();
3230 CodeGenFunction
CGF(CGM
);
3231 CGF
.StartFunction(GlobalDecl(), KmpInt32Ty
, DestructorFn
, DestructorFnInfo
,
3234 LValue Base
= CGF
.EmitLoadOfPointerLValue(
3235 CGF
.GetAddrOfLocalVar(&TaskTypeArg
),
3236 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3237 const auto *KmpTaskTWithPrivatesQTyRD
=
3238 cast
<RecordDecl
>(KmpTaskTWithPrivatesQTy
->getAsTagDecl());
3239 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3240 Base
= CGF
.EmitLValueForField(Base
, *FI
);
3241 for (const auto *Field
:
3242 cast
<RecordDecl
>(FI
->getType()->getAsTagDecl())->fields()) {
3243 if (QualType::DestructionKind DtorKind
=
3244 Field
->getType().isDestructedType()) {
3245 LValue FieldLValue
= CGF
.EmitLValueForField(Base
, Field
);
3246 CGF
.pushDestroy(DtorKind
, FieldLValue
.getAddress(CGF
), Field
->getType());
3249 CGF
.FinishFunction();
3250 return DestructorFn
;
3253 /// Emit a privates mapping function for correct handling of private and
3254 /// firstprivate variables.
3256 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3257 /// **noalias priv1,..., <tyn> **noalias privn) {
3258 /// *priv1 = &.privates.priv1;
3260 /// *privn = &.privates.privn;
3263 static llvm::Value
*
3264 emitTaskPrivateMappingFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3265 const OMPTaskDataTy
&Data
, QualType PrivatesQTy
,
3266 ArrayRef
<PrivateDataTy
> Privates
) {
3267 ASTContext
&C
= CGM
.getContext();
3268 FunctionArgList Args
;
3269 ImplicitParamDecl
TaskPrivatesArg(
3270 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3271 C
.getPointerType(PrivatesQTy
).withConst().withRestrict(),
3272 ImplicitParamDecl::Other
);
3273 Args
.push_back(&TaskPrivatesArg
);
3274 llvm::DenseMap
<CanonicalDeclPtr
<const VarDecl
>, unsigned> PrivateVarsPos
;
3275 unsigned Counter
= 1;
3276 for (const Expr
*E
: Data
.PrivateVars
) {
3277 Args
.push_back(ImplicitParamDecl::Create(
3278 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3279 C
.getPointerType(C
.getPointerType(E
->getType()))
3282 ImplicitParamDecl::Other
));
3283 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3284 PrivateVarsPos
[VD
] = Counter
;
3287 for (const Expr
*E
: Data
.FirstprivateVars
) {
3288 Args
.push_back(ImplicitParamDecl::Create(
3289 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3290 C
.getPointerType(C
.getPointerType(E
->getType()))
3293 ImplicitParamDecl::Other
));
3294 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3295 PrivateVarsPos
[VD
] = Counter
;
3298 for (const Expr
*E
: Data
.LastprivateVars
) {
3299 Args
.push_back(ImplicitParamDecl::Create(
3300 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3301 C
.getPointerType(C
.getPointerType(E
->getType()))
3304 ImplicitParamDecl::Other
));
3305 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3306 PrivateVarsPos
[VD
] = Counter
;
3309 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
3310 QualType Ty
= VD
->getType().getNonReferenceType();
3311 if (VD
->getType()->isLValueReferenceType())
3312 Ty
= C
.getPointerType(Ty
);
3313 if (isAllocatableDecl(VD
))
3314 Ty
= C
.getPointerType(Ty
);
3315 Args
.push_back(ImplicitParamDecl::Create(
3316 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3317 C
.getPointerType(C
.getPointerType(Ty
)).withConst().withRestrict(),
3318 ImplicitParamDecl::Other
));
3319 PrivateVarsPos
[VD
] = Counter
;
3322 const auto &TaskPrivatesMapFnInfo
=
3323 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
3324 llvm::FunctionType
*TaskPrivatesMapTy
=
3325 CGM
.getTypes().GetFunctionType(TaskPrivatesMapFnInfo
);
3327 CGM
.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3328 auto *TaskPrivatesMap
= llvm::Function::Create(
3329 TaskPrivatesMapTy
, llvm::GlobalValue::InternalLinkage
, Name
,
3331 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap
,
3332 TaskPrivatesMapFnInfo
);
3333 if (CGM
.getLangOpts().Optimize
) {
3334 TaskPrivatesMap
->removeFnAttr(llvm::Attribute::NoInline
);
3335 TaskPrivatesMap
->removeFnAttr(llvm::Attribute::OptimizeNone
);
3336 TaskPrivatesMap
->addFnAttr(llvm::Attribute::AlwaysInline
);
3338 CodeGenFunction
CGF(CGM
);
3339 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, TaskPrivatesMap
,
3340 TaskPrivatesMapFnInfo
, Args
, Loc
, Loc
);
3342 // *privi = &.privates.privi;
3343 LValue Base
= CGF
.EmitLoadOfPointerLValue(
3344 CGF
.GetAddrOfLocalVar(&TaskPrivatesArg
),
3345 TaskPrivatesArg
.getType()->castAs
<PointerType
>());
3346 const auto *PrivatesQTyRD
= cast
<RecordDecl
>(PrivatesQTy
->getAsTagDecl());
3348 for (const FieldDecl
*Field
: PrivatesQTyRD
->fields()) {
3349 LValue FieldLVal
= CGF
.EmitLValueForField(Base
, Field
);
3350 const VarDecl
*VD
= Args
[PrivateVarsPos
[Privates
[Counter
].second
.Original
]];
3352 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(VD
), VD
->getType());
3353 LValue RefLoadLVal
= CGF
.EmitLoadOfPointerLValue(
3354 RefLVal
.getAddress(CGF
), RefLVal
.getType()->castAs
<PointerType
>());
3355 CGF
.EmitStoreOfScalar(FieldLVal
.getPointer(CGF
), RefLoadLVal
);
3358 CGF
.FinishFunction();
3359 return TaskPrivatesMap
;
3362 /// Emit initialization for private variables in task-based directives.
3363 static void emitPrivatesInit(CodeGenFunction
&CGF
,
3364 const OMPExecutableDirective
&D
,
3365 Address KmpTaskSharedsPtr
, LValue TDBase
,
3366 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3367 QualType SharedsTy
, QualType SharedsPtrTy
,
3368 const OMPTaskDataTy
&Data
,
3369 ArrayRef
<PrivateDataTy
> Privates
, bool ForDup
) {
3370 ASTContext
&C
= CGF
.getContext();
3371 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3372 LValue PrivatesBase
= CGF
.EmitLValueForField(TDBase
, *FI
);
3373 OpenMPDirectiveKind Kind
= isOpenMPTaskLoopDirective(D
.getDirectiveKind())
3376 const CapturedStmt
&CS
= *D
.getCapturedStmt(Kind
);
3377 CodeGenFunction::CGCapturedStmtInfo
CapturesInfo(CS
);
3380 isOpenMPTargetDataManagementDirective(D
.getDirectiveKind()) ||
3381 isOpenMPTargetExecutionDirective(D
.getDirectiveKind());
3382 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3383 // PointersArray, SizesArray, and MappersArray. The original variables for
3384 // these arrays are not captured and we get their addresses explicitly.
3385 if ((!IsTargetTask
&& !Data
.FirstprivateVars
.empty() && ForDup
) ||
3386 (IsTargetTask
&& KmpTaskSharedsPtr
.isValid())) {
3387 SrcBase
= CGF
.MakeAddrLValue(
3388 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3389 KmpTaskSharedsPtr
, CGF
.ConvertTypeForMem(SharedsPtrTy
),
3390 CGF
.ConvertTypeForMem(SharedsTy
)),
3393 FI
= cast
<RecordDecl
>(FI
->getType()->getAsTagDecl())->field_begin();
3394 for (const PrivateDataTy
&Pair
: Privates
) {
3395 // Do not initialize private locals.
3396 if (Pair
.second
.isLocalPrivate()) {
3400 const VarDecl
*VD
= Pair
.second
.PrivateCopy
;
3401 const Expr
*Init
= VD
->getAnyInitializer();
3402 if (Init
&& (!ForDup
|| (isa
<CXXConstructExpr
>(Init
) &&
3403 !CGF
.isTrivialInitializer(Init
)))) {
3404 LValue PrivateLValue
= CGF
.EmitLValueForField(PrivatesBase
, *FI
);
3405 if (const VarDecl
*Elem
= Pair
.second
.PrivateElemInit
) {
3406 const VarDecl
*OriginalVD
= Pair
.second
.Original
;
3407 // Check if the variable is the target-based BasePointersArray,
3408 // PointersArray, SizesArray, or MappersArray.
3409 LValue SharedRefLValue
;
3410 QualType Type
= PrivateLValue
.getType();
3411 const FieldDecl
*SharedField
= CapturesInfo
.lookup(OriginalVD
);
3412 if (IsTargetTask
&& !SharedField
) {
3413 assert(isa
<ImplicitParamDecl
>(OriginalVD
) &&
3414 isa
<CapturedDecl
>(OriginalVD
->getDeclContext()) &&
3415 cast
<CapturedDecl
>(OriginalVD
->getDeclContext())
3416 ->getNumParams() == 0 &&
3417 isa
<TranslationUnitDecl
>(
3418 cast
<CapturedDecl
>(OriginalVD
->getDeclContext())
3419 ->getDeclContext()) &&
3420 "Expected artificial target data variable.");
3422 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(OriginalVD
), Type
);
3423 } else if (ForDup
) {
3424 SharedRefLValue
= CGF
.EmitLValueForField(SrcBase
, SharedField
);
3425 SharedRefLValue
= CGF
.MakeAddrLValue(
3426 SharedRefLValue
.getAddress(CGF
).withAlignment(
3427 C
.getDeclAlign(OriginalVD
)),
3428 SharedRefLValue
.getType(), LValueBaseInfo(AlignmentSource::Decl
),
3429 SharedRefLValue
.getTBAAInfo());
3430 } else if (CGF
.LambdaCaptureFields
.count(
3431 Pair
.second
.Original
->getCanonicalDecl()) > 0 ||
3432 isa_and_nonnull
<BlockDecl
>(CGF
.CurCodeDecl
)) {
3433 SharedRefLValue
= CGF
.EmitLValue(Pair
.second
.OriginalRef
);
3435 // Processing for implicitly captured variables.
3436 InlinedOpenMPRegionRAII
Region(
3437 CGF
, [](CodeGenFunction
&, PrePostActionTy
&) {}, OMPD_unknown
,
3438 /*HasCancel=*/false, /*NoInheritance=*/true);
3439 SharedRefLValue
= CGF
.EmitLValue(Pair
.second
.OriginalRef
);
3441 if (Type
->isArrayType()) {
3442 // Initialize firstprivate array.
3443 if (!isa
<CXXConstructExpr
>(Init
) || CGF
.isTrivialInitializer(Init
)) {
3444 // Perform simple memcpy.
3445 CGF
.EmitAggregateAssign(PrivateLValue
, SharedRefLValue
, Type
);
3447 // Initialize firstprivate array using element-by-element
3449 CGF
.EmitOMPAggregateAssign(
3450 PrivateLValue
.getAddress(CGF
), SharedRefLValue
.getAddress(CGF
),
3452 [&CGF
, Elem
, Init
, &CapturesInfo
](Address DestElement
,
3453 Address SrcElement
) {
3454 // Clean up any temporaries needed by the initialization.
3455 CodeGenFunction::OMPPrivateScope
InitScope(CGF
);
3456 InitScope
.addPrivate(Elem
, SrcElement
);
3457 (void)InitScope
.Privatize();
3458 // Emit initialization for single element.
3459 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(
3460 CGF
, &CapturesInfo
);
3461 CGF
.EmitAnyExprToMem(Init
, DestElement
,
3462 Init
->getType().getQualifiers(),
3463 /*IsInitializer=*/false);
3467 CodeGenFunction::OMPPrivateScope
InitScope(CGF
);
3468 InitScope
.addPrivate(Elem
, SharedRefLValue
.getAddress(CGF
));
3469 (void)InitScope
.Privatize();
3470 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CapturesInfo
);
3471 CGF
.EmitExprAsInit(Init
, VD
, PrivateLValue
,
3472 /*capturedByInit=*/false);
3475 CGF
.EmitExprAsInit(Init
, VD
, PrivateLValue
, /*capturedByInit=*/false);
3482 /// Check if duplication function is required for taskloops.
3483 static bool checkInitIsRequired(CodeGenFunction
&CGF
,
3484 ArrayRef
<PrivateDataTy
> Privates
) {
3485 bool InitRequired
= false;
3486 for (const PrivateDataTy
&Pair
: Privates
) {
3487 if (Pair
.second
.isLocalPrivate())
3489 const VarDecl
*VD
= Pair
.second
.PrivateCopy
;
3490 const Expr
*Init
= VD
->getAnyInitializer();
3491 InitRequired
= InitRequired
|| (isa_and_nonnull
<CXXConstructExpr
>(Init
) &&
3492 !CGF
.isTrivialInitializer(Init
));
3496 return InitRequired
;
3500 /// Emit task_dup function (for initialization of
3501 /// private/firstprivate/lastprivate vars and last_iter flag)
3503 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3505 /// // setup lastprivate flag
3506 /// task_dst->last = lastpriv;
3507 /// // could be constructor calls here...
3510 static llvm::Value
*
3511 emitTaskDupFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3512 const OMPExecutableDirective
&D
,
3513 QualType KmpTaskTWithPrivatesPtrQTy
,
3514 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3515 const RecordDecl
*KmpTaskTQTyRD
, QualType SharedsTy
,
3516 QualType SharedsPtrTy
, const OMPTaskDataTy
&Data
,
3517 ArrayRef
<PrivateDataTy
> Privates
, bool WithLastIter
) {
3518 ASTContext
&C
= CGM
.getContext();
3519 FunctionArgList Args
;
3520 ImplicitParamDecl
DstArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3521 KmpTaskTWithPrivatesPtrQTy
,
3522 ImplicitParamDecl::Other
);
3523 ImplicitParamDecl
SrcArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3524 KmpTaskTWithPrivatesPtrQTy
,
3525 ImplicitParamDecl::Other
);
3526 ImplicitParamDecl
LastprivArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.IntTy
,
3527 ImplicitParamDecl::Other
);
3528 Args
.push_back(&DstArg
);
3529 Args
.push_back(&SrcArg
);
3530 Args
.push_back(&LastprivArg
);
3531 const auto &TaskDupFnInfo
=
3532 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
3533 llvm::FunctionType
*TaskDupTy
= CGM
.getTypes().GetFunctionType(TaskDupFnInfo
);
3534 std::string Name
= CGM
.getOpenMPRuntime().getName({"omp_task_dup", ""});
3535 auto *TaskDup
= llvm::Function::Create(
3536 TaskDupTy
, llvm::GlobalValue::InternalLinkage
, Name
, &CGM
.getModule());
3537 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskDup
, TaskDupFnInfo
);
3538 TaskDup
->setDoesNotRecurse();
3539 CodeGenFunction
CGF(CGM
);
3540 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, TaskDup
, TaskDupFnInfo
, Args
, Loc
,
3543 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3544 CGF
.GetAddrOfLocalVar(&DstArg
),
3545 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3546 // task_dst->liter = lastpriv;
3548 auto LIFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLastIter
);
3549 LValue Base
= CGF
.EmitLValueForField(
3550 TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3551 LValue LILVal
= CGF
.EmitLValueForField(Base
, *LIFI
);
3552 llvm::Value
*Lastpriv
= CGF
.EmitLoadOfScalar(
3553 CGF
.GetAddrOfLocalVar(&LastprivArg
), /*Volatile=*/false, C
.IntTy
, Loc
);
3554 CGF
.EmitStoreOfScalar(Lastpriv
, LILVal
);
3557 // Emit initial values for private copies (if any).
3558 assert(!Privates
.empty());
3559 Address KmpTaskSharedsPtr
= Address::invalid();
3560 if (!Data
.FirstprivateVars
.empty()) {
3561 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3562 CGF
.GetAddrOfLocalVar(&SrcArg
),
3563 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3564 LValue Base
= CGF
.EmitLValueForField(
3565 TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3566 KmpTaskSharedsPtr
= Address(
3567 CGF
.EmitLoadOfScalar(CGF
.EmitLValueForField(
3568 Base
, *std::next(KmpTaskTQTyRD
->field_begin(),
3571 CGF
.Int8Ty
, CGM
.getNaturalTypeAlignment(SharedsTy
));
3573 emitPrivatesInit(CGF
, D
, KmpTaskSharedsPtr
, TDBase
, KmpTaskTWithPrivatesQTyRD
,
3574 SharedsTy
, SharedsPtrTy
, Data
, Privates
, /*ForDup=*/true);
3575 CGF
.FinishFunction();
3579 /// Checks if destructor function is required to be generated.
3580 /// \return true if cleanups are required, false otherwise.
3582 checkDestructorsRequired(const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3583 ArrayRef
<PrivateDataTy
> Privates
) {
3584 for (const PrivateDataTy
&P
: Privates
) {
3585 if (P
.second
.isLocalPrivate())
3587 QualType Ty
= P
.second
.Original
->getType().getNonReferenceType();
3588 if (Ty
.isDestructedType())
3595 /// Loop generator for OpenMP iterator expression.
3596 class OMPIteratorGeneratorScope final
3597 : public CodeGenFunction::OMPPrivateScope
{
3598 CodeGenFunction
&CGF
;
3599 const OMPIteratorExpr
*E
= nullptr;
3600 SmallVector
<CodeGenFunction::JumpDest
, 4> ContDests
;
3601 SmallVector
<CodeGenFunction::JumpDest
, 4> ExitDests
;
3602 OMPIteratorGeneratorScope() = delete;
3603 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope
&) = delete;
3606 OMPIteratorGeneratorScope(CodeGenFunction
&CGF
, const OMPIteratorExpr
*E
)
3607 : CodeGenFunction::OMPPrivateScope(CGF
), CGF(CGF
), E(E
) {
3610 SmallVector
<llvm::Value
*, 4> Uppers
;
3611 for (unsigned I
= 0, End
= E
->numOfIterators(); I
< End
; ++I
) {
3612 Uppers
.push_back(CGF
.EmitScalarExpr(E
->getHelper(I
).Upper
));
3613 const auto *VD
= cast
<VarDecl
>(E
->getIteratorDecl(I
));
3614 addPrivate(VD
, CGF
.CreateMemTemp(VD
->getType(), VD
->getName()));
3615 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
);
3617 HelperData
.CounterVD
,
3618 CGF
.CreateMemTemp(HelperData
.CounterVD
->getType(), "counter.addr"));
3622 for (unsigned I
= 0, End
= E
->numOfIterators(); I
< End
; ++I
) {
3623 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
);
3625 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(HelperData
.CounterVD
),
3626 HelperData
.CounterVD
->getType());
3628 CGF
.EmitStoreOfScalar(
3629 llvm::ConstantInt::get(CLVal
.getAddress(CGF
).getElementType(), 0),
3631 CodeGenFunction::JumpDest
&ContDest
=
3632 ContDests
.emplace_back(CGF
.getJumpDestInCurrentScope("iter.cont"));
3633 CodeGenFunction::JumpDest
&ExitDest
=
3634 ExitDests
.emplace_back(CGF
.getJumpDestInCurrentScope("iter.exit"));
3635 // N = <number-of_iterations>;
3636 llvm::Value
*N
= Uppers
[I
];
3638 // if (Counter < N) goto body; else goto exit;
3639 CGF
.EmitBlock(ContDest
.getBlock());
3641 CGF
.EmitLoadOfScalar(CLVal
, HelperData
.CounterVD
->getLocation());
3643 HelperData
.CounterVD
->getType()->isSignedIntegerOrEnumerationType()
3644 ? CGF
.Builder
.CreateICmpSLT(CVal
, N
)
3645 : CGF
.Builder
.CreateICmpULT(CVal
, N
);
3646 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("iter.body");
3647 CGF
.Builder
.CreateCondBr(Cmp
, BodyBB
, ExitDest
.getBlock());
3649 CGF
.EmitBlock(BodyBB
);
3650 // Iteri = Begini + Counter * Stepi;
3651 CGF
.EmitIgnoredExpr(HelperData
.Update
);
3654 ~OMPIteratorGeneratorScope() {
3657 for (unsigned I
= E
->numOfIterators(); I
> 0; --I
) {
3658 // Counter = Counter + 1;
3659 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
- 1);
3660 CGF
.EmitIgnoredExpr(HelperData
.CounterUpdate
);
3662 CGF
.EmitBranchThroughCleanup(ContDests
[I
- 1]);
3664 CGF
.EmitBlock(ExitDests
[I
- 1].getBlock(), /*IsFinished=*/I
== 1);
3670 static std::pair
<llvm::Value
*, llvm::Value
*>
3671 getPointerAndSize(CodeGenFunction
&CGF
, const Expr
*E
) {
3672 const auto *OASE
= dyn_cast
<OMPArrayShapingExpr
>(E
);
3675 const Expr
*Base
= OASE
->getBase();
3676 Addr
= CGF
.EmitScalarExpr(Base
);
3678 Addr
= CGF
.EmitLValue(E
).getPointer(CGF
);
3680 llvm::Value
*SizeVal
;
3681 QualType Ty
= E
->getType();
3683 SizeVal
= CGF
.getTypeSize(OASE
->getBase()->getType()->getPointeeType());
3684 for (const Expr
*SE
: OASE
->getDimensions()) {
3685 llvm::Value
*Sz
= CGF
.EmitScalarExpr(SE
);
3686 Sz
= CGF
.EmitScalarConversion(
3687 Sz
, SE
->getType(), CGF
.getContext().getSizeType(), SE
->getExprLoc());
3688 SizeVal
= CGF
.Builder
.CreateNUWMul(SizeVal
, Sz
);
3690 } else if (const auto *ASE
=
3691 dyn_cast
<OMPArraySectionExpr
>(E
->IgnoreParenImpCasts())) {
3693 CGF
.EmitOMPArraySectionExpr(ASE
, /*IsLowerBound=*/false);
3694 Address UpAddrAddress
= UpAddrLVal
.getAddress(CGF
);
3695 llvm::Value
*UpAddr
= CGF
.Builder
.CreateConstGEP1_32(
3696 UpAddrAddress
.getElementType(), UpAddrAddress
.getPointer(), /*Idx0=*/1);
3697 llvm::Value
*LowIntPtr
= CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.SizeTy
);
3698 llvm::Value
*UpIntPtr
= CGF
.Builder
.CreatePtrToInt(UpAddr
, CGF
.SizeTy
);
3699 SizeVal
= CGF
.Builder
.CreateNUWSub(UpIntPtr
, LowIntPtr
);
3701 SizeVal
= CGF
.getTypeSize(Ty
);
3703 return std::make_pair(Addr
, SizeVal
);
3706 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3707 static void getKmpAffinityType(ASTContext
&C
, QualType
&KmpTaskAffinityInfoTy
) {
3708 QualType FlagsTy
= C
.getIntTypeForBitwidth(32, /*Signed=*/false);
3709 if (KmpTaskAffinityInfoTy
.isNull()) {
3710 RecordDecl
*KmpAffinityInfoRD
=
3711 C
.buildImplicitRecord("kmp_task_affinity_info_t");
3712 KmpAffinityInfoRD
->startDefinition();
3713 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, C
.getIntPtrType());
3714 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, C
.getSizeType());
3715 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, FlagsTy
);
3716 KmpAffinityInfoRD
->completeDefinition();
3717 KmpTaskAffinityInfoTy
= C
.getRecordType(KmpAffinityInfoRD
);
3721 CGOpenMPRuntime::TaskResultTy
3722 CGOpenMPRuntime::emitTaskInit(CodeGenFunction
&CGF
, SourceLocation Loc
,
3723 const OMPExecutableDirective
&D
,
3724 llvm::Function
*TaskFunction
, QualType SharedsTy
,
3725 Address Shareds
, const OMPTaskDataTy
&Data
) {
3726 ASTContext
&C
= CGM
.getContext();
3727 llvm::SmallVector
<PrivateDataTy
, 4> Privates
;
3728 // Aggregate privates and sort them by the alignment.
3729 const auto *I
= Data
.PrivateCopies
.begin();
3730 for (const Expr
*E
: Data
.PrivateVars
) {
3731 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3732 Privates
.emplace_back(
3734 PrivateHelpersTy(E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3735 /*PrivateElemInit=*/nullptr));
3738 I
= Data
.FirstprivateCopies
.begin();
3739 const auto *IElemInitRef
= Data
.FirstprivateInits
.begin();
3740 for (const Expr
*E
: Data
.FirstprivateVars
) {
3741 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3742 Privates
.emplace_back(
3745 E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3746 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IElemInitRef
)->getDecl())));
3750 I
= Data
.LastprivateCopies
.begin();
3751 for (const Expr
*E
: Data
.LastprivateVars
) {
3752 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3753 Privates
.emplace_back(
3755 PrivateHelpersTy(E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3756 /*PrivateElemInit=*/nullptr));
3759 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
3760 if (isAllocatableDecl(VD
))
3761 Privates
.emplace_back(CGM
.getPointerAlign(), PrivateHelpersTy(VD
));
3763 Privates
.emplace_back(C
.getDeclAlign(VD
), PrivateHelpersTy(VD
));
3765 llvm::stable_sort(Privates
,
3766 [](const PrivateDataTy
&L
, const PrivateDataTy
&R
) {
3767 return L
.first
> R
.first
;
3769 QualType KmpInt32Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3770 // Build type kmp_routine_entry_t (if not built yet).
3771 emitKmpRoutineEntryT(KmpInt32Ty
);
3772 // Build type kmp_task_t (if not built yet).
3773 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind())) {
3774 if (SavedKmpTaskloopTQTy
.isNull()) {
3775 SavedKmpTaskloopTQTy
= C
.getRecordType(createKmpTaskTRecordDecl(
3776 CGM
, D
.getDirectiveKind(), KmpInt32Ty
, KmpRoutineEntryPtrQTy
));
3778 KmpTaskTQTy
= SavedKmpTaskloopTQTy
;
3780 assert((D
.getDirectiveKind() == OMPD_task
||
3781 isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) ||
3782 isOpenMPTargetDataManagementDirective(D
.getDirectiveKind())) &&
3783 "Expected taskloop, task or target directive");
3784 if (SavedKmpTaskTQTy
.isNull()) {
3785 SavedKmpTaskTQTy
= C
.getRecordType(createKmpTaskTRecordDecl(
3786 CGM
, D
.getDirectiveKind(), KmpInt32Ty
, KmpRoutineEntryPtrQTy
));
3788 KmpTaskTQTy
= SavedKmpTaskTQTy
;
3790 const auto *KmpTaskTQTyRD
= cast
<RecordDecl
>(KmpTaskTQTy
->getAsTagDecl());
3791 // Build particular struct kmp_task_t for the given task.
3792 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
=
3793 createKmpTaskTWithPrivatesRecordDecl(CGM
, KmpTaskTQTy
, Privates
);
3794 QualType KmpTaskTWithPrivatesQTy
= C
.getRecordType(KmpTaskTWithPrivatesQTyRD
);
3795 QualType KmpTaskTWithPrivatesPtrQTy
=
3796 C
.getPointerType(KmpTaskTWithPrivatesQTy
);
3797 llvm::Type
*KmpTaskTWithPrivatesTy
= CGF
.ConvertType(KmpTaskTWithPrivatesQTy
);
3798 llvm::Type
*KmpTaskTWithPrivatesPtrTy
=
3799 KmpTaskTWithPrivatesTy
->getPointerTo();
3800 llvm::Value
*KmpTaskTWithPrivatesTySize
=
3801 CGF
.getTypeSize(KmpTaskTWithPrivatesQTy
);
3802 QualType SharedsPtrTy
= C
.getPointerType(SharedsTy
);
3804 // Emit initial values for private copies (if any).
3805 llvm::Value
*TaskPrivatesMap
= nullptr;
3806 llvm::Type
*TaskPrivatesMapTy
=
3807 std::next(TaskFunction
->arg_begin(), 3)->getType();
3808 if (!Privates
.empty()) {
3809 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3811 emitTaskPrivateMappingFunction(CGM
, Loc
, Data
, FI
->getType(), Privates
);
3812 TaskPrivatesMap
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3813 TaskPrivatesMap
, TaskPrivatesMapTy
);
3815 TaskPrivatesMap
= llvm::ConstantPointerNull::get(
3816 cast
<llvm::PointerType
>(TaskPrivatesMapTy
));
3818 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3820 llvm::Function
*TaskEntry
= emitProxyTaskFunction(
3821 CGM
, Loc
, D
.getDirectiveKind(), KmpInt32Ty
, KmpTaskTWithPrivatesPtrQTy
,
3822 KmpTaskTWithPrivatesQTy
, KmpTaskTQTy
, SharedsPtrTy
, TaskFunction
,
3825 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3826 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3827 // kmp_routine_entry_t *task_entry);
3828 // Task flags. Format is taken from
3829 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3830 // description of kmp_tasking_flags struct.
3834 DestructorsFlag
= 0x8,
3835 PriorityFlag
= 0x20,
3836 DetachableFlag
= 0x40,
3838 unsigned Flags
= Data
.Tied
? TiedFlag
: 0;
3839 bool NeedsCleanup
= false;
3840 if (!Privates
.empty()) {
3842 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD
, Privates
);
3844 Flags
= Flags
| DestructorsFlag
;
3846 if (Data
.Priority
.getInt())
3847 Flags
= Flags
| PriorityFlag
;
3848 if (D
.hasClausesOfKind
<OMPDetachClause
>())
3849 Flags
= Flags
| DetachableFlag
;
3850 llvm::Value
*TaskFlags
=
3851 Data
.Final
.getPointer()
3852 ? CGF
.Builder
.CreateSelect(Data
.Final
.getPointer(),
3853 CGF
.Builder
.getInt32(FinalFlag
),
3854 CGF
.Builder
.getInt32(/*C=*/0))
3855 : CGF
.Builder
.getInt32(Data
.Final
.getInt() ? FinalFlag
: 0);
3856 TaskFlags
= CGF
.Builder
.CreateOr(TaskFlags
, CGF
.Builder
.getInt32(Flags
));
3857 llvm::Value
*SharedsSize
= CGM
.getSize(C
.getTypeSizeInChars(SharedsTy
));
3858 SmallVector
<llvm::Value
*, 8> AllocArgs
= {emitUpdateLocation(CGF
, Loc
),
3859 getThreadID(CGF
, Loc
), TaskFlags
, KmpTaskTWithPrivatesTySize
,
3860 SharedsSize
, CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3861 TaskEntry
, KmpRoutineEntryPtrTy
)};
3862 llvm::Value
*NewTask
;
3863 if (D
.hasClausesOfKind
<OMPNowaitClause
>()) {
3864 // Check if we have any device clause associated with the directive.
3865 const Expr
*Device
= nullptr;
3866 if (auto *C
= D
.getSingleClause
<OMPDeviceClause
>())
3867 Device
= C
->getDevice();
3868 // Emit device ID if any otherwise use default value.
3869 llvm::Value
*DeviceID
;
3871 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
3872 CGF
.Int64Ty
, /*isSigned=*/true);
3874 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
3875 AllocArgs
.push_back(DeviceID
);
3876 NewTask
= CGF
.EmitRuntimeCall(
3877 OMPBuilder
.getOrCreateRuntimeFunction(
3878 CGM
.getModule(), OMPRTL___kmpc_omp_target_task_alloc
),
3882 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
3883 CGM
.getModule(), OMPRTL___kmpc_omp_task_alloc
),
3886 // Emit detach clause initialization.
3887 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3888 // task_descriptor);
3889 if (const auto *DC
= D
.getSingleClause
<OMPDetachClause
>()) {
3890 const Expr
*Evt
= DC
->getEventHandler()->IgnoreParenImpCasts();
3891 LValue EvtLVal
= CGF
.EmitLValue(Evt
);
3893 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3894 // int gtid, kmp_task_t *task);
3895 llvm::Value
*Loc
= emitUpdateLocation(CGF
, DC
->getBeginLoc());
3896 llvm::Value
*Tid
= getThreadID(CGF
, DC
->getBeginLoc());
3897 Tid
= CGF
.Builder
.CreateIntCast(Tid
, CGF
.IntTy
, /*isSigned=*/false);
3898 llvm::Value
*EvtVal
= CGF
.EmitRuntimeCall(
3899 OMPBuilder
.getOrCreateRuntimeFunction(
3900 CGM
.getModule(), OMPRTL___kmpc_task_allow_completion_event
),
3901 {Loc
, Tid
, NewTask
});
3902 EvtVal
= CGF
.EmitScalarConversion(EvtVal
, C
.VoidPtrTy
, Evt
->getType(),
3904 CGF
.EmitStoreOfScalar(EvtVal
, EvtLVal
);
3906 // Process affinity clauses.
3907 if (D
.hasClausesOfKind
<OMPAffinityClause
>()) {
3908 // Process list of affinity data.
3909 ASTContext
&C
= CGM
.getContext();
3910 Address AffinitiesArray
= Address::invalid();
3911 // Calculate number of elements to form the array of affinity data.
3912 llvm::Value
*NumOfElements
= nullptr;
3913 unsigned NumAffinities
= 0;
3914 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
3915 if (const Expr
*Modifier
= C
->getModifier()) {
3916 const auto *IE
= cast
<OMPIteratorExpr
>(Modifier
->IgnoreParenImpCasts());
3917 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
3918 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
3919 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.SizeTy
, /*isSigned=*/false);
3921 NumOfElements
? CGF
.Builder
.CreateNUWMul(NumOfElements
, Sz
) : Sz
;
3924 NumAffinities
+= C
->varlist_size();
3927 getKmpAffinityType(CGM
.getContext(), KmpTaskAffinityInfoTy
);
3928 // Fields ids in kmp_task_affinity_info record.
3929 enum RTLAffinityInfoFieldsTy
{ BaseAddr
, Len
, Flags
};
3931 QualType KmpTaskAffinityInfoArrayTy
;
3932 if (NumOfElements
) {
3933 NumOfElements
= CGF
.Builder
.CreateNUWAdd(
3934 llvm::ConstantInt::get(CGF
.SizeTy
, NumAffinities
), NumOfElements
);
3935 auto *OVE
= new (C
) OpaqueValueExpr(
3937 C
.getIntTypeForBitwidth(C
.getTypeSize(C
.getSizeType()), /*Signed=*/0),
3939 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, OVE
,
3940 RValue::get(NumOfElements
));
3941 KmpTaskAffinityInfoArrayTy
= C
.getVariableArrayType(
3942 KmpTaskAffinityInfoTy
, OVE
, ArraySizeModifier::Normal
,
3943 /*IndexTypeQuals=*/0, SourceRange(Loc
, Loc
));
3944 // Properly emit variable-sized array.
3945 auto *PD
= ImplicitParamDecl::Create(C
, KmpTaskAffinityInfoArrayTy
,
3946 ImplicitParamDecl::Other
);
3947 CGF
.EmitVarDecl(*PD
);
3948 AffinitiesArray
= CGF
.GetAddrOfLocalVar(PD
);
3949 NumOfElements
= CGF
.Builder
.CreateIntCast(NumOfElements
, CGF
.Int32Ty
,
3950 /*isSigned=*/false);
3952 KmpTaskAffinityInfoArrayTy
= C
.getConstantArrayType(
3953 KmpTaskAffinityInfoTy
,
3954 llvm::APInt(C
.getTypeSize(C
.getSizeType()), NumAffinities
), nullptr,
3955 ArraySizeModifier::Normal
, /*IndexTypeQuals=*/0);
3957 CGF
.CreateMemTemp(KmpTaskAffinityInfoArrayTy
, ".affs.arr.addr");
3958 AffinitiesArray
= CGF
.Builder
.CreateConstArrayGEP(AffinitiesArray
, 0);
3959 NumOfElements
= llvm::ConstantInt::get(CGM
.Int32Ty
, NumAffinities
,
3960 /*isSigned=*/false);
3963 const auto *KmpAffinityInfoRD
= KmpTaskAffinityInfoTy
->getAsRecordDecl();
3964 // Fill array by elements without iterators.
3966 bool HasIterator
= false;
3967 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
3968 if (C
->getModifier()) {
3972 for (const Expr
*E
: C
->varlists()) {
3975 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
3977 CGF
.MakeAddrLValue(CGF
.Builder
.CreateConstGEP(AffinitiesArray
, Pos
),
3978 KmpTaskAffinityInfoTy
);
3979 // affs[i].base_addr = &<Affinities[i].second>;
3980 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
3981 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), BaseAddr
));
3982 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
),
3984 // affs[i].len = sizeof(<Affinities[i].second>);
3985 LValue LenLVal
= CGF
.EmitLValueForField(
3986 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), Len
));
3987 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
3993 PosLVal
= CGF
.MakeAddrLValue(
3994 CGF
.CreateMemTemp(C
.getSizeType(), "affs.counter.addr"),
3996 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Pos
), PosLVal
);
3998 // Process elements with iterators.
3999 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
4000 const Expr
*Modifier
= C
->getModifier();
4003 OMPIteratorGeneratorScope
IteratorScope(
4004 CGF
, cast_or_null
<OMPIteratorExpr
>(Modifier
->IgnoreParenImpCasts()));
4005 for (const Expr
*E
: C
->varlists()) {
4008 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
4009 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4010 LValue Base
= CGF
.MakeAddrLValue(
4011 CGF
.Builder
.CreateGEP(AffinitiesArray
, Idx
), KmpTaskAffinityInfoTy
);
4012 // affs[i].base_addr = &<Affinities[i].second>;
4013 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4014 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), BaseAddr
));
4015 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
),
4017 // affs[i].len = sizeof(<Affinities[i].second>);
4018 LValue LenLVal
= CGF
.EmitLValueForField(
4019 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), Len
));
4020 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4021 Idx
= CGF
.Builder
.CreateNUWAdd(
4022 Idx
, llvm::ConstantInt::get(Idx
->getType(), 1));
4023 CGF
.EmitStoreOfScalar(Idx
, PosLVal
);
4026 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4027 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4028 // naffins, kmp_task_affinity_info_t *affin_list);
4029 llvm::Value
*LocRef
= emitUpdateLocation(CGF
, Loc
);
4030 llvm::Value
*GTid
= getThreadID(CGF
, Loc
);
4031 llvm::Value
*AffinListPtr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4032 AffinitiesArray
.getPointer(), CGM
.VoidPtrTy
);
4033 // FIXME: Emit the function and ignore its result for now unless the
4034 // runtime function is properly implemented.
4035 (void)CGF
.EmitRuntimeCall(
4036 OMPBuilder
.getOrCreateRuntimeFunction(
4037 CGM
.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity
),
4038 {LocRef
, GTid
, NewTask
, NumOfElements
, AffinListPtr
});
4040 llvm::Value
*NewTaskNewTaskTTy
=
4041 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4042 NewTask
, KmpTaskTWithPrivatesPtrTy
);
4043 LValue Base
= CGF
.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy
,
4044 KmpTaskTWithPrivatesQTy
);
4046 CGF
.EmitLValueForField(Base
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
4047 // Fill the data in the resulting kmp_task_t record.
4048 // Copy shareds if there are any.
4049 Address KmpTaskSharedsPtr
= Address::invalid();
4050 if (!SharedsTy
->getAsStructureType()->getDecl()->field_empty()) {
4051 KmpTaskSharedsPtr
= Address(
4052 CGF
.EmitLoadOfScalar(
4053 CGF
.EmitLValueForField(
4055 *std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTShareds
)),
4057 CGF
.Int8Ty
, CGM
.getNaturalTypeAlignment(SharedsTy
));
4058 LValue Dest
= CGF
.MakeAddrLValue(KmpTaskSharedsPtr
, SharedsTy
);
4059 LValue Src
= CGF
.MakeAddrLValue(Shareds
, SharedsTy
);
4060 CGF
.EmitAggregateCopy(Dest
, Src
, SharedsTy
, AggValueSlot::DoesNotOverlap
);
4062 // Emit initial values for private copies (if any).
4063 TaskResultTy Result
;
4064 if (!Privates
.empty()) {
4065 emitPrivatesInit(CGF
, D
, KmpTaskSharedsPtr
, Base
, KmpTaskTWithPrivatesQTyRD
,
4066 SharedsTy
, SharedsPtrTy
, Data
, Privates
,
4068 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind()) &&
4069 (!Data
.LastprivateVars
.empty() || checkInitIsRequired(CGF
, Privates
))) {
4070 Result
.TaskDupFn
= emitTaskDupFunction(
4071 CGM
, Loc
, D
, KmpTaskTWithPrivatesPtrQTy
, KmpTaskTWithPrivatesQTyRD
,
4072 KmpTaskTQTyRD
, SharedsTy
, SharedsPtrTy
, Data
, Privates
,
4073 /*WithLastIter=*/!Data
.LastprivateVars
.empty());
4076 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4077 enum { Priority
= 0, Destructors
= 1 };
4078 // Provide pointer to function with destructors for privates.
4079 auto FI
= std::next(KmpTaskTQTyRD
->field_begin(), Data1
);
4080 const RecordDecl
*KmpCmplrdataUD
=
4081 (*FI
)->getType()->getAsUnionType()->getDecl();
4083 llvm::Value
*DestructorFn
= emitDestructorsFunction(
4084 CGM
, Loc
, KmpInt32Ty
, KmpTaskTWithPrivatesPtrQTy
,
4085 KmpTaskTWithPrivatesQTy
);
4086 LValue Data1LV
= CGF
.EmitLValueForField(TDBase
, *FI
);
4087 LValue DestructorsLV
= CGF
.EmitLValueForField(
4088 Data1LV
, *std::next(KmpCmplrdataUD
->field_begin(), Destructors
));
4089 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4090 DestructorFn
, KmpRoutineEntryPtrTy
),
4094 if (Data
.Priority
.getInt()) {
4095 LValue Data2LV
= CGF
.EmitLValueForField(
4096 TDBase
, *std::next(KmpTaskTQTyRD
->field_begin(), Data2
));
4097 LValue PriorityLV
= CGF
.EmitLValueForField(
4098 Data2LV
, *std::next(KmpCmplrdataUD
->field_begin(), Priority
));
4099 CGF
.EmitStoreOfScalar(Data
.Priority
.getPointer(), PriorityLV
);
4101 Result
.NewTask
= NewTask
;
4102 Result
.TaskEntry
= TaskEntry
;
4103 Result
.NewTaskNewTaskTTy
= NewTaskNewTaskTTy
;
4104 Result
.TDBase
= TDBase
;
4105 Result
.KmpTaskTQTyRD
= KmpTaskTQTyRD
;
4109 /// Translates internal dependency kind into the runtime kind.
4110 static RTLDependenceKindTy
translateDependencyKind(OpenMPDependClauseKind K
) {
4111 RTLDependenceKindTy DepKind
;
4113 case OMPC_DEPEND_in
:
4114 DepKind
= RTLDependenceKindTy::DepIn
;
4116 // Out and InOut dependencies must use the same code.
4117 case OMPC_DEPEND_out
:
4118 case OMPC_DEPEND_inout
:
4119 DepKind
= RTLDependenceKindTy::DepInOut
;
4121 case OMPC_DEPEND_mutexinoutset
:
4122 DepKind
= RTLDependenceKindTy::DepMutexInOutSet
;
4124 case OMPC_DEPEND_inoutset
:
4125 DepKind
= RTLDependenceKindTy::DepInOutSet
;
4127 case OMPC_DEPEND_outallmemory
:
4128 DepKind
= RTLDependenceKindTy::DepOmpAllMem
;
4130 case OMPC_DEPEND_source
:
4131 case OMPC_DEPEND_sink
:
4132 case OMPC_DEPEND_depobj
:
4133 case OMPC_DEPEND_inoutallmemory
:
4134 case OMPC_DEPEND_unknown
:
4135 llvm_unreachable("Unknown task dependence type");
4140 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4141 static void getDependTypes(ASTContext
&C
, QualType
&KmpDependInfoTy
,
4142 QualType
&FlagsTy
) {
4143 FlagsTy
= C
.getIntTypeForBitwidth(C
.getTypeSize(C
.BoolTy
), /*Signed=*/false);
4144 if (KmpDependInfoTy
.isNull()) {
4145 RecordDecl
*KmpDependInfoRD
= C
.buildImplicitRecord("kmp_depend_info");
4146 KmpDependInfoRD
->startDefinition();
4147 addFieldToRecordDecl(C
, KmpDependInfoRD
, C
.getIntPtrType());
4148 addFieldToRecordDecl(C
, KmpDependInfoRD
, C
.getSizeType());
4149 addFieldToRecordDecl(C
, KmpDependInfoRD
, FlagsTy
);
4150 KmpDependInfoRD
->completeDefinition();
4151 KmpDependInfoTy
= C
.getRecordType(KmpDependInfoRD
);
4155 std::pair
<llvm::Value
*, LValue
>
4156 CGOpenMPRuntime::getDepobjElements(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4157 SourceLocation Loc
) {
4158 ASTContext
&C
= CGM
.getContext();
4160 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4161 RecordDecl
*KmpDependInfoRD
=
4162 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4163 QualType KmpDependInfoPtrTy
= C
.getPointerType(KmpDependInfoTy
);
4164 LValue Base
= CGF
.EmitLoadOfPointerLValue(
4165 DepobjLVal
.getAddress(CGF
).withElementType(
4166 CGF
.ConvertTypeForMem(KmpDependInfoPtrTy
)),
4167 KmpDependInfoPtrTy
->castAs
<PointerType
>());
4168 Address DepObjAddr
= CGF
.Builder
.CreateGEP(
4169 Base
.getAddress(CGF
),
4170 llvm::ConstantInt::get(CGF
.IntPtrTy
, -1, /*isSigned=*/true));
4171 LValue NumDepsBase
= CGF
.MakeAddrLValue(
4172 DepObjAddr
, KmpDependInfoTy
, Base
.getBaseInfo(), Base
.getTBAAInfo());
4173 // NumDeps = deps[i].base_addr;
4174 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4176 *std::next(KmpDependInfoRD
->field_begin(),
4177 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4178 llvm::Value
*NumDeps
= CGF
.EmitLoadOfScalar(BaseAddrLVal
, Loc
);
4179 return std::make_pair(NumDeps
, Base
);
4182 static void emitDependData(CodeGenFunction
&CGF
, QualType
&KmpDependInfoTy
,
4183 llvm::PointerUnion
<unsigned *, LValue
*> Pos
,
4184 const OMPTaskDataTy::DependData
&Data
,
4185 Address DependenciesArray
) {
4186 CodeGenModule
&CGM
= CGF
.CGM
;
4187 ASTContext
&C
= CGM
.getContext();
4189 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4190 RecordDecl
*KmpDependInfoRD
=
4191 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4192 llvm::Type
*LLVMFlagsTy
= CGF
.ConvertTypeForMem(FlagsTy
);
4194 OMPIteratorGeneratorScope
IteratorScope(
4195 CGF
, cast_or_null
<OMPIteratorExpr
>(
4196 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4198 for (const Expr
*E
: Data
.DepExprs
) {
4202 // The expression will be a nullptr in the 'omp_all_memory' case.
4204 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
4205 Addr
= CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
);
4207 Addr
= llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4208 Size
= llvm::ConstantInt::get(CGF
.SizeTy
, 0);
4211 if (unsigned *P
= Pos
.dyn_cast
<unsigned *>()) {
4212 Base
= CGF
.MakeAddrLValue(
4213 CGF
.Builder
.CreateConstGEP(DependenciesArray
, *P
), KmpDependInfoTy
);
4215 assert(E
&& "Expected a non-null expression");
4216 LValue
&PosLVal
= *Pos
.get
<LValue
*>();
4217 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4218 Base
= CGF
.MakeAddrLValue(
4219 CGF
.Builder
.CreateGEP(DependenciesArray
, Idx
), KmpDependInfoTy
);
4221 // deps[i].base_addr = &<Dependencies[i].second>;
4222 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4224 *std::next(KmpDependInfoRD
->field_begin(),
4225 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4226 CGF
.EmitStoreOfScalar(Addr
, BaseAddrLVal
);
4227 // deps[i].len = sizeof(<Dependencies[i].second>);
4228 LValue LenLVal
= CGF
.EmitLValueForField(
4229 Base
, *std::next(KmpDependInfoRD
->field_begin(),
4230 static_cast<unsigned int>(RTLDependInfoFields::Len
)));
4231 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4232 // deps[i].flags = <Dependencies[i].first>;
4233 RTLDependenceKindTy DepKind
= translateDependencyKind(Data
.DepKind
);
4234 LValue FlagsLVal
= CGF
.EmitLValueForField(
4236 *std::next(KmpDependInfoRD
->field_begin(),
4237 static_cast<unsigned int>(RTLDependInfoFields::Flags
)));
4238 CGF
.EmitStoreOfScalar(
4239 llvm::ConstantInt::get(LLVMFlagsTy
, static_cast<unsigned int>(DepKind
)),
4241 if (unsigned *P
= Pos
.dyn_cast
<unsigned *>()) {
4244 LValue
&PosLVal
= *Pos
.get
<LValue
*>();
4245 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4246 Idx
= CGF
.Builder
.CreateNUWAdd(Idx
,
4247 llvm::ConstantInt::get(Idx
->getType(), 1));
4248 CGF
.EmitStoreOfScalar(Idx
, PosLVal
);
4253 SmallVector
<llvm::Value
*, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4254 CodeGenFunction
&CGF
, QualType
&KmpDependInfoTy
,
4255 const OMPTaskDataTy::DependData
&Data
) {
4256 assert(Data
.DepKind
== OMPC_DEPEND_depobj
&&
4257 "Expected depobj dependency kind.");
4258 SmallVector
<llvm::Value
*, 4> Sizes
;
4259 SmallVector
<LValue
, 4> SizeLVals
;
4260 ASTContext
&C
= CGF
.getContext();
4262 OMPIteratorGeneratorScope
IteratorScope(
4263 CGF
, cast_or_null
<OMPIteratorExpr
>(
4264 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4266 for (const Expr
*E
: Data
.DepExprs
) {
4267 llvm::Value
*NumDeps
;
4269 LValue DepobjLVal
= CGF
.EmitLValue(E
->IgnoreParenImpCasts());
4270 std::tie(NumDeps
, Base
) =
4271 getDepobjElements(CGF
, DepobjLVal
, E
->getExprLoc());
4272 LValue NumLVal
= CGF
.MakeAddrLValue(
4273 CGF
.CreateMemTemp(C
.getUIntPtrType(), "depobj.size.addr"),
4274 C
.getUIntPtrType());
4275 CGF
.Builder
.CreateStore(llvm::ConstantInt::get(CGF
.IntPtrTy
, 0),
4276 NumLVal
.getAddress(CGF
));
4277 llvm::Value
*PrevVal
= CGF
.EmitLoadOfScalar(NumLVal
, E
->getExprLoc());
4278 llvm::Value
*Add
= CGF
.Builder
.CreateNUWAdd(PrevVal
, NumDeps
);
4279 CGF
.EmitStoreOfScalar(Add
, NumLVal
);
4280 SizeLVals
.push_back(NumLVal
);
4283 for (unsigned I
= 0, E
= SizeLVals
.size(); I
< E
; ++I
) {
4285 CGF
.EmitLoadOfScalar(SizeLVals
[I
], Data
.DepExprs
[I
]->getExprLoc());
4286 Sizes
.push_back(Size
);
4291 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction
&CGF
,
4292 QualType
&KmpDependInfoTy
,
4294 const OMPTaskDataTy::DependData
&Data
,
4295 Address DependenciesArray
) {
4296 assert(Data
.DepKind
== OMPC_DEPEND_depobj
&&
4297 "Expected depobj dependency kind.");
4298 llvm::Value
*ElSize
= CGF
.getTypeSize(KmpDependInfoTy
);
4300 OMPIteratorGeneratorScope
IteratorScope(
4301 CGF
, cast_or_null
<OMPIteratorExpr
>(
4302 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4304 for (unsigned I
= 0, End
= Data
.DepExprs
.size(); I
< End
; ++I
) {
4305 const Expr
*E
= Data
.DepExprs
[I
];
4306 llvm::Value
*NumDeps
;
4308 LValue DepobjLVal
= CGF
.EmitLValue(E
->IgnoreParenImpCasts());
4309 std::tie(NumDeps
, Base
) =
4310 getDepobjElements(CGF
, DepobjLVal
, E
->getExprLoc());
4312 // memcopy dependency data.
4313 llvm::Value
*Size
= CGF
.Builder
.CreateNUWMul(
4315 CGF
.Builder
.CreateIntCast(NumDeps
, CGF
.SizeTy
, /*isSigned=*/false));
4316 llvm::Value
*Pos
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4317 Address DepAddr
= CGF
.Builder
.CreateGEP(DependenciesArray
, Pos
);
4318 CGF
.Builder
.CreateMemCpy(DepAddr
, Base
.getAddress(CGF
), Size
);
4322 llvm::Value
*Add
= CGF
.Builder
.CreateNUWAdd(Pos
, NumDeps
);
4323 CGF
.EmitStoreOfScalar(Add
, PosLVal
);
4328 std::pair
<llvm::Value
*, Address
> CGOpenMPRuntime::emitDependClause(
4329 CodeGenFunction
&CGF
, ArrayRef
<OMPTaskDataTy::DependData
> Dependencies
,
4330 SourceLocation Loc
) {
4331 if (llvm::all_of(Dependencies
, [](const OMPTaskDataTy::DependData
&D
) {
4332 return D
.DepExprs
.empty();
4334 return std::make_pair(nullptr, Address::invalid());
4335 // Process list of dependencies.
4336 ASTContext
&C
= CGM
.getContext();
4337 Address DependenciesArray
= Address::invalid();
4338 llvm::Value
*NumOfElements
= nullptr;
4339 unsigned NumDependencies
= std::accumulate(
4340 Dependencies
.begin(), Dependencies
.end(), 0,
4341 [](unsigned V
, const OMPTaskDataTy::DependData
&D
) {
4342 return D
.DepKind
== OMPC_DEPEND_depobj
4344 : (V
+ (D
.IteratorExpr
? 0 : D
.DepExprs
.size()));
4347 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4348 bool HasDepobjDeps
= false;
4349 bool HasRegularWithIterators
= false;
4350 llvm::Value
*NumOfDepobjElements
= llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4351 llvm::Value
*NumOfRegularWithIterators
=
4352 llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4353 // Calculate number of depobj dependencies and regular deps with the
4355 for (const OMPTaskDataTy::DependData
&D
: Dependencies
) {
4356 if (D
.DepKind
== OMPC_DEPEND_depobj
) {
4357 SmallVector
<llvm::Value
*, 4> Sizes
=
4358 emitDepobjElementsSizes(CGF
, KmpDependInfoTy
, D
);
4359 for (llvm::Value
*Size
: Sizes
) {
4360 NumOfDepobjElements
=
4361 CGF
.Builder
.CreateNUWAdd(NumOfDepobjElements
, Size
);
4363 HasDepobjDeps
= true;
4366 // Include number of iterations, if any.
4368 if (const auto *IE
= cast_or_null
<OMPIteratorExpr
>(D
.IteratorExpr
)) {
4369 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4370 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4371 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.IntPtrTy
, /*isSigned=*/false);
4372 llvm::Value
*NumClauseDeps
= CGF
.Builder
.CreateNUWMul(
4373 Sz
, llvm::ConstantInt::get(CGF
.IntPtrTy
, D
.DepExprs
.size()));
4374 NumOfRegularWithIterators
=
4375 CGF
.Builder
.CreateNUWAdd(NumOfRegularWithIterators
, NumClauseDeps
);
4377 HasRegularWithIterators
= true;
4382 QualType KmpDependInfoArrayTy
;
4383 if (HasDepobjDeps
|| HasRegularWithIterators
) {
4384 NumOfElements
= llvm::ConstantInt::get(CGM
.IntPtrTy
, NumDependencies
,
4385 /*isSigned=*/false);
4386 if (HasDepobjDeps
) {
4388 CGF
.Builder
.CreateNUWAdd(NumOfDepobjElements
, NumOfElements
);
4390 if (HasRegularWithIterators
) {
4392 CGF
.Builder
.CreateNUWAdd(NumOfRegularWithIterators
, NumOfElements
);
4394 auto *OVE
= new (C
) OpaqueValueExpr(
4395 Loc
, C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4397 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, OVE
,
4398 RValue::get(NumOfElements
));
4399 KmpDependInfoArrayTy
=
4400 C
.getVariableArrayType(KmpDependInfoTy
, OVE
, ArraySizeModifier::Normal
,
4401 /*IndexTypeQuals=*/0, SourceRange(Loc
, Loc
));
4402 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4403 // Properly emit variable-sized array.
4404 auto *PD
= ImplicitParamDecl::Create(C
, KmpDependInfoArrayTy
,
4405 ImplicitParamDecl::Other
);
4406 CGF
.EmitVarDecl(*PD
);
4407 DependenciesArray
= CGF
.GetAddrOfLocalVar(PD
);
4408 NumOfElements
= CGF
.Builder
.CreateIntCast(NumOfElements
, CGF
.Int32Ty
,
4409 /*isSigned=*/false);
4411 KmpDependInfoArrayTy
= C
.getConstantArrayType(
4412 KmpDependInfoTy
, llvm::APInt(/*numBits=*/64, NumDependencies
), nullptr,
4413 ArraySizeModifier::Normal
, /*IndexTypeQuals=*/0);
4415 CGF
.CreateMemTemp(KmpDependInfoArrayTy
, ".dep.arr.addr");
4416 DependenciesArray
= CGF
.Builder
.CreateConstArrayGEP(DependenciesArray
, 0);
4417 NumOfElements
= llvm::ConstantInt::get(CGM
.Int32Ty
, NumDependencies
,
4418 /*isSigned=*/false);
4421 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4422 if (Dependencies
[I
].DepKind
== OMPC_DEPEND_depobj
||
4423 Dependencies
[I
].IteratorExpr
)
4425 emitDependData(CGF
, KmpDependInfoTy
, &Pos
, Dependencies
[I
],
4428 // Copy regular dependencies with iterators.
4429 LValue PosLVal
= CGF
.MakeAddrLValue(
4430 CGF
.CreateMemTemp(C
.getSizeType(), "dep.counter.addr"), C
.getSizeType());
4431 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Pos
), PosLVal
);
4432 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4433 if (Dependencies
[I
].DepKind
== OMPC_DEPEND_depobj
||
4434 !Dependencies
[I
].IteratorExpr
)
4436 emitDependData(CGF
, KmpDependInfoTy
, &PosLVal
, Dependencies
[I
],
4439 // Copy final depobj arrays without iterators.
4440 if (HasDepobjDeps
) {
4441 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4442 if (Dependencies
[I
].DepKind
!= OMPC_DEPEND_depobj
)
4444 emitDepobjElements(CGF
, KmpDependInfoTy
, PosLVal
, Dependencies
[I
],
4448 DependenciesArray
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4449 DependenciesArray
, CGF
.VoidPtrTy
, CGF
.Int8Ty
);
4450 return std::make_pair(NumOfElements
, DependenciesArray
);
4453 Address
CGOpenMPRuntime::emitDepobjDependClause(
4454 CodeGenFunction
&CGF
, const OMPTaskDataTy::DependData
&Dependencies
,
4455 SourceLocation Loc
) {
4456 if (Dependencies
.DepExprs
.empty())
4457 return Address::invalid();
4458 // Process list of dependencies.
4459 ASTContext
&C
= CGM
.getContext();
4460 Address DependenciesArray
= Address::invalid();
4461 unsigned NumDependencies
= Dependencies
.DepExprs
.size();
4463 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4464 RecordDecl
*KmpDependInfoRD
=
4465 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4468 // Define type kmp_depend_info[<Dependencies.size()>];
4469 // For depobj reserve one extra element to store the number of elements.
4470 // It is required to handle depobj(x) update(in) construct.
4471 // kmp_depend_info[<Dependencies.size()>] deps;
4472 llvm::Value
*NumDepsVal
;
4473 CharUnits Align
= C
.getTypeAlignInChars(KmpDependInfoTy
);
4474 if (const auto *IE
=
4475 cast_or_null
<OMPIteratorExpr
>(Dependencies
.IteratorExpr
)) {
4476 NumDepsVal
= llvm::ConstantInt::get(CGF
.SizeTy
, 1);
4477 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4478 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4479 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.SizeTy
, /*isSigned=*/false);
4480 NumDepsVal
= CGF
.Builder
.CreateNUWMul(NumDepsVal
, Sz
);
4482 Size
= CGF
.Builder
.CreateNUWAdd(llvm::ConstantInt::get(CGF
.SizeTy
, 1),
4484 CharUnits SizeInBytes
=
4485 C
.getTypeSizeInChars(KmpDependInfoTy
).alignTo(Align
);
4486 llvm::Value
*RecSize
= CGM
.getSize(SizeInBytes
);
4487 Size
= CGF
.Builder
.CreateNUWMul(Size
, RecSize
);
4489 CGF
.Builder
.CreateIntCast(NumDepsVal
, CGF
.IntPtrTy
, /*isSigned=*/false);
4491 QualType KmpDependInfoArrayTy
= C
.getConstantArrayType(
4492 KmpDependInfoTy
, llvm::APInt(/*numBits=*/64, NumDependencies
+ 1),
4493 nullptr, ArraySizeModifier::Normal
, /*IndexTypeQuals=*/0);
4494 CharUnits Sz
= C
.getTypeSizeInChars(KmpDependInfoArrayTy
);
4495 Size
= CGM
.getSize(Sz
.alignTo(Align
));
4496 NumDepsVal
= llvm::ConstantInt::get(CGF
.IntPtrTy
, NumDependencies
);
4498 // Need to allocate on the dynamic memory.
4499 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4500 // Use default allocator.
4501 llvm::Value
*Allocator
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4502 llvm::Value
*Args
[] = {ThreadID
, Size
, Allocator
};
4505 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4506 CGM
.getModule(), OMPRTL___kmpc_alloc
),
4507 Args
, ".dep.arr.addr");
4508 llvm::Type
*KmpDependInfoLlvmTy
= CGF
.ConvertTypeForMem(KmpDependInfoTy
);
4509 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4510 Addr
, KmpDependInfoLlvmTy
->getPointerTo());
4511 DependenciesArray
= Address(Addr
, KmpDependInfoLlvmTy
, Align
);
4512 // Write number of elements in the first element of array for depobj.
4513 LValue Base
= CGF
.MakeAddrLValue(DependenciesArray
, KmpDependInfoTy
);
4514 // deps[i].base_addr = NumDependencies;
4515 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4517 *std::next(KmpDependInfoRD
->field_begin(),
4518 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4519 CGF
.EmitStoreOfScalar(NumDepsVal
, BaseAddrLVal
);
4520 llvm::PointerUnion
<unsigned *, LValue
*> Pos
;
4523 if (Dependencies
.IteratorExpr
) {
4524 PosLVal
= CGF
.MakeAddrLValue(
4525 CGF
.CreateMemTemp(C
.getSizeType(), "iterator.counter.addr"),
4527 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Idx
), PosLVal
,
4533 emitDependData(CGF
, KmpDependInfoTy
, Pos
, Dependencies
, DependenciesArray
);
4534 DependenciesArray
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4535 CGF
.Builder
.CreateConstGEP(DependenciesArray
, 1), CGF
.VoidPtrTy
,
4537 return DependenciesArray
;
4540 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4541 SourceLocation Loc
) {
4542 ASTContext
&C
= CGM
.getContext();
4544 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4545 LValue Base
= CGF
.EmitLoadOfPointerLValue(
4546 DepobjLVal
.getAddress(CGF
), C
.VoidPtrTy
.castAs
<PointerType
>());
4547 QualType KmpDependInfoPtrTy
= C
.getPointerType(KmpDependInfoTy
);
4548 Address Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4549 Base
.getAddress(CGF
), CGF
.ConvertTypeForMem(KmpDependInfoPtrTy
),
4550 CGF
.ConvertTypeForMem(KmpDependInfoTy
));
4551 llvm::Value
*DepObjAddr
= CGF
.Builder
.CreateGEP(
4552 Addr
.getElementType(), Addr
.getPointer(),
4553 llvm::ConstantInt::get(CGF
.IntPtrTy
, -1, /*isSigned=*/true));
4554 DepObjAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr
,
4556 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4557 // Use default allocator.
4558 llvm::Value
*Allocator
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4559 llvm::Value
*Args
[] = {ThreadID
, DepObjAddr
, Allocator
};
4561 // _kmpc_free(gtid, addr, nullptr);
4562 (void)CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4563 CGM
.getModule(), OMPRTL___kmpc_free
),
4567 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4568 OpenMPDependClauseKind NewDepKind
,
4569 SourceLocation Loc
) {
4570 ASTContext
&C
= CGM
.getContext();
4572 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4573 RecordDecl
*KmpDependInfoRD
=
4574 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4575 llvm::Type
*LLVMFlagsTy
= CGF
.ConvertTypeForMem(FlagsTy
);
4576 llvm::Value
*NumDeps
;
4578 std::tie(NumDeps
, Base
) = getDepobjElements(CGF
, DepobjLVal
, Loc
);
4580 Address Begin
= Base
.getAddress(CGF
);
4581 // Cast from pointer to array type to pointer to single element.
4582 llvm::Value
*End
= CGF
.Builder
.CreateGEP(
4583 Begin
.getElementType(), Begin
.getPointer(), NumDeps
);
4584 // The basic structure here is a while-do loop.
4585 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.body");
4586 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.done");
4587 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
4588 CGF
.EmitBlock(BodyBB
);
4589 llvm::PHINode
*ElementPHI
=
4590 CGF
.Builder
.CreatePHI(Begin
.getType(), 2, "omp.elementPast");
4591 ElementPHI
->addIncoming(Begin
.getPointer(), EntryBB
);
4592 Begin
= Begin
.withPointer(ElementPHI
, KnownNonNull
);
4593 Base
= CGF
.MakeAddrLValue(Begin
, KmpDependInfoTy
, Base
.getBaseInfo(),
4594 Base
.getTBAAInfo());
4595 // deps[i].flags = NewDepKind;
4596 RTLDependenceKindTy DepKind
= translateDependencyKind(NewDepKind
);
4597 LValue FlagsLVal
= CGF
.EmitLValueForField(
4598 Base
, *std::next(KmpDependInfoRD
->field_begin(),
4599 static_cast<unsigned int>(RTLDependInfoFields::Flags
)));
4600 CGF
.EmitStoreOfScalar(
4601 llvm::ConstantInt::get(LLVMFlagsTy
, static_cast<unsigned int>(DepKind
)),
4604 // Shift the address forward by one element.
4605 Address ElementNext
=
4606 CGF
.Builder
.CreateConstGEP(Begin
, /*Index=*/1, "omp.elementNext");
4607 ElementPHI
->addIncoming(ElementNext
.getPointer(),
4608 CGF
.Builder
.GetInsertBlock());
4609 llvm::Value
*IsEmpty
=
4610 CGF
.Builder
.CreateICmpEQ(ElementNext
.getPointer(), End
, "omp.isempty");
4611 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
4613 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
4616 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
4617 const OMPExecutableDirective
&D
,
4618 llvm::Function
*TaskFunction
,
4619 QualType SharedsTy
, Address Shareds
,
4621 const OMPTaskDataTy
&Data
) {
4622 if (!CGF
.HaveInsertPoint())
4625 TaskResultTy Result
=
4626 emitTaskInit(CGF
, Loc
, D
, TaskFunction
, SharedsTy
, Shareds
, Data
);
4627 llvm::Value
*NewTask
= Result
.NewTask
;
4628 llvm::Function
*TaskEntry
= Result
.TaskEntry
;
4629 llvm::Value
*NewTaskNewTaskTTy
= Result
.NewTaskNewTaskTTy
;
4630 LValue TDBase
= Result
.TDBase
;
4631 const RecordDecl
*KmpTaskTQTyRD
= Result
.KmpTaskTQTyRD
;
4632 // Process list of dependences.
4633 Address DependenciesArray
= Address::invalid();
4634 llvm::Value
*NumOfElements
;
4635 std::tie(NumOfElements
, DependenciesArray
) =
4636 emitDependClause(CGF
, Data
.Dependences
, Loc
);
4638 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4640 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4641 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4642 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4643 // list is not empty
4644 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4645 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
4646 llvm::Value
*TaskArgs
[] = { UpLoc
, ThreadID
, NewTask
};
4647 llvm::Value
*DepTaskArgs
[7];
4648 if (!Data
.Dependences
.empty()) {
4649 DepTaskArgs
[0] = UpLoc
;
4650 DepTaskArgs
[1] = ThreadID
;
4651 DepTaskArgs
[2] = NewTask
;
4652 DepTaskArgs
[3] = NumOfElements
;
4653 DepTaskArgs
[4] = DependenciesArray
.getPointer();
4654 DepTaskArgs
[5] = CGF
.Builder
.getInt32(0);
4655 DepTaskArgs
[6] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4657 auto &&ThenCodeGen
= [this, &Data
, TDBase
, KmpTaskTQTyRD
, &TaskArgs
,
4658 &DepTaskArgs
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4660 auto PartIdFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTPartId
);
4661 LValue PartIdLVal
= CGF
.EmitLValueForField(TDBase
, *PartIdFI
);
4662 CGF
.EmitStoreOfScalar(CGF
.Builder
.getInt32(0), PartIdLVal
);
4664 if (!Data
.Dependences
.empty()) {
4665 CGF
.EmitRuntimeCall(
4666 OMPBuilder
.getOrCreateRuntimeFunction(
4667 CGM
.getModule(), OMPRTL___kmpc_omp_task_with_deps
),
4670 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4671 CGM
.getModule(), OMPRTL___kmpc_omp_task
),
4674 // Check if parent region is untied and build return for untied task;
4676 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
4677 Region
->emitUntiedSwitch(CGF
);
4680 llvm::Value
*DepWaitTaskArgs
[7];
4681 if (!Data
.Dependences
.empty()) {
4682 DepWaitTaskArgs
[0] = UpLoc
;
4683 DepWaitTaskArgs
[1] = ThreadID
;
4684 DepWaitTaskArgs
[2] = NumOfElements
;
4685 DepWaitTaskArgs
[3] = DependenciesArray
.getPointer();
4686 DepWaitTaskArgs
[4] = CGF
.Builder
.getInt32(0);
4687 DepWaitTaskArgs
[5] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4688 DepWaitTaskArgs
[6] =
4689 llvm::ConstantInt::get(CGF
.Int32Ty
, Data
.HasNowaitClause
);
4691 auto &M
= CGM
.getModule();
4692 auto &&ElseCodeGen
= [this, &M
, &TaskArgs
, ThreadID
, NewTaskNewTaskTTy
,
4693 TaskEntry
, &Data
, &DepWaitTaskArgs
,
4694 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4695 CodeGenFunction::RunCleanupsScope
LocalScope(CGF
);
4696 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4697 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4698 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4700 if (!Data
.Dependences
.empty())
4701 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4702 M
, OMPRTL___kmpc_omp_taskwait_deps_51
),
4704 // Call proxy_task_entry(gtid, new_task);
4705 auto &&CodeGen
= [TaskEntry
, ThreadID
, NewTaskNewTaskTTy
,
4706 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4708 llvm::Value
*OutlinedFnArgs
[] = {ThreadID
, NewTaskNewTaskTTy
};
4709 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, Loc
, TaskEntry
,
4713 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4714 // kmp_task_t *new_task);
4715 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4716 // kmp_task_t *new_task);
4717 RegionCodeGenTy
RCG(CodeGen
);
4718 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
4719 M
, OMPRTL___kmpc_omp_task_begin_if0
),
4721 OMPBuilder
.getOrCreateRuntimeFunction(
4722 M
, OMPRTL___kmpc_omp_task_complete_if0
),
4724 RCG
.setAction(Action
);
4729 emitIfClause(CGF
, IfCond
, ThenCodeGen
, ElseCodeGen
);
4731 RegionCodeGenTy
ThenRCG(ThenCodeGen
);
4736 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
4737 const OMPLoopDirective
&D
,
4738 llvm::Function
*TaskFunction
,
4739 QualType SharedsTy
, Address Shareds
,
4741 const OMPTaskDataTy
&Data
) {
4742 if (!CGF
.HaveInsertPoint())
4744 TaskResultTy Result
=
4745 emitTaskInit(CGF
, Loc
, D
, TaskFunction
, SharedsTy
, Shareds
, Data
);
4746 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4748 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4749 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4750 // sched, kmp_uint64 grainsize, void *task_dup);
4751 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4752 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
4755 IfVal
= CGF
.Builder
.CreateIntCast(CGF
.EvaluateExprAsBool(IfCond
), CGF
.IntTy
,
4758 IfVal
= llvm::ConstantInt::getSigned(CGF
.IntTy
, /*V=*/1);
4761 LValue LBLVal
= CGF
.EmitLValueForField(
4763 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTLowerBound
));
4765 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getLowerBoundVariable())->getDecl());
4766 CGF
.EmitAnyExprToMem(LBVar
->getInit(), LBLVal
.getAddress(CGF
),
4768 /*IsInitializer=*/true);
4769 LValue UBLVal
= CGF
.EmitLValueForField(
4771 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTUpperBound
));
4773 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getUpperBoundVariable())->getDecl());
4774 CGF
.EmitAnyExprToMem(UBVar
->getInit(), UBLVal
.getAddress(CGF
),
4776 /*IsInitializer=*/true);
4777 LValue StLVal
= CGF
.EmitLValueForField(
4779 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTStride
));
4781 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getStrideVariable())->getDecl());
4782 CGF
.EmitAnyExprToMem(StVar
->getInit(), StLVal
.getAddress(CGF
),
4784 /*IsInitializer=*/true);
4785 // Store reductions address.
4786 LValue RedLVal
= CGF
.EmitLValueForField(
4788 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTReductions
));
4789 if (Data
.Reductions
) {
4790 CGF
.EmitStoreOfScalar(Data
.Reductions
, RedLVal
);
4792 CGF
.EmitNullInitialization(RedLVal
.getAddress(CGF
),
4793 CGF
.getContext().VoidPtrTy
);
4795 enum { NoSchedule
= 0, Grainsize
= 1, NumTasks
= 2 };
4796 llvm::Value
*TaskArgs
[] = {
4801 LBLVal
.getPointer(CGF
),
4802 UBLVal
.getPointer(CGF
),
4803 CGF
.EmitLoadOfScalar(StLVal
, Loc
),
4804 llvm::ConstantInt::getSigned(
4805 CGF
.IntTy
, 1), // Always 1 because taskgroup emitted by the compiler
4806 llvm::ConstantInt::getSigned(
4807 CGF
.IntTy
, Data
.Schedule
.getPointer()
4808 ? Data
.Schedule
.getInt() ? NumTasks
: Grainsize
4810 Data
.Schedule
.getPointer()
4811 ? CGF
.Builder
.CreateIntCast(Data
.Schedule
.getPointer(), CGF
.Int64Ty
,
4813 : llvm::ConstantInt::get(CGF
.Int64Ty
, /*V=*/0),
4814 Result
.TaskDupFn
? CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4815 Result
.TaskDupFn
, CGF
.VoidPtrTy
)
4816 : llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
)};
4817 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4818 CGM
.getModule(), OMPRTL___kmpc_taskloop
),
4822 /// Emit reduction operation for each element of array (required for
4823 /// array sections) LHS op = RHS.
4824 /// \param Type Type of array.
4825 /// \param LHSVar Variable on the left side of the reduction operation
4826 /// (references element of array in original variable).
4827 /// \param RHSVar Variable on the right side of the reduction operation
4828 /// (references element of array in original variable).
4829 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4831 static void EmitOMPAggregateReduction(
4832 CodeGenFunction
&CGF
, QualType Type
, const VarDecl
*LHSVar
,
4833 const VarDecl
*RHSVar
,
4834 const llvm::function_ref
<void(CodeGenFunction
&CGF
, const Expr
*,
4835 const Expr
*, const Expr
*)> &RedOpGen
,
4836 const Expr
*XExpr
= nullptr, const Expr
*EExpr
= nullptr,
4837 const Expr
*UpExpr
= nullptr) {
4838 // Perform element-by-element initialization.
4840 Address LHSAddr
= CGF
.GetAddrOfLocalVar(LHSVar
);
4841 Address RHSAddr
= CGF
.GetAddrOfLocalVar(RHSVar
);
4843 // Drill down to the base element type on both arrays.
4844 const ArrayType
*ArrayTy
= Type
->getAsArrayTypeUnsafe();
4845 llvm::Value
*NumElements
= CGF
.emitArrayLength(ArrayTy
, ElementTy
, LHSAddr
);
4847 llvm::Value
*RHSBegin
= RHSAddr
.getPointer();
4848 llvm::Value
*LHSBegin
= LHSAddr
.getPointer();
4849 // Cast from pointer to array type to pointer to single element.
4850 llvm::Value
*LHSEnd
=
4851 CGF
.Builder
.CreateGEP(LHSAddr
.getElementType(), LHSBegin
, NumElements
);
4852 // The basic structure here is a while-do loop.
4853 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.arraycpy.body");
4854 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.arraycpy.done");
4855 llvm::Value
*IsEmpty
=
4856 CGF
.Builder
.CreateICmpEQ(LHSBegin
, LHSEnd
, "omp.arraycpy.isempty");
4857 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
4859 // Enter the loop body, making that address the current address.
4860 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
4861 CGF
.EmitBlock(BodyBB
);
4863 CharUnits ElementSize
= CGF
.getContext().getTypeSizeInChars(ElementTy
);
4865 llvm::PHINode
*RHSElementPHI
= CGF
.Builder
.CreatePHI(
4866 RHSBegin
->getType(), 2, "omp.arraycpy.srcElementPast");
4867 RHSElementPHI
->addIncoming(RHSBegin
, EntryBB
);
4868 Address
RHSElementCurrent(
4869 RHSElementPHI
, RHSAddr
.getElementType(),
4870 RHSAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
4872 llvm::PHINode
*LHSElementPHI
= CGF
.Builder
.CreatePHI(
4873 LHSBegin
->getType(), 2, "omp.arraycpy.destElementPast");
4874 LHSElementPHI
->addIncoming(LHSBegin
, EntryBB
);
4875 Address
LHSElementCurrent(
4876 LHSElementPHI
, LHSAddr
.getElementType(),
4877 LHSAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
4880 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
4881 Scope
.addPrivate(LHSVar
, LHSElementCurrent
);
4882 Scope
.addPrivate(RHSVar
, RHSElementCurrent
);
4884 RedOpGen(CGF
, XExpr
, EExpr
, UpExpr
);
4885 Scope
.ForceCleanup();
4887 // Shift the address forward by one element.
4888 llvm::Value
*LHSElementNext
= CGF
.Builder
.CreateConstGEP1_32(
4889 LHSAddr
.getElementType(), LHSElementPHI
, /*Idx0=*/1,
4890 "omp.arraycpy.dest.element");
4891 llvm::Value
*RHSElementNext
= CGF
.Builder
.CreateConstGEP1_32(
4892 RHSAddr
.getElementType(), RHSElementPHI
, /*Idx0=*/1,
4893 "omp.arraycpy.src.element");
4894 // Check whether we've reached the end.
4896 CGF
.Builder
.CreateICmpEQ(LHSElementNext
, LHSEnd
, "omp.arraycpy.done");
4897 CGF
.Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
4898 LHSElementPHI
->addIncoming(LHSElementNext
, CGF
.Builder
.GetInsertBlock());
4899 RHSElementPHI
->addIncoming(RHSElementNext
, CGF
.Builder
.GetInsertBlock());
4902 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
4905 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4906 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4907 /// UDR combiner function.
4908 static void emitReductionCombiner(CodeGenFunction
&CGF
,
4909 const Expr
*ReductionOp
) {
4910 if (const auto *CE
= dyn_cast
<CallExpr
>(ReductionOp
))
4911 if (const auto *OVE
= dyn_cast
<OpaqueValueExpr
>(CE
->getCallee()))
4912 if (const auto *DRE
=
4913 dyn_cast
<DeclRefExpr
>(OVE
->getSourceExpr()->IgnoreImpCasts()))
4914 if (const auto *DRD
=
4915 dyn_cast
<OMPDeclareReductionDecl
>(DRE
->getDecl())) {
4916 std::pair
<llvm::Function
*, llvm::Function
*> Reduction
=
4917 CGF
.CGM
.getOpenMPRuntime().getUserDefinedReduction(DRD
);
4918 RValue Func
= RValue::get(Reduction
.first
);
4919 CodeGenFunction::OpaqueValueMapping
Map(CGF
, OVE
, Func
);
4920 CGF
.EmitIgnoredExpr(ReductionOp
);
4923 CGF
.EmitIgnoredExpr(ReductionOp
);
4926 llvm::Function
*CGOpenMPRuntime::emitReductionFunction(
4927 StringRef ReducerName
, SourceLocation Loc
, llvm::Type
*ArgsElemType
,
4928 ArrayRef
<const Expr
*> Privates
, ArrayRef
<const Expr
*> LHSExprs
,
4929 ArrayRef
<const Expr
*> RHSExprs
, ArrayRef
<const Expr
*> ReductionOps
) {
4930 ASTContext
&C
= CGM
.getContext();
4932 // void reduction_func(void *LHSArg, void *RHSArg);
4933 FunctionArgList Args
;
4934 ImplicitParamDecl
LHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
4935 ImplicitParamDecl::Other
);
4936 ImplicitParamDecl
RHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
4937 ImplicitParamDecl::Other
);
4938 Args
.push_back(&LHSArg
);
4939 Args
.push_back(&RHSArg
);
4941 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
4942 std::string Name
= getReductionFuncName(ReducerName
);
4943 auto *Fn
= llvm::Function::Create(CGM
.getTypes().GetFunctionType(CGFI
),
4944 llvm::GlobalValue::InternalLinkage
, Name
,
4946 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, CGFI
);
4947 Fn
->setDoesNotRecurse();
4948 CodeGenFunction
CGF(CGM
);
4949 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, CGFI
, Args
, Loc
, Loc
);
4951 // Dst = (void*[n])(LHSArg);
4952 // Src = (void*[n])(RHSArg);
4953 Address
LHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4954 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&LHSArg
)),
4955 ArgsElemType
->getPointerTo()),
4956 ArgsElemType
, CGF
.getPointerAlign());
4957 Address
RHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4958 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&RHSArg
)),
4959 ArgsElemType
->getPointerTo()),
4960 ArgsElemType
, CGF
.getPointerAlign());
4963 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4965 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
4966 const auto *IPriv
= Privates
.begin();
4968 for (unsigned I
= 0, E
= ReductionOps
.size(); I
< E
; ++I
, ++IPriv
, ++Idx
) {
4969 const auto *RHSVar
=
4970 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSExprs
[I
])->getDecl());
4971 Scope
.addPrivate(RHSVar
, emitAddrOfVarFromArray(CGF
, RHS
, Idx
, RHSVar
));
4972 const auto *LHSVar
=
4973 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSExprs
[I
])->getDecl());
4974 Scope
.addPrivate(LHSVar
, emitAddrOfVarFromArray(CGF
, LHS
, Idx
, LHSVar
));
4975 QualType PrivTy
= (*IPriv
)->getType();
4976 if (PrivTy
->isVariablyModifiedType()) {
4977 // Get array size and emit VLA type.
4979 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(LHS
, Idx
);
4980 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Elem
);
4981 const VariableArrayType
*VLA
=
4982 CGF
.getContext().getAsVariableArrayType(PrivTy
);
4983 const auto *OVE
= cast
<OpaqueValueExpr
>(VLA
->getSizeExpr());
4984 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
4985 CGF
, OVE
, RValue::get(CGF
.Builder
.CreatePtrToInt(Ptr
, CGF
.SizeTy
)));
4986 CGF
.EmitVariablyModifiedType(PrivTy
);
4990 IPriv
= Privates
.begin();
4991 const auto *ILHS
= LHSExprs
.begin();
4992 const auto *IRHS
= RHSExprs
.begin();
4993 for (const Expr
*E
: ReductionOps
) {
4994 if ((*IPriv
)->getType()->isArrayType()) {
4995 // Emit reduction for array section.
4996 const auto *LHSVar
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
4997 const auto *RHSVar
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
4998 EmitOMPAggregateReduction(
4999 CGF
, (*IPriv
)->getType(), LHSVar
, RHSVar
,
5000 [=](CodeGenFunction
&CGF
, const Expr
*, const Expr
*, const Expr
*) {
5001 emitReductionCombiner(CGF
, E
);
5004 // Emit reduction for array subscript or single variable.
5005 emitReductionCombiner(CGF
, E
);
5011 Scope
.ForceCleanup();
5012 CGF
.FinishFunction();
5016 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction
&CGF
,
5017 const Expr
*ReductionOp
,
5018 const Expr
*PrivateRef
,
5019 const DeclRefExpr
*LHS
,
5020 const DeclRefExpr
*RHS
) {
5021 if (PrivateRef
->getType()->isArrayType()) {
5022 // Emit reduction for array section.
5023 const auto *LHSVar
= cast
<VarDecl
>(LHS
->getDecl());
5024 const auto *RHSVar
= cast
<VarDecl
>(RHS
->getDecl());
5025 EmitOMPAggregateReduction(
5026 CGF
, PrivateRef
->getType(), LHSVar
, RHSVar
,
5027 [=](CodeGenFunction
&CGF
, const Expr
*, const Expr
*, const Expr
*) {
5028 emitReductionCombiner(CGF
, ReductionOp
);
5031 // Emit reduction for array subscript or single variable.
5032 emitReductionCombiner(CGF
, ReductionOp
);
5036 void CGOpenMPRuntime::emitReduction(CodeGenFunction
&CGF
, SourceLocation Loc
,
5037 ArrayRef
<const Expr
*> Privates
,
5038 ArrayRef
<const Expr
*> LHSExprs
,
5039 ArrayRef
<const Expr
*> RHSExprs
,
5040 ArrayRef
<const Expr
*> ReductionOps
,
5041 ReductionOptionsTy Options
) {
5042 if (!CGF
.HaveInsertPoint())
5045 bool WithNowait
= Options
.WithNowait
;
5046 bool SimpleReduction
= Options
.SimpleReduction
;
5048 // Next code should be emitted for reduction:
5050 // static kmp_critical_name lock = { 0 };
5052 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5053 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5055 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5056 // *(Type<n>-1*)rhs[<n>-1]);
5060 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5061 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5062 // RedList, reduce_func, &<lock>)) {
5065 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5067 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5071 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5073 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5078 // if SimpleReduction is true, only the next code is generated:
5080 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5083 ASTContext
&C
= CGM
.getContext();
5085 if (SimpleReduction
) {
5086 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
5087 const auto *IPriv
= Privates
.begin();
5088 const auto *ILHS
= LHSExprs
.begin();
5089 const auto *IRHS
= RHSExprs
.begin();
5090 for (const Expr
*E
: ReductionOps
) {
5091 emitSingleReductionCombiner(CGF
, E
, *IPriv
, cast
<DeclRefExpr
>(*ILHS
),
5092 cast
<DeclRefExpr
>(*IRHS
));
5100 // 1. Build a list of reduction variables.
5101 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5102 auto Size
= RHSExprs
.size();
5103 for (const Expr
*E
: Privates
) {
5104 if (E
->getType()->isVariablyModifiedType())
5105 // Reserve place for array size.
5108 llvm::APInt
ArraySize(/*unsigned int numBits=*/32, Size
);
5109 QualType ReductionArrayTy
= C
.getConstantArrayType(
5110 C
.VoidPtrTy
, ArraySize
, nullptr, ArraySizeModifier::Normal
,
5111 /*IndexTypeQuals=*/0);
5112 Address ReductionList
=
5113 CGF
.CreateMemTemp(ReductionArrayTy
, ".omp.reduction.red_list");
5114 const auto *IPriv
= Privates
.begin();
5116 for (unsigned I
= 0, E
= RHSExprs
.size(); I
< E
; ++I
, ++IPriv
, ++Idx
) {
5117 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(ReductionList
, Idx
);
5118 CGF
.Builder
.CreateStore(
5119 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5120 CGF
.EmitLValue(RHSExprs
[I
]).getPointer(CGF
), CGF
.VoidPtrTy
),
5122 if ((*IPriv
)->getType()->isVariablyModifiedType()) {
5123 // Store array size.
5125 Elem
= CGF
.Builder
.CreateConstArrayGEP(ReductionList
, Idx
);
5126 llvm::Value
*Size
= CGF
.Builder
.CreateIntCast(
5128 CGF
.getContext().getAsVariableArrayType((*IPriv
)->getType()))
5130 CGF
.SizeTy
, /*isSigned=*/false);
5131 CGF
.Builder
.CreateStore(CGF
.Builder
.CreateIntToPtr(Size
, CGF
.VoidPtrTy
),
5136 // 2. Emit reduce_func().
5137 llvm::Function
*ReductionFn
= emitReductionFunction(
5138 CGF
.CurFn
->getName(), Loc
, CGF
.ConvertTypeForMem(ReductionArrayTy
),
5139 Privates
, LHSExprs
, RHSExprs
, ReductionOps
);
5141 // 3. Create static kmp_critical_name lock = { 0 };
5142 std::string Name
= getName({"reduction"});
5143 llvm::Value
*Lock
= getCriticalRegionLock(Name
);
5145 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5146 // RedList, reduce_func, &<lock>);
5147 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
, OMP_ATOMIC_REDUCE
);
5148 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
5149 llvm::Value
*ReductionArrayTySize
= CGF
.getTypeSize(ReductionArrayTy
);
5150 llvm::Value
*RL
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5151 ReductionList
.getPointer(), CGF
.VoidPtrTy
);
5152 llvm::Value
*Args
[] = {
5153 IdentTLoc
, // ident_t *<loc>
5154 ThreadId
, // i32 <gtid>
5155 CGF
.Builder
.getInt32(RHSExprs
.size()), // i32 <n>
5156 ReductionArrayTySize
, // size_type sizeof(RedList)
5157 RL
, // void *RedList
5158 ReductionFn
, // void (*) (void *, void *) <reduce_func>
5159 Lock
// kmp_critical_name *&<lock>
5161 llvm::Value
*Res
= CGF
.EmitRuntimeCall(
5162 OMPBuilder
.getOrCreateRuntimeFunction(
5164 WithNowait
? OMPRTL___kmpc_reduce_nowait
: OMPRTL___kmpc_reduce
),
5167 // 5. Build switch(res)
5168 llvm::BasicBlock
*DefaultBB
= CGF
.createBasicBlock(".omp.reduction.default");
5169 llvm::SwitchInst
*SwInst
=
5170 CGF
.Builder
.CreateSwitch(Res
, DefaultBB
, /*NumCases=*/2);
5174 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5176 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5178 llvm::BasicBlock
*Case1BB
= CGF
.createBasicBlock(".omp.reduction.case1");
5179 SwInst
->addCase(CGF
.Builder
.getInt32(1), Case1BB
);
5180 CGF
.EmitBlock(Case1BB
);
5182 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5183 llvm::Value
*EndArgs
[] = {
5184 IdentTLoc
, // ident_t *<loc>
5185 ThreadId
, // i32 <gtid>
5186 Lock
// kmp_critical_name *&<lock>
5188 auto &&CodeGen
= [Privates
, LHSExprs
, RHSExprs
, ReductionOps
](
5189 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5190 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5191 const auto *IPriv
= Privates
.begin();
5192 const auto *ILHS
= LHSExprs
.begin();
5193 const auto *IRHS
= RHSExprs
.begin();
5194 for (const Expr
*E
: ReductionOps
) {
5195 RT
.emitSingleReductionCombiner(CGF
, E
, *IPriv
, cast
<DeclRefExpr
>(*ILHS
),
5196 cast
<DeclRefExpr
>(*IRHS
));
5202 RegionCodeGenTy
RCG(CodeGen
);
5203 CommonActionTy
Action(
5204 nullptr, std::nullopt
,
5205 OMPBuilder
.getOrCreateRuntimeFunction(
5206 CGM
.getModule(), WithNowait
? OMPRTL___kmpc_end_reduce_nowait
5207 : OMPRTL___kmpc_end_reduce
),
5209 RCG
.setAction(Action
);
5212 CGF
.EmitBranch(DefaultBB
);
5216 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5219 llvm::BasicBlock
*Case2BB
= CGF
.createBasicBlock(".omp.reduction.case2");
5220 SwInst
->addCase(CGF
.Builder
.getInt32(2), Case2BB
);
5221 CGF
.EmitBlock(Case2BB
);
5223 auto &&AtomicCodeGen
= [Loc
, Privates
, LHSExprs
, RHSExprs
, ReductionOps
](
5224 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5225 const auto *ILHS
= LHSExprs
.begin();
5226 const auto *IRHS
= RHSExprs
.begin();
5227 const auto *IPriv
= Privates
.begin();
5228 for (const Expr
*E
: ReductionOps
) {
5229 const Expr
*XExpr
= nullptr;
5230 const Expr
*EExpr
= nullptr;
5231 const Expr
*UpExpr
= nullptr;
5232 BinaryOperatorKind BO
= BO_Comma
;
5233 if (const auto *BO
= dyn_cast
<BinaryOperator
>(E
)) {
5234 if (BO
->getOpcode() == BO_Assign
) {
5235 XExpr
= BO
->getLHS();
5236 UpExpr
= BO
->getRHS();
5239 // Try to emit update expression as a simple atomic.
5240 const Expr
*RHSExpr
= UpExpr
;
5242 // Analyze RHS part of the whole expression.
5243 if (const auto *ACO
= dyn_cast
<AbstractConditionalOperator
>(
5244 RHSExpr
->IgnoreParenImpCasts())) {
5245 // If this is a conditional operator, analyze its condition for
5246 // min/max reduction operator.
5247 RHSExpr
= ACO
->getCond();
5249 if (const auto *BORHS
=
5250 dyn_cast
<BinaryOperator
>(RHSExpr
->IgnoreParenImpCasts())) {
5251 EExpr
= BORHS
->getRHS();
5252 BO
= BORHS
->getOpcode();
5256 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5257 auto &&AtomicRedGen
= [BO
, VD
,
5258 Loc
](CodeGenFunction
&CGF
, const Expr
*XExpr
,
5259 const Expr
*EExpr
, const Expr
*UpExpr
) {
5260 LValue X
= CGF
.EmitLValue(XExpr
);
5263 E
= CGF
.EmitAnyExpr(EExpr
);
5264 CGF
.EmitOMPAtomicSimpleUpdateExpr(
5265 X
, E
, BO
, /*IsXLHSInRHSPart=*/true,
5266 llvm::AtomicOrdering::Monotonic
, Loc
,
5267 [&CGF
, UpExpr
, VD
, Loc
](RValue XRValue
) {
5268 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
5269 Address LHSTemp
= CGF
.CreateMemTemp(VD
->getType());
5270 CGF
.emitOMPSimpleStore(
5271 CGF
.MakeAddrLValue(LHSTemp
, VD
->getType()), XRValue
,
5272 VD
->getType().getNonReferenceType(), Loc
);
5273 PrivateScope
.addPrivate(VD
, LHSTemp
);
5274 (void)PrivateScope
.Privatize();
5275 return CGF
.EmitAnyExpr(UpExpr
);
5278 if ((*IPriv
)->getType()->isArrayType()) {
5279 // Emit atomic reduction for array section.
5280 const auto *RHSVar
=
5281 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5282 EmitOMPAggregateReduction(CGF
, (*IPriv
)->getType(), VD
, RHSVar
,
5283 AtomicRedGen
, XExpr
, EExpr
, UpExpr
);
5285 // Emit atomic reduction for array subscript or single variable.
5286 AtomicRedGen(CGF
, XExpr
, EExpr
, UpExpr
);
5289 // Emit as a critical region.
5290 auto &&CritRedGen
= [E
, Loc
](CodeGenFunction
&CGF
, const Expr
*,
5291 const Expr
*, const Expr
*) {
5292 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5293 std::string Name
= RT
.getName({"atomic_reduction"});
5294 RT
.emitCriticalRegion(
5296 [=](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5298 emitReductionCombiner(CGF
, E
);
5302 if ((*IPriv
)->getType()->isArrayType()) {
5303 const auto *LHSVar
=
5304 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5305 const auto *RHSVar
=
5306 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5307 EmitOMPAggregateReduction(CGF
, (*IPriv
)->getType(), LHSVar
, RHSVar
,
5310 CritRedGen(CGF
, nullptr, nullptr, nullptr);
5318 RegionCodeGenTy
AtomicRCG(AtomicCodeGen
);
5320 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5321 llvm::Value
*EndArgs
[] = {
5322 IdentTLoc
, // ident_t *<loc>
5323 ThreadId
, // i32 <gtid>
5324 Lock
// kmp_critical_name *&<lock>
5326 CommonActionTy
Action(nullptr, std::nullopt
,
5327 OMPBuilder
.getOrCreateRuntimeFunction(
5328 CGM
.getModule(), OMPRTL___kmpc_end_reduce
),
5330 AtomicRCG
.setAction(Action
);
5336 CGF
.EmitBranch(DefaultBB
);
5337 CGF
.EmitBlock(DefaultBB
, /*IsFinished=*/true);
5340 /// Generates unique name for artificial threadprivate variables.
5341 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5342 static std::string
generateUniqueName(CodeGenModule
&CGM
, StringRef Prefix
,
5344 SmallString
<256> Buffer
;
5345 llvm::raw_svector_ostream
Out(Buffer
);
5346 const clang::DeclRefExpr
*DE
;
5347 const VarDecl
*D
= ::getBaseDecl(Ref
, DE
);
5349 D
= cast
<VarDecl
>(cast
<DeclRefExpr
>(Ref
)->getDecl());
5350 D
= D
->getCanonicalDecl();
5351 std::string Name
= CGM
.getOpenMPRuntime().getName(
5352 {D
->isLocalVarDeclOrParm() ? D
->getName() : CGM
.getMangledName(D
)});
5353 Out
<< Prefix
<< Name
<< "_"
5354 << D
->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5355 return std::string(Out
.str());
5358 /// Emits reduction initializer function:
5360 /// void @.red_init(void* %arg, void* %orig) {
5361 /// %0 = bitcast void* %arg to <type>*
5362 /// store <type> <init>, <type>* %0
5366 static llvm::Value
*emitReduceInitFunction(CodeGenModule
&CGM
,
5368 ReductionCodeGen
&RCG
, unsigned N
) {
5369 ASTContext
&C
= CGM
.getContext();
5370 QualType VoidPtrTy
= C
.VoidPtrTy
;
5371 VoidPtrTy
.addRestrict();
5372 FunctionArgList Args
;
5373 ImplicitParamDecl
Param(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, VoidPtrTy
,
5374 ImplicitParamDecl::Other
);
5375 ImplicitParamDecl
ParamOrig(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, VoidPtrTy
,
5376 ImplicitParamDecl::Other
);
5377 Args
.emplace_back(&Param
);
5378 Args
.emplace_back(&ParamOrig
);
5379 const auto &FnInfo
=
5380 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5381 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5382 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_init", ""});
5383 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5384 Name
, &CGM
.getModule());
5385 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5386 Fn
->setDoesNotRecurse();
5387 CodeGenFunction
CGF(CGM
);
5388 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5389 QualType PrivateType
= RCG
.getPrivateType(N
);
5390 Address PrivateAddr
= CGF
.EmitLoadOfPointer(
5391 CGF
.GetAddrOfLocalVar(&Param
).withElementType(
5392 CGF
.ConvertTypeForMem(PrivateType
)->getPointerTo()),
5393 C
.getPointerType(PrivateType
)->castAs
<PointerType
>());
5394 llvm::Value
*Size
= nullptr;
5395 // If the size of the reduction item is non-constant, load it from global
5396 // threadprivate variable.
5397 if (RCG
.getSizes(N
).second
) {
5398 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5399 CGF
, CGM
.getContext().getSizeType(),
5400 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5401 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5402 CGM
.getContext().getSizeType(), Loc
);
5404 RCG
.emitAggregateType(CGF
, N
, Size
);
5405 Address OrigAddr
= Address::invalid();
5406 // If initializer uses initializer from declare reduction construct, emit a
5407 // pointer to the address of the original reduction item (reuired by reduction
5409 if (RCG
.usesReductionInitializer(N
)) {
5410 Address SharedAddr
= CGF
.GetAddrOfLocalVar(&ParamOrig
);
5411 OrigAddr
= CGF
.EmitLoadOfPointer(
5413 CGM
.getContext().VoidPtrTy
.castAs
<PointerType
>()->getTypePtr());
5415 // Emit the initializer:
5416 // %0 = bitcast void* %arg to <type>*
5417 // store <type> <init>, <type>* %0
5418 RCG
.emitInitialization(CGF
, N
, PrivateAddr
, OrigAddr
,
5419 [](CodeGenFunction
&) { return false; });
5420 CGF
.FinishFunction();
5424 /// Emits reduction combiner function:
5426 /// void @.red_comb(void* %arg0, void* %arg1) {
5427 /// %lhs = bitcast void* %arg0 to <type>*
5428 /// %rhs = bitcast void* %arg1 to <type>*
5429 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5430 /// store <type> %2, <type>* %lhs
5434 static llvm::Value
*emitReduceCombFunction(CodeGenModule
&CGM
,
5436 ReductionCodeGen
&RCG
, unsigned N
,
5437 const Expr
*ReductionOp
,
5438 const Expr
*LHS
, const Expr
*RHS
,
5439 const Expr
*PrivateRef
) {
5440 ASTContext
&C
= CGM
.getContext();
5441 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(LHS
)->getDecl());
5442 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(RHS
)->getDecl());
5443 FunctionArgList Args
;
5444 ImplicitParamDecl
ParamInOut(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
5445 C
.VoidPtrTy
, ImplicitParamDecl::Other
);
5446 ImplicitParamDecl
ParamIn(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5447 ImplicitParamDecl::Other
);
5448 Args
.emplace_back(&ParamInOut
);
5449 Args
.emplace_back(&ParamIn
);
5450 const auto &FnInfo
=
5451 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5452 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5453 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_comb", ""});
5454 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5455 Name
, &CGM
.getModule());
5456 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5457 Fn
->setDoesNotRecurse();
5458 CodeGenFunction
CGF(CGM
);
5459 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5460 llvm::Value
*Size
= nullptr;
5461 // If the size of the reduction item is non-constant, load it from global
5462 // threadprivate variable.
5463 if (RCG
.getSizes(N
).second
) {
5464 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5465 CGF
, CGM
.getContext().getSizeType(),
5466 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5467 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5468 CGM
.getContext().getSizeType(), Loc
);
5470 RCG
.emitAggregateType(CGF
, N
, Size
);
5471 // Remap lhs and rhs variables to the addresses of the function arguments.
5472 // %lhs = bitcast void* %arg0 to <type>*
5473 // %rhs = bitcast void* %arg1 to <type>*
5474 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
5475 PrivateScope
.addPrivate(
5477 // Pull out the pointer to the variable.
5478 CGF
.EmitLoadOfPointer(
5479 CGF
.GetAddrOfLocalVar(&ParamInOut
)
5481 CGF
.ConvertTypeForMem(LHSVD
->getType())->getPointerTo()),
5482 C
.getPointerType(LHSVD
->getType())->castAs
<PointerType
>()));
5483 PrivateScope
.addPrivate(
5485 // Pull out the pointer to the variable.
5486 CGF
.EmitLoadOfPointer(
5487 CGF
.GetAddrOfLocalVar(&ParamIn
).withElementType(
5488 CGF
.ConvertTypeForMem(RHSVD
->getType())->getPointerTo()),
5489 C
.getPointerType(RHSVD
->getType())->castAs
<PointerType
>()));
5490 PrivateScope
.Privatize();
5491 // Emit the combiner body:
5492 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5493 // store <type> %2, <type>* %lhs
5494 CGM
.getOpenMPRuntime().emitSingleReductionCombiner(
5495 CGF
, ReductionOp
, PrivateRef
, cast
<DeclRefExpr
>(LHS
),
5496 cast
<DeclRefExpr
>(RHS
));
5497 CGF
.FinishFunction();
5501 /// Emits reduction finalizer function:
5503 /// void @.red_fini(void* %arg) {
5504 /// %0 = bitcast void* %arg to <type>*
5505 /// <destroy>(<type>* %0)
5509 static llvm::Value
*emitReduceFiniFunction(CodeGenModule
&CGM
,
5511 ReductionCodeGen
&RCG
, unsigned N
) {
5512 if (!RCG
.needCleanups(N
))
5514 ASTContext
&C
= CGM
.getContext();
5515 FunctionArgList Args
;
5516 ImplicitParamDecl
Param(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5517 ImplicitParamDecl::Other
);
5518 Args
.emplace_back(&Param
);
5519 const auto &FnInfo
=
5520 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5521 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5522 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_fini", ""});
5523 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5524 Name
, &CGM
.getModule());
5525 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5526 Fn
->setDoesNotRecurse();
5527 CodeGenFunction
CGF(CGM
);
5528 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5529 Address PrivateAddr
= CGF
.EmitLoadOfPointer(
5530 CGF
.GetAddrOfLocalVar(&Param
), C
.VoidPtrTy
.castAs
<PointerType
>());
5531 llvm::Value
*Size
= nullptr;
5532 // If the size of the reduction item is non-constant, load it from global
5533 // threadprivate variable.
5534 if (RCG
.getSizes(N
).second
) {
5535 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5536 CGF
, CGM
.getContext().getSizeType(),
5537 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5538 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5539 CGM
.getContext().getSizeType(), Loc
);
5541 RCG
.emitAggregateType(CGF
, N
, Size
);
5542 // Emit the finalizer body:
5543 // <destroy>(<type>* %0)
5544 RCG
.emitCleanups(CGF
, N
, PrivateAddr
);
5545 CGF
.FinishFunction(Loc
);
5549 llvm::Value
*CGOpenMPRuntime::emitTaskReductionInit(
5550 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> LHSExprs
,
5551 ArrayRef
<const Expr
*> RHSExprs
, const OMPTaskDataTy
&Data
) {
5552 if (!CGF
.HaveInsertPoint() || Data
.ReductionVars
.empty())
5555 // Build typedef struct:
5556 // kmp_taskred_input {
5557 // void *reduce_shar; // shared reduction item
5558 // void *reduce_orig; // original reduction item used for initialization
5559 // size_t reduce_size; // size of data item
5560 // void *reduce_init; // data initialization routine
5561 // void *reduce_fini; // data finalization routine
5562 // void *reduce_comb; // data combiner routine
5563 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5564 // } kmp_taskred_input_t;
5565 ASTContext
&C
= CGM
.getContext();
5566 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_taskred_input_t");
5567 RD
->startDefinition();
5568 const FieldDecl
*SharedFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5569 const FieldDecl
*OrigFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5570 const FieldDecl
*SizeFD
= addFieldToRecordDecl(C
, RD
, C
.getSizeType());
5571 const FieldDecl
*InitFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5572 const FieldDecl
*FiniFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5573 const FieldDecl
*CombFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5574 const FieldDecl
*FlagsFD
= addFieldToRecordDecl(
5575 C
, RD
, C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5576 RD
->completeDefinition();
5577 QualType RDType
= C
.getRecordType(RD
);
5578 unsigned Size
= Data
.ReductionVars
.size();
5579 llvm::APInt
ArraySize(/*numBits=*/64, Size
);
5580 QualType ArrayRDType
=
5581 C
.getConstantArrayType(RDType
, ArraySize
, nullptr,
5582 ArraySizeModifier::Normal
, /*IndexTypeQuals=*/0);
5583 // kmp_task_red_input_t .rd_input.[Size];
5584 Address TaskRedInput
= CGF
.CreateMemTemp(ArrayRDType
, ".rd_input.");
5585 ReductionCodeGen
RCG(Data
.ReductionVars
, Data
.ReductionOrigs
,
5586 Data
.ReductionCopies
, Data
.ReductionOps
);
5587 for (unsigned Cnt
= 0; Cnt
< Size
; ++Cnt
) {
5588 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5589 llvm::Value
*Idxs
[] = {llvm::ConstantInt::get(CGM
.SizeTy
, /*V=*/0),
5590 llvm::ConstantInt::get(CGM
.SizeTy
, Cnt
)};
5591 llvm::Value
*GEP
= CGF
.EmitCheckedInBoundsGEP(
5592 TaskRedInput
.getElementType(), TaskRedInput
.getPointer(), Idxs
,
5593 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc
,
5595 LValue ElemLVal
= CGF
.MakeNaturalAlignAddrLValue(GEP
, RDType
);
5596 // ElemLVal.reduce_shar = &Shareds[Cnt];
5597 LValue SharedLVal
= CGF
.EmitLValueForField(ElemLVal
, SharedFD
);
5598 RCG
.emitSharedOrigLValue(CGF
, Cnt
);
5599 llvm::Value
*Shared
= RCG
.getSharedLValue(Cnt
).getPointer(CGF
);
5600 CGF
.EmitStoreOfScalar(Shared
, SharedLVal
);
5601 // ElemLVal.reduce_orig = &Origs[Cnt];
5602 LValue OrigLVal
= CGF
.EmitLValueForField(ElemLVal
, OrigFD
);
5603 llvm::Value
*Orig
= RCG
.getOrigLValue(Cnt
).getPointer(CGF
);
5604 CGF
.EmitStoreOfScalar(Orig
, OrigLVal
);
5605 RCG
.emitAggregateType(CGF
, Cnt
);
5606 llvm::Value
*SizeValInChars
;
5607 llvm::Value
*SizeVal
;
5608 std::tie(SizeValInChars
, SizeVal
) = RCG
.getSizes(Cnt
);
5609 // We use delayed creation/initialization for VLAs and array sections. It is
5610 // required because runtime does not provide the way to pass the sizes of
5611 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5612 // threadprivate global variables are used to store these values and use
5613 // them in the functions.
5614 bool DelayedCreation
= !!SizeVal
;
5615 SizeValInChars
= CGF
.Builder
.CreateIntCast(SizeValInChars
, CGM
.SizeTy
,
5616 /*isSigned=*/false);
5617 LValue SizeLVal
= CGF
.EmitLValueForField(ElemLVal
, SizeFD
);
5618 CGF
.EmitStoreOfScalar(SizeValInChars
, SizeLVal
);
5619 // ElemLVal.reduce_init = init;
5620 LValue InitLVal
= CGF
.EmitLValueForField(ElemLVal
, InitFD
);
5621 llvm::Value
*InitAddr
= emitReduceInitFunction(CGM
, Loc
, RCG
, Cnt
);
5622 CGF
.EmitStoreOfScalar(InitAddr
, InitLVal
);
5623 // ElemLVal.reduce_fini = fini;
5624 LValue FiniLVal
= CGF
.EmitLValueForField(ElemLVal
, FiniFD
);
5625 llvm::Value
*Fini
= emitReduceFiniFunction(CGM
, Loc
, RCG
, Cnt
);
5626 llvm::Value
*FiniAddr
=
5627 Fini
? Fini
: llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
);
5628 CGF
.EmitStoreOfScalar(FiniAddr
, FiniLVal
);
5629 // ElemLVal.reduce_comb = comb;
5630 LValue CombLVal
= CGF
.EmitLValueForField(ElemLVal
, CombFD
);
5631 llvm::Value
*CombAddr
= emitReduceCombFunction(
5632 CGM
, Loc
, RCG
, Cnt
, Data
.ReductionOps
[Cnt
], LHSExprs
[Cnt
],
5633 RHSExprs
[Cnt
], Data
.ReductionCopies
[Cnt
]);
5634 CGF
.EmitStoreOfScalar(CombAddr
, CombLVal
);
5635 // ElemLVal.flags = 0;
5636 LValue FlagsLVal
= CGF
.EmitLValueForField(ElemLVal
, FlagsFD
);
5637 if (DelayedCreation
) {
5638 CGF
.EmitStoreOfScalar(
5639 llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/1, /*isSigned=*/true),
5642 CGF
.EmitNullInitialization(FlagsLVal
.getAddress(CGF
),
5643 FlagsLVal
.getType());
5645 if (Data
.IsReductionWithTaskMod
) {
5646 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5647 // is_ws, int num, void *data);
5648 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
);
5649 llvm::Value
*GTid
= CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5650 CGM
.IntTy
, /*isSigned=*/true);
5651 llvm::Value
*Args
[] = {
5653 llvm::ConstantInt::get(CGM
.IntTy
, Data
.IsWorksharingReduction
? 1 : 0,
5655 llvm::ConstantInt::get(CGM
.IntTy
, Size
, /*isSigned=*/true),
5656 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5657 TaskRedInput
.getPointer(), CGM
.VoidPtrTy
)};
5658 return CGF
.EmitRuntimeCall(
5659 OMPBuilder
.getOrCreateRuntimeFunction(
5660 CGM
.getModule(), OMPRTL___kmpc_taskred_modifier_init
),
5663 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5664 llvm::Value
*Args
[] = {
5665 CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
), CGM
.IntTy
,
5667 llvm::ConstantInt::get(CGM
.IntTy
, Size
, /*isSigned=*/true),
5668 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput
.getPointer(),
5670 return CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5671 CGM
.getModule(), OMPRTL___kmpc_taskred_init
),
5675 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction
&CGF
,
5677 bool IsWorksharingReduction
) {
5678 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5679 // is_ws, int num, void *data);
5680 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
);
5681 llvm::Value
*GTid
= CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5682 CGM
.IntTy
, /*isSigned=*/true);
5683 llvm::Value
*Args
[] = {IdentTLoc
, GTid
,
5684 llvm::ConstantInt::get(CGM
.IntTy
,
5685 IsWorksharingReduction
? 1 : 0,
5686 /*isSigned=*/true)};
5687 (void)CGF
.EmitRuntimeCall(
5688 OMPBuilder
.getOrCreateRuntimeFunction(
5689 CGM
.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini
),
5693 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction
&CGF
,
5695 ReductionCodeGen
&RCG
,
5697 auto Sizes
= RCG
.getSizes(N
);
5698 // Emit threadprivate global variable if the type is non-constant
5699 // (Sizes.second = nullptr).
5701 llvm::Value
*SizeVal
= CGF
.Builder
.CreateIntCast(Sizes
.second
, CGM
.SizeTy
,
5702 /*isSigned=*/false);
5703 Address SizeAddr
= getAddrOfArtificialThreadPrivate(
5704 CGF
, CGM
.getContext().getSizeType(),
5705 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5706 CGF
.Builder
.CreateStore(SizeVal
, SizeAddr
, /*IsVolatile=*/false);
5710 Address
CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction
&CGF
,
5712 llvm::Value
*ReductionsPtr
,
5713 LValue SharedLVal
) {
5714 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5716 llvm::Value
*Args
[] = {CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5720 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5721 SharedLVal
.getPointer(CGF
), CGM
.VoidPtrTy
)};
5723 CGF
.EmitRuntimeCall(
5724 OMPBuilder
.getOrCreateRuntimeFunction(
5725 CGM
.getModule(), OMPRTL___kmpc_task_reduction_get_th_data
),
5727 CGF
.Int8Ty
, SharedLVal
.getAlignment());
5730 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
5731 const OMPTaskDataTy
&Data
) {
5732 if (!CGF
.HaveInsertPoint())
5735 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
&& Data
.Dependences
.empty()) {
5736 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5737 OMPBuilder
.createTaskwait(CGF
.Builder
);
5739 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
5740 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
5741 auto &M
= CGM
.getModule();
5742 Address DependenciesArray
= Address::invalid();
5743 llvm::Value
*NumOfElements
;
5744 std::tie(NumOfElements
, DependenciesArray
) =
5745 emitDependClause(CGF
, Data
.Dependences
, Loc
);
5746 if (!Data
.Dependences
.empty()) {
5747 llvm::Value
*DepWaitTaskArgs
[7];
5748 DepWaitTaskArgs
[0] = UpLoc
;
5749 DepWaitTaskArgs
[1] = ThreadID
;
5750 DepWaitTaskArgs
[2] = NumOfElements
;
5751 DepWaitTaskArgs
[3] = DependenciesArray
.getPointer();
5752 DepWaitTaskArgs
[4] = CGF
.Builder
.getInt32(0);
5753 DepWaitTaskArgs
[5] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5754 DepWaitTaskArgs
[6] =
5755 llvm::ConstantInt::get(CGF
.Int32Ty
, Data
.HasNowaitClause
);
5757 CodeGenFunction::RunCleanupsScope
LocalScope(CGF
);
5759 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5760 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5761 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5762 // kmp_int32 has_no_wait); if dependence info is specified.
5763 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5764 M
, OMPRTL___kmpc_omp_taskwait_deps_51
),
5769 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5771 llvm::Value
*Args
[] = {UpLoc
, ThreadID
};
5772 // Ignore return result until untied tasks are supported.
5773 CGF
.EmitRuntimeCall(
5774 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_omp_taskwait
),
5779 if (auto *Region
= dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
5780 Region
->emitUntiedSwitch(CGF
);
5783 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction
&CGF
,
5784 OpenMPDirectiveKind InnerKind
,
5785 const RegionCodeGenTy
&CodeGen
,
5787 if (!CGF
.HaveInsertPoint())
5789 InlinedOpenMPRegionRAII
Region(CGF
, CodeGen
, InnerKind
, HasCancel
,
5790 InnerKind
!= OMPD_critical
&&
5791 InnerKind
!= OMPD_master
&&
5792 InnerKind
!= OMPD_masked
);
5793 CGF
.CapturedStmtInfo
->EmitBody(CGF
, /*S=*/nullptr);
5804 } // anonymous namespace
5806 static RTCancelKind
getCancellationKind(OpenMPDirectiveKind CancelRegion
) {
5807 RTCancelKind CancelKind
= CancelNoreq
;
5808 if (CancelRegion
== OMPD_parallel
)
5809 CancelKind
= CancelParallel
;
5810 else if (CancelRegion
== OMPD_for
)
5811 CancelKind
= CancelLoop
;
5812 else if (CancelRegion
== OMPD_sections
)
5813 CancelKind
= CancelSections
;
5815 assert(CancelRegion
== OMPD_taskgroup
);
5816 CancelKind
= CancelTaskgroup
;
5821 void CGOpenMPRuntime::emitCancellationPointCall(
5822 CodeGenFunction
&CGF
, SourceLocation Loc
,
5823 OpenMPDirectiveKind CancelRegion
) {
5824 if (!CGF
.HaveInsertPoint())
5826 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5827 // global_tid, kmp_int32 cncl_kind);
5828 if (auto *OMPRegionInfo
=
5829 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
5830 // For 'cancellation point taskgroup', the task region info may not have a
5831 // cancel. This may instead happen in another adjacent task.
5832 if (CancelRegion
== OMPD_taskgroup
|| OMPRegionInfo
->hasCancel()) {
5833 llvm::Value
*Args
[] = {
5834 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
5835 CGF
.Builder
.getInt32(getCancellationKind(CancelRegion
))};
5836 // Ignore return result until untied tasks are supported.
5837 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
5838 OMPBuilder
.getOrCreateRuntimeFunction(
5839 CGM
.getModule(), OMPRTL___kmpc_cancellationpoint
),
5841 // if (__kmpc_cancellationpoint()) {
5842 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5843 // exit from construct;
5845 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
5846 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
5847 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
5848 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
5849 CGF
.EmitBlock(ExitBB
);
5850 if (CancelRegion
== OMPD_parallel
)
5851 emitBarrierCall(CGF
, Loc
, OMPD_unknown
, /*EmitChecks=*/false);
5852 // exit from construct;
5853 CodeGenFunction::JumpDest CancelDest
=
5854 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
5855 CGF
.EmitBranchThroughCleanup(CancelDest
);
5856 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
5861 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
5863 OpenMPDirectiveKind CancelRegion
) {
5864 if (!CGF
.HaveInsertPoint())
5866 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5867 // kmp_int32 cncl_kind);
5868 auto &M
= CGM
.getModule();
5869 if (auto *OMPRegionInfo
=
5870 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
5871 auto &&ThenGen
= [this, &M
, Loc
, CancelRegion
,
5872 OMPRegionInfo
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5873 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5874 llvm::Value
*Args
[] = {
5875 RT
.emitUpdateLocation(CGF
, Loc
), RT
.getThreadID(CGF
, Loc
),
5876 CGF
.Builder
.getInt32(getCancellationKind(CancelRegion
))};
5877 // Ignore return result until untied tasks are supported.
5878 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
5879 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_cancel
), Args
);
5880 // if (__kmpc_cancel()) {
5881 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5882 // exit from construct;
5884 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
5885 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
5886 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
5887 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
5888 CGF
.EmitBlock(ExitBB
);
5889 if (CancelRegion
== OMPD_parallel
)
5890 RT
.emitBarrierCall(CGF
, Loc
, OMPD_unknown
, /*EmitChecks=*/false);
5891 // exit from construct;
5892 CodeGenFunction::JumpDest CancelDest
=
5893 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
5894 CGF
.EmitBranchThroughCleanup(CancelDest
);
5895 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
5898 emitIfClause(CGF
, IfCond
, ThenGen
,
5899 [](CodeGenFunction
&, PrePostActionTy
&) {});
5901 RegionCodeGenTy
ThenRCG(ThenGen
);
5908 /// Cleanup action for uses_allocators support.
5909 class OMPUsesAllocatorsActionTy final
: public PrePostActionTy
{
5910 ArrayRef
<std::pair
<const Expr
*, const Expr
*>> Allocators
;
5913 OMPUsesAllocatorsActionTy(
5914 ArrayRef
<std::pair
<const Expr
*, const Expr
*>> Allocators
)
5915 : Allocators(Allocators
) {}
5916 void Enter(CodeGenFunction
&CGF
) override
{
5917 if (!CGF
.HaveInsertPoint())
5919 for (const auto &AllocatorData
: Allocators
) {
5920 CGF
.CGM
.getOpenMPRuntime().emitUsesAllocatorsInit(
5921 CGF
, AllocatorData
.first
, AllocatorData
.second
);
5924 void Exit(CodeGenFunction
&CGF
) override
{
5925 if (!CGF
.HaveInsertPoint())
5927 for (const auto &AllocatorData
: Allocators
) {
5928 CGF
.CGM
.getOpenMPRuntime().emitUsesAllocatorsFini(CGF
,
5929 AllocatorData
.first
);
5935 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5936 const OMPExecutableDirective
&D
, StringRef ParentName
,
5937 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
5938 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
5939 assert(!ParentName
.empty() && "Invalid target entry parent name!");
5940 HasEmittedTargetRegion
= true;
5941 SmallVector
<std::pair
<const Expr
*, const Expr
*>, 4> Allocators
;
5942 for (const auto *C
: D
.getClausesOfKind
<OMPUsesAllocatorsClause
>()) {
5943 for (unsigned I
= 0, E
= C
->getNumberOfAllocators(); I
< E
; ++I
) {
5944 const OMPUsesAllocatorsClause::Data D
= C
->getAllocatorData(I
);
5945 if (!D
.AllocatorTraits
)
5947 Allocators
.emplace_back(D
.Allocator
, D
.AllocatorTraits
);
5950 OMPUsesAllocatorsActionTy
UsesAllocatorAction(Allocators
);
5951 CodeGen
.setAction(UsesAllocatorAction
);
5952 emitTargetOutlinedFunctionHelper(D
, ParentName
, OutlinedFn
, OutlinedFnID
,
5953 IsOffloadEntry
, CodeGen
);
5956 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction
&CGF
,
5957 const Expr
*Allocator
,
5958 const Expr
*AllocatorTraits
) {
5959 llvm::Value
*ThreadId
= getThreadID(CGF
, Allocator
->getExprLoc());
5960 ThreadId
= CGF
.Builder
.CreateIntCast(ThreadId
, CGF
.IntTy
, /*isSigned=*/true);
5961 // Use default memspace handle.
5962 llvm::Value
*MemSpaceHandle
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5963 llvm::Value
*NumTraits
= llvm::ConstantInt::get(
5964 CGF
.IntTy
, cast
<ConstantArrayType
>(
5965 AllocatorTraits
->getType()->getAsArrayTypeUnsafe())
5967 .getLimitedValue());
5968 LValue AllocatorTraitsLVal
= CGF
.EmitLValue(AllocatorTraits
);
5969 Address Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5970 AllocatorTraitsLVal
.getAddress(CGF
), CGF
.VoidPtrPtrTy
, CGF
.VoidPtrTy
);
5971 AllocatorTraitsLVal
= CGF
.MakeAddrLValue(Addr
, CGF
.getContext().VoidPtrTy
,
5972 AllocatorTraitsLVal
.getBaseInfo(),
5973 AllocatorTraitsLVal
.getTBAAInfo());
5974 llvm::Value
*Traits
= Addr
.getPointer();
5976 llvm::Value
*AllocatorVal
=
5977 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5978 CGM
.getModule(), OMPRTL___kmpc_init_allocator
),
5979 {ThreadId
, MemSpaceHandle
, NumTraits
, Traits
});
5980 // Store to allocator.
5981 CGF
.EmitAutoVarAlloca(*cast
<VarDecl
>(
5982 cast
<DeclRefExpr
>(Allocator
->IgnoreParenImpCasts())->getDecl()));
5983 LValue AllocatorLVal
= CGF
.EmitLValue(Allocator
->IgnoreParenImpCasts());
5985 CGF
.EmitScalarConversion(AllocatorVal
, CGF
.getContext().VoidPtrTy
,
5986 Allocator
->getType(), Allocator
->getExprLoc());
5987 CGF
.EmitStoreOfScalar(AllocatorVal
, AllocatorLVal
);
5990 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction
&CGF
,
5991 const Expr
*Allocator
) {
5992 llvm::Value
*ThreadId
= getThreadID(CGF
, Allocator
->getExprLoc());
5993 ThreadId
= CGF
.Builder
.CreateIntCast(ThreadId
, CGF
.IntTy
, /*isSigned=*/true);
5994 LValue AllocatorLVal
= CGF
.EmitLValue(Allocator
->IgnoreParenImpCasts());
5995 llvm::Value
*AllocatorVal
=
5996 CGF
.EmitLoadOfScalar(AllocatorLVal
, Allocator
->getExprLoc());
5997 AllocatorVal
= CGF
.EmitScalarConversion(AllocatorVal
, Allocator
->getType(),
5998 CGF
.getContext().VoidPtrTy
,
5999 Allocator
->getExprLoc());
6000 (void)CGF
.EmitRuntimeCall(
6001 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
6002 OMPRTL___kmpc_destroy_allocator
),
6003 {ThreadId
, AllocatorVal
});
6006 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
6007 const OMPExecutableDirective
&D
, CodeGenFunction
&CGF
,
6008 int32_t &MinThreadsVal
, int32_t &MaxThreadsVal
, int32_t &MinTeamsVal
,
6009 int32_t &MaxTeamsVal
) {
6011 getNumTeamsExprForTargetDirective(CGF
, D
, MinTeamsVal
, MaxTeamsVal
);
6012 getNumThreadsExprForTargetDirective(CGF
, D
, MaxThreadsVal
,
6013 /*UpperBoundOnly=*/true);
6015 for (auto *C
: D
.getClausesOfKind
<OMPXAttributeClause
>()) {
6016 for (auto *A
: C
->getAttrs()) {
6017 int32_t AttrMinThreadsVal
= 1, AttrMaxThreadsVal
= -1;
6018 int32_t AttrMinBlocksVal
= 1, AttrMaxBlocksVal
= -1;
6019 if (auto *Attr
= dyn_cast
<CUDALaunchBoundsAttr
>(A
))
6020 CGM
.handleCUDALaunchBoundsAttr(nullptr, Attr
, &AttrMaxThreadsVal
,
6021 &AttrMinBlocksVal
, &AttrMaxBlocksVal
);
6022 else if (auto *Attr
= dyn_cast
<AMDGPUFlatWorkGroupSizeAttr
>(A
))
6023 CGM
.handleAMDGPUFlatWorkGroupSizeAttr(
6024 nullptr, Attr
, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal
,
6025 &AttrMaxThreadsVal
);
6029 MinThreadsVal
= std::max(MinThreadsVal
, AttrMinThreadsVal
);
6030 if (AttrMaxThreadsVal
> 0)
6031 MaxThreadsVal
= MaxThreadsVal
> 0
6032 ? std::min(MaxThreadsVal
, AttrMaxThreadsVal
)
6033 : AttrMaxThreadsVal
;
6034 MinTeamsVal
= std::max(MinTeamsVal
, AttrMinBlocksVal
);
6035 if (AttrMaxBlocksVal
> 0)
6036 MaxTeamsVal
= MaxTeamsVal
> 0 ? std::min(MaxTeamsVal
, AttrMaxBlocksVal
)
6042 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6043 const OMPExecutableDirective
&D
, StringRef ParentName
,
6044 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
6045 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
6047 llvm::TargetRegionEntryInfo EntryInfo
=
6048 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
, D
.getBeginLoc(), ParentName
);
6050 CodeGenFunction
CGF(CGM
, true);
6051 llvm::OpenMPIRBuilder::FunctionGenCallback
&&GenerateOutlinedFunction
=
6052 [&CGF
, &D
, &CodeGen
](StringRef EntryFnName
) {
6053 const CapturedStmt
&CS
= *D
.getCapturedStmt(OMPD_target
);
6055 CGOpenMPTargetRegionInfo
CGInfo(CS
, CodeGen
, EntryFnName
);
6056 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6057 return CGF
.GenerateOpenMPCapturedStmtFunction(CS
, D
.getBeginLoc());
6060 OMPBuilder
.emitTargetRegionFunction(EntryInfo
, GenerateOutlinedFunction
,
6061 IsOffloadEntry
, OutlinedFn
, OutlinedFnID
);
6066 CGM
.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn
, CGM
);
6068 for (auto *C
: D
.getClausesOfKind
<OMPXAttributeClause
>()) {
6069 for (auto *A
: C
->getAttrs()) {
6070 if (auto *Attr
= dyn_cast
<AMDGPUWavesPerEUAttr
>(A
))
6071 CGM
.handleAMDGPUWavesPerEUAttr(OutlinedFn
, Attr
);
6076 /// Checks if the expression is constant or does not have non-trivial function
6078 static bool isTrivial(ASTContext
&Ctx
, const Expr
* E
) {
6079 // We can skip constant expressions.
6080 // We can skip expressions with trivial calls or simple expressions.
6081 return (E
->isEvaluatable(Ctx
, Expr::SE_AllowUndefinedBehavior
) ||
6082 !E
->hasNonTrivialCall(Ctx
)) &&
6083 !E
->HasSideEffects(Ctx
, /*IncludePossibleEffects=*/true);
6086 const Stmt
*CGOpenMPRuntime::getSingleCompoundChild(ASTContext
&Ctx
,
6088 const Stmt
*Child
= Body
->IgnoreContainers();
6089 while (const auto *C
= dyn_cast_or_null
<CompoundStmt
>(Child
)) {
6091 for (const Stmt
*S
: C
->body()) {
6092 if (const auto *E
= dyn_cast
<Expr
>(S
)) {
6093 if (isTrivial(Ctx
, E
))
6096 // Some of the statements can be ignored.
6097 if (isa
<AsmStmt
>(S
) || isa
<NullStmt
>(S
) || isa
<OMPFlushDirective
>(S
) ||
6098 isa
<OMPBarrierDirective
>(S
) || isa
<OMPTaskyieldDirective
>(S
))
6100 // Analyze declarations.
6101 if (const auto *DS
= dyn_cast
<DeclStmt
>(S
)) {
6102 if (llvm::all_of(DS
->decls(), [](const Decl
*D
) {
6103 if (isa
<EmptyDecl
>(D
) || isa
<DeclContext
>(D
) ||
6104 isa
<TypeDecl
>(D
) || isa
<PragmaCommentDecl
>(D
) ||
6105 isa
<PragmaDetectMismatchDecl
>(D
) || isa
<UsingDecl
>(D
) ||
6106 isa
<UsingDirectiveDecl
>(D
) ||
6107 isa
<OMPDeclareReductionDecl
>(D
) ||
6108 isa
<OMPThreadPrivateDecl
>(D
) || isa
<OMPAllocateDecl
>(D
))
6110 const auto *VD
= dyn_cast
<VarDecl
>(D
);
6113 return VD
->hasGlobalStorage() || !VD
->isUsed();
6117 // Found multiple children - cannot get the one child only.
6123 Child
= Child
->IgnoreContainers();
6128 const Expr
*CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6129 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, int32_t &MinTeamsVal
,
6130 int32_t &MaxTeamsVal
) {
6132 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6133 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6134 "Expected target-based executable directive.");
6135 switch (DirectiveKind
) {
6137 const auto *CS
= D
.getInnermostCapturedStmt();
6139 CS
->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6140 const Stmt
*ChildStmt
=
6141 CGOpenMPRuntime::getSingleCompoundChild(CGF
.getContext(), Body
);
6142 if (const auto *NestedDir
=
6143 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
6144 if (isOpenMPTeamsDirective(NestedDir
->getDirectiveKind())) {
6145 if (NestedDir
->hasClausesOfKind
<OMPNumTeamsClause
>()) {
6146 const Expr
*NumTeams
=
6147 NestedDir
->getSingleClause
<OMPNumTeamsClause
>()->getNumTeams();
6148 if (NumTeams
->isIntegerConstantExpr(CGF
.getContext()))
6150 NumTeams
->getIntegerConstantExpr(CGF
.getContext()))
6151 MinTeamsVal
= MaxTeamsVal
= Constant
->getExtValue();
6154 MinTeamsVal
= MaxTeamsVal
= 0;
6157 if (isOpenMPParallelDirective(NestedDir
->getDirectiveKind()) ||
6158 isOpenMPSimdDirective(NestedDir
->getDirectiveKind())) {
6159 MinTeamsVal
= MaxTeamsVal
= 1;
6162 MinTeamsVal
= MaxTeamsVal
= 1;
6165 // A value of -1 is used to check if we need to emit no teams region
6166 MinTeamsVal
= MaxTeamsVal
= -1;
6169 case OMPD_target_teams_loop
:
6170 case OMPD_target_teams
:
6171 case OMPD_target_teams_distribute
:
6172 case OMPD_target_teams_distribute_simd
:
6173 case OMPD_target_teams_distribute_parallel_for
:
6174 case OMPD_target_teams_distribute_parallel_for_simd
: {
6175 if (D
.hasClausesOfKind
<OMPNumTeamsClause
>()) {
6176 const Expr
*NumTeams
=
6177 D
.getSingleClause
<OMPNumTeamsClause
>()->getNumTeams();
6178 if (NumTeams
->isIntegerConstantExpr(CGF
.getContext()))
6179 if (auto Constant
= NumTeams
->getIntegerConstantExpr(CGF
.getContext()))
6180 MinTeamsVal
= MaxTeamsVal
= Constant
->getExtValue();
6183 MinTeamsVal
= MaxTeamsVal
= 0;
6186 case OMPD_target_parallel
:
6187 case OMPD_target_parallel_for
:
6188 case OMPD_target_parallel_for_simd
:
6189 case OMPD_target_parallel_loop
:
6190 case OMPD_target_simd
:
6191 MinTeamsVal
= MaxTeamsVal
= 1;
6195 case OMPD_parallel_for
:
6196 case OMPD_parallel_loop
:
6197 case OMPD_parallel_master
:
6198 case OMPD_parallel_sections
:
6200 case OMPD_parallel_for_simd
:
6202 case OMPD_cancellation_point
:
6204 case OMPD_threadprivate
:
6215 case OMPD_taskyield
:
6218 case OMPD_taskgroup
:
6224 case OMPD_target_data
:
6225 case OMPD_target_exit_data
:
6226 case OMPD_target_enter_data
:
6227 case OMPD_distribute
:
6228 case OMPD_distribute_simd
:
6229 case OMPD_distribute_parallel_for
:
6230 case OMPD_distribute_parallel_for_simd
:
6231 case OMPD_teams_distribute
:
6232 case OMPD_teams_distribute_simd
:
6233 case OMPD_teams_distribute_parallel_for
:
6234 case OMPD_teams_distribute_parallel_for_simd
:
6235 case OMPD_target_update
:
6236 case OMPD_declare_simd
:
6237 case OMPD_declare_variant
:
6238 case OMPD_begin_declare_variant
:
6239 case OMPD_end_declare_variant
:
6240 case OMPD_declare_target
:
6241 case OMPD_end_declare_target
:
6242 case OMPD_declare_reduction
:
6243 case OMPD_declare_mapper
:
6245 case OMPD_taskloop_simd
:
6246 case OMPD_master_taskloop
:
6247 case OMPD_master_taskloop_simd
:
6248 case OMPD_parallel_master_taskloop
:
6249 case OMPD_parallel_master_taskloop_simd
:
6251 case OMPD_metadirective
:
6257 llvm_unreachable("Unexpected directive kind.");
6260 llvm::Value
*CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6261 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) {
6262 assert(!CGF
.getLangOpts().OpenMPIsTargetDevice
&&
6263 "Clauses associated with the teams directive expected to be emitted "
6264 "only for the host!");
6265 CGBuilderTy
&Bld
= CGF
.Builder
;
6266 int32_t MinNT
= -1, MaxNT
= -1;
6267 const Expr
*NumTeams
=
6268 getNumTeamsExprForTargetDirective(CGF
, D
, MinNT
, MaxNT
);
6269 if (NumTeams
!= nullptr) {
6270 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6272 switch (DirectiveKind
) {
6274 const auto *CS
= D
.getInnermostCapturedStmt();
6275 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6276 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6277 llvm::Value
*NumTeamsVal
= CGF
.EmitScalarExpr(NumTeams
,
6278 /*IgnoreResultAssign*/ true);
6279 return Bld
.CreateIntCast(NumTeamsVal
, CGF
.Int32Ty
,
6282 case OMPD_target_teams
:
6283 case OMPD_target_teams_distribute
:
6284 case OMPD_target_teams_distribute_simd
:
6285 case OMPD_target_teams_distribute_parallel_for
:
6286 case OMPD_target_teams_distribute_parallel_for_simd
: {
6287 CodeGenFunction::RunCleanupsScope
NumTeamsScope(CGF
);
6288 llvm::Value
*NumTeamsVal
= CGF
.EmitScalarExpr(NumTeams
,
6289 /*IgnoreResultAssign*/ true);
6290 return Bld
.CreateIntCast(NumTeamsVal
, CGF
.Int32Ty
,
6298 assert(MinNT
== MaxNT
&& "Num threads ranges require handling here.");
6299 return llvm::ConstantInt::get(CGF
.Int32Ty
, MinNT
);
6302 /// Check for a num threads constant value (stored in \p DefaultVal), or
6303 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6304 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6305 /// nullptr, no expression evaluation is perfomed.
6306 static void getNumThreads(CodeGenFunction
&CGF
, const CapturedStmt
*CS
,
6307 const Expr
**E
, int32_t &UpperBound
,
6308 bool UpperBoundOnly
, llvm::Value
**CondVal
) {
6309 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6310 CGF
.getContext(), CS
->getCapturedStmt());
6311 const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
);
6315 if (isOpenMPParallelDirective(Dir
->getDirectiveKind())) {
6316 // Handle if clause. If if clause present, the number of threads is
6317 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6318 if (CondVal
&& Dir
->hasClausesOfKind
<OMPIfClause
>()) {
6319 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6320 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6321 const OMPIfClause
*IfClause
= nullptr;
6322 for (const auto *C
: Dir
->getClausesOfKind
<OMPIfClause
>()) {
6323 if (C
->getNameModifier() == OMPD_unknown
||
6324 C
->getNameModifier() == OMPD_parallel
) {
6330 const Expr
*CondExpr
= IfClause
->getCondition();
6332 if (CondExpr
->EvaluateAsBooleanCondition(Result
, CGF
.getContext())) {
6338 CodeGenFunction::LexicalScope
Scope(CGF
, CondExpr
->getSourceRange());
6339 if (const auto *PreInit
=
6340 cast_or_null
<DeclStmt
>(IfClause
->getPreInitStmt())) {
6341 for (const auto *I
: PreInit
->decls()) {
6342 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6343 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6345 CodeGenFunction::AutoVarEmission Emission
=
6346 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6347 CGF
.EmitAutoVarCleanups(Emission
);
6350 *CondVal
= CGF
.EvaluateExprAsBool(CondExpr
);
6355 // Check the value of num_threads clause iff if clause was not specified
6356 // or is not evaluated to false.
6357 if (Dir
->hasClausesOfKind
<OMPNumThreadsClause
>()) {
6358 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6359 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6360 const auto *NumThreadsClause
=
6361 Dir
->getSingleClause
<OMPNumThreadsClause
>();
6362 const Expr
*NTExpr
= NumThreadsClause
->getNumThreads();
6363 if (NTExpr
->isIntegerConstantExpr(CGF
.getContext()))
6364 if (auto Constant
= NTExpr
->getIntegerConstantExpr(CGF
.getContext()))
6367 ? Constant
->getZExtValue()
6368 : std::min(UpperBound
,
6369 static_cast<int32_t>(Constant
->getZExtValue()));
6370 // If we haven't found a upper bound, remember we saw a thread limiting
6372 if (UpperBound
== -1)
6376 CodeGenFunction::LexicalScope
Scope(CGF
, NTExpr
->getSourceRange());
6377 if (const auto *PreInit
=
6378 cast_or_null
<DeclStmt
>(NumThreadsClause
->getPreInitStmt())) {
6379 for (const auto *I
: PreInit
->decls()) {
6380 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6381 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6383 CodeGenFunction::AutoVarEmission Emission
=
6384 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6385 CGF
.EmitAutoVarCleanups(Emission
);
6393 if (isOpenMPSimdDirective(Dir
->getDirectiveKind()))
6397 const Expr
*CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6398 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, int32_t &UpperBound
,
6399 bool UpperBoundOnly
, llvm::Value
**CondVal
, const Expr
**ThreadLimitExpr
) {
6400 assert((!CGF
.getLangOpts().OpenMPIsTargetDevice
|| UpperBoundOnly
) &&
6401 "Clauses associated with the teams directive expected to be emitted "
6402 "only for the host!");
6403 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6404 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6405 "Expected target-based executable directive.");
6407 const Expr
*NT
= nullptr;
6408 const Expr
**NTPtr
= UpperBoundOnly
? nullptr : &NT
;
6410 auto CheckForConstExpr
= [&](const Expr
*E
, const Expr
**EPtr
) {
6411 if (E
->isIntegerConstantExpr(CGF
.getContext())) {
6412 if (auto Constant
= E
->getIntegerConstantExpr(CGF
.getContext()))
6413 UpperBound
= UpperBound
? Constant
->getZExtValue()
6414 : std::min(UpperBound
,
6415 int32_t(Constant
->getZExtValue()));
6417 // If we haven't found a upper bound, remember we saw a thread limiting
6419 if (UpperBound
== -1)
6425 auto ReturnSequential
= [&]() {
6430 switch (DirectiveKind
) {
6432 const CapturedStmt
*CS
= D
.getInnermostCapturedStmt();
6433 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6434 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6435 CGF
.getContext(), CS
->getCapturedStmt());
6436 // TODO: The standard is not clear how to resolve two thread limit clauses,
6437 // let's pick the teams one if it's present, otherwise the target one.
6438 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6439 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6440 if (const auto *TLC
= Dir
->getSingleClause
<OMPThreadLimitClause
>()) {
6441 ThreadLimitClause
= TLC
;
6442 if (ThreadLimitExpr
) {
6443 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6444 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6445 CodeGenFunction::LexicalScope
Scope(
6446 CGF
, ThreadLimitClause
->getThreadLimit()->getSourceRange());
6447 if (const auto *PreInit
=
6448 cast_or_null
<DeclStmt
>(ThreadLimitClause
->getPreInitStmt())) {
6449 for (const auto *I
: PreInit
->decls()) {
6450 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6451 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6453 CodeGenFunction::AutoVarEmission Emission
=
6454 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6455 CGF
.EmitAutoVarCleanups(Emission
);
6462 if (ThreadLimitClause
)
6463 CheckForConstExpr(ThreadLimitClause
->getThreadLimit(), ThreadLimitExpr
);
6464 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6465 if (isOpenMPTeamsDirective(Dir
->getDirectiveKind()) &&
6466 !isOpenMPDistributeDirective(Dir
->getDirectiveKind())) {
6467 CS
= Dir
->getInnermostCapturedStmt();
6468 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6469 CGF
.getContext(), CS
->getCapturedStmt());
6470 Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
);
6472 if (Dir
&& isOpenMPParallelDirective(Dir
->getDirectiveKind())) {
6473 CS
= Dir
->getInnermostCapturedStmt();
6474 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6475 } else if (Dir
&& isOpenMPSimdDirective(Dir
->getDirectiveKind()))
6476 return ReturnSequential();
6480 case OMPD_target_teams
: {
6481 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6482 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6483 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6484 CheckForConstExpr(ThreadLimitClause
->getThreadLimit(), ThreadLimitExpr
);
6486 const CapturedStmt
*CS
= D
.getInnermostCapturedStmt();
6487 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6488 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6489 CGF
.getContext(), CS
->getCapturedStmt());
6490 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6491 if (Dir
->getDirectiveKind() == OMPD_distribute
) {
6492 CS
= Dir
->getInnermostCapturedStmt();
6493 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6498 case OMPD_target_teams_distribute
:
6499 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6500 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6501 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6502 CheckForConstExpr(ThreadLimitClause
->getThreadLimit(), ThreadLimitExpr
);
6504 getNumThreads(CGF
, D
.getInnermostCapturedStmt(), NTPtr
, UpperBound
,
6505 UpperBoundOnly
, CondVal
);
6507 case OMPD_target_teams_loop
:
6508 case OMPD_target_parallel_loop
:
6509 case OMPD_target_parallel
:
6510 case OMPD_target_parallel_for
:
6511 case OMPD_target_parallel_for_simd
:
6512 case OMPD_target_teams_distribute_parallel_for
:
6513 case OMPD_target_teams_distribute_parallel_for_simd
: {
6514 if (CondVal
&& D
.hasClausesOfKind
<OMPIfClause
>()) {
6515 const OMPIfClause
*IfClause
= nullptr;
6516 for (const auto *C
: D
.getClausesOfKind
<OMPIfClause
>()) {
6517 if (C
->getNameModifier() == OMPD_unknown
||
6518 C
->getNameModifier() == OMPD_parallel
) {
6524 const Expr
*Cond
= IfClause
->getCondition();
6526 if (Cond
->EvaluateAsBooleanCondition(Result
, CGF
.getContext())) {
6528 return ReturnSequential();
6530 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
6531 *CondVal
= CGF
.EvaluateExprAsBool(Cond
);
6535 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6536 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6537 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6538 CheckForConstExpr(ThreadLimitClause
->getThreadLimit(), ThreadLimitExpr
);
6540 if (D
.hasClausesOfKind
<OMPNumThreadsClause
>()) {
6541 CodeGenFunction::RunCleanupsScope
NumThreadsScope(CGF
);
6542 const auto *NumThreadsClause
= D
.getSingleClause
<OMPNumThreadsClause
>();
6543 CheckForConstExpr(NumThreadsClause
->getNumThreads(), nullptr);
6544 return NumThreadsClause
->getNumThreads();
6548 case OMPD_target_teams_distribute_simd
:
6549 case OMPD_target_simd
:
6550 return ReturnSequential();
6554 llvm_unreachable("Unsupported directive kind.");
6557 llvm::Value
*CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6558 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) {
6559 llvm::Value
*NumThreadsVal
= nullptr;
6560 llvm::Value
*CondVal
= nullptr;
6561 llvm::Value
*ThreadLimitVal
= nullptr;
6562 const Expr
*ThreadLimitExpr
= nullptr;
6563 int32_t UpperBound
= -1;
6565 const Expr
*NT
= getNumThreadsExprForTargetDirective(
6566 CGF
, D
, UpperBound
, /* UpperBoundOnly */ false, &CondVal
,
6569 // Thread limit expressions are used below, emit them.
6570 if (ThreadLimitExpr
) {
6572 CGF
.EmitScalarExpr(ThreadLimitExpr
, /*IgnoreResultAssign=*/true);
6573 ThreadLimitVal
= CGF
.Builder
.CreateIntCast(ThreadLimitVal
, CGF
.Int32Ty
,
6574 /*isSigned=*/false);
6577 // Generate the num teams expression.
6578 if (UpperBound
== 1) {
6579 NumThreadsVal
= CGF
.Builder
.getInt32(UpperBound
);
6581 NumThreadsVal
= CGF
.EmitScalarExpr(NT
, /*IgnoreResultAssign=*/true);
6582 NumThreadsVal
= CGF
.Builder
.CreateIntCast(NumThreadsVal
, CGF
.Int32Ty
,
6583 /*isSigned=*/false);
6584 } else if (ThreadLimitVal
) {
6585 // If we do not have a num threads value but a thread limit, replace the
6586 // former with the latter. We know handled the thread limit expression.
6587 NumThreadsVal
= ThreadLimitVal
;
6588 ThreadLimitVal
= nullptr;
6590 // Default to "0" which means runtime choice.
6591 assert(!ThreadLimitVal
&& "Default not applicable with thread limit value");
6592 NumThreadsVal
= CGF
.Builder
.getInt32(0);
6595 // Handle if clause. If if clause present, the number of threads is
6596 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6598 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
6599 NumThreadsVal
= CGF
.Builder
.CreateSelect(CondVal
, NumThreadsVal
,
6600 CGF
.Builder
.getInt32(1));
6603 // If the thread limit and num teams expression were present, take the
6605 if (ThreadLimitVal
) {
6606 NumThreadsVal
= CGF
.Builder
.CreateSelect(
6607 CGF
.Builder
.CreateICmpULT(ThreadLimitVal
, NumThreadsVal
),
6608 ThreadLimitVal
, NumThreadsVal
);
6611 return NumThreadsVal
;
6615 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6617 // Utility to handle information from clauses associated with a given
6618 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6619 // It provides a convenient interface to obtain the information and generate
6620 // code for that information.
6621 class MappableExprsHandler
{
6623 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6624 static unsigned getFlagMemberOffset() {
6625 unsigned Offset
= 0;
6626 for (uint64_t Remain
=
6627 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
6628 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
6629 !(Remain
& 1); Remain
= Remain
>> 1)
6634 /// Class that holds debugging information for a data mapping to be passed to
6635 /// the runtime library.
6636 class MappingExprInfo
{
6637 /// The variable declaration used for the data mapping.
6638 const ValueDecl
*MapDecl
= nullptr;
6639 /// The original expression used in the map clause, or null if there is
6641 const Expr
*MapExpr
= nullptr;
6644 MappingExprInfo(const ValueDecl
*MapDecl
, const Expr
*MapExpr
= nullptr)
6645 : MapDecl(MapDecl
), MapExpr(MapExpr
) {}
6647 const ValueDecl
*getMapDecl() const { return MapDecl
; }
6648 const Expr
*getMapExpr() const { return MapExpr
; }
6651 using DeviceInfoTy
= llvm::OpenMPIRBuilder::DeviceInfoTy
;
6652 using MapBaseValuesArrayTy
= llvm::OpenMPIRBuilder::MapValuesArrayTy
;
6653 using MapValuesArrayTy
= llvm::OpenMPIRBuilder::MapValuesArrayTy
;
6654 using MapFlagsArrayTy
= llvm::OpenMPIRBuilder::MapFlagsArrayTy
;
6655 using MapDimArrayTy
= llvm::OpenMPIRBuilder::MapDimArrayTy
;
6656 using MapNonContiguousArrayTy
=
6657 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy
;
6658 using MapExprsArrayTy
= SmallVector
<MappingExprInfo
, 4>;
6659 using MapValueDeclsArrayTy
= SmallVector
<const ValueDecl
*, 4>;
6661 /// This structure contains combined information generated for mappable
6662 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6663 /// mappers, and non-contiguous information.
6664 struct MapCombinedInfoTy
: llvm::OpenMPIRBuilder::MapInfosTy
{
6665 MapExprsArrayTy Exprs
;
6666 MapValueDeclsArrayTy Mappers
;
6667 MapValueDeclsArrayTy DevicePtrDecls
;
6669 /// Append arrays in \a CurInfo.
6670 void append(MapCombinedInfoTy
&CurInfo
) {
6671 Exprs
.append(CurInfo
.Exprs
.begin(), CurInfo
.Exprs
.end());
6672 DevicePtrDecls
.append(CurInfo
.DevicePtrDecls
.begin(),
6673 CurInfo
.DevicePtrDecls
.end());
6674 Mappers
.append(CurInfo
.Mappers
.begin(), CurInfo
.Mappers
.end());
6675 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo
);
6679 /// Map between a struct and the its lowest & highest elements which have been
6681 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6682 /// HE(FieldIndex, Pointer)}
6683 struct StructRangeInfoTy
{
6684 MapCombinedInfoTy PreliminaryMapData
;
6685 std::pair
<unsigned /*FieldIndex*/, Address
/*Pointer*/> LowestElem
= {
6686 0, Address::invalid()};
6687 std::pair
<unsigned /*FieldIndex*/, Address
/*Pointer*/> HighestElem
= {
6688 0, Address::invalid()};
6689 Address Base
= Address::invalid();
6690 Address LB
= Address::invalid();
6691 bool IsArraySection
= false;
6692 bool HasCompleteRecord
= false;
6696 /// Kind that defines how a device pointer has to be returned.
6698 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
6699 OpenMPMapClauseKind MapType
= OMPC_MAP_unknown
;
6700 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
6701 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
;
6702 bool ReturnDevicePointer
= false;
6703 bool IsImplicit
= false;
6704 const ValueDecl
*Mapper
= nullptr;
6705 const Expr
*VarRef
= nullptr;
6706 bool ForDeviceAddr
= false;
6708 MapInfo() = default;
6710 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
,
6711 OpenMPMapClauseKind MapType
,
6712 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6713 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
6714 bool ReturnDevicePointer
, bool IsImplicit
,
6715 const ValueDecl
*Mapper
= nullptr, const Expr
*VarRef
= nullptr,
6716 bool ForDeviceAddr
= false)
6717 : Components(Components
), MapType(MapType
), MapModifiers(MapModifiers
),
6718 MotionModifiers(MotionModifiers
),
6719 ReturnDevicePointer(ReturnDevicePointer
), IsImplicit(IsImplicit
),
6720 Mapper(Mapper
), VarRef(VarRef
), ForDeviceAddr(ForDeviceAddr
) {}
6723 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6724 /// member and there is no map information about it, then emission of that
6725 /// entry is deferred until the whole struct has been processed.
6726 struct DeferredDevicePtrEntryTy
{
6727 const Expr
*IE
= nullptr;
6728 const ValueDecl
*VD
= nullptr;
6729 bool ForDeviceAddr
= false;
6731 DeferredDevicePtrEntryTy(const Expr
*IE
, const ValueDecl
*VD
,
6733 : IE(IE
), VD(VD
), ForDeviceAddr(ForDeviceAddr
) {}
6736 /// The target directive from where the mappable clauses were extracted. It
6737 /// is either a executable directive or a user-defined mapper directive.
6738 llvm::PointerUnion
<const OMPExecutableDirective
*,
6739 const OMPDeclareMapperDecl
*>
6742 /// Function the directive is being generated for.
6743 CodeGenFunction
&CGF
;
6745 /// Set of all first private variables in the current directive.
6746 /// bool data is set to true if the variable is implicitly marked as
6747 /// firstprivate, false otherwise.
6748 llvm::DenseMap
<CanonicalDeclPtr
<const VarDecl
>, bool> FirstPrivateDecls
;
6750 /// Map between device pointer declarations and their expression components.
6751 /// The key value for declarations in 'this' is null.
6754 SmallVector
<OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>>
6757 /// Map between device addr declarations and their expression components.
6758 /// The key value for declarations in 'this' is null.
6761 SmallVector
<OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>>
6764 /// Map between lambda declarations and their map type.
6765 llvm::DenseMap
<const ValueDecl
*, const OMPMapClause
*> LambdasMap
;
6767 llvm::Value
*getExprTypeSize(const Expr
*E
) const {
6768 QualType ExprTy
= E
->getType().getCanonicalType();
6770 // Calculate the size for array shaping expression.
6771 if (const auto *OAE
= dyn_cast
<OMPArrayShapingExpr
>(E
)) {
6773 CGF
.getTypeSize(OAE
->getBase()->getType()->getPointeeType());
6774 for (const Expr
*SE
: OAE
->getDimensions()) {
6775 llvm::Value
*Sz
= CGF
.EmitScalarExpr(SE
);
6776 Sz
= CGF
.EmitScalarConversion(Sz
, SE
->getType(),
6777 CGF
.getContext().getSizeType(),
6779 Size
= CGF
.Builder
.CreateNUWMul(Size
, Sz
);
6784 // Reference types are ignored for mapping purposes.
6785 if (const auto *RefTy
= ExprTy
->getAs
<ReferenceType
>())
6786 ExprTy
= RefTy
->getPointeeType().getCanonicalType();
6788 // Given that an array section is considered a built-in type, we need to
6789 // do the calculation based on the length of the section instead of relying
6790 // on CGF.getTypeSize(E->getType()).
6791 if (const auto *OAE
= dyn_cast
<OMPArraySectionExpr
>(E
)) {
6792 QualType BaseTy
= OMPArraySectionExpr::getBaseOriginalType(
6793 OAE
->getBase()->IgnoreParenImpCasts())
6794 .getCanonicalType();
6796 // If there is no length associated with the expression and lower bound is
6797 // not specified too, that means we are using the whole length of the
6799 if (!OAE
->getLength() && OAE
->getColonLocFirst().isValid() &&
6800 !OAE
->getLowerBound())
6801 return CGF
.getTypeSize(BaseTy
);
6803 llvm::Value
*ElemSize
;
6804 if (const auto *PTy
= BaseTy
->getAs
<PointerType
>()) {
6805 ElemSize
= CGF
.getTypeSize(PTy
->getPointeeType().getCanonicalType());
6807 const auto *ATy
= cast
<ArrayType
>(BaseTy
.getTypePtr());
6808 assert(ATy
&& "Expecting array type if not a pointer type.");
6809 ElemSize
= CGF
.getTypeSize(ATy
->getElementType().getCanonicalType());
6812 // If we don't have a length at this point, that is because we have an
6813 // array section with a single element.
6814 if (!OAE
->getLength() && OAE
->getColonLocFirst().isInvalid())
6817 if (const Expr
*LenExpr
= OAE
->getLength()) {
6818 llvm::Value
*LengthVal
= CGF
.EmitScalarExpr(LenExpr
);
6819 LengthVal
= CGF
.EmitScalarConversion(LengthVal
, LenExpr
->getType(),
6820 CGF
.getContext().getSizeType(),
6821 LenExpr
->getExprLoc());
6822 return CGF
.Builder
.CreateNUWMul(LengthVal
, ElemSize
);
6824 assert(!OAE
->getLength() && OAE
->getColonLocFirst().isValid() &&
6825 OAE
->getLowerBound() && "expected array_section[lb:].");
6826 // Size = sizetype - lb * elemtype;
6827 llvm::Value
*LengthVal
= CGF
.getTypeSize(BaseTy
);
6828 llvm::Value
*LBVal
= CGF
.EmitScalarExpr(OAE
->getLowerBound());
6829 LBVal
= CGF
.EmitScalarConversion(LBVal
, OAE
->getLowerBound()->getType(),
6830 CGF
.getContext().getSizeType(),
6831 OAE
->getLowerBound()->getExprLoc());
6832 LBVal
= CGF
.Builder
.CreateNUWMul(LBVal
, ElemSize
);
6833 llvm::Value
*Cmp
= CGF
.Builder
.CreateICmpUGT(LengthVal
, LBVal
);
6834 llvm::Value
*TrueVal
= CGF
.Builder
.CreateNUWSub(LengthVal
, LBVal
);
6835 LengthVal
= CGF
.Builder
.CreateSelect(
6836 Cmp
, TrueVal
, llvm::ConstantInt::get(CGF
.SizeTy
, 0));
6839 return CGF
.getTypeSize(ExprTy
);
6842 /// Return the corresponding bits for a given map clause modifier. Add
6843 /// a flag marking the map as a pointer if requested. Add a flag marking the
6844 /// map as the first one of a series of maps that relate to the same map
6846 OpenMPOffloadMappingFlags
getMapTypeBits(
6847 OpenMPMapClauseKind MapType
, ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6848 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
, bool IsImplicit
,
6849 bool AddPtrFlag
, bool AddIsTargetParamFlag
, bool IsNonContiguous
) const {
6850 OpenMPOffloadMappingFlags Bits
=
6851 IsImplicit
? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6852 : OpenMPOffloadMappingFlags::OMP_MAP_NONE
;
6854 case OMPC_MAP_alloc
:
6855 case OMPC_MAP_release
:
6856 // alloc and release is the default behavior in the runtime library, i.e.
6857 // if we don't pass any bits alloc/release that is what the runtime is
6858 // going to do. Therefore, we don't need to signal anything for these two
6862 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TO
;
6865 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
6867 case OMPC_MAP_tofrom
:
6868 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TO
|
6869 OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
6871 case OMPC_MAP_delete
:
6872 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_DELETE
;
6874 case OMPC_MAP_unknown
:
6875 llvm_unreachable("Unexpected map type!");
6878 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
;
6879 if (AddIsTargetParamFlag
)
6880 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
6881 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_always
))
6882 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS
;
6883 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_close
))
6884 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE
;
6885 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_present
) ||
6886 llvm::is_contained(MotionModifiers
, OMPC_MOTION_MODIFIER_present
))
6887 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
;
6888 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_ompx_hold
))
6889 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
6890 if (IsNonContiguous
)
6891 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG
;
6895 /// Return true if the provided expression is a final array section. A
6896 /// final array section, is one whose length can't be proved to be one.
6897 bool isFinalArraySectionExpression(const Expr
*E
) const {
6898 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(E
);
6900 // It is not an array section and therefore not a unity-size one.
6904 // An array section with no colon always refer to a single element.
6905 if (OASE
->getColonLocFirst().isInvalid())
6908 const Expr
*Length
= OASE
->getLength();
6910 // If we don't have a length we have to check if the array has size 1
6911 // for this dimension. Also, we should always expect a length if the
6912 // base type is pointer.
6914 QualType BaseQTy
= OMPArraySectionExpr::getBaseOriginalType(
6915 OASE
->getBase()->IgnoreParenImpCasts())
6916 .getCanonicalType();
6917 if (const auto *ATy
= dyn_cast
<ConstantArrayType
>(BaseQTy
.getTypePtr()))
6918 return ATy
->getSize().getSExtValue() != 1;
6919 // If we don't have a constant dimension length, we have to consider
6920 // the current section as having any size, so it is not necessarily
6921 // unitary. If it happen to be unity size, that's user fault.
6925 // Check if the length evaluates to 1.
6926 Expr::EvalResult Result
;
6927 if (!Length
->EvaluateAsInt(Result
, CGF
.getContext()))
6928 return true; // Can have more that size 1.
6930 llvm::APSInt ConstLength
= Result
.Val
.getInt();
6931 return ConstLength
.getSExtValue() != 1;
6934 /// Generate the base pointers, section pointers, sizes, map type bits, and
6935 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6936 /// map type, map or motion modifiers, and expression components.
6937 /// \a IsFirstComponent should be set to true if the provided set of
6938 /// components is the first associated with a capture.
6939 void generateInfoForComponentList(
6940 OpenMPMapClauseKind MapType
, ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6941 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
6942 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
,
6943 MapCombinedInfoTy
&CombinedInfo
, StructRangeInfoTy
&PartialStruct
,
6944 bool IsFirstComponentList
, bool IsImplicit
,
6945 const ValueDecl
*Mapper
= nullptr, bool ForDeviceAddr
= false,
6946 const ValueDecl
*BaseDecl
= nullptr, const Expr
*MapExpr
= nullptr,
6947 ArrayRef
<OMPClauseMappableExprCommon::MappableExprComponentListRef
>
6948 OverlappedElements
= std::nullopt
) const {
6949 // The following summarizes what has to be generated for each map and the
6950 // types below. The generated information is expressed in this order:
6951 // base pointer, section pointer, size, flags
6952 // (to add to the ones that come from the map type and modifier).
6975 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6978 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6981 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6984 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6987 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6988 // in unified shared memory mode or for local pointers
6989 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6992 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6993 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6996 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6997 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
7000 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7003 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7006 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7009 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7011 // map(to: s.p[:22])
7012 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7013 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7014 // &(s.p), &(s.p[0]), 22*sizeof(double),
7015 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7016 // (*) alloc space for struct members, only this is a target parameter
7017 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7018 // optimizes this entry out, same in the examples below)
7019 // (***) map the pointee (map: to)
7022 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7023 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7024 // (*) alloc space for struct members, only this is a target parameter
7025 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7026 // optimizes this entry out, same in the examples below)
7027 // (***) map the pointee (map: to)
7030 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7032 // map(from: s.ps->s.i)
7033 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7034 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7035 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7037 // map(to: s.ps->ps)
7038 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7039 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7040 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7042 // map(s.ps->ps->ps)
7043 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7044 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7045 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7046 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7048 // map(to: s.ps->ps->s.f[:22])
7049 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7050 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7051 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7052 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7055 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7058 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7061 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7064 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7066 // map(to: ps->p[:22])
7067 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7068 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7069 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7072 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7074 // map(from: ps->ps->s.i)
7075 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7076 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7077 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7079 // map(from: ps->ps->ps)
7080 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7081 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7082 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7084 // map(ps->ps->ps->ps)
7085 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7086 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7087 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7088 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7090 // map(to: ps->ps->ps->s.f[:22])
7091 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7092 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7093 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7094 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7096 // map(to: s.f[:22]) map(from: s.p[:33])
7097 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7098 // sizeof(double*) (**), TARGET_PARAM
7099 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7100 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7101 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7102 // (*) allocate contiguous space needed to fit all mapped members even if
7103 // we allocate space for members not mapped (in this example,
7104 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7105 // them as well because they fall between &s.f[0] and &s.p)
7107 // map(from: s.f[:22]) map(to: ps->p[:33])
7108 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7109 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7110 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7111 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7112 // (*) the struct this entry pertains to is the 2nd element in the list of
7113 // arguments, hence MEMBER_OF(2)
7115 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7116 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7117 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7118 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7119 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7120 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7121 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7122 // (*) the struct this entry pertains to is the 4th element in the list
7123 // of arguments, hence MEMBER_OF(4)
7125 // Track if the map information being generated is the first for a capture.
7126 bool IsCaptureFirstInfo
= IsFirstComponentList
;
7127 // When the variable is on a declare target link or in a to clause with
7128 // unified memory, a reference is needed to hold the host/device address
7130 bool RequiresReference
= false;
7132 // Scan the components from the base to the complete expression.
7133 auto CI
= Components
.rbegin();
7134 auto CE
= Components
.rend();
7137 // Track if the map information being generated is the first for a list of
7139 bool IsExpressionFirstInfo
= true;
7140 bool FirstPointerInComplexData
= false;
7141 Address BP
= Address::invalid();
7142 const Expr
*AssocExpr
= I
->getAssociatedExpression();
7143 const auto *AE
= dyn_cast
<ArraySubscriptExpr
>(AssocExpr
);
7144 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(AssocExpr
);
7145 const auto *OAShE
= dyn_cast
<OMPArrayShapingExpr
>(AssocExpr
);
7147 if (isa
<MemberExpr
>(AssocExpr
)) {
7148 // The base is the 'this' pointer. The content of the pointer is going
7149 // to be the base of the field being mapped.
7150 BP
= CGF
.LoadCXXThisAddress();
7151 } else if ((AE
&& isa
<CXXThisExpr
>(AE
->getBase()->IgnoreParenImpCasts())) ||
7153 isa
<CXXThisExpr
>(OASE
->getBase()->IgnoreParenImpCasts()))) {
7154 BP
= CGF
.EmitOMPSharedLValue(AssocExpr
).getAddress(CGF
);
7156 isa
<CXXThisExpr
>(OAShE
->getBase()->IgnoreParenCasts())) {
7158 CGF
.EmitScalarExpr(OAShE
->getBase()),
7159 CGF
.ConvertTypeForMem(OAShE
->getBase()->getType()->getPointeeType()),
7160 CGF
.getContext().getTypeAlignInChars(OAShE
->getBase()->getType()));
7162 // The base is the reference to the variable.
7164 BP
= CGF
.EmitOMPSharedLValue(AssocExpr
).getAddress(CGF
);
7165 if (const auto *VD
=
7166 dyn_cast_or_null
<VarDecl
>(I
->getAssociatedDeclaration())) {
7167 if (std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
7168 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
)) {
7169 if ((*Res
== OMPDeclareTargetDeclAttr::MT_Link
) ||
7170 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
7171 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
7172 CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7173 RequiresReference
= true;
7174 BP
= CGF
.CGM
.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD
);
7179 // If the variable is a pointer and is being dereferenced (i.e. is not
7180 // the last component), the base has to be the pointer itself, not its
7181 // reference. References are ignored for mapping purposes.
7183 I
->getAssociatedDeclaration()->getType().getNonReferenceType();
7184 if (Ty
->isAnyPointerType() && std::next(I
) != CE
) {
7185 // No need to generate individual map information for the pointer, it
7186 // can be associated with the combined storage if shared memory mode is
7187 // active or the base declaration is not global variable.
7188 const auto *VD
= dyn_cast
<VarDecl
>(I
->getAssociatedDeclaration());
7189 if (CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7190 !VD
|| VD
->hasLocalStorage())
7191 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7193 FirstPointerInComplexData
= true;
7198 // Track whether a component of the list should be marked as MEMBER_OF some
7199 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7200 // in a component list should be marked as MEMBER_OF, all subsequent entries
7201 // do not belong to the base struct. E.g.
7203 // s.ps->ps->ps->f[:]
7205 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7206 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7207 // is the pointee of ps(2) which is not member of struct s, so it should not
7208 // be marked as such (it is still PTR_AND_OBJ).
7209 // The variable is initialized to false so that PTR_AND_OBJ entries which
7210 // are not struct members are not considered (e.g. array of pointers to
7212 bool ShouldBeMemberOf
= false;
7214 // Variable keeping track of whether or not we have encountered a component
7215 // in the component list which is a member expression. Useful when we have a
7216 // pointer or a final array section, in which case it is the previous
7217 // component in the list which tells us whether we have a member expression.
7219 // While processing the final array section "[:]" it is "f" which tells us
7220 // whether we are dealing with a member of a declared struct.
7221 const MemberExpr
*EncounteredME
= nullptr;
7223 // Track for the total number of dimension. Start from one for the dummy
7225 uint64_t DimSize
= 1;
7227 bool IsNonContiguous
= CombinedInfo
.NonContigInfo
.IsNonContiguous
;
7228 bool IsPrevMemberReference
= false;
7230 for (; I
!= CE
; ++I
) {
7231 // If the current component is member of a struct (parent struct) mark it.
7232 if (!EncounteredME
) {
7233 EncounteredME
= dyn_cast
<MemberExpr
>(I
->getAssociatedExpression());
7234 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7235 // as MEMBER_OF the parent struct.
7236 if (EncounteredME
) {
7237 ShouldBeMemberOf
= true;
7238 // Do not emit as complex pointer if this is actually not array-like
7240 if (FirstPointerInComplexData
) {
7241 QualType Ty
= std::prev(I
)
7242 ->getAssociatedDeclaration()
7244 .getNonReferenceType();
7245 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7246 FirstPointerInComplexData
= false;
7251 auto Next
= std::next(I
);
7253 // We need to generate the addresses and sizes if this is the last
7254 // component, if the component is a pointer or if it is an array section
7255 // whose length can't be proved to be one. If this is a pointer, it
7256 // becomes the base address for the following components.
7258 // A final array section, is one whose length can't be proved to be one.
7259 // If the map item is non-contiguous then we don't treat any array section
7260 // as final array section.
7261 bool IsFinalArraySection
=
7263 isFinalArraySectionExpression(I
->getAssociatedExpression());
7265 // If we have a declaration for the mapping use that, otherwise use
7266 // the base declaration of the map clause.
7267 const ValueDecl
*MapDecl
= (I
->getAssociatedDeclaration())
7268 ? I
->getAssociatedDeclaration()
7270 MapExpr
= (I
->getAssociatedExpression()) ? I
->getAssociatedExpression()
7273 // Get information on whether the element is a pointer. Have to do a
7274 // special treatment for array sections given that they are built-in
7277 dyn_cast
<OMPArraySectionExpr
>(I
->getAssociatedExpression());
7279 dyn_cast
<OMPArrayShapingExpr
>(I
->getAssociatedExpression());
7280 const auto *UO
= dyn_cast
<UnaryOperator
>(I
->getAssociatedExpression());
7281 const auto *BO
= dyn_cast
<BinaryOperator
>(I
->getAssociatedExpression());
7284 (OASE
&& OMPArraySectionExpr::getBaseOriginalType(OASE
)
7286 ->isAnyPointerType()) ||
7287 I
->getAssociatedExpression()->getType()->isAnyPointerType();
7288 bool IsMemberReference
= isa
<MemberExpr
>(I
->getAssociatedExpression()) &&
7290 MapDecl
->getType()->isLValueReferenceType();
7291 bool IsNonDerefPointer
= IsPointer
&&
7292 !(UO
&& UO
->getOpcode() != UO_Deref
) && !BO
&&
7298 if (Next
== CE
|| IsMemberReference
|| IsNonDerefPointer
||
7299 IsFinalArraySection
) {
7300 // If this is not the last component, we expect the pointer to be
7301 // associated with an array expression or member expression.
7302 assert((Next
== CE
||
7303 isa
<MemberExpr
>(Next
->getAssociatedExpression()) ||
7304 isa
<ArraySubscriptExpr
>(Next
->getAssociatedExpression()) ||
7305 isa
<OMPArraySectionExpr
>(Next
->getAssociatedExpression()) ||
7306 isa
<OMPArrayShapingExpr
>(Next
->getAssociatedExpression()) ||
7307 isa
<UnaryOperator
>(Next
->getAssociatedExpression()) ||
7308 isa
<BinaryOperator
>(Next
->getAssociatedExpression())) &&
7309 "Unexpected expression");
7311 Address LB
= Address::invalid();
7312 Address LowestElem
= Address::invalid();
7313 auto &&EmitMemberExprBase
= [](CodeGenFunction
&CGF
,
7314 const MemberExpr
*E
) {
7315 const Expr
*BaseExpr
= E
->getBase();
7316 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7320 LValueBaseInfo BaseInfo
;
7321 TBAAAccessInfo TBAAInfo
;
7323 CGF
.EmitPointerWithAlignment(BaseExpr
, &BaseInfo
, &TBAAInfo
);
7324 QualType PtrTy
= BaseExpr
->getType()->getPointeeType();
7325 BaseLV
= CGF
.MakeAddrLValue(Addr
, PtrTy
, BaseInfo
, TBAAInfo
);
7327 BaseLV
= CGF
.EmitOMPSharedLValue(BaseExpr
);
7333 Address(CGF
.EmitScalarExpr(OAShE
->getBase()),
7334 CGF
.ConvertTypeForMem(
7335 OAShE
->getBase()->getType()->getPointeeType()),
7336 CGF
.getContext().getTypeAlignInChars(
7337 OAShE
->getBase()->getType()));
7338 } else if (IsMemberReference
) {
7339 const auto *ME
= cast
<MemberExpr
>(I
->getAssociatedExpression());
7340 LValue BaseLVal
= EmitMemberExprBase(CGF
, ME
);
7341 LowestElem
= CGF
.EmitLValueForFieldInitialization(
7342 BaseLVal
, cast
<FieldDecl
>(MapDecl
))
7344 LB
= CGF
.EmitLoadOfReferenceLValue(LowestElem
, MapDecl
->getType())
7348 CGF
.EmitOMPSharedLValue(I
->getAssociatedExpression())
7352 // If this component is a pointer inside the base struct then we don't
7353 // need to create any entry for it - it will be combined with the object
7354 // it is pointing to into a single PTR_AND_OBJ entry.
7355 bool IsMemberPointerOrAddr
=
7357 (((IsPointer
|| ForDeviceAddr
) &&
7358 I
->getAssociatedExpression() == EncounteredME
) ||
7359 (IsPrevMemberReference
&& !IsPointer
) ||
7360 (IsMemberReference
&& Next
!= CE
&&
7361 !Next
->getAssociatedExpression()->getType()->isPointerType()));
7362 if (!OverlappedElements
.empty() && Next
== CE
) {
7363 // Handle base element with the info for overlapped elements.
7364 assert(!PartialStruct
.Base
.isValid() && "The base element is set.");
7365 assert(!IsPointer
&&
7366 "Unexpected base element with the pointer type.");
7367 // Mark the whole struct as the struct that requires allocation on the
7369 PartialStruct
.LowestElem
= {0, LowestElem
};
7370 CharUnits TypeSize
= CGF
.getContext().getTypeSizeInChars(
7371 I
->getAssociatedExpression()->getType());
7372 Address HB
= CGF
.Builder
.CreateConstGEP(
7373 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
7374 LowestElem
, CGF
.VoidPtrTy
, CGF
.Int8Ty
),
7375 TypeSize
.getQuantity() - 1);
7376 PartialStruct
.HighestElem
= {
7377 std::numeric_limits
<decltype(
7378 PartialStruct
.HighestElem
.first
)>::max(),
7380 PartialStruct
.Base
= BP
;
7381 PartialStruct
.LB
= LB
;
7383 PartialStruct
.PreliminaryMapData
.BasePointers
.empty() &&
7384 "Overlapped elements must be used only once for the variable.");
7385 std::swap(PartialStruct
.PreliminaryMapData
, CombinedInfo
);
7386 // Emit data for non-overlapped data.
7387 OpenMPOffloadMappingFlags Flags
=
7388 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
7389 getMapTypeBits(MapType
, MapModifiers
, MotionModifiers
, IsImplicit
,
7390 /*AddPtrFlag=*/false,
7391 /*AddIsTargetParamFlag=*/false, IsNonContiguous
);
7392 llvm::Value
*Size
= nullptr;
7393 // Do bitcopy of all non-overlapped structure elements.
7394 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7395 Component
: OverlappedElements
) {
7396 Address ComponentLB
= Address::invalid();
7397 for (const OMPClauseMappableExprCommon::MappableComponent
&MC
:
7399 if (const ValueDecl
*VD
= MC
.getAssociatedDeclaration()) {
7400 const auto *FD
= dyn_cast
<FieldDecl
>(VD
);
7401 if (FD
&& FD
->getType()->isLValueReferenceType()) {
7403 cast
<MemberExpr
>(MC
.getAssociatedExpression());
7404 LValue BaseLVal
= EmitMemberExprBase(CGF
, ME
);
7406 CGF
.EmitLValueForFieldInitialization(BaseLVal
, FD
)
7410 CGF
.EmitOMPSharedLValue(MC
.getAssociatedExpression())
7413 Size
= CGF
.Builder
.CreatePtrDiff(
7414 CGF
.Int8Ty
, ComponentLB
.getPointer(), LB
.getPointer());
7418 assert(Size
&& "Failed to determine structure size");
7419 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7420 CombinedInfo
.BasePointers
.push_back(BP
.getPointer());
7421 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7422 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7423 CombinedInfo
.Pointers
.push_back(LB
.getPointer());
7424 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
7425 Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7426 CombinedInfo
.Types
.push_back(Flags
);
7427 CombinedInfo
.Mappers
.push_back(nullptr);
7428 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7430 LB
= CGF
.Builder
.CreateConstGEP(ComponentLB
, 1);
7432 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7433 CombinedInfo
.BasePointers
.push_back(BP
.getPointer());
7434 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7435 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7436 CombinedInfo
.Pointers
.push_back(LB
.getPointer());
7437 Size
= CGF
.Builder
.CreatePtrDiff(
7438 CGF
.Int8Ty
, CGF
.Builder
.CreateConstGEP(HB
, 1).getPointer(),
7440 CombinedInfo
.Sizes
.push_back(
7441 CGF
.Builder
.CreateIntCast(Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7442 CombinedInfo
.Types
.push_back(Flags
);
7443 CombinedInfo
.Mappers
.push_back(nullptr);
7444 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7448 llvm::Value
*Size
= getExprTypeSize(I
->getAssociatedExpression());
7449 if (!IsMemberPointerOrAddr
||
7450 (Next
== CE
&& MapType
!= OMPC_MAP_unknown
)) {
7451 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7452 CombinedInfo
.BasePointers
.push_back(BP
.getPointer());
7453 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7454 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7455 CombinedInfo
.Pointers
.push_back(LB
.getPointer());
7456 CombinedInfo
.Sizes
.push_back(
7457 CGF
.Builder
.CreateIntCast(Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7458 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7461 // If Mapper is valid, the last component inherits the mapper.
7462 bool HasMapper
= Mapper
&& Next
== CE
;
7463 CombinedInfo
.Mappers
.push_back(HasMapper
? Mapper
: nullptr);
7465 // We need to add a pointer flag for each map that comes from the
7466 // same expression except for the first one. We also need to signal
7467 // this map is the first one that relates with the current capture
7468 // (there is a set of entries for each capture).
7469 OpenMPOffloadMappingFlags Flags
= getMapTypeBits(
7470 MapType
, MapModifiers
, MotionModifiers
, IsImplicit
,
7471 !IsExpressionFirstInfo
|| RequiresReference
||
7472 FirstPointerInComplexData
|| IsMemberReference
,
7473 IsCaptureFirstInfo
&& !RequiresReference
, IsNonContiguous
);
7475 if (!IsExpressionFirstInfo
|| IsMemberReference
) {
7476 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7477 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7478 if (IsPointer
|| (IsMemberReference
&& Next
!= CE
))
7479 Flags
&= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7480 OpenMPOffloadMappingFlags::OMP_MAP_FROM
|
7481 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS
|
7482 OpenMPOffloadMappingFlags::OMP_MAP_DELETE
|
7483 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE
);
7485 if (ShouldBeMemberOf
) {
7486 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7487 // should be later updated with the correct value of MEMBER_OF.
7488 Flags
|= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
;
7489 // From now on, all subsequent PTR_AND_OBJ entries should not be
7490 // marked as MEMBER_OF.
7491 ShouldBeMemberOf
= false;
7495 CombinedInfo
.Types
.push_back(Flags
);
7498 // If we have encountered a member expression so far, keep track of the
7499 // mapped member. If the parent is "*this", then the value declaration
7501 if (EncounteredME
) {
7502 const auto *FD
= cast
<FieldDecl
>(EncounteredME
->getMemberDecl());
7503 unsigned FieldIndex
= FD
->getFieldIndex();
7505 // Update info about the lowest and highest elements for this struct
7506 if (!PartialStruct
.Base
.isValid()) {
7507 PartialStruct
.LowestElem
= {FieldIndex
, LowestElem
};
7508 if (IsFinalArraySection
) {
7510 CGF
.EmitOMPArraySectionExpr(OASE
, /*IsLowerBound=*/false)
7512 PartialStruct
.HighestElem
= {FieldIndex
, HB
};
7514 PartialStruct
.HighestElem
= {FieldIndex
, LowestElem
};
7516 PartialStruct
.Base
= BP
;
7517 PartialStruct
.LB
= BP
;
7518 } else if (FieldIndex
< PartialStruct
.LowestElem
.first
) {
7519 PartialStruct
.LowestElem
= {FieldIndex
, LowestElem
};
7520 } else if (FieldIndex
> PartialStruct
.HighestElem
.first
) {
7521 PartialStruct
.HighestElem
= {FieldIndex
, LowestElem
};
7525 // Need to emit combined struct for array sections.
7526 if (IsFinalArraySection
|| IsNonContiguous
)
7527 PartialStruct
.IsArraySection
= true;
7529 // If we have a final array section, we are done with this expression.
7530 if (IsFinalArraySection
)
7533 // The pointer becomes the base for the next element.
7535 BP
= IsMemberReference
? LowestElem
: LB
;
7537 IsExpressionFirstInfo
= false;
7538 IsCaptureFirstInfo
= false;
7539 FirstPointerInComplexData
= false;
7540 IsPrevMemberReference
= IsMemberReference
;
7541 } else if (FirstPointerInComplexData
) {
7542 QualType Ty
= Components
.rbegin()
7543 ->getAssociatedDeclaration()
7545 .getNonReferenceType();
7546 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7547 FirstPointerInComplexData
= false;
7550 // If ran into the whole component - allocate the space for the whole
7553 PartialStruct
.HasCompleteRecord
= true;
7555 if (!IsNonContiguous
)
7558 const ASTContext
&Context
= CGF
.getContext();
7560 // For supporting stride in array section, we need to initialize the first
7561 // dimension size as 1, first offset as 0, and first count as 1
7562 MapValuesArrayTy CurOffsets
= {llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 0)};
7563 MapValuesArrayTy CurCounts
= {llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 1)};
7564 MapValuesArrayTy CurStrides
;
7565 MapValuesArrayTy DimSizes
{llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 1)};
7566 uint64_t ElementTypeSize
;
7568 // Collect Size information for each dimension and get the element size as
7569 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7570 // should be [10, 10] and the first stride is 4 btyes.
7571 for (const OMPClauseMappableExprCommon::MappableComponent
&Component
:
7573 const Expr
*AssocExpr
= Component
.getAssociatedExpression();
7574 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(AssocExpr
);
7579 QualType Ty
= OMPArraySectionExpr::getBaseOriginalType(OASE
->getBase());
7580 auto *CAT
= Context
.getAsConstantArrayType(Ty
);
7581 auto *VAT
= Context
.getAsVariableArrayType(Ty
);
7583 // We need all the dimension size except for the last dimension.
7584 assert((VAT
|| CAT
|| &Component
== &*Components
.begin()) &&
7585 "Should be either ConstantArray or VariableArray if not the "
7588 // Get element size if CurStrides is empty.
7589 if (CurStrides
.empty()) {
7590 const Type
*ElementType
= nullptr;
7592 ElementType
= CAT
->getElementType().getTypePtr();
7594 ElementType
= VAT
->getElementType().getTypePtr();
7596 assert(&Component
== &*Components
.begin() &&
7597 "Only expect pointer (non CAT or VAT) when this is the "
7599 // If ElementType is null, then it means the base is a pointer
7600 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7601 // for next iteration.
7603 // For the case that having pointer as base, we need to remove one
7604 // level of indirection.
7605 if (&Component
!= &*Components
.begin())
7606 ElementType
= ElementType
->getPointeeOrArrayElementType();
7608 Context
.getTypeSizeInChars(ElementType
).getQuantity();
7609 CurStrides
.push_back(
7610 llvm::ConstantInt::get(CGF
.Int64Ty
, ElementTypeSize
));
7613 // Get dimension value except for the last dimension since we don't need
7615 if (DimSizes
.size() < Components
.size() - 1) {
7617 DimSizes
.push_back(llvm::ConstantInt::get(
7618 CGF
.Int64Ty
, CAT
->getSize().getZExtValue()));
7620 DimSizes
.push_back(CGF
.Builder
.CreateIntCast(
7621 CGF
.EmitScalarExpr(VAT
->getSizeExpr()), CGF
.Int64Ty
,
7622 /*IsSigned=*/false));
7626 // Skip the dummy dimension since we have already have its information.
7627 auto *DI
= DimSizes
.begin() + 1;
7628 // Product of dimension.
7629 llvm::Value
*DimProd
=
7630 llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, ElementTypeSize
);
7632 // Collect info for non-contiguous. Notice that offset, count, and stride
7633 // are only meaningful for array-section, so we insert a null for anything
7634 // other than array-section.
7635 // Also, the size of offset, count, and stride are not the same as
7636 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7637 // count, and stride are the same as the number of non-contiguous
7638 // declaration in target update to/from clause.
7639 for (const OMPClauseMappableExprCommon::MappableComponent
&Component
:
7641 const Expr
*AssocExpr
= Component
.getAssociatedExpression();
7643 if (const auto *AE
= dyn_cast
<ArraySubscriptExpr
>(AssocExpr
)) {
7644 llvm::Value
*Offset
= CGF
.Builder
.CreateIntCast(
7645 CGF
.EmitScalarExpr(AE
->getIdx()), CGF
.Int64Ty
,
7646 /*isSigned=*/false);
7647 CurOffsets
.push_back(Offset
);
7648 CurCounts
.push_back(llvm::ConstantInt::get(CGF
.Int64Ty
, /*V=*/1));
7649 CurStrides
.push_back(CurStrides
.back());
7653 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(AssocExpr
);
7659 const Expr
*OffsetExpr
= OASE
->getLowerBound();
7660 llvm::Value
*Offset
= nullptr;
7662 // If offset is absent, then we just set it to zero.
7663 Offset
= llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
7665 Offset
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(OffsetExpr
),
7667 /*isSigned=*/false);
7669 CurOffsets
.push_back(Offset
);
7672 const Expr
*CountExpr
= OASE
->getLength();
7673 llvm::Value
*Count
= nullptr;
7675 // In Clang, once a high dimension is an array section, we construct all
7676 // the lower dimension as array section, however, for case like
7677 // arr[0:2][2], Clang construct the inner dimension as an array section
7678 // but it actually is not in an array section form according to spec.
7679 if (!OASE
->getColonLocFirst().isValid() &&
7680 !OASE
->getColonLocSecond().isValid()) {
7681 Count
= llvm::ConstantInt::get(CGF
.Int64Ty
, 1);
7683 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7684 // When the length is absent it defaults to ⌈(size −
7685 // lower-bound)/stride⌉, where size is the size of the array
7687 const Expr
*StrideExpr
= OASE
->getStride();
7688 llvm::Value
*Stride
=
7690 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(StrideExpr
),
7691 CGF
.Int64Ty
, /*isSigned=*/false)
7694 Count
= CGF
.Builder
.CreateUDiv(
7695 CGF
.Builder
.CreateNUWSub(*DI
, Offset
), Stride
);
7697 Count
= CGF
.Builder
.CreateNUWSub(*DI
, Offset
);
7700 Count
= CGF
.EmitScalarExpr(CountExpr
);
7702 Count
= CGF
.Builder
.CreateIntCast(Count
, CGF
.Int64Ty
, /*isSigned=*/false);
7703 CurCounts
.push_back(Count
);
7705 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7706 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7707 // Offset Count Stride
7708 // D0 0 1 4 (int) <- dummy dimension
7709 // D1 0 2 8 (2 * (1) * 4)
7710 // D2 1 2 20 (1 * (1 * 5) * 4)
7711 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7712 const Expr
*StrideExpr
= OASE
->getStride();
7713 llvm::Value
*Stride
=
7715 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(StrideExpr
),
7716 CGF
.Int64Ty
, /*isSigned=*/false)
7718 DimProd
= CGF
.Builder
.CreateNUWMul(DimProd
, *(DI
- 1));
7720 CurStrides
.push_back(CGF
.Builder
.CreateNUWMul(DimProd
, Stride
));
7722 CurStrides
.push_back(DimProd
);
7723 if (DI
!= DimSizes
.end())
7727 CombinedInfo
.NonContigInfo
.Offsets
.push_back(CurOffsets
);
7728 CombinedInfo
.NonContigInfo
.Counts
.push_back(CurCounts
);
7729 CombinedInfo
.NonContigInfo
.Strides
.push_back(CurStrides
);
7732 /// Return the adjusted map modifiers if the declaration a capture refers to
7733 /// appears in a first-private clause. This is expected to be used only with
7734 /// directives that start with 'target'.
7735 OpenMPOffloadMappingFlags
7736 getMapModifiersForPrivateClauses(const CapturedStmt::Capture
&Cap
) const {
7737 assert(Cap
.capturesVariable() && "Expected capture by reference only!");
7739 // A first private variable captured by reference will use only the
7740 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7741 // declaration is known as first-private in this handler.
7742 if (FirstPrivateDecls
.count(Cap
.getCapturedVar())) {
7743 if (Cap
.getCapturedVar()->getType()->isAnyPointerType())
7744 return OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7745 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
;
7746 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE
|
7747 OpenMPOffloadMappingFlags::OMP_MAP_TO
;
7749 auto I
= LambdasMap
.find(Cap
.getCapturedVar()->getCanonicalDecl());
7750 if (I
!= LambdasMap
.end())
7751 // for map(to: lambda): using user specified map type.
7752 return getMapTypeBits(
7753 I
->getSecond()->getMapType(), I
->getSecond()->getMapTypeModifiers(),
7754 /*MotionModifiers=*/std::nullopt
, I
->getSecond()->isImplicit(),
7755 /*AddPtrFlag=*/false,
7756 /*AddIsTargetParamFlag=*/false,
7757 /*isNonContiguous=*/false);
7758 return OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7759 OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
7762 void getPlainLayout(const CXXRecordDecl
*RD
,
7763 llvm::SmallVectorImpl
<const FieldDecl
*> &Layout
,
7764 bool AsBase
) const {
7765 const CGRecordLayout
&RL
= CGF
.getTypes().getCGRecordLayout(RD
);
7767 llvm::StructType
*St
=
7768 AsBase
? RL
.getBaseSubobjectLLVMType() : RL
.getLLVMType();
7770 unsigned NumElements
= St
->getNumElements();
7772 llvm::PointerUnion
<const CXXRecordDecl
*, const FieldDecl
*>, 4>
7773 RecordLayout(NumElements
);
7776 for (const auto &I
: RD
->bases()) {
7779 const auto *Base
= I
.getType()->getAsCXXRecordDecl();
7780 // Ignore empty bases.
7781 if (Base
->isEmpty() || CGF
.getContext()
7782 .getASTRecordLayout(Base
)
7783 .getNonVirtualSize()
7787 unsigned FieldIndex
= RL
.getNonVirtualBaseLLVMFieldNo(Base
);
7788 RecordLayout
[FieldIndex
] = Base
;
7790 // Fill in virtual bases.
7791 for (const auto &I
: RD
->vbases()) {
7792 const auto *Base
= I
.getType()->getAsCXXRecordDecl();
7793 // Ignore empty bases.
7794 if (Base
->isEmpty())
7796 unsigned FieldIndex
= RL
.getVirtualBaseIndex(Base
);
7797 if (RecordLayout
[FieldIndex
])
7799 RecordLayout
[FieldIndex
] = Base
;
7801 // Fill in all the fields.
7802 assert(!RD
->isUnion() && "Unexpected union.");
7803 for (const auto *Field
: RD
->fields()) {
7804 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7805 // will fill in later.)
7806 if (!Field
->isBitField() && !Field
->isZeroSize(CGF
.getContext())) {
7807 unsigned FieldIndex
= RL
.getLLVMFieldNo(Field
);
7808 RecordLayout
[FieldIndex
] = Field
;
7811 for (const llvm::PointerUnion
<const CXXRecordDecl
*, const FieldDecl
*>
7812 &Data
: RecordLayout
) {
7815 if (const auto *Base
= Data
.dyn_cast
<const CXXRecordDecl
*>())
7816 getPlainLayout(Base
, Layout
, /*AsBase=*/true);
7818 Layout
.push_back(Data
.get
<const FieldDecl
*>());
7822 /// Generate all the base pointers, section pointers, sizes, map types, and
7823 /// mappers for the extracted mappable expressions (all included in \a
7824 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7825 /// pair of the relevant declaration and index where it occurs is appended to
7826 /// the device pointers info array.
7827 void generateAllInfoForClauses(
7828 ArrayRef
<const OMPClause
*> Clauses
, MapCombinedInfoTy
&CombinedInfo
,
7829 llvm::OpenMPIRBuilder
&OMPBuilder
,
7830 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkipVarSet
=
7831 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) const {
7832 // We have to process the component lists that relate with the same
7833 // declaration in a single chunk so that we can generate the map flags
7834 // correctly. Therefore, we organize all lists in a map.
7835 enum MapKind
{ Present
, Allocs
, Other
, Total
};
7836 llvm::MapVector
<CanonicalDeclPtr
<const Decl
>,
7837 SmallVector
<SmallVector
<MapInfo
, 8>, 4>>
7840 // Helper function to fill the information map for the different supported
7843 [&Info
, &SkipVarSet
](
7844 const ValueDecl
*D
, MapKind Kind
,
7845 OMPClauseMappableExprCommon::MappableExprComponentListRef L
,
7846 OpenMPMapClauseKind MapType
,
7847 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
7848 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
7849 bool ReturnDevicePointer
, bool IsImplicit
, const ValueDecl
*Mapper
,
7850 const Expr
*VarRef
= nullptr, bool ForDeviceAddr
= false) {
7851 if (SkipVarSet
.contains(D
))
7853 auto It
= Info
.find(D
);
7854 if (It
== Info
.end())
7856 .insert(std::make_pair(
7857 D
, SmallVector
<SmallVector
<MapInfo
, 8>, 4>(Total
)))
7859 It
->second
[Kind
].emplace_back(
7860 L
, MapType
, MapModifiers
, MotionModifiers
, ReturnDevicePointer
,
7861 IsImplicit
, Mapper
, VarRef
, ForDeviceAddr
);
7864 for (const auto *Cl
: Clauses
) {
7865 const auto *C
= dyn_cast
<OMPMapClause
>(Cl
);
7868 MapKind Kind
= Other
;
7869 if (llvm::is_contained(C
->getMapTypeModifiers(),
7870 OMPC_MAP_MODIFIER_present
))
7872 else if (C
->getMapType() == OMPC_MAP_alloc
)
7874 const auto *EI
= C
->getVarRefs().begin();
7875 for (const auto L
: C
->component_lists()) {
7876 const Expr
*E
= (C
->getMapLoc().isValid()) ? *EI
: nullptr;
7877 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), C
->getMapType(),
7878 C
->getMapTypeModifiers(), std::nullopt
,
7879 /*ReturnDevicePointer=*/false, C
->isImplicit(), std::get
<2>(L
),
7884 for (const auto *Cl
: Clauses
) {
7885 const auto *C
= dyn_cast
<OMPToClause
>(Cl
);
7888 MapKind Kind
= Other
;
7889 if (llvm::is_contained(C
->getMotionModifiers(),
7890 OMPC_MOTION_MODIFIER_present
))
7892 const auto *EI
= C
->getVarRefs().begin();
7893 for (const auto L
: C
->component_lists()) {
7894 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), OMPC_MAP_to
, std::nullopt
,
7895 C
->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7896 C
->isImplicit(), std::get
<2>(L
), *EI
);
7900 for (const auto *Cl
: Clauses
) {
7901 const auto *C
= dyn_cast
<OMPFromClause
>(Cl
);
7904 MapKind Kind
= Other
;
7905 if (llvm::is_contained(C
->getMotionModifiers(),
7906 OMPC_MOTION_MODIFIER_present
))
7908 const auto *EI
= C
->getVarRefs().begin();
7909 for (const auto L
: C
->component_lists()) {
7910 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), OMPC_MAP_from
,
7911 std::nullopt
, C
->getMotionModifiers(),
7912 /*ReturnDevicePointer=*/false, C
->isImplicit(), std::get
<2>(L
),
7918 // Look at the use_device_ptr and use_device_addr clauses information and
7919 // mark the existing map entries as such. If there is no map information for
7920 // an entry in the use_device_ptr and use_device_addr list, we create one
7921 // with map type 'alloc' and zero size section. It is the user fault if that
7922 // was not mapped before. If there is no map information and the pointer is
7923 // a struct member, then we defer the emission of that entry until the whole
7924 // struct has been processed.
7925 llvm::MapVector
<CanonicalDeclPtr
<const Decl
>,
7926 SmallVector
<DeferredDevicePtrEntryTy
, 4>>
7928 MapCombinedInfoTy UseDeviceDataCombinedInfo
;
7930 auto &&UseDeviceDataCombinedInfoGen
=
7931 [&UseDeviceDataCombinedInfo
](const ValueDecl
*VD
, llvm::Value
*Ptr
,
7932 CodeGenFunction
&CGF
, bool IsDevAddr
) {
7933 UseDeviceDataCombinedInfo
.Exprs
.push_back(VD
);
7934 UseDeviceDataCombinedInfo
.BasePointers
.emplace_back(Ptr
);
7935 UseDeviceDataCombinedInfo
.DevicePtrDecls
.emplace_back(VD
);
7936 UseDeviceDataCombinedInfo
.DevicePointers
.emplace_back(
7937 IsDevAddr
? DeviceInfoTy::Address
: DeviceInfoTy::Pointer
);
7938 UseDeviceDataCombinedInfo
.Pointers
.push_back(Ptr
);
7939 UseDeviceDataCombinedInfo
.Sizes
.push_back(
7940 llvm::Constant::getNullValue(CGF
.Int64Ty
));
7941 UseDeviceDataCombinedInfo
.Types
.push_back(
7942 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
);
7943 UseDeviceDataCombinedInfo
.Mappers
.push_back(nullptr);
7947 [&DeferredInfo
, &UseDeviceDataCombinedInfoGen
,
7948 &InfoGen
](CodeGenFunction
&CGF
, const Expr
*IE
, const ValueDecl
*VD
,
7949 OMPClauseMappableExprCommon::MappableExprComponentListRef
7951 bool IsImplicit
, bool IsDevAddr
) {
7952 // We didn't find any match in our map information - generate a zero
7953 // size array section - if the pointer is a struct member we defer
7954 // this action until the whole struct has been processed.
7955 if (isa
<MemberExpr
>(IE
)) {
7956 // Insert the pointer into Info to be processed by
7957 // generateInfoForComponentList. Because it is a member pointer
7958 // without a pointee, no entry will be generated for it, therefore
7959 // we need to generate one after the whole struct has been
7960 // processed. Nonetheless, generateInfoForComponentList must be
7961 // called to take the pointer into account for the calculation of
7962 // the range of the partial struct.
7963 InfoGen(nullptr, Other
, Components
, OMPC_MAP_unknown
, std::nullopt
,
7964 std::nullopt
, /*ReturnDevicePointer=*/false, IsImplicit
,
7965 nullptr, nullptr, IsDevAddr
);
7966 DeferredInfo
[nullptr].emplace_back(IE
, VD
, IsDevAddr
);
7970 if (IE
->isGLValue())
7971 Ptr
= CGF
.EmitLValue(IE
).getPointer(CGF
);
7973 Ptr
= CGF
.EmitScalarExpr(IE
);
7975 Ptr
= CGF
.EmitLoadOfScalar(CGF
.EmitLValue(IE
), IE
->getExprLoc());
7977 UseDeviceDataCombinedInfoGen(VD
, Ptr
, CGF
, IsDevAddr
);
7981 auto &&IsMapInfoExist
= [&Info
](CodeGenFunction
&CGF
, const ValueDecl
*VD
,
7982 const Expr
*IE
, bool IsDevAddr
) -> bool {
7983 // We potentially have map information for this declaration already.
7984 // Look for the first set of components that refer to it. If found,
7986 // If the first component is a member expression, we have to look into
7987 // 'this', which maps to null in the map of map information. Otherwise
7988 // look directly for the information.
7989 auto It
= Info
.find(isa
<MemberExpr
>(IE
) ? nullptr : VD
);
7990 if (It
!= Info
.end()) {
7992 for (auto &Data
: It
->second
) {
7993 auto *CI
= llvm::find_if(Data
, [VD
](const MapInfo
&MI
) {
7994 return MI
.Components
.back().getAssociatedDeclaration() == VD
;
7996 // If we found a map entry, signal that the pointer has to be
7997 // returned and move on to the next declaration. Exclude cases where
7998 // the base pointer is mapped as array subscript, array section or
7999 // array shaping. The base address is passed as a pointer to base in
8000 // this case and cannot be used as a base for use_device_ptr list
8002 if (CI
!= Data
.end()) {
8004 CI
->ForDeviceAddr
= IsDevAddr
;
8005 CI
->ReturnDevicePointer
= true;
8009 auto PrevCI
= std::next(CI
->Components
.rbegin());
8010 const auto *VarD
= dyn_cast
<VarDecl
>(VD
);
8011 if (CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8012 isa
<MemberExpr
>(IE
) ||
8013 !VD
->getType().getNonReferenceType()->isPointerType() ||
8014 PrevCI
== CI
->Components
.rend() ||
8015 isa
<MemberExpr
>(PrevCI
->getAssociatedExpression()) || !VarD
||
8016 VarD
->hasLocalStorage()) {
8017 CI
->ForDeviceAddr
= IsDevAddr
;
8018 CI
->ReturnDevicePointer
= true;
8030 // Look at the use_device_ptr clause information and mark the existing map
8031 // entries as such. If there is no map information for an entry in the
8032 // use_device_ptr list, we create one with map type 'alloc' and zero size
8033 // section. It is the user fault if that was not mapped before. If there is
8034 // no map information and the pointer is a struct member, then we defer the
8035 // emission of that entry until the whole struct has been processed.
8036 for (const auto *Cl
: Clauses
) {
8037 const auto *C
= dyn_cast
<OMPUseDevicePtrClause
>(Cl
);
8040 for (const auto L
: C
->component_lists()) {
8041 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
=
8043 assert(!Components
.empty() &&
8044 "Not expecting empty list of components!");
8045 const ValueDecl
*VD
= Components
.back().getAssociatedDeclaration();
8046 VD
= cast
<ValueDecl
>(VD
->getCanonicalDecl());
8047 const Expr
*IE
= Components
.back().getAssociatedExpression();
8048 if (IsMapInfoExist(CGF
, VD
, IE
, /*IsDevAddr=*/false))
8050 MapInfoGen(CGF
, IE
, VD
, Components
, C
->isImplicit(),
8051 /*IsDevAddr=*/false);
8055 llvm::SmallDenseSet
<CanonicalDeclPtr
<const Decl
>, 4> Processed
;
8056 for (const auto *Cl
: Clauses
) {
8057 const auto *C
= dyn_cast
<OMPUseDeviceAddrClause
>(Cl
);
8060 for (const auto L
: C
->component_lists()) {
8061 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
=
8063 assert(!std::get
<1>(L
).empty() &&
8064 "Not expecting empty list of components!");
8065 const ValueDecl
*VD
= std::get
<1>(L
).back().getAssociatedDeclaration();
8066 if (!Processed
.insert(VD
).second
)
8068 VD
= cast
<ValueDecl
>(VD
->getCanonicalDecl());
8069 const Expr
*IE
= std::get
<1>(L
).back().getAssociatedExpression();
8070 if (IsMapInfoExist(CGF
, VD
, IE
, /*IsDevAddr=*/true))
8072 MapInfoGen(CGF
, IE
, VD
, Components
, C
->isImplicit(),
8073 /*IsDevAddr=*/true);
8077 for (const auto &Data
: Info
) {
8078 StructRangeInfoTy PartialStruct
;
8079 // Temporary generated information.
8080 MapCombinedInfoTy CurInfo
;
8081 const Decl
*D
= Data
.first
;
8082 const ValueDecl
*VD
= cast_or_null
<ValueDecl
>(D
);
8083 for (const auto &M
: Data
.second
) {
8084 for (const MapInfo
&L
: M
) {
8085 assert(!L
.Components
.empty() &&
8086 "Not expecting declaration with no component lists.");
8088 // Remember the current base pointer index.
8089 unsigned CurrentBasePointersIdx
= CurInfo
.BasePointers
.size();
8090 CurInfo
.NonContigInfo
.IsNonContiguous
=
8091 L
.Components
.back().isNonContiguous();
8092 generateInfoForComponentList(
8093 L
.MapType
, L
.MapModifiers
, L
.MotionModifiers
, L
.Components
,
8094 CurInfo
, PartialStruct
, /*IsFirstComponentList=*/false,
8095 L
.IsImplicit
, L
.Mapper
, L
.ForDeviceAddr
, VD
, L
.VarRef
);
8097 // If this entry relates with a device pointer, set the relevant
8098 // declaration and add the 'return pointer' flag.
8099 if (L
.ReturnDevicePointer
) {
8100 assert(CurInfo
.BasePointers
.size() > CurrentBasePointersIdx
&&
8101 "Unexpected number of mapped base pointers.");
8103 const ValueDecl
*RelevantVD
=
8104 L
.Components
.back().getAssociatedDeclaration();
8105 assert(RelevantVD
&&
8106 "No relevant declaration related with device pointer??");
8108 CurInfo
.DevicePtrDecls
[CurrentBasePointersIdx
] = RelevantVD
;
8109 CurInfo
.DevicePointers
[CurrentBasePointersIdx
] =
8110 L
.ForDeviceAddr
? DeviceInfoTy::Address
: DeviceInfoTy::Pointer
;
8111 CurInfo
.Types
[CurrentBasePointersIdx
] |=
8112 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
;
8117 // Append any pending zero-length pointers which are struct members and
8118 // used with use_device_ptr or use_device_addr.
8119 auto CI
= DeferredInfo
.find(Data
.first
);
8120 if (CI
!= DeferredInfo
.end()) {
8121 for (const DeferredDevicePtrEntryTy
&L
: CI
->second
) {
8122 llvm::Value
*BasePtr
;
8124 if (L
.ForDeviceAddr
) {
8125 if (L
.IE
->isGLValue())
8126 Ptr
= this->CGF
.EmitLValue(L
.IE
).getPointer(CGF
);
8128 Ptr
= this->CGF
.EmitScalarExpr(L
.IE
);
8130 // Entry is RETURN_PARAM. Also, set the placeholder value
8131 // MEMBER_OF=FFFF so that the entry is later updated with the
8132 // correct value of MEMBER_OF.
8133 CurInfo
.Types
.push_back(
8134 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
|
8135 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
8137 BasePtr
= this->CGF
.EmitLValue(L
.IE
).getPointer(CGF
);
8138 Ptr
= this->CGF
.EmitLoadOfScalar(this->CGF
.EmitLValue(L
.IE
),
8139 L
.IE
->getExprLoc());
8140 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8141 // placeholder value MEMBER_OF=FFFF so that the entry is later
8142 // updated with the correct value of MEMBER_OF.
8143 CurInfo
.Types
.push_back(
8144 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8145 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
|
8146 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
8148 CurInfo
.Exprs
.push_back(L
.VD
);
8149 CurInfo
.BasePointers
.emplace_back(BasePtr
);
8150 CurInfo
.DevicePtrDecls
.emplace_back(L
.VD
);
8151 CurInfo
.DevicePointers
.emplace_back(
8152 L
.ForDeviceAddr
? DeviceInfoTy::Address
: DeviceInfoTy::Pointer
);
8153 CurInfo
.Pointers
.push_back(Ptr
);
8154 CurInfo
.Sizes
.push_back(
8155 llvm::Constant::getNullValue(this->CGF
.Int64Ty
));
8156 CurInfo
.Mappers
.push_back(nullptr);
8159 // If there is an entry in PartialStruct it means we have a struct with
8160 // individual members mapped. Emit an extra combined entry.
8161 if (PartialStruct
.Base
.isValid()) {
8162 CurInfo
.NonContigInfo
.Dims
.push_back(0);
8163 emitCombinedEntry(CombinedInfo
, CurInfo
.Types
, PartialStruct
,
8164 /*IsMapThis*/ !VD
, OMPBuilder
, VD
);
8167 // We need to append the results of this capture to what we already
8169 CombinedInfo
.append(CurInfo
);
8171 // Append data for use_device_ptr clauses.
8172 CombinedInfo
.append(UseDeviceDataCombinedInfo
);
8176 MappableExprsHandler(const OMPExecutableDirective
&Dir
, CodeGenFunction
&CGF
)
8177 : CurDir(&Dir
), CGF(CGF
) {
8178 // Extract firstprivate clause information.
8179 for (const auto *C
: Dir
.getClausesOfKind
<OMPFirstprivateClause
>())
8180 for (const auto *D
: C
->varlists())
8181 FirstPrivateDecls
.try_emplace(
8182 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
)->getDecl()), C
->isImplicit());
8183 // Extract implicit firstprivates from uses_allocators clauses.
8184 for (const auto *C
: Dir
.getClausesOfKind
<OMPUsesAllocatorsClause
>()) {
8185 for (unsigned I
= 0, E
= C
->getNumberOfAllocators(); I
< E
; ++I
) {
8186 OMPUsesAllocatorsClause::Data D
= C
->getAllocatorData(I
);
8187 if (const auto *DRE
= dyn_cast_or_null
<DeclRefExpr
>(D
.AllocatorTraits
))
8188 FirstPrivateDecls
.try_emplace(cast
<VarDecl
>(DRE
->getDecl()),
8190 else if (const auto *VD
= dyn_cast
<VarDecl
>(
8191 cast
<DeclRefExpr
>(D
.Allocator
->IgnoreParenImpCasts())
8193 FirstPrivateDecls
.try_emplace(VD
, /*Implicit=*/true);
8196 // Extract device pointer clause information.
8197 for (const auto *C
: Dir
.getClausesOfKind
<OMPIsDevicePtrClause
>())
8198 for (auto L
: C
->component_lists())
8199 DevPointersMap
[std::get
<0>(L
)].push_back(std::get
<1>(L
));
8200 // Extract device addr clause information.
8201 for (const auto *C
: Dir
.getClausesOfKind
<OMPHasDeviceAddrClause
>())
8202 for (auto L
: C
->component_lists())
8203 HasDevAddrsMap
[std::get
<0>(L
)].push_back(std::get
<1>(L
));
8204 // Extract map information.
8205 for (const auto *C
: Dir
.getClausesOfKind
<OMPMapClause
>()) {
8206 if (C
->getMapType() != OMPC_MAP_to
)
8208 for (auto L
: C
->component_lists()) {
8209 const ValueDecl
*VD
= std::get
<0>(L
);
8210 const auto *RD
= VD
? VD
->getType()
8212 .getNonReferenceType()
8213 ->getAsCXXRecordDecl()
8215 if (RD
&& RD
->isLambda())
8216 LambdasMap
.try_emplace(std::get
<0>(L
), C
);
8221 /// Constructor for the declare mapper directive.
8222 MappableExprsHandler(const OMPDeclareMapperDecl
&Dir
, CodeGenFunction
&CGF
)
8223 : CurDir(&Dir
), CGF(CGF
) {}
8225 /// Generate code for the combined entry if we have a partially mapped struct
8226 /// and take care of the mapping flags of the arguments corresponding to
8227 /// individual struct members.
8228 void emitCombinedEntry(MapCombinedInfoTy
&CombinedInfo
,
8229 MapFlagsArrayTy
&CurTypes
,
8230 const StructRangeInfoTy
&PartialStruct
, bool IsMapThis
,
8231 llvm::OpenMPIRBuilder
&OMPBuilder
,
8232 const ValueDecl
*VD
= nullptr,
8233 bool NotTargetParams
= true) const {
8234 if (CurTypes
.size() == 1 &&
8235 ((CurTypes
.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
) !=
8236 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
) &&
8237 !PartialStruct
.IsArraySection
)
8239 Address LBAddr
= PartialStruct
.LowestElem
.second
;
8240 Address HBAddr
= PartialStruct
.HighestElem
.second
;
8241 if (PartialStruct
.HasCompleteRecord
) {
8242 LBAddr
= PartialStruct
.LB
;
8243 HBAddr
= PartialStruct
.LB
;
8245 CombinedInfo
.Exprs
.push_back(VD
);
8246 // Base is the base of the struct
8247 CombinedInfo
.BasePointers
.push_back(PartialStruct
.Base
.getPointer());
8248 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8249 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8250 // Pointer is the address of the lowest element
8251 llvm::Value
*LB
= LBAddr
.getPointer();
8252 const CXXMethodDecl
*MD
=
8253 CGF
.CurFuncDecl
? dyn_cast
<CXXMethodDecl
>(CGF
.CurFuncDecl
) : nullptr;
8254 const CXXRecordDecl
*RD
= MD
? MD
->getParent() : nullptr;
8255 bool HasBaseClass
= RD
&& IsMapThis
? RD
->getNumBases() > 0 : false;
8256 // There should not be a mapper for a combined entry.
8258 // OpenMP 5.2 148:21:
8259 // If the target construct is within a class non-static member function,
8260 // and a variable is an accessible data member of the object for which the
8261 // non-static data member function is invoked, the variable is treated as
8262 // if the this[:1] expression had appeared in a map clause with a map-type
8265 CombinedInfo
.Pointers
.push_back(PartialStruct
.Base
.getPointer());
8266 QualType Ty
= MD
->getFunctionObjectParameterType();
8268 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(Ty
), CGF
.Int64Ty
,
8270 CombinedInfo
.Sizes
.push_back(Size
);
8272 CombinedInfo
.Pointers
.push_back(LB
);
8273 // Size is (addr of {highest+1} element) - (addr of lowest element)
8274 llvm::Value
*HB
= HBAddr
.getPointer();
8275 llvm::Value
*HAddr
= CGF
.Builder
.CreateConstGEP1_32(
8276 HBAddr
.getElementType(), HB
, /*Idx0=*/1);
8277 llvm::Value
*CLAddr
= CGF
.Builder
.CreatePointerCast(LB
, CGF
.VoidPtrTy
);
8278 llvm::Value
*CHAddr
= CGF
.Builder
.CreatePointerCast(HAddr
, CGF
.VoidPtrTy
);
8279 llvm::Value
*Diff
= CGF
.Builder
.CreatePtrDiff(CGF
.Int8Ty
, CHAddr
, CLAddr
);
8280 llvm::Value
*Size
= CGF
.Builder
.CreateIntCast(Diff
, CGF
.Int64Ty
,
8281 /*isSigned=*/false);
8282 CombinedInfo
.Sizes
.push_back(Size
);
8284 CombinedInfo
.Mappers
.push_back(nullptr);
8285 // Map type is always TARGET_PARAM, if generate info for captures.
8286 CombinedInfo
.Types
.push_back(
8287 NotTargetParams
? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8288 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
);
8289 // If any element has the present modifier, then make sure the runtime
8290 // doesn't attempt to allocate the struct.
8291 if (CurTypes
.end() !=
8292 llvm::find_if(CurTypes
, [](OpenMPOffloadMappingFlags Type
) {
8293 return static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
8294 Type
& OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
);
8296 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
;
8297 // Remove TARGET_PARAM flag from the first element
8298 (*CurTypes
.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
8299 // If any element has the ompx_hold modifier, then make sure the runtime
8300 // uses the hold reference count for the struct as a whole so that it won't
8301 // be unmapped by an extra dynamic reference count decrement. Add it to all
8302 // elements as well so the runtime knows which reference count to check
8303 // when determining whether it's time for device-to-host transfers of
8304 // individual elements.
8305 if (CurTypes
.end() !=
8306 llvm::find_if(CurTypes
, [](OpenMPOffloadMappingFlags Type
) {
8307 return static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
8308 Type
& OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
);
8310 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
8311 for (auto &M
: CurTypes
)
8312 M
|= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
8315 // All other current entries will be MEMBER_OF the combined entry
8316 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8317 // 0xFFFF in the MEMBER_OF field).
8318 OpenMPOffloadMappingFlags MemberOfFlag
=
8319 OMPBuilder
.getMemberOfFlag(CombinedInfo
.BasePointers
.size() - 1);
8320 for (auto &M
: CurTypes
)
8321 OMPBuilder
.setCorrectMemberOfFlag(M
, MemberOfFlag
);
8324 /// Generate all the base pointers, section pointers, sizes, map types, and
8325 /// mappers for the extracted mappable expressions (all included in \a
8326 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8327 /// pair of the relevant declaration and index where it occurs is appended to
8328 /// the device pointers info array.
8329 void generateAllInfo(
8330 MapCombinedInfoTy
&CombinedInfo
, llvm::OpenMPIRBuilder
&OMPBuilder
,
8331 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkipVarSet
=
8332 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) const {
8333 assert(CurDir
.is
<const OMPExecutableDirective
*>() &&
8334 "Expect a executable directive");
8335 const auto *CurExecDir
= CurDir
.get
<const OMPExecutableDirective
*>();
8336 generateAllInfoForClauses(CurExecDir
->clauses(), CombinedInfo
, OMPBuilder
,
8340 /// Generate all the base pointers, section pointers, sizes, map types, and
8341 /// mappers for the extracted map clauses of user-defined mapper (all included
8342 /// in \a CombinedInfo).
8343 void generateAllInfoForMapper(MapCombinedInfoTy
&CombinedInfo
,
8344 llvm::OpenMPIRBuilder
&OMPBuilder
) const {
8345 assert(CurDir
.is
<const OMPDeclareMapperDecl
*>() &&
8346 "Expect a declare mapper directive");
8347 const auto *CurMapperDir
= CurDir
.get
<const OMPDeclareMapperDecl
*>();
8348 generateAllInfoForClauses(CurMapperDir
->clauses(), CombinedInfo
,
8352 /// Emit capture info for lambdas for variables captured by reference.
8353 void generateInfoForLambdaCaptures(
8354 const ValueDecl
*VD
, llvm::Value
*Arg
, MapCombinedInfoTy
&CombinedInfo
,
8355 llvm::DenseMap
<llvm::Value
*, llvm::Value
*> &LambdaPointers
) const {
8356 QualType VDType
= VD
->getType().getCanonicalType().getNonReferenceType();
8357 const auto *RD
= VDType
->getAsCXXRecordDecl();
8358 if (!RD
|| !RD
->isLambda())
8360 Address
VDAddr(Arg
, CGF
.ConvertTypeForMem(VDType
),
8361 CGF
.getContext().getDeclAlign(VD
));
8362 LValue VDLVal
= CGF
.MakeAddrLValue(VDAddr
, VDType
);
8363 llvm::DenseMap
<const ValueDecl
*, FieldDecl
*> Captures
;
8364 FieldDecl
*ThisCapture
= nullptr;
8365 RD
->getCaptureFields(Captures
, ThisCapture
);
8368 CGF
.EmitLValueForFieldInitialization(VDLVal
, ThisCapture
);
8369 LValue ThisLValVal
= CGF
.EmitLValueForField(VDLVal
, ThisCapture
);
8370 LambdaPointers
.try_emplace(ThisLVal
.getPointer(CGF
),
8371 VDLVal
.getPointer(CGF
));
8372 CombinedInfo
.Exprs
.push_back(VD
);
8373 CombinedInfo
.BasePointers
.push_back(ThisLVal
.getPointer(CGF
));
8374 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8375 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8376 CombinedInfo
.Pointers
.push_back(ThisLValVal
.getPointer(CGF
));
8377 CombinedInfo
.Sizes
.push_back(
8378 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(CGF
.getContext().VoidPtrTy
),
8379 CGF
.Int64Ty
, /*isSigned=*/true));
8380 CombinedInfo
.Types
.push_back(
8381 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8382 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8383 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8384 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
8385 CombinedInfo
.Mappers
.push_back(nullptr);
8387 for (const LambdaCapture
&LC
: RD
->captures()) {
8388 if (!LC
.capturesVariable())
8390 const VarDecl
*VD
= cast
<VarDecl
>(LC
.getCapturedVar());
8391 if (LC
.getCaptureKind() != LCK_ByRef
&& !VD
->getType()->isPointerType())
8393 auto It
= Captures
.find(VD
);
8394 assert(It
!= Captures
.end() && "Found lambda capture without field.");
8395 LValue VarLVal
= CGF
.EmitLValueForFieldInitialization(VDLVal
, It
->second
);
8396 if (LC
.getCaptureKind() == LCK_ByRef
) {
8397 LValue VarLValVal
= CGF
.EmitLValueForField(VDLVal
, It
->second
);
8398 LambdaPointers
.try_emplace(VarLVal
.getPointer(CGF
),
8399 VDLVal
.getPointer(CGF
));
8400 CombinedInfo
.Exprs
.push_back(VD
);
8401 CombinedInfo
.BasePointers
.push_back(VarLVal
.getPointer(CGF
));
8402 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8403 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8404 CombinedInfo
.Pointers
.push_back(VarLValVal
.getPointer(CGF
));
8405 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8407 VD
->getType().getCanonicalType().getNonReferenceType()),
8408 CGF
.Int64Ty
, /*isSigned=*/true));
8410 RValue VarRVal
= CGF
.EmitLoadOfLValue(VarLVal
, RD
->getLocation());
8411 LambdaPointers
.try_emplace(VarLVal
.getPointer(CGF
),
8412 VDLVal
.getPointer(CGF
));
8413 CombinedInfo
.Exprs
.push_back(VD
);
8414 CombinedInfo
.BasePointers
.push_back(VarLVal
.getPointer(CGF
));
8415 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8416 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8417 CombinedInfo
.Pointers
.push_back(VarRVal
.getScalarVal());
8418 CombinedInfo
.Sizes
.push_back(llvm::ConstantInt::get(CGF
.Int64Ty
, 0));
8420 CombinedInfo
.Types
.push_back(
8421 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8422 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8423 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8424 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
8425 CombinedInfo
.Mappers
.push_back(nullptr);
8429 /// Set correct indices for lambdas captures.
8430 void adjustMemberOfForLambdaCaptures(
8431 llvm::OpenMPIRBuilder
&OMPBuilder
,
8432 const llvm::DenseMap
<llvm::Value
*, llvm::Value
*> &LambdaPointers
,
8433 MapBaseValuesArrayTy
&BasePointers
, MapValuesArrayTy
&Pointers
,
8434 MapFlagsArrayTy
&Types
) const {
8435 for (unsigned I
= 0, E
= Types
.size(); I
< E
; ++I
) {
8436 // Set correct member_of idx for all implicit lambda captures.
8437 if (Types
[I
] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8438 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8439 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8440 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
))
8442 llvm::Value
*BasePtr
= LambdaPointers
.lookup(BasePointers
[I
]);
8443 assert(BasePtr
&& "Unable to find base lambda address.");
8445 for (unsigned J
= I
; J
> 0; --J
) {
8446 unsigned Idx
= J
- 1;
8447 if (Pointers
[Idx
] != BasePtr
)
8452 assert(TgtIdx
!= -1 && "Unable to find parent lambda.");
8453 // All other current entries will be MEMBER_OF the combined entry
8454 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8455 // 0xFFFF in the MEMBER_OF field).
8456 OpenMPOffloadMappingFlags MemberOfFlag
=
8457 OMPBuilder
.getMemberOfFlag(TgtIdx
);
8458 OMPBuilder
.setCorrectMemberOfFlag(Types
[I
], MemberOfFlag
);
8462 /// Generate the base pointers, section pointers, sizes, map types, and
8463 /// mappers associated to a given capture (all included in \a CombinedInfo).
8464 void generateInfoForCapture(const CapturedStmt::Capture
*Cap
,
8465 llvm::Value
*Arg
, MapCombinedInfoTy
&CombinedInfo
,
8466 StructRangeInfoTy
&PartialStruct
) const {
8467 assert(!Cap
->capturesVariableArrayType() &&
8468 "Not expecting to generate map info for a variable array type!");
8470 // We need to know when we generating information for the first component
8471 const ValueDecl
*VD
= Cap
->capturesThis()
8473 : Cap
->getCapturedVar()->getCanonicalDecl();
8475 // for map(to: lambda): skip here, processing it in
8476 // generateDefaultMapInfo
8477 if (LambdasMap
.count(VD
))
8480 // If this declaration appears in a is_device_ptr clause we just have to
8481 // pass the pointer by value. If it is a reference to a declaration, we just
8483 if (VD
&& (DevPointersMap
.count(VD
) || HasDevAddrsMap
.count(VD
))) {
8484 CombinedInfo
.Exprs
.push_back(VD
);
8485 CombinedInfo
.BasePointers
.emplace_back(Arg
);
8486 CombinedInfo
.DevicePtrDecls
.emplace_back(VD
);
8487 CombinedInfo
.DevicePointers
.emplace_back(DeviceInfoTy::Pointer
);
8488 CombinedInfo
.Pointers
.push_back(Arg
);
8489 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8490 CGF
.getTypeSize(CGF
.getContext().VoidPtrTy
), CGF
.Int64Ty
,
8491 /*isSigned=*/true));
8492 CombinedInfo
.Types
.push_back(
8493 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8494 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
);
8495 CombinedInfo
.Mappers
.push_back(nullptr);
8500 std::tuple
<OMPClauseMappableExprCommon::MappableExprComponentListRef
,
8501 OpenMPMapClauseKind
, ArrayRef
<OpenMPMapModifierKind
>, bool,
8502 const ValueDecl
*, const Expr
*>;
8503 SmallVector
<MapData
, 4> DeclComponentLists
;
8504 // For member fields list in is_device_ptr, store it in
8505 // DeclComponentLists for generating components info.
8506 static const OpenMPMapModifierKind Unknown
= OMPC_MAP_MODIFIER_unknown
;
8507 auto It
= DevPointersMap
.find(VD
);
8508 if (It
!= DevPointersMap
.end())
8509 for (const auto &MCL
: It
->second
)
8510 DeclComponentLists
.emplace_back(MCL
, OMPC_MAP_to
, Unknown
,
8511 /*IsImpicit = */ true, nullptr,
8513 auto I
= HasDevAddrsMap
.find(VD
);
8514 if (I
!= HasDevAddrsMap
.end())
8515 for (const auto &MCL
: I
->second
)
8516 DeclComponentLists
.emplace_back(MCL
, OMPC_MAP_tofrom
, Unknown
,
8517 /*IsImpicit = */ true, nullptr,
8519 assert(CurDir
.is
<const OMPExecutableDirective
*>() &&
8520 "Expect a executable directive");
8521 const auto *CurExecDir
= CurDir
.get
<const OMPExecutableDirective
*>();
8522 for (const auto *C
: CurExecDir
->getClausesOfKind
<OMPMapClause
>()) {
8523 const auto *EI
= C
->getVarRefs().begin();
8524 for (const auto L
: C
->decl_component_lists(VD
)) {
8525 const ValueDecl
*VDecl
, *Mapper
;
8526 // The Expression is not correct if the mapping is implicit
8527 const Expr
*E
= (C
->getMapLoc().isValid()) ? *EI
: nullptr;
8528 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8529 std::tie(VDecl
, Components
, Mapper
) = L
;
8530 assert(VDecl
== VD
&& "We got information for the wrong declaration??");
8531 assert(!Components
.empty() &&
8532 "Not expecting declaration with no component lists.");
8533 DeclComponentLists
.emplace_back(Components
, C
->getMapType(),
8534 C
->getMapTypeModifiers(),
8535 C
->isImplicit(), Mapper
, E
);
8539 llvm::stable_sort(DeclComponentLists
, [](const MapData
&LHS
,
8540 const MapData
&RHS
) {
8541 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
= std::get
<2>(LHS
);
8542 OpenMPMapClauseKind MapType
= std::get
<1>(RHS
);
8544 llvm::is_contained(MapModifiers
, clang::OMPC_MAP_MODIFIER_present
);
8545 bool HasAllocs
= MapType
== OMPC_MAP_alloc
;
8546 MapModifiers
= std::get
<2>(RHS
);
8547 MapType
= std::get
<1>(LHS
);
8549 llvm::is_contained(MapModifiers
, clang::OMPC_MAP_MODIFIER_present
);
8550 bool HasAllocsR
= MapType
== OMPC_MAP_alloc
;
8551 return (HasPresent
&& !HasPresentR
) || (HasAllocs
&& !HasAllocsR
);
8554 // Find overlapping elements (including the offset from the base element).
8555 llvm::SmallDenseMap
<
8558 OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>,
8562 for (const MapData
&L
: DeclComponentLists
) {
8563 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8564 OpenMPMapClauseKind MapType
;
8565 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8567 const ValueDecl
*Mapper
;
8569 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8572 for (const MapData
&L1
: ArrayRef(DeclComponentLists
).slice(Count
)) {
8573 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1
;
8574 std::tie(Components1
, MapType
, MapModifiers
, IsImplicit
, Mapper
,
8576 auto CI
= Components
.rbegin();
8577 auto CE
= Components
.rend();
8578 auto SI
= Components1
.rbegin();
8579 auto SE
= Components1
.rend();
8580 for (; CI
!= CE
&& SI
!= SE
; ++CI
, ++SI
) {
8581 if (CI
->getAssociatedExpression()->getStmtClass() !=
8582 SI
->getAssociatedExpression()->getStmtClass())
8584 // Are we dealing with different variables/fields?
8585 if (CI
->getAssociatedDeclaration() != SI
->getAssociatedDeclaration())
8588 // Found overlapping if, at least for one component, reached the head
8589 // of the components list.
8590 if (CI
== CE
|| SI
== SE
) {
8591 // Ignore it if it is the same component.
8592 if (CI
== CE
&& SI
== SE
)
8594 const auto It
= (SI
== SE
) ? CI
: SI
;
8595 // If one component is a pointer and another one is a kind of
8596 // dereference of this pointer (array subscript, section, dereference,
8597 // etc.), it is not an overlapping.
8598 // Same, if one component is a base and another component is a
8599 // dereferenced pointer memberexpr with the same base.
8600 if (!isa
<MemberExpr
>(It
->getAssociatedExpression()) ||
8601 (std::prev(It
)->getAssociatedDeclaration() &&
8603 ->getAssociatedDeclaration()
8605 ->isPointerType()) ||
8606 (It
->getAssociatedDeclaration() &&
8607 It
->getAssociatedDeclaration()->getType()->isPointerType() &&
8608 std::next(It
) != CE
&& std::next(It
) != SE
))
8610 const MapData
&BaseData
= CI
== CE
? L
: L1
;
8611 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData
=
8612 SI
== SE
? Components
: Components1
;
8613 auto &OverlappedElements
= OverlappedData
.FindAndConstruct(&BaseData
);
8614 OverlappedElements
.getSecond().push_back(SubData
);
8618 // Sort the overlapped elements for each item.
8619 llvm::SmallVector
<const FieldDecl
*, 4> Layout
;
8620 if (!OverlappedData
.empty()) {
8621 const Type
*BaseType
= VD
->getType().getCanonicalType().getTypePtr();
8622 const Type
*OrigType
= BaseType
->getPointeeOrArrayElementType();
8623 while (BaseType
!= OrigType
) {
8624 BaseType
= OrigType
->getCanonicalTypeInternal().getTypePtr();
8625 OrigType
= BaseType
->getPointeeOrArrayElementType();
8628 if (const auto *CRD
= BaseType
->getAsCXXRecordDecl())
8629 getPlainLayout(CRD
, Layout
, /*AsBase=*/false);
8631 const auto *RD
= BaseType
->getAsRecordDecl();
8632 Layout
.append(RD
->field_begin(), RD
->field_end());
8635 for (auto &Pair
: OverlappedData
) {
8639 OMPClauseMappableExprCommon::MappableExprComponentListRef First
,
8640 OMPClauseMappableExprCommon::MappableExprComponentListRef
8642 auto CI
= First
.rbegin();
8643 auto CE
= First
.rend();
8644 auto SI
= Second
.rbegin();
8645 auto SE
= Second
.rend();
8646 for (; CI
!= CE
&& SI
!= SE
; ++CI
, ++SI
) {
8647 if (CI
->getAssociatedExpression()->getStmtClass() !=
8648 SI
->getAssociatedExpression()->getStmtClass())
8650 // Are we dealing with different variables/fields?
8651 if (CI
->getAssociatedDeclaration() !=
8652 SI
->getAssociatedDeclaration())
8656 // Lists contain the same elements.
8657 if (CI
== CE
&& SI
== SE
)
8660 // List with less elements is less than list with more elements.
8661 if (CI
== CE
|| SI
== SE
)
8664 const auto *FD1
= cast
<FieldDecl
>(CI
->getAssociatedDeclaration());
8665 const auto *FD2
= cast
<FieldDecl
>(SI
->getAssociatedDeclaration());
8666 if (FD1
->getParent() == FD2
->getParent())
8667 return FD1
->getFieldIndex() < FD2
->getFieldIndex();
8669 llvm::find_if(Layout
, [FD1
, FD2
](const FieldDecl
*FD
) {
8670 return FD
== FD1
|| FD
== FD2
;
8676 // Associated with a capture, because the mapping flags depend on it.
8677 // Go through all of the elements with the overlapped elements.
8678 bool IsFirstComponentList
= true;
8679 for (const auto &Pair
: OverlappedData
) {
8680 const MapData
&L
= *Pair
.getFirst();
8681 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8682 OpenMPMapClauseKind MapType
;
8683 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8685 const ValueDecl
*Mapper
;
8687 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8689 ArrayRef
<OMPClauseMappableExprCommon::MappableExprComponentListRef
>
8690 OverlappedComponents
= Pair
.getSecond();
8691 generateInfoForComponentList(
8692 MapType
, MapModifiers
, std::nullopt
, Components
, CombinedInfo
,
8693 PartialStruct
, IsFirstComponentList
, IsImplicit
, Mapper
,
8694 /*ForDeviceAddr=*/false, VD
, VarRef
, OverlappedComponents
);
8695 IsFirstComponentList
= false;
8697 // Go through other elements without overlapped elements.
8698 for (const MapData
&L
: DeclComponentLists
) {
8699 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8700 OpenMPMapClauseKind MapType
;
8701 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8703 const ValueDecl
*Mapper
;
8705 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8707 auto It
= OverlappedData
.find(&L
);
8708 if (It
== OverlappedData
.end())
8709 generateInfoForComponentList(MapType
, MapModifiers
, std::nullopt
,
8710 Components
, CombinedInfo
, PartialStruct
,
8711 IsFirstComponentList
, IsImplicit
, Mapper
,
8712 /*ForDeviceAddr=*/false, VD
, VarRef
);
8713 IsFirstComponentList
= false;
8717 /// Generate the default map information for a given capture \a CI,
8718 /// record field declaration \a RI and captured value \a CV.
8719 void generateDefaultMapInfo(const CapturedStmt::Capture
&CI
,
8720 const FieldDecl
&RI
, llvm::Value
*CV
,
8721 MapCombinedInfoTy
&CombinedInfo
) const {
8722 bool IsImplicit
= true;
8723 // Do the default mapping.
8724 if (CI
.capturesThis()) {
8725 CombinedInfo
.Exprs
.push_back(nullptr);
8726 CombinedInfo
.BasePointers
.push_back(CV
);
8727 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8728 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8729 CombinedInfo
.Pointers
.push_back(CV
);
8730 const auto *PtrTy
= cast
<PointerType
>(RI
.getType().getTypePtr());
8731 CombinedInfo
.Sizes
.push_back(
8732 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(PtrTy
->getPointeeType()),
8733 CGF
.Int64Ty
, /*isSigned=*/true));
8734 // Default map type.
8735 CombinedInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO
|
8736 OpenMPOffloadMappingFlags::OMP_MAP_FROM
);
8737 } else if (CI
.capturesVariableByCopy()) {
8738 const VarDecl
*VD
= CI
.getCapturedVar();
8739 CombinedInfo
.Exprs
.push_back(VD
->getCanonicalDecl());
8740 CombinedInfo
.BasePointers
.push_back(CV
);
8741 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8742 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8743 CombinedInfo
.Pointers
.push_back(CV
);
8744 if (!RI
.getType()->isAnyPointerType()) {
8745 // We have to signal to the runtime captures passed by value that are
8747 CombinedInfo
.Types
.push_back(
8748 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
);
8749 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8750 CGF
.getTypeSize(RI
.getType()), CGF
.Int64Ty
, /*isSigned=*/true));
8752 // Pointers are implicitly mapped with a zero size and no flags
8753 // (other than first map that is added for all implicit maps).
8754 CombinedInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE
);
8755 CombinedInfo
.Sizes
.push_back(llvm::Constant::getNullValue(CGF
.Int64Ty
));
8757 auto I
= FirstPrivateDecls
.find(VD
);
8758 if (I
!= FirstPrivateDecls
.end())
8759 IsImplicit
= I
->getSecond();
8761 assert(CI
.capturesVariable() && "Expected captured reference.");
8762 const auto *PtrTy
= cast
<ReferenceType
>(RI
.getType().getTypePtr());
8763 QualType ElementType
= PtrTy
->getPointeeType();
8764 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8765 CGF
.getTypeSize(ElementType
), CGF
.Int64Ty
, /*isSigned=*/true));
8766 // The default map type for a scalar/complex type is 'to' because by
8767 // default the value doesn't have to be retrieved. For an aggregate
8768 // type, the default is 'tofrom'.
8769 CombinedInfo
.Types
.push_back(getMapModifiersForPrivateClauses(CI
));
8770 const VarDecl
*VD
= CI
.getCapturedVar();
8771 auto I
= FirstPrivateDecls
.find(VD
);
8772 CombinedInfo
.Exprs
.push_back(VD
->getCanonicalDecl());
8773 CombinedInfo
.BasePointers
.push_back(CV
);
8774 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8775 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8776 if (I
!= FirstPrivateDecls
.end() && ElementType
->isAnyPointerType()) {
8777 Address PtrAddr
= CGF
.EmitLoadOfReference(CGF
.MakeAddrLValue(
8778 CV
, ElementType
, CGF
.getContext().getDeclAlign(VD
),
8779 AlignmentSource::Decl
));
8780 CombinedInfo
.Pointers
.push_back(PtrAddr
.getPointer());
8782 CombinedInfo
.Pointers
.push_back(CV
);
8784 if (I
!= FirstPrivateDecls
.end())
8785 IsImplicit
= I
->getSecond();
8787 // Every default map produces a single argument which is a target parameter.
8788 CombinedInfo
.Types
.back() |=
8789 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
8791 // Add flag stating this is an implicit map.
8793 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
;
8795 // No user-defined mapper for default mapping.
8796 CombinedInfo
.Mappers
.push_back(nullptr);
8799 } // anonymous namespace
8801 // Try to extract the base declaration from a `this->x` expression if possible.
8802 static ValueDecl
*getDeclFromThisExpr(const Expr
*E
) {
8806 if (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(E
->IgnoreParenCasts()))
8807 if (const MemberExpr
*ME
=
8808 dyn_cast
<MemberExpr
>(OASE
->getBase()->IgnoreParenImpCasts()))
8809 return ME
->getMemberDecl();
8813 /// Emit a string constant containing the names of the values mapped to the
8814 /// offloading runtime library.
8816 emitMappingInformation(CodeGenFunction
&CGF
, llvm::OpenMPIRBuilder
&OMPBuilder
,
8817 MappableExprsHandler::MappingExprInfo
&MapExprs
) {
8819 uint32_t SrcLocStrSize
;
8820 if (!MapExprs
.getMapDecl() && !MapExprs
.getMapExpr())
8821 return OMPBuilder
.getOrCreateDefaultSrcLocStr(SrcLocStrSize
);
8824 if (!MapExprs
.getMapDecl() && MapExprs
.getMapExpr()) {
8825 if (const ValueDecl
*VD
= getDeclFromThisExpr(MapExprs
.getMapExpr()))
8826 Loc
= VD
->getLocation();
8828 Loc
= MapExprs
.getMapExpr()->getExprLoc();
8830 Loc
= MapExprs
.getMapDecl()->getLocation();
8833 std::string ExprName
;
8834 if (MapExprs
.getMapExpr()) {
8835 PrintingPolicy
P(CGF
.getContext().getLangOpts());
8836 llvm::raw_string_ostream
OS(ExprName
);
8837 MapExprs
.getMapExpr()->printPretty(OS
, nullptr, P
);
8840 ExprName
= MapExprs
.getMapDecl()->getNameAsString();
8843 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
8844 return OMPBuilder
.getOrCreateSrcLocStr(PLoc
.getFilename(), ExprName
,
8845 PLoc
.getLine(), PLoc
.getColumn(),
8849 /// Emit the arrays used to pass the captures and map information to the
8850 /// offloading runtime library. If there is no map or capture information,
8851 /// return nullptr by reference.
8852 static void emitOffloadingArrays(
8853 CodeGenFunction
&CGF
, MappableExprsHandler::MapCombinedInfoTy
&CombinedInfo
,
8854 CGOpenMPRuntime::TargetDataInfo
&Info
, llvm::OpenMPIRBuilder
&OMPBuilder
,
8855 bool IsNonContiguous
= false) {
8856 CodeGenModule
&CGM
= CGF
.CGM
;
8858 // Reset the array information.
8859 Info
.clearArrayInfo();
8860 Info
.NumberOfPtrs
= CombinedInfo
.BasePointers
.size();
8862 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
8863 InsertPointTy
AllocaIP(CGF
.AllocaInsertPt
->getParent(),
8864 CGF
.AllocaInsertPt
->getIterator());
8865 InsertPointTy
CodeGenIP(CGF
.Builder
.GetInsertBlock(),
8866 CGF
.Builder
.GetInsertPoint());
8868 auto FillInfoMap
= [&](MappableExprsHandler::MappingExprInfo
&MapExpr
) {
8869 return emitMappingInformation(CGF
, OMPBuilder
, MapExpr
);
8871 if (CGM
.getCodeGenOpts().getDebugInfo() !=
8872 llvm::codegenoptions::NoDebugInfo
) {
8873 CombinedInfo
.Names
.resize(CombinedInfo
.Exprs
.size());
8874 llvm::transform(CombinedInfo
.Exprs
, CombinedInfo
.Names
.begin(),
8878 auto DeviceAddrCB
= [&](unsigned int I
, llvm::Value
*NewDecl
) {
8879 if (const ValueDecl
*DevVD
= CombinedInfo
.DevicePtrDecls
[I
]) {
8880 Info
.CaptureDeviceAddrMap
.try_emplace(DevVD
, NewDecl
);
8884 auto CustomMapperCB
= [&](unsigned int I
) {
8885 llvm::Value
*MFunc
= nullptr;
8886 if (CombinedInfo
.Mappers
[I
]) {
8887 Info
.HasMapper
= true;
8888 MFunc
= CGF
.CGM
.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8889 cast
<OMPDeclareMapperDecl
>(CombinedInfo
.Mappers
[I
]));
8893 OMPBuilder
.emitOffloadingArrays(AllocaIP
, CodeGenIP
, CombinedInfo
, Info
,
8894 /*IsNonContiguous=*/true, DeviceAddrCB
,
8898 /// Check for inner distribute directive.
8899 static const OMPExecutableDirective
*
8900 getNestedDistributeDirective(ASTContext
&Ctx
, const OMPExecutableDirective
&D
) {
8901 const auto *CS
= D
.getInnermostCapturedStmt();
8903 CS
->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8904 const Stmt
*ChildStmt
=
8905 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx
, Body
);
8907 if (const auto *NestedDir
=
8908 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
8909 OpenMPDirectiveKind DKind
= NestedDir
->getDirectiveKind();
8910 switch (D
.getDirectiveKind()) {
8912 // For now, just treat 'target teams loop' as if it's distributed.
8913 if (isOpenMPDistributeDirective(DKind
) || DKind
== OMPD_teams_loop
)
8915 if (DKind
== OMPD_teams
) {
8916 Body
= NestedDir
->getInnermostCapturedStmt()->IgnoreContainers(
8917 /*IgnoreCaptured=*/true);
8920 ChildStmt
= CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx
, Body
);
8921 if (const auto *NND
=
8922 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
8923 DKind
= NND
->getDirectiveKind();
8924 if (isOpenMPDistributeDirective(DKind
))
8929 case OMPD_target_teams
:
8930 if (isOpenMPDistributeDirective(DKind
))
8933 case OMPD_target_parallel
:
8934 case OMPD_target_simd
:
8935 case OMPD_target_parallel_for
:
8936 case OMPD_target_parallel_for_simd
:
8938 case OMPD_target_teams_distribute
:
8939 case OMPD_target_teams_distribute_simd
:
8940 case OMPD_target_teams_distribute_parallel_for
:
8941 case OMPD_target_teams_distribute_parallel_for_simd
:
8944 case OMPD_parallel_for
:
8945 case OMPD_parallel_master
:
8946 case OMPD_parallel_sections
:
8948 case OMPD_parallel_for_simd
:
8950 case OMPD_cancellation_point
:
8952 case OMPD_threadprivate
:
8963 case OMPD_taskyield
:
8966 case OMPD_taskgroup
:
8972 case OMPD_target_data
:
8973 case OMPD_target_exit_data
:
8974 case OMPD_target_enter_data
:
8975 case OMPD_distribute
:
8976 case OMPD_distribute_simd
:
8977 case OMPD_distribute_parallel_for
:
8978 case OMPD_distribute_parallel_for_simd
:
8979 case OMPD_teams_distribute
:
8980 case OMPD_teams_distribute_simd
:
8981 case OMPD_teams_distribute_parallel_for
:
8982 case OMPD_teams_distribute_parallel_for_simd
:
8983 case OMPD_target_update
:
8984 case OMPD_declare_simd
:
8985 case OMPD_declare_variant
:
8986 case OMPD_begin_declare_variant
:
8987 case OMPD_end_declare_variant
:
8988 case OMPD_declare_target
:
8989 case OMPD_end_declare_target
:
8990 case OMPD_declare_reduction
:
8991 case OMPD_declare_mapper
:
8993 case OMPD_taskloop_simd
:
8994 case OMPD_master_taskloop
:
8995 case OMPD_master_taskloop_simd
:
8996 case OMPD_parallel_master_taskloop
:
8997 case OMPD_parallel_master_taskloop_simd
:
8999 case OMPD_metadirective
:
9002 llvm_unreachable("Unexpected directive.");
9009 /// Emit the user-defined mapper function. The code generation follows the
9010 /// pattern in the example below.
9012 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9013 /// void *base, void *begin,
9014 /// int64_t size, int64_t type,
9015 /// void *name = nullptr) {
9016 /// // Allocate space for an array section first or add a base/begin for
9017 /// // pointer dereference.
9018 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9019 /// !maptype.IsDelete)
9020 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9021 /// size*sizeof(Ty), clearToFromMember(type));
9023 /// for (unsigned i = 0; i < size; i++) {
9024 /// // For each component specified by this mapper:
9025 /// for (auto c : begin[i]->all_components) {
9026 /// if (c.hasMapper())
9027 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9028 /// c.arg_type, c.arg_name);
9030 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9031 /// c.arg_begin, c.arg_size, c.arg_type,
9035 /// // Delete the array section.
9036 /// if (size > 1 && maptype.IsDelete)
9037 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9038 /// size*sizeof(Ty), clearToFromMember(type));
9041 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl
*D
,
9042 CodeGenFunction
*CGF
) {
9043 if (UDMMap
.count(D
) > 0)
9045 ASTContext
&C
= CGM
.getContext();
9046 QualType Ty
= D
->getType();
9047 QualType PtrTy
= C
.getPointerType(Ty
).withRestrict();
9048 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9049 auto *MapperVarDecl
=
9050 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getMapperVarRef())->getDecl());
9051 SourceLocation Loc
= D
->getLocation();
9052 CharUnits ElementSize
= C
.getTypeSizeInChars(Ty
);
9053 llvm::Type
*ElemTy
= CGM
.getTypes().ConvertTypeForMem(Ty
);
9055 // Prepare mapper function arguments and attributes.
9056 ImplicitParamDecl
HandleArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
9057 C
.VoidPtrTy
, ImplicitParamDecl::Other
);
9058 ImplicitParamDecl
BaseArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
9059 ImplicitParamDecl::Other
);
9060 ImplicitParamDecl
BeginArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
9061 C
.VoidPtrTy
, ImplicitParamDecl::Other
);
9062 ImplicitParamDecl
SizeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, Int64Ty
,
9063 ImplicitParamDecl::Other
);
9064 ImplicitParamDecl
TypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, Int64Ty
,
9065 ImplicitParamDecl::Other
);
9066 ImplicitParamDecl
NameArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
9067 ImplicitParamDecl::Other
);
9068 FunctionArgList Args
;
9069 Args
.push_back(&HandleArg
);
9070 Args
.push_back(&BaseArg
);
9071 Args
.push_back(&BeginArg
);
9072 Args
.push_back(&SizeArg
);
9073 Args
.push_back(&TypeArg
);
9074 Args
.push_back(&NameArg
);
9075 const CGFunctionInfo
&FnInfo
=
9076 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
9077 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
9078 SmallString
<64> TyStr
;
9079 llvm::raw_svector_ostream
Out(TyStr
);
9080 CGM
.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty
, Out
);
9081 std::string Name
= getName({"omp_mapper", TyStr
, D
->getName()});
9082 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
9083 Name
, &CGM
.getModule());
9084 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
9085 Fn
->removeFnAttr(llvm::Attribute::OptimizeNone
);
9086 // Start the mapper function code generation.
9087 CodeGenFunction
MapperCGF(CGM
);
9088 MapperCGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
9089 // Compute the starting and end addresses of array elements.
9090 llvm::Value
*Size
= MapperCGF
.EmitLoadOfScalar(
9091 MapperCGF
.GetAddrOfLocalVar(&SizeArg
), /*Volatile=*/false,
9092 C
.getPointerType(Int64Ty
), Loc
);
9093 // Prepare common arguments for array initiation and deletion.
9094 llvm::Value
*Handle
= MapperCGF
.EmitLoadOfScalar(
9095 MapperCGF
.GetAddrOfLocalVar(&HandleArg
),
9096 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9097 llvm::Value
*BaseIn
= MapperCGF
.EmitLoadOfScalar(
9098 MapperCGF
.GetAddrOfLocalVar(&BaseArg
),
9099 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9100 llvm::Value
*BeginIn
= MapperCGF
.EmitLoadOfScalar(
9101 MapperCGF
.GetAddrOfLocalVar(&BeginArg
),
9102 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9103 // Convert the size in bytes into the number of array elements.
9104 Size
= MapperCGF
.Builder
.CreateExactUDiv(
9105 Size
, MapperCGF
.Builder
.getInt64(ElementSize
.getQuantity()));
9106 llvm::Value
*PtrBegin
= MapperCGF
.Builder
.CreateBitCast(
9107 BeginIn
, CGM
.getTypes().ConvertTypeForMem(PtrTy
));
9108 llvm::Value
*PtrEnd
= MapperCGF
.Builder
.CreateGEP(ElemTy
, PtrBegin
, Size
);
9109 llvm::Value
*MapType
= MapperCGF
.EmitLoadOfScalar(
9110 MapperCGF
.GetAddrOfLocalVar(&TypeArg
), /*Volatile=*/false,
9111 C
.getPointerType(Int64Ty
), Loc
);
9112 llvm::Value
*MapName
= MapperCGF
.EmitLoadOfScalar(
9113 MapperCGF
.GetAddrOfLocalVar(&NameArg
),
9114 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9116 // Emit array initiation if this is an array section and \p MapType indicates
9117 // that memory allocation is required.
9118 llvm::BasicBlock
*HeadBB
= MapperCGF
.createBasicBlock("omp.arraymap.head");
9119 emitUDMapperArrayInitOrDel(MapperCGF
, Handle
, BaseIn
, BeginIn
, Size
, MapType
,
9120 MapName
, ElementSize
, HeadBB
, /*IsInit=*/true);
9122 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9124 // Emit the loop header block.
9125 MapperCGF
.EmitBlock(HeadBB
);
9126 llvm::BasicBlock
*BodyBB
= MapperCGF
.createBasicBlock("omp.arraymap.body");
9127 llvm::BasicBlock
*DoneBB
= MapperCGF
.createBasicBlock("omp.done");
9128 // Evaluate whether the initial condition is satisfied.
9129 llvm::Value
*IsEmpty
=
9130 MapperCGF
.Builder
.CreateICmpEQ(PtrBegin
, PtrEnd
, "omp.arraymap.isempty");
9131 MapperCGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
9132 llvm::BasicBlock
*EntryBB
= MapperCGF
.Builder
.GetInsertBlock();
9134 // Emit the loop body block.
9135 MapperCGF
.EmitBlock(BodyBB
);
9136 llvm::BasicBlock
*LastBB
= BodyBB
;
9137 llvm::PHINode
*PtrPHI
= MapperCGF
.Builder
.CreatePHI(
9138 PtrBegin
->getType(), 2, "omp.arraymap.ptrcurrent");
9139 PtrPHI
->addIncoming(PtrBegin
, EntryBB
);
9140 Address
PtrCurrent(PtrPHI
, ElemTy
,
9141 MapperCGF
.GetAddrOfLocalVar(&BeginArg
)
9143 .alignmentOfArrayElement(ElementSize
));
9144 // Privatize the declared variable of mapper to be the current array element.
9145 CodeGenFunction::OMPPrivateScope
Scope(MapperCGF
);
9146 Scope
.addPrivate(MapperVarDecl
, PtrCurrent
);
9147 (void)Scope
.Privatize();
9149 // Get map clause information. Fill up the arrays with all mapped variables.
9150 MappableExprsHandler::MapCombinedInfoTy Info
;
9151 MappableExprsHandler
MEHandler(*D
, MapperCGF
);
9152 MEHandler
.generateAllInfoForMapper(Info
, OMPBuilder
);
9154 // Call the runtime API __tgt_mapper_num_components to get the number of
9155 // pre-existing components.
9156 llvm::Value
*OffloadingArgs
[] = {Handle
};
9157 llvm::Value
*PreviousSize
= MapperCGF
.EmitRuntimeCall(
9158 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
9159 OMPRTL___tgt_mapper_num_components
),
9161 llvm::Value
*ShiftedPreviousSize
= MapperCGF
.Builder
.CreateShl(
9163 MapperCGF
.Builder
.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9165 // Fill up the runtime mapper handle for all components.
9166 for (unsigned I
= 0; I
< Info
.BasePointers
.size(); ++I
) {
9167 llvm::Value
*CurBaseArg
= MapperCGF
.Builder
.CreateBitCast(
9168 Info
.BasePointers
[I
], CGM
.getTypes().ConvertTypeForMem(C
.VoidPtrTy
));
9169 llvm::Value
*CurBeginArg
= MapperCGF
.Builder
.CreateBitCast(
9170 Info
.Pointers
[I
], CGM
.getTypes().ConvertTypeForMem(C
.VoidPtrTy
));
9171 llvm::Value
*CurSizeArg
= Info
.Sizes
[I
];
9172 llvm::Value
*CurNameArg
=
9173 (CGM
.getCodeGenOpts().getDebugInfo() ==
9174 llvm::codegenoptions::NoDebugInfo
)
9175 ? llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
)
9176 : emitMappingInformation(MapperCGF
, OMPBuilder
, Info
.Exprs
[I
]);
9178 // Extract the MEMBER_OF field from the map type.
9179 llvm::Value
*OriMapType
= MapperCGF
.Builder
.getInt64(
9180 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9182 llvm::Value
*MemberMapType
=
9183 MapperCGF
.Builder
.CreateNUWAdd(OriMapType
, ShiftedPreviousSize
);
9185 // Combine the map type inherited from user-defined mapper with that
9186 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9187 // bits of the \a MapType, which is the input argument of the mapper
9188 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9189 // bits of MemberMapType.
9190 // [OpenMP 5.0], 1.2.6. map-type decay.
9191 // | alloc | to | from | tofrom | release | delete
9192 // ----------------------------------------------------------
9193 // alloc | alloc | alloc | alloc | alloc | release | delete
9194 // to | alloc | to | alloc | to | release | delete
9195 // from | alloc | alloc | from | from | release | delete
9196 // tofrom | alloc | to | from | tofrom | release | delete
9197 llvm::Value
*LeftToFrom
= MapperCGF
.Builder
.CreateAnd(
9199 MapperCGF
.Builder
.getInt64(
9200 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9201 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9202 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9203 llvm::BasicBlock
*AllocBB
= MapperCGF
.createBasicBlock("omp.type.alloc");
9204 llvm::BasicBlock
*AllocElseBB
=
9205 MapperCGF
.createBasicBlock("omp.type.alloc.else");
9206 llvm::BasicBlock
*ToBB
= MapperCGF
.createBasicBlock("omp.type.to");
9207 llvm::BasicBlock
*ToElseBB
= MapperCGF
.createBasicBlock("omp.type.to.else");
9208 llvm::BasicBlock
*FromBB
= MapperCGF
.createBasicBlock("omp.type.from");
9209 llvm::BasicBlock
*EndBB
= MapperCGF
.createBasicBlock("omp.type.end");
9210 llvm::Value
*IsAlloc
= MapperCGF
.Builder
.CreateIsNull(LeftToFrom
);
9211 MapperCGF
.Builder
.CreateCondBr(IsAlloc
, AllocBB
, AllocElseBB
);
9212 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9213 MapperCGF
.EmitBlock(AllocBB
);
9214 llvm::Value
*AllocMapType
= MapperCGF
.Builder
.CreateAnd(
9216 MapperCGF
.Builder
.getInt64(
9217 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9218 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9219 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9220 MapperCGF
.Builder
.CreateBr(EndBB
);
9221 MapperCGF
.EmitBlock(AllocElseBB
);
9222 llvm::Value
*IsTo
= MapperCGF
.Builder
.CreateICmpEQ(
9224 MapperCGF
.Builder
.getInt64(
9225 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9226 OpenMPOffloadMappingFlags::OMP_MAP_TO
)));
9227 MapperCGF
.Builder
.CreateCondBr(IsTo
, ToBB
, ToElseBB
);
9228 // In case of to, clear OMP_MAP_FROM.
9229 MapperCGF
.EmitBlock(ToBB
);
9230 llvm::Value
*ToMapType
= MapperCGF
.Builder
.CreateAnd(
9232 MapperCGF
.Builder
.getInt64(
9233 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9234 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9235 MapperCGF
.Builder
.CreateBr(EndBB
);
9236 MapperCGF
.EmitBlock(ToElseBB
);
9237 llvm::Value
*IsFrom
= MapperCGF
.Builder
.CreateICmpEQ(
9239 MapperCGF
.Builder
.getInt64(
9240 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9241 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9242 MapperCGF
.Builder
.CreateCondBr(IsFrom
, FromBB
, EndBB
);
9243 // In case of from, clear OMP_MAP_TO.
9244 MapperCGF
.EmitBlock(FromBB
);
9245 llvm::Value
*FromMapType
= MapperCGF
.Builder
.CreateAnd(
9247 MapperCGF
.Builder
.getInt64(
9248 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9249 OpenMPOffloadMappingFlags::OMP_MAP_TO
)));
9250 // In case of tofrom, do nothing.
9251 MapperCGF
.EmitBlock(EndBB
);
9253 llvm::PHINode
*CurMapType
=
9254 MapperCGF
.Builder
.CreatePHI(CGM
.Int64Ty
, 4, "omp.maptype");
9255 CurMapType
->addIncoming(AllocMapType
, AllocBB
);
9256 CurMapType
->addIncoming(ToMapType
, ToBB
);
9257 CurMapType
->addIncoming(FromMapType
, FromBB
);
9258 CurMapType
->addIncoming(MemberMapType
, ToElseBB
);
9260 llvm::Value
*OffloadingArgs
[] = {Handle
, CurBaseArg
, CurBeginArg
,
9261 CurSizeArg
, CurMapType
, CurNameArg
};
9262 if (Info
.Mappers
[I
]) {
9263 // Call the corresponding mapper function.
9264 llvm::Function
*MapperFunc
= getOrCreateUserDefinedMapperFunc(
9265 cast
<OMPDeclareMapperDecl
>(Info
.Mappers
[I
]));
9266 assert(MapperFunc
&& "Expect a valid mapper function is available.");
9267 MapperCGF
.EmitNounwindRuntimeCall(MapperFunc
, OffloadingArgs
);
9269 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9271 MapperCGF
.EmitRuntimeCall(
9272 OMPBuilder
.getOrCreateRuntimeFunction(
9273 CGM
.getModule(), OMPRTL___tgt_push_mapper_component
),
9278 // Update the pointer to point to the next element that needs to be mapped,
9279 // and check whether we have mapped all elements.
9280 llvm::Value
*PtrNext
= MapperCGF
.Builder
.CreateConstGEP1_32(
9281 ElemTy
, PtrPHI
, /*Idx0=*/1, "omp.arraymap.next");
9282 PtrPHI
->addIncoming(PtrNext
, LastBB
);
9283 llvm::Value
*IsDone
=
9284 MapperCGF
.Builder
.CreateICmpEQ(PtrNext
, PtrEnd
, "omp.arraymap.isdone");
9285 llvm::BasicBlock
*ExitBB
= MapperCGF
.createBasicBlock("omp.arraymap.exit");
9286 MapperCGF
.Builder
.CreateCondBr(IsDone
, ExitBB
, BodyBB
);
9288 MapperCGF
.EmitBlock(ExitBB
);
9289 // Emit array deletion if this is an array section and \p MapType indicates
9290 // that deletion is required.
9291 emitUDMapperArrayInitOrDel(MapperCGF
, Handle
, BaseIn
, BeginIn
, Size
, MapType
,
9292 MapName
, ElementSize
, DoneBB
, /*IsInit=*/false);
9294 // Emit the function exit block.
9295 MapperCGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
9296 MapperCGF
.FinishFunction();
9297 UDMMap
.try_emplace(D
, Fn
);
9299 auto &Decls
= FunctionUDMMap
.FindAndConstruct(CGF
->CurFn
);
9300 Decls
.second
.push_back(D
);
9304 /// Emit the array initialization or deletion portion for user-defined mapper
9305 /// code generation. First, it evaluates whether an array section is mapped and
9306 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9307 /// true, and \a MapType indicates to not delete this array, array
9308 /// initialization code is generated. If \a IsInit is false, and \a MapType
9309 /// indicates to not this array, array deletion code is generated.
9310 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9311 CodeGenFunction
&MapperCGF
, llvm::Value
*Handle
, llvm::Value
*Base
,
9312 llvm::Value
*Begin
, llvm::Value
*Size
, llvm::Value
*MapType
,
9313 llvm::Value
*MapName
, CharUnits ElementSize
, llvm::BasicBlock
*ExitBB
,
9315 StringRef Prefix
= IsInit
? ".init" : ".del";
9317 // Evaluate if this is an array section.
9318 llvm::BasicBlock
*BodyBB
=
9319 MapperCGF
.createBasicBlock(getName({"omp.array", Prefix
}));
9320 llvm::Value
*IsArray
= MapperCGF
.Builder
.CreateICmpSGT(
9321 Size
, MapperCGF
.Builder
.getInt64(1), "omp.arrayinit.isarray");
9322 llvm::Value
*DeleteBit
= MapperCGF
.Builder
.CreateAnd(
9324 MapperCGF
.Builder
.getInt64(
9325 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9326 OpenMPOffloadMappingFlags::OMP_MAP_DELETE
)));
9327 llvm::Value
*DeleteCond
;
9331 llvm::Value
*BaseIsBegin
= MapperCGF
.Builder
.CreateICmpNE(Base
, Begin
);
9333 llvm::Value
*PtrAndObjBit
= MapperCGF
.Builder
.CreateAnd(
9335 MapperCGF
.Builder
.getInt64(
9336 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9337 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
)));
9338 PtrAndObjBit
= MapperCGF
.Builder
.CreateIsNotNull(PtrAndObjBit
);
9339 BaseIsBegin
= MapperCGF
.Builder
.CreateAnd(BaseIsBegin
, PtrAndObjBit
);
9340 Cond
= MapperCGF
.Builder
.CreateOr(IsArray
, BaseIsBegin
);
9341 DeleteCond
= MapperCGF
.Builder
.CreateIsNull(
9342 DeleteBit
, getName({"omp.array", Prefix
, ".delete"}));
9345 DeleteCond
= MapperCGF
.Builder
.CreateIsNotNull(
9346 DeleteBit
, getName({"omp.array", Prefix
, ".delete"}));
9348 Cond
= MapperCGF
.Builder
.CreateAnd(Cond
, DeleteCond
);
9349 MapperCGF
.Builder
.CreateCondBr(Cond
, BodyBB
, ExitBB
);
9351 MapperCGF
.EmitBlock(BodyBB
);
9352 // Get the array size by multiplying element size and element number (i.e., \p
9354 llvm::Value
*ArraySize
= MapperCGF
.Builder
.CreateNUWMul(
9355 Size
, MapperCGF
.Builder
.getInt64(ElementSize
.getQuantity()));
9356 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9357 // memory allocation/deletion purpose only.
9358 llvm::Value
*MapTypeArg
= MapperCGF
.Builder
.CreateAnd(
9360 MapperCGF
.Builder
.getInt64(
9361 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9362 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9363 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9364 MapTypeArg
= MapperCGF
.Builder
.CreateOr(
9366 MapperCGF
.Builder
.getInt64(
9367 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9368 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
)));
9370 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9372 llvm::Value
*OffloadingArgs
[] = {Handle
, Base
, Begin
,
9373 ArraySize
, MapTypeArg
, MapName
};
9374 MapperCGF
.EmitRuntimeCall(
9375 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
9376 OMPRTL___tgt_push_mapper_component
),
9380 llvm::Function
*CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9381 const OMPDeclareMapperDecl
*D
) {
9382 auto I
= UDMMap
.find(D
);
9383 if (I
!= UDMMap
.end())
9385 emitUserDefinedMapper(D
);
9386 return UDMMap
.lookup(D
);
9389 llvm::Value
*CGOpenMPRuntime::emitTargetNumIterationsCall(
9390 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
9391 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9392 const OMPLoopDirective
&D
)>
9394 OpenMPDirectiveKind Kind
= D
.getDirectiveKind();
9395 const OMPExecutableDirective
*TD
= &D
;
9396 // Get nested teams distribute kind directive, if any.
9397 if ((!isOpenMPDistributeDirective(Kind
) || !isOpenMPTeamsDirective(Kind
)) &&
9398 Kind
!= OMPD_target_teams_loop
)
9399 TD
= getNestedDistributeDirective(CGM
.getContext(), D
);
9401 return llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
9403 const auto *LD
= cast
<OMPLoopDirective
>(TD
);
9404 if (llvm::Value
*NumIterations
= SizeEmitter(CGF
, *LD
))
9405 return NumIterations
;
9406 return llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
9410 emitTargetCallFallback(CGOpenMPRuntime
*OMPRuntime
, llvm::Function
*OutlinedFn
,
9411 const OMPExecutableDirective
&D
,
9412 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
,
9413 bool RequiresOuterTask
, const CapturedStmt
&CS
,
9414 bool OffloadingMandatory
, CodeGenFunction
&CGF
) {
9415 if (OffloadingMandatory
) {
9416 CGF
.Builder
.CreateUnreachable();
9418 if (RequiresOuterTask
) {
9419 CapturedVars
.clear();
9420 CGF
.GenerateOpenMPCapturedVars(CS
, CapturedVars
);
9422 OMPRuntime
->emitOutlinedFunctionCall(CGF
, D
.getBeginLoc(), OutlinedFn
,
9427 static llvm::Value
*emitDeviceID(
9428 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9429 CodeGenFunction
&CGF
) {
9430 // Emit device ID if any.
9431 llvm::Value
*DeviceID
;
9432 if (Device
.getPointer()) {
9433 assert((Device
.getInt() == OMPC_DEVICE_unknown
||
9434 Device
.getInt() == OMPC_DEVICE_device_num
) &&
9435 "Expected device_num modifier.");
9436 llvm::Value
*DevVal
= CGF
.EmitScalarExpr(Device
.getPointer());
9438 CGF
.Builder
.CreateIntCast(DevVal
, CGF
.Int64Ty
, /*isSigned=*/true);
9440 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
9445 llvm::Value
*emitDynCGGroupMem(const OMPExecutableDirective
&D
,
9446 CodeGenFunction
&CGF
) {
9447 llvm::Value
*DynCGroupMem
= CGF
.Builder
.getInt32(0);
9449 if (auto *DynMemClause
= D
.getSingleClause
<OMPXDynCGroupMemClause
>()) {
9450 CodeGenFunction::RunCleanupsScope
DynCGroupMemScope(CGF
);
9451 llvm::Value
*DynCGroupMemVal
= CGF
.EmitScalarExpr(
9452 DynMemClause
->getSize(), /*IgnoreResultAssign=*/true);
9453 DynCGroupMem
= CGF
.Builder
.CreateIntCast(DynCGroupMemVal
, CGF
.Int32Ty
,
9454 /*isSigned=*/false);
9456 return DynCGroupMem
;
9459 static void emitTargetCallKernelLaunch(
9460 CGOpenMPRuntime
*OMPRuntime
, llvm::Function
*OutlinedFn
,
9461 const OMPExecutableDirective
&D
,
9462 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
, bool RequiresOuterTask
,
9463 const CapturedStmt
&CS
, bool OffloadingMandatory
,
9464 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9465 llvm::Value
*OutlinedFnID
, CodeGenFunction::OMPTargetDataInfo
&InputInfo
,
9466 llvm::Value
*&MapTypesArray
, llvm::Value
*&MapNamesArray
,
9467 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9468 const OMPLoopDirective
&D
)>
9470 CodeGenFunction
&CGF
, CodeGenModule
&CGM
) {
9471 llvm::OpenMPIRBuilder
&OMPBuilder
= OMPRuntime
->getOMPBuilder();
9473 // Fill up the arrays with all the captured variables.
9474 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
9476 // Get mappable expression information.
9477 MappableExprsHandler
MEHandler(D
, CGF
);
9478 llvm::DenseMap
<llvm::Value
*, llvm::Value
*> LambdaPointers
;
9479 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> MappedVarSet
;
9481 auto RI
= CS
.getCapturedRecordDecl()->field_begin();
9482 auto *CV
= CapturedVars
.begin();
9483 for (CapturedStmt::const_capture_iterator CI
= CS
.capture_begin(),
9484 CE
= CS
.capture_end();
9485 CI
!= CE
; ++CI
, ++RI
, ++CV
) {
9486 MappableExprsHandler::MapCombinedInfoTy CurInfo
;
9487 MappableExprsHandler::StructRangeInfoTy PartialStruct
;
9489 // VLA sizes are passed to the outlined region by copy and do not have map
9490 // information associated.
9491 if (CI
->capturesVariableArrayType()) {
9492 CurInfo
.Exprs
.push_back(nullptr);
9493 CurInfo
.BasePointers
.push_back(*CV
);
9494 CurInfo
.DevicePtrDecls
.push_back(nullptr);
9495 CurInfo
.DevicePointers
.push_back(
9496 MappableExprsHandler::DeviceInfoTy::None
);
9497 CurInfo
.Pointers
.push_back(*CV
);
9498 CurInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
9499 CGF
.getTypeSize(RI
->getType()), CGF
.Int64Ty
, /*isSigned=*/true));
9500 // Copy to the device as an argument. No need to retrieve it.
9501 CurInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
9502 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
|
9503 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
9504 CurInfo
.Mappers
.push_back(nullptr);
9506 // If we have any information in the map clause, we use it, otherwise we
9507 // just do a default mapping.
9508 MEHandler
.generateInfoForCapture(CI
, *CV
, CurInfo
, PartialStruct
);
9509 if (!CI
->capturesThis())
9510 MappedVarSet
.insert(CI
->getCapturedVar());
9512 MappedVarSet
.insert(nullptr);
9513 if (CurInfo
.BasePointers
.empty() && !PartialStruct
.Base
.isValid())
9514 MEHandler
.generateDefaultMapInfo(*CI
, **RI
, *CV
, CurInfo
);
9515 // Generate correct mapping for variables captured by reference in
9517 if (CI
->capturesVariable())
9518 MEHandler
.generateInfoForLambdaCaptures(CI
->getCapturedVar(), *CV
,
9519 CurInfo
, LambdaPointers
);
9521 // We expect to have at least an element of information for this capture.
9522 assert((!CurInfo
.BasePointers
.empty() || PartialStruct
.Base
.isValid()) &&
9523 "Non-existing map pointer for capture!");
9524 assert(CurInfo
.BasePointers
.size() == CurInfo
.Pointers
.size() &&
9525 CurInfo
.BasePointers
.size() == CurInfo
.Sizes
.size() &&
9526 CurInfo
.BasePointers
.size() == CurInfo
.Types
.size() &&
9527 CurInfo
.BasePointers
.size() == CurInfo
.Mappers
.size() &&
9528 "Inconsistent map information sizes!");
9530 // If there is an entry in PartialStruct it means we have a struct with
9531 // individual members mapped. Emit an extra combined entry.
9532 if (PartialStruct
.Base
.isValid()) {
9533 CombinedInfo
.append(PartialStruct
.PreliminaryMapData
);
9534 MEHandler
.emitCombinedEntry(
9535 CombinedInfo
, CurInfo
.Types
, PartialStruct
, CI
->capturesThis(),
9536 OMPBuilder
, nullptr,
9537 !PartialStruct
.PreliminaryMapData
.BasePointers
.empty());
9540 // We need to append the results of this capture to what we already have.
9541 CombinedInfo
.append(CurInfo
);
9543 // Adjust MEMBER_OF flags for the lambdas captures.
9544 MEHandler
.adjustMemberOfForLambdaCaptures(
9545 OMPBuilder
, LambdaPointers
, CombinedInfo
.BasePointers
,
9546 CombinedInfo
.Pointers
, CombinedInfo
.Types
);
9547 // Map any list items in a map clause that were not captures because they
9548 // weren't referenced within the construct.
9549 MEHandler
.generateAllInfo(CombinedInfo
, OMPBuilder
, MappedVarSet
);
9551 CGOpenMPRuntime::TargetDataInfo Info
;
9552 // Fill up the arrays and create the arguments.
9553 emitOffloadingArrays(CGF
, CombinedInfo
, Info
, OMPBuilder
);
9554 bool EmitDebug
= CGF
.CGM
.getCodeGenOpts().getDebugInfo() !=
9555 llvm::codegenoptions::NoDebugInfo
;
9556 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, Info
.RTArgs
, Info
,
9558 /*ForEndCall=*/false);
9560 InputInfo
.NumberOfTargetItems
= Info
.NumberOfPtrs
;
9561 InputInfo
.BasePointersArray
= Address(Info
.RTArgs
.BasePointersArray
,
9562 CGF
.VoidPtrTy
, CGM
.getPointerAlign());
9563 InputInfo
.PointersArray
=
9564 Address(Info
.RTArgs
.PointersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
9565 InputInfo
.SizesArray
=
9566 Address(Info
.RTArgs
.SizesArray
, CGF
.Int64Ty
, CGM
.getPointerAlign());
9567 InputInfo
.MappersArray
=
9568 Address(Info
.RTArgs
.MappersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
9569 MapTypesArray
= Info
.RTArgs
.MapTypesArray
;
9570 MapNamesArray
= Info
.RTArgs
.MapNamesArray
;
9572 auto &&ThenGen
= [&OMPRuntime
, OutlinedFn
, &D
, &CapturedVars
,
9573 RequiresOuterTask
, &CS
, OffloadingMandatory
, Device
,
9574 OutlinedFnID
, &InputInfo
, &MapTypesArray
, &MapNamesArray
,
9575 SizeEmitter
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9576 bool IsReverseOffloading
= Device
.getInt() == OMPC_DEVICE_ancestor
;
9578 if (IsReverseOffloading
) {
9579 // Reverse offloading is not supported, so just execute on the host.
9580 // FIXME: This fallback solution is incorrect since it ignores the
9581 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9582 // assert here and ensure SEMA emits an error.
9583 emitTargetCallFallback(OMPRuntime
, OutlinedFn
, D
, CapturedVars
,
9584 RequiresOuterTask
, CS
, OffloadingMandatory
, CGF
);
9588 bool HasNoWait
= D
.hasClausesOfKind
<OMPNowaitClause
>();
9589 unsigned NumTargetItems
= InputInfo
.NumberOfTargetItems
;
9591 llvm::Value
*BasePointersArray
= InputInfo
.BasePointersArray
.getPointer();
9592 llvm::Value
*PointersArray
= InputInfo
.PointersArray
.getPointer();
9593 llvm::Value
*SizesArray
= InputInfo
.SizesArray
.getPointer();
9594 llvm::Value
*MappersArray
= InputInfo
.MappersArray
.getPointer();
9596 auto &&EmitTargetCallFallbackCB
=
9597 [&OMPRuntime
, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
, &CS
,
9598 OffloadingMandatory
, &CGF
](llvm::OpenMPIRBuilder::InsertPointTy IP
)
9599 -> llvm::OpenMPIRBuilder::InsertPointTy
{
9600 CGF
.Builder
.restoreIP(IP
);
9601 emitTargetCallFallback(OMPRuntime
, OutlinedFn
, D
, CapturedVars
,
9602 RequiresOuterTask
, CS
, OffloadingMandatory
, CGF
);
9603 return CGF
.Builder
.saveIP();
9606 llvm::Value
*DeviceID
= emitDeviceID(Device
, CGF
);
9607 llvm::Value
*NumTeams
= OMPRuntime
->emitNumTeamsForTargetDirective(CGF
, D
);
9608 llvm::Value
*NumThreads
=
9609 OMPRuntime
->emitNumThreadsForTargetDirective(CGF
, D
);
9610 llvm::Value
*RTLoc
= OMPRuntime
->emitUpdateLocation(CGF
, D
.getBeginLoc());
9611 llvm::Value
*NumIterations
=
9612 OMPRuntime
->emitTargetNumIterationsCall(CGF
, D
, SizeEmitter
);
9613 llvm::Value
*DynCGGroupMem
= emitDynCGGroupMem(D
, CGF
);
9614 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
9615 CGF
.AllocaInsertPt
->getParent(), CGF
.AllocaInsertPt
->getIterator());
9617 llvm::OpenMPIRBuilder::TargetDataRTArgs
RTArgs(
9618 BasePointersArray
, PointersArray
, SizesArray
, MapTypesArray
,
9619 nullptr /* MapTypesArrayEnd */, MappersArray
, MapNamesArray
);
9621 llvm::OpenMPIRBuilder::TargetKernelArgs
Args(
9622 NumTargetItems
, RTArgs
, NumIterations
, NumTeams
, NumThreads
,
9623 DynCGGroupMem
, HasNoWait
);
9625 CGF
.Builder
.restoreIP(OMPRuntime
->getOMPBuilder().emitKernelLaunch(
9626 CGF
.Builder
, OutlinedFn
, OutlinedFnID
, EmitTargetCallFallbackCB
, Args
,
9627 DeviceID
, RTLoc
, AllocaIP
));
9630 if (RequiresOuterTask
)
9631 CGF
.EmitOMPTargetTaskBasedDirective(D
, ThenGen
, InputInfo
);
9633 OMPRuntime
->emitInlinedDirective(CGF
, D
.getDirectiveKind(), ThenGen
);
9637 emitTargetCallElse(CGOpenMPRuntime
*OMPRuntime
, llvm::Function
*OutlinedFn
,
9638 const OMPExecutableDirective
&D
,
9639 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
,
9640 bool RequiresOuterTask
, const CapturedStmt
&CS
,
9641 bool OffloadingMandatory
, CodeGenFunction
&CGF
) {
9643 // Notify that the host version must be executed.
9645 [&OMPRuntime
, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
, &CS
,
9646 OffloadingMandatory
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9647 emitTargetCallFallback(OMPRuntime
, OutlinedFn
, D
, CapturedVars
,
9648 RequiresOuterTask
, CS
, OffloadingMandatory
, CGF
);
9651 if (RequiresOuterTask
) {
9652 CodeGenFunction::OMPTargetDataInfo InputInfo
;
9653 CGF
.EmitOMPTargetTaskBasedDirective(D
, ElseGen
, InputInfo
);
9655 OMPRuntime
->emitInlinedDirective(CGF
, D
.getDirectiveKind(), ElseGen
);
9659 void CGOpenMPRuntime::emitTargetCall(
9660 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
9661 llvm::Function
*OutlinedFn
, llvm::Value
*OutlinedFnID
, const Expr
*IfCond
,
9662 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9663 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9664 const OMPLoopDirective
&D
)>
9666 if (!CGF
.HaveInsertPoint())
9669 const bool OffloadingMandatory
= !CGM
.getLangOpts().OpenMPIsTargetDevice
&&
9670 CGM
.getLangOpts().OpenMPOffloadMandatory
;
9672 assert((OffloadingMandatory
|| OutlinedFn
) && "Invalid outlined function!");
9674 const bool RequiresOuterTask
=
9675 D
.hasClausesOfKind
<OMPDependClause
>() ||
9676 D
.hasClausesOfKind
<OMPNowaitClause
>() ||
9677 D
.hasClausesOfKind
<OMPInReductionClause
>() ||
9678 (CGM
.getLangOpts().OpenMP
>= 51 &&
9679 needsTaskBasedThreadLimit(D
.getDirectiveKind()) &&
9680 D
.hasClausesOfKind
<OMPThreadLimitClause
>());
9681 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
9682 const CapturedStmt
&CS
= *D
.getCapturedStmt(OMPD_target
);
9683 auto &&ArgsCodegen
= [&CS
, &CapturedVars
](CodeGenFunction
&CGF
,
9684 PrePostActionTy
&) {
9685 CGF
.GenerateOpenMPCapturedVars(CS
, CapturedVars
);
9687 emitInlinedDirective(CGF
, OMPD_unknown
, ArgsCodegen
);
9689 CodeGenFunction::OMPTargetDataInfo InputInfo
;
9690 llvm::Value
*MapTypesArray
= nullptr;
9691 llvm::Value
*MapNamesArray
= nullptr;
9693 auto &&TargetThenGen
= [this, OutlinedFn
, &D
, &CapturedVars
,
9694 RequiresOuterTask
, &CS
, OffloadingMandatory
, Device
,
9695 OutlinedFnID
, &InputInfo
, &MapTypesArray
,
9696 &MapNamesArray
, SizeEmitter
](CodeGenFunction
&CGF
,
9697 PrePostActionTy
&) {
9698 emitTargetCallKernelLaunch(this, OutlinedFn
, D
, CapturedVars
,
9699 RequiresOuterTask
, CS
, OffloadingMandatory
,
9700 Device
, OutlinedFnID
, InputInfo
, MapTypesArray
,
9701 MapNamesArray
, SizeEmitter
, CGF
, CGM
);
9704 auto &&TargetElseGen
=
9705 [this, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
, &CS
,
9706 OffloadingMandatory
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9707 emitTargetCallElse(this, OutlinedFn
, D
, CapturedVars
, RequiresOuterTask
,
9708 CS
, OffloadingMandatory
, CGF
);
9711 // If we have a target function ID it means that we need to support
9712 // offloading, otherwise, just execute on the host. We need to execute on host
9713 // regardless of the conditional in the if clause if, e.g., the user do not
9714 // specify target triples.
9717 emitIfClause(CGF
, IfCond
, TargetThenGen
, TargetElseGen
);
9719 RegionCodeGenTy
ThenRCG(TargetThenGen
);
9723 RegionCodeGenTy
ElseRCG(TargetElseGen
);
9728 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt
*S
,
9729 StringRef ParentName
) {
9733 // Codegen OMP target directives that offload compute to the device.
9734 bool RequiresDeviceCodegen
=
9735 isa
<OMPExecutableDirective
>(S
) &&
9736 isOpenMPTargetExecutionDirective(
9737 cast
<OMPExecutableDirective
>(S
)->getDirectiveKind());
9739 if (RequiresDeviceCodegen
) {
9740 const auto &E
= *cast
<OMPExecutableDirective
>(S
);
9742 llvm::TargetRegionEntryInfo EntryInfo
= getEntryInfoFromPresumedLoc(
9743 CGM
, OMPBuilder
, E
.getBeginLoc(), ParentName
);
9745 // Is this a target region that should not be emitted as an entry point? If
9746 // so just signal we are done with this target region.
9747 if (!OMPBuilder
.OffloadInfoManager
.hasTargetRegionEntryInfo(EntryInfo
))
9750 switch (E
.getDirectiveKind()) {
9752 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM
, ParentName
,
9753 cast
<OMPTargetDirective
>(E
));
9755 case OMPD_target_parallel
:
9756 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9757 CGM
, ParentName
, cast
<OMPTargetParallelDirective
>(E
));
9759 case OMPD_target_teams
:
9760 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9761 CGM
, ParentName
, cast
<OMPTargetTeamsDirective
>(E
));
9763 case OMPD_target_teams_distribute
:
9764 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9765 CGM
, ParentName
, cast
<OMPTargetTeamsDistributeDirective
>(E
));
9767 case OMPD_target_teams_distribute_simd
:
9768 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9769 CGM
, ParentName
, cast
<OMPTargetTeamsDistributeSimdDirective
>(E
));
9771 case OMPD_target_parallel_for
:
9772 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9773 CGM
, ParentName
, cast
<OMPTargetParallelForDirective
>(E
));
9775 case OMPD_target_parallel_for_simd
:
9776 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9777 CGM
, ParentName
, cast
<OMPTargetParallelForSimdDirective
>(E
));
9779 case OMPD_target_simd
:
9780 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9781 CGM
, ParentName
, cast
<OMPTargetSimdDirective
>(E
));
9783 case OMPD_target_teams_distribute_parallel_for
:
9784 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9786 cast
<OMPTargetTeamsDistributeParallelForDirective
>(E
));
9788 case OMPD_target_teams_distribute_parallel_for_simd
:
9790 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9792 cast
<OMPTargetTeamsDistributeParallelForSimdDirective
>(E
));
9794 case OMPD_target_teams_loop
:
9795 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9796 CGM
, ParentName
, cast
<OMPTargetTeamsGenericLoopDirective
>(E
));
9798 case OMPD_target_parallel_loop
:
9799 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9800 CGM
, ParentName
, cast
<OMPTargetParallelGenericLoopDirective
>(E
));
9804 case OMPD_parallel_for
:
9805 case OMPD_parallel_master
:
9806 case OMPD_parallel_sections
:
9808 case OMPD_parallel_for_simd
:
9810 case OMPD_cancellation_point
:
9812 case OMPD_threadprivate
:
9823 case OMPD_taskyield
:
9826 case OMPD_taskgroup
:
9832 case OMPD_target_data
:
9833 case OMPD_target_exit_data
:
9834 case OMPD_target_enter_data
:
9835 case OMPD_distribute
:
9836 case OMPD_distribute_simd
:
9837 case OMPD_distribute_parallel_for
:
9838 case OMPD_distribute_parallel_for_simd
:
9839 case OMPD_teams_distribute
:
9840 case OMPD_teams_distribute_simd
:
9841 case OMPD_teams_distribute_parallel_for
:
9842 case OMPD_teams_distribute_parallel_for_simd
:
9843 case OMPD_target_update
:
9844 case OMPD_declare_simd
:
9845 case OMPD_declare_variant
:
9846 case OMPD_begin_declare_variant
:
9847 case OMPD_end_declare_variant
:
9848 case OMPD_declare_target
:
9849 case OMPD_end_declare_target
:
9850 case OMPD_declare_reduction
:
9851 case OMPD_declare_mapper
:
9853 case OMPD_taskloop_simd
:
9854 case OMPD_master_taskloop
:
9855 case OMPD_master_taskloop_simd
:
9856 case OMPD_parallel_master_taskloop
:
9857 case OMPD_parallel_master_taskloop_simd
:
9859 case OMPD_metadirective
:
9862 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9867 if (const auto *E
= dyn_cast
<OMPExecutableDirective
>(S
)) {
9868 if (!E
->hasAssociatedStmt() || !E
->getAssociatedStmt())
9871 scanForTargetRegionsFunctions(E
->getRawStmt(), ParentName
);
9875 // If this is a lambda function, look into its body.
9876 if (const auto *L
= dyn_cast
<LambdaExpr
>(S
))
9879 // Keep looking for target regions recursively.
9880 for (const Stmt
*II
: S
->children())
9881 scanForTargetRegionsFunctions(II
, ParentName
);
9884 static bool isAssumedToBeNotEmitted(const ValueDecl
*VD
, bool IsDevice
) {
9885 std::optional
<OMPDeclareTargetDeclAttr::DevTypeTy
> DevTy
=
9886 OMPDeclareTargetDeclAttr::getDeviceType(VD
);
9889 // Do not emit device_type(nohost) functions for the host.
9890 if (!IsDevice
&& DevTy
== OMPDeclareTargetDeclAttr::DT_NoHost
)
9892 // Do not emit device_type(host) functions for the device.
9893 if (IsDevice
&& DevTy
== OMPDeclareTargetDeclAttr::DT_Host
)
9898 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD
) {
9899 // If emitting code for the host, we do not process FD here. Instead we do
9900 // the normal code generation.
9901 if (!CGM
.getLangOpts().OpenMPIsTargetDevice
) {
9902 if (const auto *FD
= dyn_cast
<FunctionDecl
>(GD
.getDecl()))
9903 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(FD
),
9904 CGM
.getLangOpts().OpenMPIsTargetDevice
))
9909 const ValueDecl
*VD
= cast
<ValueDecl
>(GD
.getDecl());
9910 // Try to detect target regions in the function.
9911 if (const auto *FD
= dyn_cast
<FunctionDecl
>(VD
)) {
9912 StringRef Name
= CGM
.getMangledName(GD
);
9913 scanForTargetRegionsFunctions(FD
->getBody(), Name
);
9914 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(FD
),
9915 CGM
.getLangOpts().OpenMPIsTargetDevice
))
9919 // Do not to emit function if it is not marked as declare target.
9920 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
) &&
9921 AlreadyEmittedTargetDecls
.count(VD
) == 0;
9924 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD
) {
9925 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(GD
.getDecl()),
9926 CGM
.getLangOpts().OpenMPIsTargetDevice
))
9929 if (!CGM
.getLangOpts().OpenMPIsTargetDevice
)
9932 // Check if there are Ctors/Dtors in this declaration and look for target
9933 // regions in it. We use the complete variant to produce the kernel name
9935 QualType RDTy
= cast
<VarDecl
>(GD
.getDecl())->getType();
9936 if (const auto *RD
= RDTy
->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9937 for (const CXXConstructorDecl
*Ctor
: RD
->ctors()) {
9938 StringRef ParentName
=
9939 CGM
.getMangledName(GlobalDecl(Ctor
, Ctor_Complete
));
9940 scanForTargetRegionsFunctions(Ctor
->getBody(), ParentName
);
9942 if (const CXXDestructorDecl
*Dtor
= RD
->getDestructor()) {
9943 StringRef ParentName
=
9944 CGM
.getMangledName(GlobalDecl(Dtor
, Dtor_Complete
));
9945 scanForTargetRegionsFunctions(Dtor
->getBody(), ParentName
);
9949 // Do not to emit variable if it is not marked as declare target.
9950 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
9951 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9952 cast
<VarDecl
>(GD
.getDecl()));
9953 if (!Res
|| *Res
== OMPDeclareTargetDeclAttr::MT_Link
||
9954 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
9955 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
9956 HasRequiresUnifiedSharedMemory
)) {
9957 DeferredGlobalVariables
.insert(cast
<VarDecl
>(GD
.getDecl()));
9963 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl
*VD
,
9964 llvm::Constant
*Addr
) {
9965 if (CGM
.getLangOpts().OMPTargetTriples
.empty() &&
9966 !CGM
.getLangOpts().OpenMPIsTargetDevice
)
9969 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
9970 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
9972 // If this is an 'extern' declaration we defer to the canonical definition and
9973 // do not emit an offloading entry.
9974 if (Res
&& *Res
!= OMPDeclareTargetDeclAttr::MT_Link
&&
9975 VD
->hasExternalStorage())
9979 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
9980 // Register non-target variables being emitted in device code (debug info
9982 StringRef VarName
= CGM
.getMangledName(VD
);
9983 EmittedNonTargetVariables
.try_emplace(VarName
, Addr
);
9988 auto AddrOfGlobal
= [&VD
, this]() { return CGM
.GetAddrOfGlobal(VD
); };
9989 auto LinkageForVariable
= [&VD
, this]() {
9990 return CGM
.getLLVMLinkageVarDefinition(VD
);
9993 std::vector
<llvm::GlobalVariable
*> GeneratedRefs
;
9994 OMPBuilder
.registerTargetGlobalVariable(
9995 convertCaptureClause(VD
), convertDeviceClause(VD
),
9996 VD
->hasDefinition(CGM
.getContext()) == VarDecl::DeclarationOnly
,
9997 VD
->isExternallyVisible(),
9998 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
,
9999 VD
->getCanonicalDecl()->getBeginLoc()),
10000 CGM
.getMangledName(VD
), GeneratedRefs
, CGM
.getLangOpts().OpenMPSimd
,
10001 CGM
.getLangOpts().OMPTargetTriples
, AddrOfGlobal
, LinkageForVariable
,
10002 CGM
.getTypes().ConvertTypeForMem(
10003 CGM
.getContext().getPointerType(VD
->getType())),
10006 for (auto *ref
: GeneratedRefs
)
10007 CGM
.addCompilerUsedGlobal(ref
);
10010 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD
) {
10011 if (isa
<FunctionDecl
>(GD
.getDecl()) ||
10012 isa
<OMPDeclareReductionDecl
>(GD
.getDecl()))
10013 return emitTargetFunctions(GD
);
10015 return emitTargetGlobalVariable(GD
);
10018 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10019 for (const VarDecl
*VD
: DeferredGlobalVariables
) {
10020 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10021 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
10024 if ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10025 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10026 !HasRequiresUnifiedSharedMemory
) {
10027 CGM
.EmitGlobal(VD
);
10029 assert((*Res
== OMPDeclareTargetDeclAttr::MT_Link
||
10030 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10031 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10032 HasRequiresUnifiedSharedMemory
)) &&
10033 "Expected link clause or to clause with unified memory.");
10034 (void)CGM
.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD
);
10039 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10040 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) const {
10041 assert(isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) &&
10042 " Expected target-based directive.");
10045 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl
*D
) {
10046 for (const OMPClause
*Clause
: D
->clauselists()) {
10047 if (Clause
->getClauseKind() == OMPC_unified_shared_memory
) {
10048 HasRequiresUnifiedSharedMemory
= true;
10049 OMPBuilder
.Config
.setHasRequiresUnifiedSharedMemory(true);
10050 } else if (const auto *AC
=
10051 dyn_cast
<OMPAtomicDefaultMemOrderClause
>(Clause
)) {
10052 switch (AC
->getAtomicDefaultMemOrderKind()) {
10053 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel
:
10054 RequiresAtomicOrdering
= llvm::AtomicOrdering::AcquireRelease
;
10056 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst
:
10057 RequiresAtomicOrdering
= llvm::AtomicOrdering::SequentiallyConsistent
;
10059 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed
:
10060 RequiresAtomicOrdering
= llvm::AtomicOrdering::Monotonic
;
10062 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
:
10069 llvm::AtomicOrdering
CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10070 return RequiresAtomicOrdering
;
10073 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl
*VD
,
10075 if (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())
10077 const auto *A
= VD
->getAttr
<OMPAllocateDeclAttr
>();
10078 switch(A
->getAllocatorType()) {
10079 case OMPAllocateDeclAttr::OMPNullMemAlloc
:
10080 case OMPAllocateDeclAttr::OMPDefaultMemAlloc
:
10081 // Not supported, fallback to the default mem space.
10082 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc
:
10083 case OMPAllocateDeclAttr::OMPCGroupMemAlloc
:
10084 case OMPAllocateDeclAttr::OMPHighBWMemAlloc
:
10085 case OMPAllocateDeclAttr::OMPLowLatMemAlloc
:
10086 case OMPAllocateDeclAttr::OMPThreadMemAlloc
:
10087 case OMPAllocateDeclAttr::OMPConstMemAlloc
:
10088 case OMPAllocateDeclAttr::OMPPTeamMemAlloc
:
10089 AS
= LangAS::Default
;
10091 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc
:
10092 llvm_unreachable("Expected predefined allocator for the variables with the "
10093 "static storage.");
10098 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10099 return HasRequiresUnifiedSharedMemory
;
10102 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10103 CodeGenModule
&CGM
)
10105 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
10106 SavedShouldMarkAsGlobal
= CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
;
10107 CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
= false;
10111 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10112 if (CGM
.getLangOpts().OpenMPIsTargetDevice
)
10113 CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
= SavedShouldMarkAsGlobal
;
10116 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD
) {
10117 if (!CGM
.getLangOpts().OpenMPIsTargetDevice
|| !ShouldMarkAsGlobal
)
10120 const auto *D
= cast
<FunctionDecl
>(GD
.getDecl());
10121 // Do not to emit function if it is marked as declare target as it was already
10123 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D
)) {
10124 if (D
->hasBody() && AlreadyEmittedTargetDecls
.count(D
) == 0) {
10125 if (auto *F
= dyn_cast_or_null
<llvm::Function
>(
10126 CGM
.GetGlobalValue(CGM
.getMangledName(GD
))))
10127 return !F
->isDeclaration();
10133 return !AlreadyEmittedTargetDecls
.insert(D
).second
;
10136 llvm::Function
*CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10137 // If we don't have entries or if we are emitting code for the device, we
10138 // don't need to do anything.
10139 if (CGM
.getLangOpts().OMPTargetTriples
.empty() ||
10140 CGM
.getLangOpts().OpenMPSimd
|| CGM
.getLangOpts().OpenMPIsTargetDevice
||
10141 (OMPBuilder
.OffloadInfoManager
.empty() &&
10142 !HasEmittedDeclareTargetRegion
&& !HasEmittedTargetRegion
))
10145 // Create and register the function that handles the requires directives.
10146 ASTContext
&C
= CGM
.getContext();
10148 llvm::Function
*RequiresRegFn
;
10150 CodeGenFunction
CGF(CGM
);
10151 const auto &FI
= CGM
.getTypes().arrangeNullaryFunction();
10152 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
10153 std::string ReqName
= getName({"omp_offloading", "requires_reg"});
10154 RequiresRegFn
= CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, ReqName
, FI
);
10155 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, RequiresRegFn
, FI
, {});
10156 // TODO: check for other requires clauses.
10157 // The requires directive takes effect only when a target region is
10158 // present in the compilation unit. Otherwise it is ignored and not
10159 // passed to the runtime. This avoids the runtime from throwing an error
10160 // for mismatching requires clauses across compilation units that don't
10161 // contain at least 1 target region.
10162 assert((HasEmittedTargetRegion
|| HasEmittedDeclareTargetRegion
||
10163 !OMPBuilder
.OffloadInfoManager
.empty()) &&
10164 "Target or declare target region expected.");
10165 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10166 CGM
.getModule(), OMPRTL___tgt_register_requires
),
10167 llvm::ConstantInt::get(
10168 CGM
.Int64Ty
, OMPBuilder
.Config
.getRequiresFlags()));
10169 CGF
.FinishFunction();
10171 return RequiresRegFn
;
10174 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction
&CGF
,
10175 const OMPExecutableDirective
&D
,
10176 SourceLocation Loc
,
10177 llvm::Function
*OutlinedFn
,
10178 ArrayRef
<llvm::Value
*> CapturedVars
) {
10179 if (!CGF
.HaveInsertPoint())
10182 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10183 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
10185 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10186 llvm::Value
*Args
[] = {
10188 CGF
.Builder
.getInt32(CapturedVars
.size()), // Number of captured vars
10189 CGF
.Builder
.CreateBitCast(OutlinedFn
, getKmpc_MicroPointerTy())};
10190 llvm::SmallVector
<llvm::Value
*, 16> RealArgs
;
10191 RealArgs
.append(std::begin(Args
), std::end(Args
));
10192 RealArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
10194 llvm::FunctionCallee RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
10195 CGM
.getModule(), OMPRTL___kmpc_fork_teams
);
10196 CGF
.EmitRuntimeCall(RTLFn
, RealArgs
);
10199 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction
&CGF
,
10200 const Expr
*NumTeams
,
10201 const Expr
*ThreadLimit
,
10202 SourceLocation Loc
) {
10203 if (!CGF
.HaveInsertPoint())
10206 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10208 llvm::Value
*NumTeamsVal
=
10210 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(NumTeams
),
10211 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10212 : CGF
.Builder
.getInt32(0);
10214 llvm::Value
*ThreadLimitVal
=
10216 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(ThreadLimit
),
10217 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10218 : CGF
.Builder
.getInt32(0);
10220 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10221 llvm::Value
*PushNumTeamsArgs
[] = {RTLoc
, getThreadID(CGF
, Loc
), NumTeamsVal
,
10223 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10224 CGM
.getModule(), OMPRTL___kmpc_push_num_teams
),
10228 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction
&CGF
,
10229 const Expr
*ThreadLimit
,
10230 SourceLocation Loc
) {
10231 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10232 llvm::Value
*ThreadLimitVal
=
10234 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(ThreadLimit
),
10235 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10236 : CGF
.Builder
.getInt32(0);
10238 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10239 llvm::Value
*ThreadLimitArgs
[] = {RTLoc
, getThreadID(CGF
, Loc
),
10241 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10242 CGM
.getModule(), OMPRTL___kmpc_set_thread_limit
),
10246 void CGOpenMPRuntime::emitTargetDataCalls(
10247 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
10248 const Expr
*Device
, const RegionCodeGenTy
&CodeGen
,
10249 CGOpenMPRuntime::TargetDataInfo
&Info
) {
10250 if (!CGF
.HaveInsertPoint())
10253 // Action used to replace the default codegen action and turn privatization
10255 PrePostActionTy NoPrivAction
;
10257 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
10259 llvm::Value
*IfCondVal
= nullptr;
10261 IfCondVal
= CGF
.EvaluateExprAsBool(IfCond
);
10263 // Emit device ID if any.
10264 llvm::Value
*DeviceID
= nullptr;
10266 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10267 CGF
.Int64Ty
, /*isSigned=*/true);
10269 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10272 // Fill up the arrays with all the mapped variables.
10273 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10274 auto GenMapInfoCB
=
10275 [&](InsertPointTy CodeGenIP
) -> llvm::OpenMPIRBuilder::MapInfosTy
& {
10276 CGF
.Builder
.restoreIP(CodeGenIP
);
10277 // Get map clause information.
10278 MappableExprsHandler
MEHandler(D
, CGF
);
10279 MEHandler
.generateAllInfo(CombinedInfo
, OMPBuilder
);
10281 auto FillInfoMap
= [&](MappableExprsHandler::MappingExprInfo
&MapExpr
) {
10282 return emitMappingInformation(CGF
, OMPBuilder
, MapExpr
);
10284 if (CGM
.getCodeGenOpts().getDebugInfo() !=
10285 llvm::codegenoptions::NoDebugInfo
) {
10286 CombinedInfo
.Names
.resize(CombinedInfo
.Exprs
.size());
10287 llvm::transform(CombinedInfo
.Exprs
, CombinedInfo
.Names
.begin(),
10291 return CombinedInfo
;
10293 using BodyGenTy
= llvm::OpenMPIRBuilder::BodyGenTy
;
10294 auto BodyCB
= [&](InsertPointTy CodeGenIP
, BodyGenTy BodyGenType
) {
10295 CGF
.Builder
.restoreIP(CodeGenIP
);
10296 switch (BodyGenType
) {
10297 case BodyGenTy::Priv
:
10298 if (!Info
.CaptureDeviceAddrMap
.empty())
10301 case BodyGenTy::DupNoPriv
:
10302 if (!Info
.CaptureDeviceAddrMap
.empty()) {
10303 CodeGen
.setAction(NoPrivAction
);
10307 case BodyGenTy::NoPriv
:
10308 if (Info
.CaptureDeviceAddrMap
.empty()) {
10309 CodeGen
.setAction(NoPrivAction
);
10314 return InsertPointTy(CGF
.Builder
.GetInsertBlock(),
10315 CGF
.Builder
.GetInsertPoint());
10318 auto DeviceAddrCB
= [&](unsigned int I
, llvm::Value
*NewDecl
) {
10319 if (const ValueDecl
*DevVD
= CombinedInfo
.DevicePtrDecls
[I
]) {
10320 Info
.CaptureDeviceAddrMap
.try_emplace(DevVD
, NewDecl
);
10324 auto CustomMapperCB
= [&](unsigned int I
) {
10325 llvm::Value
*MFunc
= nullptr;
10326 if (CombinedInfo
.Mappers
[I
]) {
10327 Info
.HasMapper
= true;
10328 MFunc
= CGF
.CGM
.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10329 cast
<OMPDeclareMapperDecl
>(CombinedInfo
.Mappers
[I
]));
10334 // Source location for the ident struct
10335 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10337 InsertPointTy
AllocaIP(CGF
.AllocaInsertPt
->getParent(),
10338 CGF
.AllocaInsertPt
->getIterator());
10339 InsertPointTy
CodeGenIP(CGF
.Builder
.GetInsertBlock(),
10340 CGF
.Builder
.GetInsertPoint());
10341 llvm::OpenMPIRBuilder::LocationDescription
OmpLoc(CodeGenIP
);
10342 CGF
.Builder
.restoreIP(OMPBuilder
.createTargetData(
10343 OmpLoc
, AllocaIP
, CodeGenIP
, DeviceID
, IfCondVal
, Info
, GenMapInfoCB
,
10344 /*MapperFunc=*/nullptr, BodyCB
, DeviceAddrCB
, CustomMapperCB
, RTLoc
));
10347 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10348 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
10349 const Expr
*Device
) {
10350 if (!CGF
.HaveInsertPoint())
10353 assert((isa
<OMPTargetEnterDataDirective
>(D
) ||
10354 isa
<OMPTargetExitDataDirective
>(D
) ||
10355 isa
<OMPTargetUpdateDirective
>(D
)) &&
10356 "Expecting either target enter, exit data, or update directives.");
10358 CodeGenFunction::OMPTargetDataInfo InputInfo
;
10359 llvm::Value
*MapTypesArray
= nullptr;
10360 llvm::Value
*MapNamesArray
= nullptr;
10361 // Generate the code for the opening of the data environment.
10362 auto &&ThenGen
= [this, &D
, Device
, &InputInfo
, &MapTypesArray
,
10363 &MapNamesArray
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
10364 // Emit device ID if any.
10365 llvm::Value
*DeviceID
= nullptr;
10367 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10368 CGF
.Int64Ty
, /*isSigned=*/true);
10370 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10373 // Emit the number of elements in the offloading arrays.
10374 llvm::Constant
*PointerNum
=
10375 CGF
.Builder
.getInt32(InputInfo
.NumberOfTargetItems
);
10377 // Source location for the ident struct
10378 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10380 llvm::Value
*OffloadingArgs
[] = {RTLoc
,
10383 InputInfo
.BasePointersArray
.getPointer(),
10384 InputInfo
.PointersArray
.getPointer(),
10385 InputInfo
.SizesArray
.getPointer(),
10388 InputInfo
.MappersArray
.getPointer()};
10390 // Select the right runtime function call for each standalone
10392 const bool HasNowait
= D
.hasClausesOfKind
<OMPNowaitClause
>();
10393 RuntimeFunction RTLFn
;
10394 switch (D
.getDirectiveKind()) {
10395 case OMPD_target_enter_data
:
10396 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_begin_nowait_mapper
10397 : OMPRTL___tgt_target_data_begin_mapper
;
10399 case OMPD_target_exit_data
:
10400 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_end_nowait_mapper
10401 : OMPRTL___tgt_target_data_end_mapper
;
10403 case OMPD_target_update
:
10404 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_update_nowait_mapper
10405 : OMPRTL___tgt_target_data_update_mapper
;
10407 case OMPD_parallel
:
10409 case OMPD_parallel_for
:
10410 case OMPD_parallel_master
:
10411 case OMPD_parallel_sections
:
10412 case OMPD_for_simd
:
10413 case OMPD_parallel_for_simd
:
10415 case OMPD_cancellation_point
:
10417 case OMPD_threadprivate
:
10418 case OMPD_allocate
:
10423 case OMPD_sections
:
10427 case OMPD_critical
:
10428 case OMPD_taskyield
:
10430 case OMPD_taskwait
:
10431 case OMPD_taskgroup
:
10437 case OMPD_target_data
:
10438 case OMPD_distribute
:
10439 case OMPD_distribute_simd
:
10440 case OMPD_distribute_parallel_for
:
10441 case OMPD_distribute_parallel_for_simd
:
10442 case OMPD_teams_distribute
:
10443 case OMPD_teams_distribute_simd
:
10444 case OMPD_teams_distribute_parallel_for
:
10445 case OMPD_teams_distribute_parallel_for_simd
:
10446 case OMPD_declare_simd
:
10447 case OMPD_declare_variant
:
10448 case OMPD_begin_declare_variant
:
10449 case OMPD_end_declare_variant
:
10450 case OMPD_declare_target
:
10451 case OMPD_end_declare_target
:
10452 case OMPD_declare_reduction
:
10453 case OMPD_declare_mapper
:
10454 case OMPD_taskloop
:
10455 case OMPD_taskloop_simd
:
10456 case OMPD_master_taskloop
:
10457 case OMPD_master_taskloop_simd
:
10458 case OMPD_parallel_master_taskloop
:
10459 case OMPD_parallel_master_taskloop_simd
:
10461 case OMPD_target_simd
:
10462 case OMPD_target_teams_distribute
:
10463 case OMPD_target_teams_distribute_simd
:
10464 case OMPD_target_teams_distribute_parallel_for
:
10465 case OMPD_target_teams_distribute_parallel_for_simd
:
10466 case OMPD_target_teams
:
10467 case OMPD_target_parallel
:
10468 case OMPD_target_parallel_for
:
10469 case OMPD_target_parallel_for_simd
:
10470 case OMPD_requires
:
10471 case OMPD_metadirective
:
10474 llvm_unreachable("Unexpected standalone target data directive.");
10477 CGF
.EmitRuntimeCall(
10478 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(), RTLFn
),
10482 auto &&TargetThenGen
= [this, &ThenGen
, &D
, &InputInfo
, &MapTypesArray
,
10483 &MapNamesArray
](CodeGenFunction
&CGF
,
10484 PrePostActionTy
&) {
10485 // Fill up the arrays with all the mapped variables.
10486 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10488 // Get map clause information.
10489 MappableExprsHandler
MEHandler(D
, CGF
);
10490 MEHandler
.generateAllInfo(CombinedInfo
, OMPBuilder
);
10492 CGOpenMPRuntime::TargetDataInfo Info
;
10493 // Fill up the arrays and create the arguments.
10494 emitOffloadingArrays(CGF
, CombinedInfo
, Info
, OMPBuilder
,
10495 /*IsNonContiguous=*/true);
10496 bool RequiresOuterTask
= D
.hasClausesOfKind
<OMPDependClause
>() ||
10497 D
.hasClausesOfKind
<OMPNowaitClause
>();
10498 bool EmitDebug
= CGF
.CGM
.getCodeGenOpts().getDebugInfo() !=
10499 llvm::codegenoptions::NoDebugInfo
;
10500 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, Info
.RTArgs
, Info
,
10502 /*ForEndCall=*/false);
10503 InputInfo
.NumberOfTargetItems
= Info
.NumberOfPtrs
;
10504 InputInfo
.BasePointersArray
= Address(Info
.RTArgs
.BasePointersArray
,
10505 CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10506 InputInfo
.PointersArray
= Address(Info
.RTArgs
.PointersArray
, CGF
.VoidPtrTy
,
10507 CGM
.getPointerAlign());
10508 InputInfo
.SizesArray
=
10509 Address(Info
.RTArgs
.SizesArray
, CGF
.Int64Ty
, CGM
.getPointerAlign());
10510 InputInfo
.MappersArray
=
10511 Address(Info
.RTArgs
.MappersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10512 MapTypesArray
= Info
.RTArgs
.MapTypesArray
;
10513 MapNamesArray
= Info
.RTArgs
.MapNamesArray
;
10514 if (RequiresOuterTask
)
10515 CGF
.EmitOMPTargetTaskBasedDirective(D
, ThenGen
, InputInfo
);
10517 emitInlinedDirective(CGF
, D
.getDirectiveKind(), ThenGen
);
10521 emitIfClause(CGF
, IfCond
, TargetThenGen
,
10522 [](CodeGenFunction
&CGF
, PrePostActionTy
&) {});
10524 RegionCodeGenTy
ThenRCG(TargetThenGen
);
10530 /// Kind of parameter in a function with 'declare simd' directive.
10539 /// Attribute set of the parameter.
10540 struct ParamAttrTy
{
10541 ParamKindTy Kind
= Vector
;
10542 llvm::APSInt StrideOrArg
;
10543 llvm::APSInt Alignment
;
10544 bool HasVarStride
= false;
10548 static unsigned evaluateCDTSize(const FunctionDecl
*FD
,
10549 ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10550 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10551 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10552 // of that clause. The VLEN value must be power of 2.
10553 // In other case the notion of the function`s "characteristic data type" (CDT)
10554 // is used to compute the vector length.
10555 // CDT is defined in the following order:
10556 // a) For non-void function, the CDT is the return type.
10557 // b) If the function has any non-uniform, non-linear parameters, then the
10558 // CDT is the type of the first such parameter.
10559 // c) If the CDT determined by a) or b) above is struct, union, or class
10560 // type which is pass-by-value (except for the type that maps to the
10561 // built-in complex data type), the characteristic data type is int.
10562 // d) If none of the above three cases is applicable, the CDT is int.
10563 // The VLEN is then determined based on the CDT and the size of vector
10564 // register of that ISA for which current vector version is generated. The
10565 // VLEN is computed using the formula below:
10566 // VLEN = sizeof(vector_register) / sizeof(CDT),
10567 // where vector register size specified in section 3.2.1 Registers and the
10568 // Stack Frame of original AMD64 ABI document.
10569 QualType RetType
= FD
->getReturnType();
10570 if (RetType
.isNull())
10572 ASTContext
&C
= FD
->getASTContext();
10574 if (!RetType
.isNull() && !RetType
->isVoidType()) {
10577 unsigned Offset
= 0;
10578 if (const auto *MD
= dyn_cast
<CXXMethodDecl
>(FD
)) {
10579 if (ParamAttrs
[Offset
].Kind
== Vector
)
10580 CDT
= C
.getPointerType(C
.getRecordType(MD
->getParent()));
10583 if (CDT
.isNull()) {
10584 for (unsigned I
= 0, E
= FD
->getNumParams(); I
< E
; ++I
) {
10585 if (ParamAttrs
[I
+ Offset
].Kind
== Vector
) {
10586 CDT
= FD
->getParamDecl(I
)->getType();
10594 CDT
= CDT
->getCanonicalTypeUnqualified();
10595 if (CDT
->isRecordType() || CDT
->isUnionType())
10597 return C
.getTypeSize(CDT
);
10600 /// Mangle the parameter part of the vector function name according to
10601 /// their OpenMP classification. The mangling function is defined in
10602 /// section 4.5 of the AAVFABI(2021Q1).
10603 static std::string
mangleVectorParameters(ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10604 SmallString
<256> Buffer
;
10605 llvm::raw_svector_ostream
Out(Buffer
);
10606 for (const auto &ParamAttr
: ParamAttrs
) {
10607 switch (ParamAttr
.Kind
) {
10627 if (ParamAttr
.HasVarStride
)
10628 Out
<< "s" << ParamAttr
.StrideOrArg
;
10629 else if (ParamAttr
.Kind
== Linear
|| ParamAttr
.Kind
== LinearRef
||
10630 ParamAttr
.Kind
== LinearUVal
|| ParamAttr
.Kind
== LinearVal
) {
10631 // Don't print the step value if it is not present or if it is
10633 if (ParamAttr
.StrideOrArg
< 0)
10634 Out
<< 'n' << -ParamAttr
.StrideOrArg
;
10635 else if (ParamAttr
.StrideOrArg
!= 1)
10636 Out
<< ParamAttr
.StrideOrArg
;
10639 if (!!ParamAttr
.Alignment
)
10640 Out
<< 'a' << ParamAttr
.Alignment
;
10643 return std::string(Out
.str());
10647 emitX86DeclareSimdFunction(const FunctionDecl
*FD
, llvm::Function
*Fn
,
10648 const llvm::APSInt
&VLENVal
,
10649 ArrayRef
<ParamAttrTy
> ParamAttrs
,
10650 OMPDeclareSimdDeclAttr::BranchStateTy State
) {
10653 unsigned VecRegSize
;
10655 ISADataTy ISAData
[] = {
10669 llvm::SmallVector
<char, 2> Masked
;
10671 case OMPDeclareSimdDeclAttr::BS_Undefined
:
10672 Masked
.push_back('N');
10673 Masked
.push_back('M');
10675 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
10676 Masked
.push_back('N');
10678 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
10679 Masked
.push_back('M');
10682 for (char Mask
: Masked
) {
10683 for (const ISADataTy
&Data
: ISAData
) {
10684 SmallString
<256> Buffer
;
10685 llvm::raw_svector_ostream
Out(Buffer
);
10686 Out
<< "_ZGV" << Data
.ISA
<< Mask
;
10688 unsigned NumElts
= evaluateCDTSize(FD
, ParamAttrs
);
10689 assert(NumElts
&& "Non-zero simdlen/cdtsize expected");
10690 Out
<< llvm::APSInt::getUnsigned(Data
.VecRegSize
/ NumElts
);
10694 Out
<< mangleVectorParameters(ParamAttrs
);
10695 Out
<< '_' << Fn
->getName();
10696 Fn
->addFnAttr(Out
.str());
10701 // This are the Functions that are needed to mangle the name of the
10702 // vector functions generated by the compiler, according to the rules
10703 // defined in the "Vector Function ABI specifications for AArch64",
10705 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10707 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10708 static bool getAArch64MTV(QualType QT
, ParamKindTy Kind
) {
10709 QT
= QT
.getCanonicalType();
10711 if (QT
->isVoidType())
10714 if (Kind
== ParamKindTy::Uniform
)
10717 if (Kind
== ParamKindTy::LinearUVal
|| Kind
== ParamKindTy::LinearRef
)
10720 if ((Kind
== ParamKindTy::Linear
|| Kind
== ParamKindTy::LinearVal
) &&
10721 !QT
->isReferenceType())
10727 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10728 static bool getAArch64PBV(QualType QT
, ASTContext
&C
) {
10729 QT
= QT
.getCanonicalType();
10730 unsigned Size
= C
.getTypeSize(QT
);
10732 // Only scalars and complex within 16 bytes wide set PVB to true.
10733 if (Size
!= 8 && Size
!= 16 && Size
!= 32 && Size
!= 64 && Size
!= 128)
10736 if (QT
->isFloatingType())
10739 if (QT
->isIntegerType())
10742 if (QT
->isPointerType())
10745 // TODO: Add support for complex types (section 3.1.2, item 2).
10750 /// Computes the lane size (LS) of a return type or of an input parameter,
10751 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10752 /// TODO: Add support for references, section 3.2.1, item 1.
10753 static unsigned getAArch64LS(QualType QT
, ParamKindTy Kind
, ASTContext
&C
) {
10754 if (!getAArch64MTV(QT
, Kind
) && QT
.getCanonicalType()->isPointerType()) {
10755 QualType PTy
= QT
.getCanonicalType()->getPointeeType();
10756 if (getAArch64PBV(PTy
, C
))
10757 return C
.getTypeSize(PTy
);
10759 if (getAArch64PBV(QT
, C
))
10760 return C
.getTypeSize(QT
);
10762 return C
.getTypeSize(C
.getUIntPtrType());
10765 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10766 // signature of the scalar function, as defined in 3.2.2 of the
10768 static std::tuple
<unsigned, unsigned, bool>
10769 getNDSWDS(const FunctionDecl
*FD
, ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10770 QualType RetType
= FD
->getReturnType().getCanonicalType();
10772 ASTContext
&C
= FD
->getASTContext();
10774 bool OutputBecomesInput
= false;
10776 llvm::SmallVector
<unsigned, 8> Sizes
;
10777 if (!RetType
->isVoidType()) {
10778 Sizes
.push_back(getAArch64LS(RetType
, ParamKindTy::Vector
, C
));
10779 if (!getAArch64PBV(RetType
, C
) && getAArch64MTV(RetType
, {}))
10780 OutputBecomesInput
= true;
10782 for (unsigned I
= 0, E
= FD
->getNumParams(); I
< E
; ++I
) {
10783 QualType QT
= FD
->getParamDecl(I
)->getType().getCanonicalType();
10784 Sizes
.push_back(getAArch64LS(QT
, ParamAttrs
[I
].Kind
, C
));
10787 assert(!Sizes
.empty() && "Unable to determine NDS and WDS.");
10788 // The LS of a function parameter / return value can only be a power
10789 // of 2, starting from 8 bits, up to 128.
10790 assert(llvm::all_of(Sizes
,
10791 [](unsigned Size
) {
10792 return Size
== 8 || Size
== 16 || Size
== 32 ||
10793 Size
== 64 || Size
== 128;
10797 return std::make_tuple(*std::min_element(std::begin(Sizes
), std::end(Sizes
)),
10798 *std::max_element(std::begin(Sizes
), std::end(Sizes
)),
10799 OutputBecomesInput
);
10802 // Function used to add the attribute. The parameter `VLEN` is
10803 // templated to allow the use of "x" when targeting scalable functions
10805 template <typename T
>
10806 static void addAArch64VectorName(T VLEN
, StringRef LMask
, StringRef Prefix
,
10807 char ISA
, StringRef ParSeq
,
10808 StringRef MangledName
, bool OutputBecomesInput
,
10809 llvm::Function
*Fn
) {
10810 SmallString
<256> Buffer
;
10811 llvm::raw_svector_ostream
Out(Buffer
);
10812 Out
<< Prefix
<< ISA
<< LMask
<< VLEN
;
10813 if (OutputBecomesInput
)
10815 Out
<< ParSeq
<< "_" << MangledName
;
10816 Fn
->addFnAttr(Out
.str());
10819 // Helper function to generate the Advanced SIMD names depending on
10820 // the value of the NDS when simdlen is not present.
10821 static void addAArch64AdvSIMDNDSNames(unsigned NDS
, StringRef Mask
,
10822 StringRef Prefix
, char ISA
,
10823 StringRef ParSeq
, StringRef MangledName
,
10824 bool OutputBecomesInput
,
10825 llvm::Function
*Fn
) {
10828 addAArch64VectorName(8, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10829 OutputBecomesInput
, Fn
);
10830 addAArch64VectorName(16, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10831 OutputBecomesInput
, Fn
);
10834 addAArch64VectorName(4, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10835 OutputBecomesInput
, Fn
);
10836 addAArch64VectorName(8, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10837 OutputBecomesInput
, Fn
);
10840 addAArch64VectorName(2, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10841 OutputBecomesInput
, Fn
);
10842 addAArch64VectorName(4, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10843 OutputBecomesInput
, Fn
);
10847 addAArch64VectorName(2, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10848 OutputBecomesInput
, Fn
);
10851 llvm_unreachable("Scalar type is too wide.");
10855 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10856 static void emitAArch64DeclareSimdFunction(
10857 CodeGenModule
&CGM
, const FunctionDecl
*FD
, unsigned UserVLEN
,
10858 ArrayRef
<ParamAttrTy
> ParamAttrs
,
10859 OMPDeclareSimdDeclAttr::BranchStateTy State
, StringRef MangledName
,
10860 char ISA
, unsigned VecRegSize
, llvm::Function
*Fn
, SourceLocation SLoc
) {
10862 // Get basic data for building the vector signature.
10863 const auto Data
= getNDSWDS(FD
, ParamAttrs
);
10864 const unsigned NDS
= std::get
<0>(Data
);
10865 const unsigned WDS
= std::get
<1>(Data
);
10866 const bool OutputBecomesInput
= std::get
<2>(Data
);
10868 // Check the values provided via `simdlen` by the user.
10869 // 1. A `simdlen(1)` doesn't produce vector signatures,
10870 if (UserVLEN
== 1) {
10871 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
10872 DiagnosticsEngine::Warning
,
10873 "The clause simdlen(1) has no effect when targeting aarch64.");
10874 CGM
.getDiags().Report(SLoc
, DiagID
);
10878 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10879 // Advanced SIMD output.
10880 if (ISA
== 'n' && UserVLEN
&& !llvm::isPowerOf2_32(UserVLEN
)) {
10881 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
10882 DiagnosticsEngine::Warning
, "The value specified in simdlen must be a "
10883 "power of 2 when targeting Advanced SIMD.");
10884 CGM
.getDiags().Report(SLoc
, DiagID
);
10888 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10890 if (ISA
== 's' && UserVLEN
!= 0) {
10891 if ((UserVLEN
* WDS
> 2048) || (UserVLEN
* WDS
% 128 != 0)) {
10892 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
10893 DiagnosticsEngine::Warning
, "The clause simdlen must fit the %0-bit "
10894 "lanes in the architectural constraints "
10895 "for SVE (min is 128-bit, max is "
10896 "2048-bit, by steps of 128-bit)");
10897 CGM
.getDiags().Report(SLoc
, DiagID
) << WDS
;
10902 // Sort out parameter sequence.
10903 const std::string ParSeq
= mangleVectorParameters(ParamAttrs
);
10904 StringRef Prefix
= "_ZGV";
10905 // Generate simdlen from user input (if any).
10908 // SVE generates only a masked function.
10909 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10910 OutputBecomesInput
, Fn
);
10912 assert(ISA
== 'n' && "Expected ISA either 's' or 'n'.");
10913 // Advanced SIMD generates one or two functions, depending on
10914 // the `[not]inbranch` clause.
10916 case OMPDeclareSimdDeclAttr::BS_Undefined
:
10917 addAArch64VectorName(UserVLEN
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10918 OutputBecomesInput
, Fn
);
10919 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10920 OutputBecomesInput
, Fn
);
10922 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
10923 addAArch64VectorName(UserVLEN
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10924 OutputBecomesInput
, Fn
);
10926 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
10927 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10928 OutputBecomesInput
, Fn
);
10933 // If no user simdlen is provided, follow the AAVFABI rules for
10934 // generating the vector length.
10936 // SVE, section 3.4.1, item 1.
10937 addAArch64VectorName("x", "M", Prefix
, ISA
, ParSeq
, MangledName
,
10938 OutputBecomesInput
, Fn
);
10940 assert(ISA
== 'n' && "Expected ISA either 's' or 'n'.");
10941 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10942 // two vector names depending on the use of the clause
10943 // `[not]inbranch`.
10945 case OMPDeclareSimdDeclAttr::BS_Undefined
:
10946 addAArch64AdvSIMDNDSNames(NDS
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10947 OutputBecomesInput
, Fn
);
10948 addAArch64AdvSIMDNDSNames(NDS
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10949 OutputBecomesInput
, Fn
);
10951 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
10952 addAArch64AdvSIMDNDSNames(NDS
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10953 OutputBecomesInput
, Fn
);
10955 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
10956 addAArch64AdvSIMDNDSNames(NDS
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10957 OutputBecomesInput
, Fn
);
10964 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl
*FD
,
10965 llvm::Function
*Fn
) {
10966 ASTContext
&C
= CGM
.getContext();
10967 FD
= FD
->getMostRecentDecl();
10969 // Map params to their positions in function decl.
10970 llvm::DenseMap
<const Decl
*, unsigned> ParamPositions
;
10971 if (isa
<CXXMethodDecl
>(FD
))
10972 ParamPositions
.try_emplace(FD
, 0);
10973 unsigned ParamPos
= ParamPositions
.size();
10974 for (const ParmVarDecl
*P
: FD
->parameters()) {
10975 ParamPositions
.try_emplace(P
->getCanonicalDecl(), ParamPos
);
10978 for (const auto *Attr
: FD
->specific_attrs
<OMPDeclareSimdDeclAttr
>()) {
10979 llvm::SmallVector
<ParamAttrTy
, 8> ParamAttrs(ParamPositions
.size());
10980 // Mark uniform parameters.
10981 for (const Expr
*E
: Attr
->uniforms()) {
10982 E
= E
->IgnoreParenImpCasts();
10984 if (isa
<CXXThisExpr
>(E
)) {
10985 Pos
= ParamPositions
[FD
];
10987 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
10988 ->getCanonicalDecl();
10989 auto It
= ParamPositions
.find(PVD
);
10990 assert(It
!= ParamPositions
.end() && "Function parameter not found");
10993 ParamAttrs
[Pos
].Kind
= Uniform
;
10995 // Get alignment info.
10996 auto *NI
= Attr
->alignments_begin();
10997 for (const Expr
*E
: Attr
->aligneds()) {
10998 E
= E
->IgnoreParenImpCasts();
11001 if (isa
<CXXThisExpr
>(E
)) {
11002 Pos
= ParamPositions
[FD
];
11003 ParmTy
= E
->getType();
11005 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11006 ->getCanonicalDecl();
11007 auto It
= ParamPositions
.find(PVD
);
11008 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11010 ParmTy
= PVD
->getType();
11012 ParamAttrs
[Pos
].Alignment
=
11014 ? (*NI
)->EvaluateKnownConstInt(C
)
11015 : llvm::APSInt::getUnsigned(
11016 C
.toCharUnitsFromBits(C
.getOpenMPDefaultSimdAlign(ParmTy
))
11020 // Mark linear parameters.
11021 auto *SI
= Attr
->steps_begin();
11022 auto *MI
= Attr
->modifiers_begin();
11023 for (const Expr
*E
: Attr
->linears()) {
11024 E
= E
->IgnoreParenImpCasts();
11026 bool IsReferenceType
= false;
11027 // Rescaling factor needed to compute the linear parameter
11028 // value in the mangled name.
11029 unsigned PtrRescalingFactor
= 1;
11030 if (isa
<CXXThisExpr
>(E
)) {
11031 Pos
= ParamPositions
[FD
];
11032 auto *P
= cast
<PointerType
>(E
->getType());
11033 PtrRescalingFactor
= CGM
.getContext()
11034 .getTypeSizeInChars(P
->getPointeeType())
11037 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11038 ->getCanonicalDecl();
11039 auto It
= ParamPositions
.find(PVD
);
11040 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11042 if (auto *P
= dyn_cast
<PointerType
>(PVD
->getType()))
11043 PtrRescalingFactor
= CGM
.getContext()
11044 .getTypeSizeInChars(P
->getPointeeType())
11046 else if (PVD
->getType()->isReferenceType()) {
11047 IsReferenceType
= true;
11048 PtrRescalingFactor
=
11050 .getTypeSizeInChars(PVD
->getType().getNonReferenceType())
11054 ParamAttrTy
&ParamAttr
= ParamAttrs
[Pos
];
11055 if (*MI
== OMPC_LINEAR_ref
)
11056 ParamAttr
.Kind
= LinearRef
;
11057 else if (*MI
== OMPC_LINEAR_uval
)
11058 ParamAttr
.Kind
= LinearUVal
;
11059 else if (IsReferenceType
)
11060 ParamAttr
.Kind
= LinearVal
;
11062 ParamAttr
.Kind
= Linear
;
11063 // Assuming a stride of 1, for `linear` without modifiers.
11064 ParamAttr
.StrideOrArg
= llvm::APSInt::getUnsigned(1);
11066 Expr::EvalResult Result
;
11067 if (!(*SI
)->EvaluateAsInt(Result
, C
, Expr::SE_AllowSideEffects
)) {
11068 if (const auto *DRE
=
11069 cast
<DeclRefExpr
>((*SI
)->IgnoreParenImpCasts())) {
11070 if (const auto *StridePVD
=
11071 dyn_cast
<ParmVarDecl
>(DRE
->getDecl())) {
11072 ParamAttr
.HasVarStride
= true;
11073 auto It
= ParamPositions
.find(StridePVD
->getCanonicalDecl());
11074 assert(It
!= ParamPositions
.end() &&
11075 "Function parameter not found");
11076 ParamAttr
.StrideOrArg
= llvm::APSInt::getUnsigned(It
->second
);
11080 ParamAttr
.StrideOrArg
= Result
.Val
.getInt();
11083 // If we are using a linear clause on a pointer, we need to
11084 // rescale the value of linear_step with the byte size of the
11086 if (!ParamAttr
.HasVarStride
&&
11087 (ParamAttr
.Kind
== Linear
|| ParamAttr
.Kind
== LinearRef
))
11088 ParamAttr
.StrideOrArg
= ParamAttr
.StrideOrArg
* PtrRescalingFactor
;
11092 llvm::APSInt VLENVal
;
11093 SourceLocation ExprLoc
;
11094 const Expr
*VLENExpr
= Attr
->getSimdlen();
11096 VLENVal
= VLENExpr
->EvaluateKnownConstInt(C
);
11097 ExprLoc
= VLENExpr
->getExprLoc();
11099 OMPDeclareSimdDeclAttr::BranchStateTy State
= Attr
->getBranchState();
11100 if (CGM
.getTriple().isX86()) {
11101 emitX86DeclareSimdFunction(FD
, Fn
, VLENVal
, ParamAttrs
, State
);
11102 } else if (CGM
.getTriple().getArch() == llvm::Triple::aarch64
) {
11103 unsigned VLEN
= VLENVal
.getExtValue();
11104 StringRef MangledName
= Fn
->getName();
11105 if (CGM
.getTarget().hasFeature("sve"))
11106 emitAArch64DeclareSimdFunction(CGM
, FD
, VLEN
, ParamAttrs
, State
,
11107 MangledName
, 's', 128, Fn
, ExprLoc
);
11108 else if (CGM
.getTarget().hasFeature("neon"))
11109 emitAArch64DeclareSimdFunction(CGM
, FD
, VLEN
, ParamAttrs
, State
,
11110 MangledName
, 'n', 128, Fn
, ExprLoc
);
11113 FD
= FD
->getPreviousDecl();
11118 /// Cleanup action for doacross support.
11119 class DoacrossCleanupTy final
: public EHScopeStack::Cleanup
{
11121 static const int DoacrossFinArgs
= 2;
11124 llvm::FunctionCallee RTLFn
;
11125 llvm::Value
*Args
[DoacrossFinArgs
];
11128 DoacrossCleanupTy(llvm::FunctionCallee RTLFn
,
11129 ArrayRef
<llvm::Value
*> CallArgs
)
11131 assert(CallArgs
.size() == DoacrossFinArgs
);
11132 std::copy(CallArgs
.begin(), CallArgs
.end(), std::begin(Args
));
11134 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
11135 if (!CGF
.HaveInsertPoint())
11137 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11142 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction
&CGF
,
11143 const OMPLoopDirective
&D
,
11144 ArrayRef
<Expr
*> NumIterations
) {
11145 if (!CGF
.HaveInsertPoint())
11148 ASTContext
&C
= CGM
.getContext();
11149 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11151 if (KmpDimTy
.isNull()) {
11152 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11153 // kmp_int64 lo; // lower
11154 // kmp_int64 up; // upper
11155 // kmp_int64 st; // stride
11157 RD
= C
.buildImplicitRecord("kmp_dim");
11158 RD
->startDefinition();
11159 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11160 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11161 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11162 RD
->completeDefinition();
11163 KmpDimTy
= C
.getRecordType(RD
);
11165 RD
= cast
<RecordDecl
>(KmpDimTy
->getAsTagDecl());
11167 llvm::APInt
Size(/*numBits=*/32, NumIterations
.size());
11168 QualType ArrayTy
= C
.getConstantArrayType(KmpDimTy
, Size
, nullptr,
11169 ArraySizeModifier::Normal
, 0);
11171 Address DimsAddr
= CGF
.CreateMemTemp(ArrayTy
, "dims");
11172 CGF
.EmitNullInitialization(DimsAddr
, ArrayTy
);
11173 enum { LowerFD
= 0, UpperFD
, StrideFD
};
11174 // Fill dims with data.
11175 for (unsigned I
= 0, E
= NumIterations
.size(); I
< E
; ++I
) {
11176 LValue DimsLVal
= CGF
.MakeAddrLValue(
11177 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, I
), KmpDimTy
);
11178 // dims.upper = num_iterations;
11179 LValue UpperLVal
= CGF
.EmitLValueForField(
11180 DimsLVal
, *std::next(RD
->field_begin(), UpperFD
));
11181 llvm::Value
*NumIterVal
= CGF
.EmitScalarConversion(
11182 CGF
.EmitScalarExpr(NumIterations
[I
]), NumIterations
[I
]->getType(),
11183 Int64Ty
, NumIterations
[I
]->getExprLoc());
11184 CGF
.EmitStoreOfScalar(NumIterVal
, UpperLVal
);
11185 // dims.stride = 1;
11186 LValue StrideLVal
= CGF
.EmitLValueForField(
11187 DimsLVal
, *std::next(RD
->field_begin(), StrideFD
));
11188 CGF
.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM
.Int64Ty
, /*V=*/1),
11192 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11193 // kmp_int32 num_dims, struct kmp_dim * dims);
11194 llvm::Value
*Args
[] = {
11195 emitUpdateLocation(CGF
, D
.getBeginLoc()),
11196 getThreadID(CGF
, D
.getBeginLoc()),
11197 llvm::ConstantInt::getSigned(CGM
.Int32Ty
, NumIterations
.size()),
11198 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11199 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, 0).getPointer(),
11202 llvm::FunctionCallee RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11203 CGM
.getModule(), OMPRTL___kmpc_doacross_init
);
11204 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11205 llvm::Value
*FiniArgs
[DoacrossCleanupTy::DoacrossFinArgs
] = {
11206 emitUpdateLocation(CGF
, D
.getEndLoc()), getThreadID(CGF
, D
.getEndLoc())};
11207 llvm::FunctionCallee FiniRTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11208 CGM
.getModule(), OMPRTL___kmpc_doacross_fini
);
11209 CGF
.EHStack
.pushCleanup
<DoacrossCleanupTy
>(NormalAndEHCleanup
, FiniRTLFn
,
11210 llvm::ArrayRef(FiniArgs
));
11213 template <typename T
>
11214 static void EmitDoacrossOrdered(CodeGenFunction
&CGF
, CodeGenModule
&CGM
,
11215 const T
*C
, llvm::Value
*ULoc
,
11216 llvm::Value
*ThreadID
) {
11218 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11219 llvm::APInt
Size(/*numBits=*/32, C
->getNumLoops());
11220 QualType ArrayTy
= CGM
.getContext().getConstantArrayType(
11221 Int64Ty
, Size
, nullptr, ArraySizeModifier::Normal
, 0);
11222 Address CntAddr
= CGF
.CreateMemTemp(ArrayTy
, ".cnt.addr");
11223 for (unsigned I
= 0, E
= C
->getNumLoops(); I
< E
; ++I
) {
11224 const Expr
*CounterVal
= C
->getLoopData(I
);
11225 assert(CounterVal
);
11226 llvm::Value
*CntVal
= CGF
.EmitScalarConversion(
11227 CGF
.EmitScalarExpr(CounterVal
), CounterVal
->getType(), Int64Ty
,
11228 CounterVal
->getExprLoc());
11229 CGF
.EmitStoreOfScalar(CntVal
, CGF
.Builder
.CreateConstArrayGEP(CntAddr
, I
),
11230 /*Volatile=*/false, Int64Ty
);
11232 llvm::Value
*Args
[] = {
11233 ULoc
, ThreadID
, CGF
.Builder
.CreateConstArrayGEP(CntAddr
, 0).getPointer()};
11234 llvm::FunctionCallee RTLFn
;
11235 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
11236 OMPDoacrossKind
<T
> ODK
;
11237 if (ODK
.isSource(C
)) {
11238 RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
11239 OMPRTL___kmpc_doacross_post
);
11241 assert(ODK
.isSink(C
) && "Expect sink modifier.");
11242 RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
11243 OMPRTL___kmpc_doacross_wait
);
11245 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11248 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
11249 const OMPDependClause
*C
) {
11250 return EmitDoacrossOrdered
<OMPDependClause
>(
11251 CGF
, CGM
, C
, emitUpdateLocation(CGF
, C
->getBeginLoc()),
11252 getThreadID(CGF
, C
->getBeginLoc()));
11255 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
11256 const OMPDoacrossClause
*C
) {
11257 return EmitDoacrossOrdered
<OMPDoacrossClause
>(
11258 CGF
, CGM
, C
, emitUpdateLocation(CGF
, C
->getBeginLoc()),
11259 getThreadID(CGF
, C
->getBeginLoc()));
11262 void CGOpenMPRuntime::emitCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
11263 llvm::FunctionCallee Callee
,
11264 ArrayRef
<llvm::Value
*> Args
) const {
11265 assert(Loc
.isValid() && "Outlined function call location must be valid.");
11266 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
11268 if (auto *Fn
= dyn_cast
<llvm::Function
>(Callee
.getCallee())) {
11269 if (Fn
->doesNotThrow()) {
11270 CGF
.EmitNounwindRuntimeCall(Fn
, Args
);
11274 CGF
.EmitRuntimeCall(Callee
, Args
);
11277 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11278 CodeGenFunction
&CGF
, SourceLocation Loc
, llvm::FunctionCallee OutlinedFn
,
11279 ArrayRef
<llvm::Value
*> Args
) const {
11280 emitCall(CGF
, Loc
, OutlinedFn
, Args
);
11283 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction
&CGF
, const Decl
*D
) {
11284 if (const auto *FD
= dyn_cast
<FunctionDecl
>(D
))
11285 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD
))
11286 HasEmittedDeclareTargetRegion
= true;
11289 Address
CGOpenMPRuntime::getParameterAddress(CodeGenFunction
&CGF
,
11290 const VarDecl
*NativeParam
,
11291 const VarDecl
*TargetParam
) const {
11292 return CGF
.GetAddrOfLocalVar(NativeParam
);
11295 /// Return allocator value from expression, or return a null allocator (default
11296 /// when no allocator specified).
11297 static llvm::Value
*getAllocatorVal(CodeGenFunction
&CGF
,
11298 const Expr
*Allocator
) {
11299 llvm::Value
*AllocVal
;
11301 AllocVal
= CGF
.EmitScalarExpr(Allocator
);
11302 // According to the standard, the original allocator type is a enum
11303 // (integer). Convert to pointer type, if required.
11304 AllocVal
= CGF
.EmitScalarConversion(AllocVal
, Allocator
->getType(),
11305 CGF
.getContext().VoidPtrTy
,
11306 Allocator
->getExprLoc());
11308 // If no allocator specified, it defaults to the null allocator.
11309 AllocVal
= llvm::Constant::getNullValue(
11310 CGF
.CGM
.getTypes().ConvertType(CGF
.getContext().VoidPtrTy
));
11315 /// Return the alignment from an allocate directive if present.
11316 static llvm::Value
*getAlignmentValue(CodeGenModule
&CGM
, const VarDecl
*VD
) {
11317 std::optional
<CharUnits
> AllocateAlignment
= CGM
.getOMPAllocateAlignment(VD
);
11319 if (!AllocateAlignment
)
11322 return llvm::ConstantInt::get(CGM
.SizeTy
, AllocateAlignment
->getQuantity());
11325 Address
CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction
&CGF
,
11326 const VarDecl
*VD
) {
11328 return Address::invalid();
11329 Address UntiedAddr
= Address::invalid();
11330 Address UntiedRealAddr
= Address::invalid();
11331 auto It
= FunctionToUntiedTaskStackMap
.find(CGF
.CurFn
);
11332 if (It
!= FunctionToUntiedTaskStackMap
.end()) {
11333 const UntiedLocalVarsAddressesMap
&UntiedData
=
11334 UntiedLocalVarsStack
[It
->second
];
11335 auto I
= UntiedData
.find(VD
);
11336 if (I
!= UntiedData
.end()) {
11337 UntiedAddr
= I
->second
.first
;
11338 UntiedRealAddr
= I
->second
.second
;
11341 const VarDecl
*CVD
= VD
->getCanonicalDecl();
11342 if (CVD
->hasAttr
<OMPAllocateDeclAttr
>()) {
11343 // Use the default allocation.
11344 if (!isAllocatableDecl(VD
))
11347 CharUnits Align
= CGM
.getContext().getDeclAlign(CVD
);
11348 if (CVD
->getType()->isVariablyModifiedType()) {
11349 Size
= CGF
.getTypeSize(CVD
->getType());
11350 // Align the size: ((size + align - 1) / align) * align
11351 Size
= CGF
.Builder
.CreateNUWAdd(
11352 Size
, CGM
.getSize(Align
- CharUnits::fromQuantity(1)));
11353 Size
= CGF
.Builder
.CreateUDiv(Size
, CGM
.getSize(Align
));
11354 Size
= CGF
.Builder
.CreateNUWMul(Size
, CGM
.getSize(Align
));
11356 CharUnits Sz
= CGM
.getContext().getTypeSizeInChars(CVD
->getType());
11357 Size
= CGM
.getSize(Sz
.alignTo(Align
));
11359 llvm::Value
*ThreadID
= getThreadID(CGF
, CVD
->getBeginLoc());
11360 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
11361 const Expr
*Allocator
= AA
->getAllocator();
11362 llvm::Value
*AllocVal
= getAllocatorVal(CGF
, Allocator
);
11363 llvm::Value
*Alignment
= getAlignmentValue(CGM
, CVD
);
11364 SmallVector
<llvm::Value
*, 4> Args
;
11365 Args
.push_back(ThreadID
);
11367 Args
.push_back(Alignment
);
11368 Args
.push_back(Size
);
11369 Args
.push_back(AllocVal
);
11370 llvm::omp::RuntimeFunction FnID
=
11371 Alignment
? OMPRTL___kmpc_aligned_alloc
: OMPRTL___kmpc_alloc
;
11372 llvm::Value
*Addr
= CGF
.EmitRuntimeCall(
11373 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(), FnID
), Args
,
11374 getName({CVD
->getName(), ".void.addr"}));
11375 llvm::FunctionCallee FiniRTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11376 CGM
.getModule(), OMPRTL___kmpc_free
);
11377 QualType Ty
= CGM
.getContext().getPointerType(CVD
->getType());
11378 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11379 Addr
, CGF
.ConvertTypeForMem(Ty
), getName({CVD
->getName(), ".addr"}));
11380 if (UntiedAddr
.isValid())
11381 CGF
.EmitStoreOfScalar(Addr
, UntiedAddr
, /*Volatile=*/false, Ty
);
11383 // Cleanup action for allocate support.
11384 class OMPAllocateCleanupTy final
: public EHScopeStack::Cleanup
{
11385 llvm::FunctionCallee RTLFn
;
11386 SourceLocation::UIntTy LocEncoding
;
11388 const Expr
*AllocExpr
;
11391 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn
,
11392 SourceLocation::UIntTy LocEncoding
, Address Addr
,
11393 const Expr
*AllocExpr
)
11394 : RTLFn(RTLFn
), LocEncoding(LocEncoding
), Addr(Addr
),
11395 AllocExpr(AllocExpr
) {}
11396 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
11397 if (!CGF
.HaveInsertPoint())
11399 llvm::Value
*Args
[3];
11400 Args
[0] = CGF
.CGM
.getOpenMPRuntime().getThreadID(
11401 CGF
, SourceLocation::getFromRawEncoding(LocEncoding
));
11402 Args
[1] = CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11403 Addr
.getPointer(), CGF
.VoidPtrTy
);
11404 llvm::Value
*AllocVal
= getAllocatorVal(CGF
, AllocExpr
);
11405 Args
[2] = AllocVal
;
11406 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11410 UntiedRealAddr
.isValid()
11412 : Address(Addr
, CGF
.ConvertTypeForMem(CVD
->getType()), Align
);
11413 CGF
.EHStack
.pushCleanup
<OMPAllocateCleanupTy
>(
11414 NormalAndEHCleanup
, FiniRTLFn
, CVD
->getLocation().getRawEncoding(),
11415 VDAddr
, Allocator
);
11416 if (UntiedRealAddr
.isValid())
11418 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
11419 Region
->emitUntiedSwitch(CGF
);
11425 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction
&CGF
,
11426 const VarDecl
*VD
) const {
11427 auto It
= FunctionToUntiedTaskStackMap
.find(CGF
.CurFn
);
11428 if (It
== FunctionToUntiedTaskStackMap
.end())
11430 return UntiedLocalVarsStack
[It
->second
].count(VD
) > 0;
11433 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11434 CodeGenModule
&CGM
, const OMPLoopDirective
&S
)
11435 : CGM(CGM
), NeedToPush(S
.hasClausesOfKind
<OMPNontemporalClause
>()) {
11436 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11439 NontemporalDeclsSet
&DS
=
11440 CGM
.getOpenMPRuntime().NontemporalDeclsStack
.emplace_back();
11441 for (const auto *C
: S
.getClausesOfKind
<OMPNontemporalClause
>()) {
11442 for (const Stmt
*Ref
: C
->private_refs()) {
11443 const auto *SimpleRefExpr
= cast
<Expr
>(Ref
)->IgnoreParenImpCasts();
11444 const ValueDecl
*VD
;
11445 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(SimpleRefExpr
)) {
11446 VD
= DRE
->getDecl();
11448 const auto *ME
= cast
<MemberExpr
>(SimpleRefExpr
);
11449 assert((ME
->isImplicitCXXThis() ||
11450 isa
<CXXThisExpr
>(ME
->getBase()->IgnoreParenImpCasts())) &&
11451 "Expected member of current class.");
11452 VD
= ME
->getMemberDecl();
11459 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11462 CGM
.getOpenMPRuntime().NontemporalDeclsStack
.pop_back();
11465 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11466 CodeGenFunction
&CGF
,
11467 const llvm::MapVector
<CanonicalDeclPtr
<const VarDecl
>,
11468 std::pair
<Address
, Address
>> &LocalVars
)
11469 : CGM(CGF
.CGM
), NeedToPush(!LocalVars
.empty()) {
11472 CGM
.getOpenMPRuntime().FunctionToUntiedTaskStackMap
.try_emplace(
11473 CGF
.CurFn
, CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.size());
11474 CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.push_back(LocalVars
);
11477 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11480 CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.pop_back();
11483 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl
*VD
) const {
11484 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11486 return llvm::any_of(
11487 CGM
.getOpenMPRuntime().NontemporalDeclsStack
,
11488 [VD
](const NontemporalDeclsSet
&Set
) { return Set
.contains(VD
); });
11491 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11492 const OMPExecutableDirective
&S
,
11493 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &NeedToAddForLPCsAsDisabled
)
11495 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> NeedToCheckForLPCs
;
11496 // Vars in target/task regions must be excluded completely.
11497 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()) ||
11498 isOpenMPTaskingDirective(S
.getDirectiveKind())) {
11499 SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
11500 getOpenMPCaptureRegions(CaptureRegions
, S
.getDirectiveKind());
11501 const CapturedStmt
*CS
= S
.getCapturedStmt(CaptureRegions
.front());
11502 for (const CapturedStmt::Capture
&Cap
: CS
->captures()) {
11503 if (Cap
.capturesVariable() || Cap
.capturesVariableByCopy())
11504 NeedToCheckForLPCs
.insert(Cap
.getCapturedVar());
11507 // Exclude vars in private clauses.
11508 for (const auto *C
: S
.getClausesOfKind
<OMPPrivateClause
>()) {
11509 for (const Expr
*Ref
: C
->varlists()) {
11510 if (!Ref
->getType()->isScalarType())
11512 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11515 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11518 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
11519 for (const Expr
*Ref
: C
->varlists()) {
11520 if (!Ref
->getType()->isScalarType())
11522 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11525 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11528 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
11529 for (const Expr
*Ref
: C
->varlists()) {
11530 if (!Ref
->getType()->isScalarType())
11532 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11535 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11538 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
11539 for (const Expr
*Ref
: C
->varlists()) {
11540 if (!Ref
->getType()->isScalarType())
11542 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11545 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11548 for (const auto *C
: S
.getClausesOfKind
<OMPLinearClause
>()) {
11549 for (const Expr
*Ref
: C
->varlists()) {
11550 if (!Ref
->getType()->isScalarType())
11552 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11555 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11558 for (const Decl
*VD
: NeedToCheckForLPCs
) {
11559 for (const LastprivateConditionalData
&Data
:
11560 llvm::reverse(CGM
.getOpenMPRuntime().LastprivateConditionalStack
)) {
11561 if (Data
.DeclToUniqueName
.count(VD
) > 0) {
11562 if (!Data
.Disabled
)
11563 NeedToAddForLPCsAsDisabled
.insert(VD
);
11570 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11571 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
, LValue IVLVal
)
11573 Action((CGM
.getLangOpts().OpenMP
>= 50 &&
11574 llvm::any_of(S
.getClausesOfKind
<OMPLastprivateClause
>(),
11575 [](const OMPLastprivateClause
*C
) {
11576 return C
->getKind() ==
11577 OMPC_LASTPRIVATE_conditional
;
11579 ? ActionToDo::PushAsLastprivateConditional
11580 : ActionToDo::DoNotPush
) {
11581 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11582 if (CGM
.getLangOpts().OpenMP
< 50 || Action
== ActionToDo::DoNotPush
)
11584 assert(Action
== ActionToDo::PushAsLastprivateConditional
&&
11585 "Expected a push action.");
11586 LastprivateConditionalData
&Data
=
11587 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.emplace_back();
11588 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
11589 if (C
->getKind() != OMPC_LASTPRIVATE_conditional
)
11592 for (const Expr
*Ref
: C
->varlists()) {
11593 Data
.DeclToUniqueName
.insert(std::make_pair(
11594 cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts())->getDecl(),
11595 SmallString
<16>(generateUniqueName(CGM
, "pl_cond", Ref
))));
11598 Data
.IVLVal
= IVLVal
;
11599 Data
.Fn
= CGF
.CurFn
;
11602 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11603 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
11604 : CGM(CGF
.CGM
), Action(ActionToDo::DoNotPush
) {
11605 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11606 if (CGM
.getLangOpts().OpenMP
< 50)
11608 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> NeedToAddForLPCsAsDisabled
;
11609 tryToDisableInnerAnalysis(S
, NeedToAddForLPCsAsDisabled
);
11610 if (!NeedToAddForLPCsAsDisabled
.empty()) {
11611 Action
= ActionToDo::DisableLastprivateConditional
;
11612 LastprivateConditionalData
&Data
=
11613 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.emplace_back();
11614 for (const Decl
*VD
: NeedToAddForLPCsAsDisabled
)
11615 Data
.DeclToUniqueName
.insert(std::make_pair(VD
, SmallString
<16>()));
11616 Data
.Fn
= CGF
.CurFn
;
11617 Data
.Disabled
= true;
11621 CGOpenMPRuntime::LastprivateConditionalRAII
11622 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11623 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
11624 return LastprivateConditionalRAII(CGF
, S
);
11627 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11628 if (CGM
.getLangOpts().OpenMP
< 50)
11630 if (Action
== ActionToDo::DisableLastprivateConditional
) {
11631 assert(CGM
.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled
&&
11632 "Expected list of disabled private vars.");
11633 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.pop_back();
11635 if (Action
== ActionToDo::PushAsLastprivateConditional
) {
11637 !CGM
.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled
&&
11638 "Expected list of lastprivate conditional vars.");
11639 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.pop_back();
11643 Address
CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction
&CGF
,
11644 const VarDecl
*VD
) {
11645 ASTContext
&C
= CGM
.getContext();
11646 auto I
= LastprivateConditionalToTypes
.find(CGF
.CurFn
);
11647 if (I
== LastprivateConditionalToTypes
.end())
11648 I
= LastprivateConditionalToTypes
.try_emplace(CGF
.CurFn
).first
;
11650 const FieldDecl
*VDField
;
11651 const FieldDecl
*FiredField
;
11653 auto VI
= I
->getSecond().find(VD
);
11654 if (VI
== I
->getSecond().end()) {
11655 RecordDecl
*RD
= C
.buildImplicitRecord("lasprivate.conditional");
11656 RD
->startDefinition();
11657 VDField
= addFieldToRecordDecl(C
, RD
, VD
->getType().getNonReferenceType());
11658 FiredField
= addFieldToRecordDecl(C
, RD
, C
.CharTy
);
11659 RD
->completeDefinition();
11660 NewType
= C
.getRecordType(RD
);
11661 Address Addr
= CGF
.CreateMemTemp(NewType
, C
.getDeclAlign(VD
), VD
->getName());
11662 BaseLVal
= CGF
.MakeAddrLValue(Addr
, NewType
, AlignmentSource::Decl
);
11663 I
->getSecond().try_emplace(VD
, NewType
, VDField
, FiredField
, BaseLVal
);
11665 NewType
= std::get
<0>(VI
->getSecond());
11666 VDField
= std::get
<1>(VI
->getSecond());
11667 FiredField
= std::get
<2>(VI
->getSecond());
11668 BaseLVal
= std::get
<3>(VI
->getSecond());
11671 CGF
.EmitLValueForField(BaseLVal
, FiredField
);
11672 CGF
.EmitStoreOfScalar(
11673 llvm::ConstantInt::getNullValue(CGF
.ConvertTypeForMem(C
.CharTy
)),
11675 return CGF
.EmitLValueForField(BaseLVal
, VDField
).getAddress(CGF
);
11679 /// Checks if the lastprivate conditional variable is referenced in LHS.
11680 class LastprivateConditionalRefChecker final
11681 : public ConstStmtVisitor
<LastprivateConditionalRefChecker
, bool> {
11682 ArrayRef
<CGOpenMPRuntime::LastprivateConditionalData
> LPM
;
11683 const Expr
*FoundE
= nullptr;
11684 const Decl
*FoundD
= nullptr;
11685 StringRef UniqueDeclName
;
11687 llvm::Function
*FoundFn
= nullptr;
11688 SourceLocation Loc
;
11691 bool VisitDeclRefExpr(const DeclRefExpr
*E
) {
11692 for (const CGOpenMPRuntime::LastprivateConditionalData
&D
:
11693 llvm::reverse(LPM
)) {
11694 auto It
= D
.DeclToUniqueName
.find(E
->getDecl());
11695 if (It
== D
.DeclToUniqueName
.end())
11700 FoundD
= E
->getDecl()->getCanonicalDecl();
11701 UniqueDeclName
= It
->second
;
11706 return FoundE
== E
;
11708 bool VisitMemberExpr(const MemberExpr
*E
) {
11709 if (!CodeGenFunction::IsWrappedCXXThis(E
->getBase()))
11711 for (const CGOpenMPRuntime::LastprivateConditionalData
&D
:
11712 llvm::reverse(LPM
)) {
11713 auto It
= D
.DeclToUniqueName
.find(E
->getMemberDecl());
11714 if (It
== D
.DeclToUniqueName
.end())
11719 FoundD
= E
->getMemberDecl()->getCanonicalDecl();
11720 UniqueDeclName
= It
->second
;
11725 return FoundE
== E
;
11727 bool VisitStmt(const Stmt
*S
) {
11728 for (const Stmt
*Child
: S
->children()) {
11731 if (const auto *E
= dyn_cast
<Expr
>(Child
))
11732 if (!E
->isGLValue())
11739 explicit LastprivateConditionalRefChecker(
11740 ArrayRef
<CGOpenMPRuntime::LastprivateConditionalData
> LPM
)
11742 std::tuple
<const Expr
*, const Decl
*, StringRef
, LValue
, llvm::Function
*>
11743 getFoundData() const {
11744 return std::make_tuple(FoundE
, FoundD
, UniqueDeclName
, IVLVal
, FoundFn
);
11749 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction
&CGF
,
11751 StringRef UniqueDeclName
,
11753 SourceLocation Loc
) {
11754 // Last updated loop counter for the lastprivate conditional var.
11755 // int<xx> last_iv = 0;
11756 llvm::Type
*LLIVTy
= CGF
.ConvertTypeForMem(IVLVal
.getType());
11757 llvm::Constant
*LastIV
= OMPBuilder
.getOrCreateInternalVariable(
11758 LLIVTy
, getName({UniqueDeclName
, "iv"}));
11759 cast
<llvm::GlobalVariable
>(LastIV
)->setAlignment(
11760 IVLVal
.getAlignment().getAsAlign());
11761 LValue LastIVLVal
= CGF
.MakeNaturalAlignAddrLValue(LastIV
, IVLVal
.getType());
11763 // Last value of the lastprivate conditional.
11764 // decltype(priv_a) last_a;
11765 llvm::GlobalVariable
*Last
= OMPBuilder
.getOrCreateInternalVariable(
11766 CGF
.ConvertTypeForMem(LVal
.getType()), UniqueDeclName
);
11767 Last
->setAlignment(LVal
.getAlignment().getAsAlign());
11768 LValue LastLVal
= CGF
.MakeAddrLValue(
11769 Address(Last
, Last
->getValueType(), LVal
.getAlignment()), LVal
.getType());
11771 // Global loop counter. Required to handle inner parallel-for regions.
11773 llvm::Value
*IVVal
= CGF
.EmitLoadOfScalar(IVLVal
, Loc
);
11775 // #pragma omp critical(a)
11776 // if (last_iv <= iv) {
11778 // last_a = priv_a;
11780 auto &&CodeGen
= [&LastIVLVal
, &IVLVal
, IVVal
, &LVal
, &LastLVal
,
11781 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
11783 llvm::Value
*LastIVVal
= CGF
.EmitLoadOfScalar(LastIVLVal
, Loc
);
11784 // (last_iv <= iv) ? Check if the variable is updated and store new
11785 // value in global var.
11786 llvm::Value
*CmpRes
;
11787 if (IVLVal
.getType()->isSignedIntegerType()) {
11788 CmpRes
= CGF
.Builder
.CreateICmpSLE(LastIVVal
, IVVal
);
11790 assert(IVLVal
.getType()->isUnsignedIntegerType() &&
11791 "Loop iteration variable must be integer.");
11792 CmpRes
= CGF
.Builder
.CreateICmpULE(LastIVVal
, IVVal
);
11794 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock("lp_cond_then");
11795 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock("lp_cond_exit");
11796 CGF
.Builder
.CreateCondBr(CmpRes
, ThenBB
, ExitBB
);
11798 CGF
.EmitBlock(ThenBB
);
11801 CGF
.EmitStoreOfScalar(IVVal
, LastIVLVal
);
11803 // last_a = priv_a;
11804 switch (CGF
.getEvaluationKind(LVal
.getType())) {
11806 llvm::Value
*PrivVal
= CGF
.EmitLoadOfScalar(LVal
, Loc
);
11807 CGF
.EmitStoreOfScalar(PrivVal
, LastLVal
);
11810 case TEK_Complex
: {
11811 CodeGenFunction::ComplexPairTy PrivVal
= CGF
.EmitLoadOfComplex(LVal
, Loc
);
11812 CGF
.EmitStoreOfComplex(PrivVal
, LastLVal
, /*isInit=*/false);
11815 case TEK_Aggregate
:
11817 "Aggregates are not supported in lastprivate conditional.");
11820 CGF
.EmitBranch(ExitBB
);
11821 // There is no need to emit line number for unconditional branch.
11822 (void)ApplyDebugLocation::CreateEmpty(CGF
);
11823 CGF
.EmitBlock(ExitBB
, /*IsFinished=*/true);
11826 if (CGM
.getLangOpts().OpenMPSimd
) {
11827 // Do not emit as a critical region as no parallel region could be emitted.
11828 RegionCodeGenTy
ThenRCG(CodeGen
);
11831 emitCriticalRegion(CGF
, UniqueDeclName
, CodeGen
, Loc
);
11835 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction
&CGF
,
11837 if (CGF
.getLangOpts().OpenMP
< 50 || LastprivateConditionalStack
.empty())
11839 LastprivateConditionalRefChecker
Checker(LastprivateConditionalStack
);
11840 if (!Checker
.Visit(LHS
))
11842 const Expr
*FoundE
;
11843 const Decl
*FoundD
;
11844 StringRef UniqueDeclName
;
11846 llvm::Function
*FoundFn
;
11847 std::tie(FoundE
, FoundD
, UniqueDeclName
, IVLVal
, FoundFn
) =
11848 Checker
.getFoundData();
11849 if (FoundFn
!= CGF
.CurFn
) {
11850 // Special codegen for inner parallel regions.
11851 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11852 auto It
= LastprivateConditionalToTypes
[FoundFn
].find(FoundD
);
11853 assert(It
!= LastprivateConditionalToTypes
[FoundFn
].end() &&
11854 "Lastprivate conditional is not found in outer region.");
11855 QualType StructTy
= std::get
<0>(It
->getSecond());
11856 const FieldDecl
* FiredDecl
= std::get
<2>(It
->getSecond());
11857 LValue PrivLVal
= CGF
.EmitLValue(FoundE
);
11858 Address StructAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11859 PrivLVal
.getAddress(CGF
),
11860 CGF
.ConvertTypeForMem(CGF
.getContext().getPointerType(StructTy
)),
11861 CGF
.ConvertTypeForMem(StructTy
));
11863 CGF
.MakeAddrLValue(StructAddr
, StructTy
, AlignmentSource::Decl
);
11864 LValue FiredLVal
= CGF
.EmitLValueForField(BaseLVal
, FiredDecl
);
11865 CGF
.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11866 CGF
.ConvertTypeForMem(FiredDecl
->getType()), 1)),
11867 FiredLVal
, llvm::AtomicOrdering::Unordered
,
11868 /*IsVolatile=*/true, /*isInit=*/false);
11872 // Private address of the lastprivate conditional in the current context.
11874 LValue LVal
= CGF
.EmitLValue(FoundE
);
11875 emitLastprivateConditionalUpdate(CGF
, IVLVal
, UniqueDeclName
, LVal
,
11876 FoundE
->getExprLoc());
11879 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11880 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
11881 const llvm::DenseSet
<CanonicalDeclPtr
<const VarDecl
>> &IgnoredDecls
) {
11882 if (CGF
.getLangOpts().OpenMP
< 50 || LastprivateConditionalStack
.empty())
11884 auto Range
= llvm::reverse(LastprivateConditionalStack
);
11885 auto It
= llvm::find_if(
11886 Range
, [](const LastprivateConditionalData
&D
) { return !D
.Disabled
; });
11887 if (It
== Range
.end() || It
->Fn
!= CGF
.CurFn
)
11889 auto LPCI
= LastprivateConditionalToTypes
.find(It
->Fn
);
11890 assert(LPCI
!= LastprivateConditionalToTypes
.end() &&
11891 "Lastprivates must be registered already.");
11892 SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
11893 getOpenMPCaptureRegions(CaptureRegions
, D
.getDirectiveKind());
11894 const CapturedStmt
*CS
= D
.getCapturedStmt(CaptureRegions
.back());
11895 for (const auto &Pair
: It
->DeclToUniqueName
) {
11896 const auto *VD
= cast
<VarDecl
>(Pair
.first
->getCanonicalDecl());
11897 if (!CS
->capturesVariable(VD
) || IgnoredDecls
.contains(VD
))
11899 auto I
= LPCI
->getSecond().find(Pair
.first
);
11900 assert(I
!= LPCI
->getSecond().end() &&
11901 "Lastprivate must be rehistered already.");
11902 // bool Cmp = priv_a.Fired != 0;
11903 LValue BaseLVal
= std::get
<3>(I
->getSecond());
11905 CGF
.EmitLValueForField(BaseLVal
, std::get
<2>(I
->getSecond()));
11906 llvm::Value
*Res
= CGF
.EmitLoadOfScalar(FiredLVal
, D
.getBeginLoc());
11907 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Res
);
11908 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock("lpc.then");
11909 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("lpc.done");
11911 CGF
.Builder
.CreateCondBr(Cmp
, ThenBB
, DoneBB
);
11912 CGF
.EmitBlock(ThenBB
);
11913 Address Addr
= CGF
.GetAddrOfLocalVar(VD
);
11915 if (VD
->getType()->isReferenceType())
11916 LVal
= CGF
.EmitLoadOfReferenceLValue(Addr
, VD
->getType(),
11917 AlignmentSource::Decl
);
11919 LVal
= CGF
.MakeAddrLValue(Addr
, VD
->getType().getNonReferenceType(),
11920 AlignmentSource::Decl
);
11921 emitLastprivateConditionalUpdate(CGF
, It
->IVLVal
, Pair
.second
, LVal
,
11923 auto AL
= ApplyDebugLocation::CreateArtificial(CGF
);
11924 CGF
.EmitBlock(DoneBB
, /*IsFinal=*/true);
11929 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11930 CodeGenFunction
&CGF
, LValue PrivLVal
, const VarDecl
*VD
,
11931 SourceLocation Loc
) {
11932 if (CGF
.getLangOpts().OpenMP
< 50)
11934 auto It
= LastprivateConditionalStack
.back().DeclToUniqueName
.find(VD
);
11935 assert(It
!= LastprivateConditionalStack
.back().DeclToUniqueName
.end() &&
11936 "Unknown lastprivate conditional variable.");
11937 StringRef UniqueName
= It
->second
;
11938 llvm::GlobalVariable
*GV
= CGM
.getModule().getNamedGlobal(UniqueName
);
11939 // The variable was not updated in the region - exit.
11942 LValue LPLVal
= CGF
.MakeAddrLValue(
11943 Address(GV
, GV
->getValueType(), PrivLVal
.getAlignment()),
11944 PrivLVal
.getType().getNonReferenceType());
11945 llvm::Value
*Res
= CGF
.EmitLoadOfScalar(LPLVal
, Loc
);
11946 CGF
.EmitStoreOfScalar(Res
, PrivLVal
);
11949 llvm::Function
*CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11950 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
11951 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
11952 const RegionCodeGenTy
&CodeGen
) {
11953 llvm_unreachable("Not supported in SIMD-only mode");
11956 llvm::Function
*CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11957 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
11958 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
11959 const RegionCodeGenTy
&CodeGen
) {
11960 llvm_unreachable("Not supported in SIMD-only mode");
11963 llvm::Function
*CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11964 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
11965 const VarDecl
*PartIDVar
, const VarDecl
*TaskTVar
,
11966 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
11967 bool Tied
, unsigned &NumberOfParts
) {
11968 llvm_unreachable("Not supported in SIMD-only mode");
11971 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction
&CGF
,
11972 SourceLocation Loc
,
11973 llvm::Function
*OutlinedFn
,
11974 ArrayRef
<llvm::Value
*> CapturedVars
,
11975 const Expr
*IfCond
,
11976 llvm::Value
*NumThreads
) {
11977 llvm_unreachable("Not supported in SIMD-only mode");
11980 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11981 CodeGenFunction
&CGF
, StringRef CriticalName
,
11982 const RegionCodeGenTy
&CriticalOpGen
, SourceLocation Loc
,
11983 const Expr
*Hint
) {
11984 llvm_unreachable("Not supported in SIMD-only mode");
11987 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction
&CGF
,
11988 const RegionCodeGenTy
&MasterOpGen
,
11989 SourceLocation Loc
) {
11990 llvm_unreachable("Not supported in SIMD-only mode");
11993 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction
&CGF
,
11994 const RegionCodeGenTy
&MasterOpGen
,
11995 SourceLocation Loc
,
11996 const Expr
*Filter
) {
11997 llvm_unreachable("Not supported in SIMD-only mode");
12000 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction
&CGF
,
12001 SourceLocation Loc
) {
12002 llvm_unreachable("Not supported in SIMD-only mode");
12005 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12006 CodeGenFunction
&CGF
, const RegionCodeGenTy
&TaskgroupOpGen
,
12007 SourceLocation Loc
) {
12008 llvm_unreachable("Not supported in SIMD-only mode");
12011 void CGOpenMPSIMDRuntime::emitSingleRegion(
12012 CodeGenFunction
&CGF
, const RegionCodeGenTy
&SingleOpGen
,
12013 SourceLocation Loc
, ArrayRef
<const Expr
*> CopyprivateVars
,
12014 ArrayRef
<const Expr
*> DestExprs
, ArrayRef
<const Expr
*> SrcExprs
,
12015 ArrayRef
<const Expr
*> AssignmentOps
) {
12016 llvm_unreachable("Not supported in SIMD-only mode");
12019 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction
&CGF
,
12020 const RegionCodeGenTy
&OrderedOpGen
,
12021 SourceLocation Loc
,
12023 llvm_unreachable("Not supported in SIMD-only mode");
12026 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction
&CGF
,
12027 SourceLocation Loc
,
12028 OpenMPDirectiveKind Kind
,
12030 bool ForceSimpleCall
) {
12031 llvm_unreachable("Not supported in SIMD-only mode");
12034 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12035 CodeGenFunction
&CGF
, SourceLocation Loc
,
12036 const OpenMPScheduleTy
&ScheduleKind
, unsigned IVSize
, bool IVSigned
,
12037 bool Ordered
, const DispatchRTInput
&DispatchValues
) {
12038 llvm_unreachable("Not supported in SIMD-only mode");
12041 void CGOpenMPSIMDRuntime::emitForStaticInit(
12042 CodeGenFunction
&CGF
, SourceLocation Loc
, OpenMPDirectiveKind DKind
,
12043 const OpenMPScheduleTy
&ScheduleKind
, const StaticRTInput
&Values
) {
12044 llvm_unreachable("Not supported in SIMD-only mode");
12047 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12048 CodeGenFunction
&CGF
, SourceLocation Loc
,
12049 OpenMPDistScheduleClauseKind SchedKind
, const StaticRTInput
&Values
) {
12050 llvm_unreachable("Not supported in SIMD-only mode");
12053 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction
&CGF
,
12054 SourceLocation Loc
,
12057 llvm_unreachable("Not supported in SIMD-only mode");
12060 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction
&CGF
,
12061 SourceLocation Loc
,
12062 OpenMPDirectiveKind DKind
) {
12063 llvm_unreachable("Not supported in SIMD-only mode");
12066 llvm::Value
*CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction
&CGF
,
12067 SourceLocation Loc
,
12068 unsigned IVSize
, bool IVSigned
,
12069 Address IL
, Address LB
,
12070 Address UB
, Address ST
) {
12071 llvm_unreachable("Not supported in SIMD-only mode");
12074 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction
&CGF
,
12075 llvm::Value
*NumThreads
,
12076 SourceLocation Loc
) {
12077 llvm_unreachable("Not supported in SIMD-only mode");
12080 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction
&CGF
,
12081 ProcBindKind ProcBind
,
12082 SourceLocation Loc
) {
12083 llvm_unreachable("Not supported in SIMD-only mode");
12086 Address
CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction
&CGF
,
12089 SourceLocation Loc
) {
12090 llvm_unreachable("Not supported in SIMD-only mode");
12093 llvm::Function
*CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12094 const VarDecl
*VD
, Address VDAddr
, SourceLocation Loc
, bool PerformInit
,
12095 CodeGenFunction
*CGF
) {
12096 llvm_unreachable("Not supported in SIMD-only mode");
12099 Address
CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12100 CodeGenFunction
&CGF
, QualType VarType
, StringRef Name
) {
12101 llvm_unreachable("Not supported in SIMD-only mode");
12104 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction
&CGF
,
12105 ArrayRef
<const Expr
*> Vars
,
12106 SourceLocation Loc
,
12107 llvm::AtomicOrdering AO
) {
12108 llvm_unreachable("Not supported in SIMD-only mode");
12111 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
12112 const OMPExecutableDirective
&D
,
12113 llvm::Function
*TaskFunction
,
12114 QualType SharedsTy
, Address Shareds
,
12115 const Expr
*IfCond
,
12116 const OMPTaskDataTy
&Data
) {
12117 llvm_unreachable("Not supported in SIMD-only mode");
12120 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12121 CodeGenFunction
&CGF
, SourceLocation Loc
, const OMPLoopDirective
&D
,
12122 llvm::Function
*TaskFunction
, QualType SharedsTy
, Address Shareds
,
12123 const Expr
*IfCond
, const OMPTaskDataTy
&Data
) {
12124 llvm_unreachable("Not supported in SIMD-only mode");
12127 void CGOpenMPSIMDRuntime::emitReduction(
12128 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> Privates
,
12129 ArrayRef
<const Expr
*> LHSExprs
, ArrayRef
<const Expr
*> RHSExprs
,
12130 ArrayRef
<const Expr
*> ReductionOps
, ReductionOptionsTy Options
) {
12131 assert(Options
.SimpleReduction
&& "Only simple reduction is expected.");
12132 CGOpenMPRuntime::emitReduction(CGF
, Loc
, Privates
, LHSExprs
, RHSExprs
,
12133 ReductionOps
, Options
);
12136 llvm::Value
*CGOpenMPSIMDRuntime::emitTaskReductionInit(
12137 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> LHSExprs
,
12138 ArrayRef
<const Expr
*> RHSExprs
, const OMPTaskDataTy
&Data
) {
12139 llvm_unreachable("Not supported in SIMD-only mode");
12142 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction
&CGF
,
12143 SourceLocation Loc
,
12144 bool IsWorksharingReduction
) {
12145 llvm_unreachable("Not supported in SIMD-only mode");
12148 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction
&CGF
,
12149 SourceLocation Loc
,
12150 ReductionCodeGen
&RCG
,
12152 llvm_unreachable("Not supported in SIMD-only mode");
12155 Address
CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction
&CGF
,
12156 SourceLocation Loc
,
12157 llvm::Value
*ReductionsPtr
,
12158 LValue SharedLVal
) {
12159 llvm_unreachable("Not supported in SIMD-only mode");
12162 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction
&CGF
,
12163 SourceLocation Loc
,
12164 const OMPTaskDataTy
&Data
) {
12165 llvm_unreachable("Not supported in SIMD-only mode");
12168 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12169 CodeGenFunction
&CGF
, SourceLocation Loc
,
12170 OpenMPDirectiveKind CancelRegion
) {
12171 llvm_unreachable("Not supported in SIMD-only mode");
12174 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction
&CGF
,
12175 SourceLocation Loc
, const Expr
*IfCond
,
12176 OpenMPDirectiveKind CancelRegion
) {
12177 llvm_unreachable("Not supported in SIMD-only mode");
12180 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12181 const OMPExecutableDirective
&D
, StringRef ParentName
,
12182 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
12183 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
12184 llvm_unreachable("Not supported in SIMD-only mode");
12187 void CGOpenMPSIMDRuntime::emitTargetCall(
12188 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
12189 llvm::Function
*OutlinedFn
, llvm::Value
*OutlinedFnID
, const Expr
*IfCond
,
12190 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
12191 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
12192 const OMPLoopDirective
&D
)>
12194 llvm_unreachable("Not supported in SIMD-only mode");
12197 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD
) {
12198 llvm_unreachable("Not supported in SIMD-only mode");
12201 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD
) {
12202 llvm_unreachable("Not supported in SIMD-only mode");
12205 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD
) {
12209 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction
&CGF
,
12210 const OMPExecutableDirective
&D
,
12211 SourceLocation Loc
,
12212 llvm::Function
*OutlinedFn
,
12213 ArrayRef
<llvm::Value
*> CapturedVars
) {
12214 llvm_unreachable("Not supported in SIMD-only mode");
12217 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction
&CGF
,
12218 const Expr
*NumTeams
,
12219 const Expr
*ThreadLimit
,
12220 SourceLocation Loc
) {
12221 llvm_unreachable("Not supported in SIMD-only mode");
12224 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12225 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
12226 const Expr
*Device
, const RegionCodeGenTy
&CodeGen
,
12227 CGOpenMPRuntime::TargetDataInfo
&Info
) {
12228 llvm_unreachable("Not supported in SIMD-only mode");
12231 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12232 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
12233 const Expr
*Device
) {
12234 llvm_unreachable("Not supported in SIMD-only mode");
12237 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction
&CGF
,
12238 const OMPLoopDirective
&D
,
12239 ArrayRef
<Expr
*> NumIterations
) {
12240 llvm_unreachable("Not supported in SIMD-only mode");
12243 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
12244 const OMPDependClause
*C
) {
12245 llvm_unreachable("Not supported in SIMD-only mode");
12248 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
12249 const OMPDoacrossClause
*C
) {
12250 llvm_unreachable("Not supported in SIMD-only mode");
12254 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl
*FD
,
12255 const VarDecl
*NativeParam
) const {
12256 llvm_unreachable("Not supported in SIMD-only mode");
12260 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction
&CGF
,
12261 const VarDecl
*NativeParam
,
12262 const VarDecl
*TargetParam
) const {
12263 llvm_unreachable("Not supported in SIMD-only mode");