1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
13 #include "CGOpenMPRuntime.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
47 using namespace clang
;
48 using namespace CodeGen
;
49 using namespace llvm::omp
;
52 /// Base class for handling code generation inside OpenMP regions.
53 class CGOpenMPRegionInfo
: public CodeGenFunction::CGCapturedStmtInfo
{
55 /// Kinds of OpenMP regions used in codegen.
56 enum CGOpenMPRegionKind
{
57 /// Region with outlined function for standalone 'parallel'
59 ParallelOutlinedRegion
,
60 /// Region with outlined function for standalone 'task' directive.
62 /// Region for constructs that do not require function outlining,
63 /// like 'for', 'sections', 'atomic' etc. directives.
65 /// Region with outlined function for standalone 'target' directive.
69 CGOpenMPRegionInfo(const CapturedStmt
&CS
,
70 const CGOpenMPRegionKind RegionKind
,
71 const RegionCodeGenTy
&CodeGen
, OpenMPDirectiveKind Kind
,
73 : CGCapturedStmtInfo(CS
, CR_OpenMP
), RegionKind(RegionKind
),
74 CodeGen(CodeGen
), Kind(Kind
), HasCancel(HasCancel
) {}
76 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind
,
77 const RegionCodeGenTy
&CodeGen
, OpenMPDirectiveKind Kind
,
79 : CGCapturedStmtInfo(CR_OpenMP
), RegionKind(RegionKind
), CodeGen(CodeGen
),
80 Kind(Kind
), HasCancel(HasCancel
) {}
82 /// Get a variable or parameter for storing global thread id
83 /// inside OpenMP construct.
84 virtual const VarDecl
*getThreadIDVariable() const = 0;
86 /// Emit the captured statement body.
87 void EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) override
;
89 /// Get an LValue for the current ThreadID variable.
90 /// \return LValue for thread id variable. This LValue always has type int32*.
91 virtual LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
);
93 virtual void emitUntiedSwitch(CodeGenFunction
& /*CGF*/) {}
95 CGOpenMPRegionKind
getRegionKind() const { return RegionKind
; }
97 OpenMPDirectiveKind
getDirectiveKind() const { return Kind
; }
99 bool hasCancel() const { return HasCancel
; }
101 static bool classof(const CGCapturedStmtInfo
*Info
) {
102 return Info
->getKind() == CR_OpenMP
;
105 ~CGOpenMPRegionInfo() override
= default;
108 CGOpenMPRegionKind RegionKind
;
109 RegionCodeGenTy CodeGen
;
110 OpenMPDirectiveKind Kind
;
114 /// API for captured statement code generation in OpenMP constructs.
115 class CGOpenMPOutlinedRegionInfo final
: public CGOpenMPRegionInfo
{
117 CGOpenMPOutlinedRegionInfo(const CapturedStmt
&CS
, const VarDecl
*ThreadIDVar
,
118 const RegionCodeGenTy
&CodeGen
,
119 OpenMPDirectiveKind Kind
, bool HasCancel
,
120 StringRef HelperName
)
121 : CGOpenMPRegionInfo(CS
, ParallelOutlinedRegion
, CodeGen
, Kind
,
123 ThreadIDVar(ThreadIDVar
), HelperName(HelperName
) {
124 assert(ThreadIDVar
!= nullptr && "No ThreadID in OpenMP region.");
127 /// Get a variable or parameter for storing global thread id
128 /// inside OpenMP construct.
129 const VarDecl
*getThreadIDVariable() const override
{ return ThreadIDVar
; }
131 /// Get the name of the capture helper.
132 StringRef
getHelperName() const override
{ return HelperName
; }
134 static bool classof(const CGCapturedStmtInfo
*Info
) {
135 return CGOpenMPRegionInfo::classof(Info
) &&
136 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() ==
137 ParallelOutlinedRegion
;
141 /// A variable or parameter storing global thread id for OpenMP
143 const VarDecl
*ThreadIDVar
;
144 StringRef HelperName
;
147 /// API for captured statement code generation in OpenMP constructs.
148 class CGOpenMPTaskOutlinedRegionInfo final
: public CGOpenMPRegionInfo
{
150 class UntiedTaskActionTy final
: public PrePostActionTy
{
152 const VarDecl
*PartIDVar
;
153 const RegionCodeGenTy UntiedCodeGen
;
154 llvm::SwitchInst
*UntiedSwitch
= nullptr;
157 UntiedTaskActionTy(bool Tied
, const VarDecl
*PartIDVar
,
158 const RegionCodeGenTy
&UntiedCodeGen
)
159 : Untied(!Tied
), PartIDVar(PartIDVar
), UntiedCodeGen(UntiedCodeGen
) {}
160 void Enter(CodeGenFunction
&CGF
) override
{
162 // Emit task switching point.
163 LValue PartIdLVal
= CGF
.EmitLoadOfPointerLValue(
164 CGF
.GetAddrOfLocalVar(PartIDVar
),
165 PartIDVar
->getType()->castAs
<PointerType
>());
167 CGF
.EmitLoadOfScalar(PartIdLVal
, PartIDVar
->getLocation());
168 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock(".untied.done.");
169 UntiedSwitch
= CGF
.Builder
.CreateSwitch(Res
, DoneBB
);
170 CGF
.EmitBlock(DoneBB
);
171 CGF
.EmitBranchThroughCleanup(CGF
.ReturnBlock
);
172 CGF
.EmitBlock(CGF
.createBasicBlock(".untied.jmp."));
173 UntiedSwitch
->addCase(CGF
.Builder
.getInt32(0),
174 CGF
.Builder
.GetInsertBlock());
175 emitUntiedSwitch(CGF
);
178 void emitUntiedSwitch(CodeGenFunction
&CGF
) const {
180 LValue PartIdLVal
= CGF
.EmitLoadOfPointerLValue(
181 CGF
.GetAddrOfLocalVar(PartIDVar
),
182 PartIDVar
->getType()->castAs
<PointerType
>());
183 CGF
.EmitStoreOfScalar(CGF
.Builder
.getInt32(UntiedSwitch
->getNumCases()),
186 CodeGenFunction::JumpDest CurPoint
=
187 CGF
.getJumpDestInCurrentScope(".untied.next.");
188 CGF
.EmitBranch(CGF
.ReturnBlock
.getBlock());
189 CGF
.EmitBlock(CGF
.createBasicBlock(".untied.jmp."));
190 UntiedSwitch
->addCase(CGF
.Builder
.getInt32(UntiedSwitch
->getNumCases()),
191 CGF
.Builder
.GetInsertBlock());
192 CGF
.EmitBranchThroughCleanup(CurPoint
);
193 CGF
.EmitBlock(CurPoint
.getBlock());
196 unsigned getNumberOfParts() const { return UntiedSwitch
->getNumCases(); }
198 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt
&CS
,
199 const VarDecl
*ThreadIDVar
,
200 const RegionCodeGenTy
&CodeGen
,
201 OpenMPDirectiveKind Kind
, bool HasCancel
,
202 const UntiedTaskActionTy
&Action
)
203 : CGOpenMPRegionInfo(CS
, TaskOutlinedRegion
, CodeGen
, Kind
, HasCancel
),
204 ThreadIDVar(ThreadIDVar
), Action(Action
) {
205 assert(ThreadIDVar
!= nullptr && "No ThreadID in OpenMP region.");
208 /// Get a variable or parameter for storing global thread id
209 /// inside OpenMP construct.
210 const VarDecl
*getThreadIDVariable() const override
{ return ThreadIDVar
; }
212 /// Get an LValue for the current ThreadID variable.
213 LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
) override
;
215 /// Get the name of the capture helper.
216 StringRef
getHelperName() const override
{ return ".omp_outlined."; }
218 void emitUntiedSwitch(CodeGenFunction
&CGF
) override
{
219 Action
.emitUntiedSwitch(CGF
);
222 static bool classof(const CGCapturedStmtInfo
*Info
) {
223 return CGOpenMPRegionInfo::classof(Info
) &&
224 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() ==
229 /// A variable or parameter storing global thread id for OpenMP
231 const VarDecl
*ThreadIDVar
;
232 /// Action for emitting code for untied tasks.
233 const UntiedTaskActionTy
&Action
;
236 /// API for inlined captured statement code generation in OpenMP
238 class CGOpenMPInlinedRegionInfo
: public CGOpenMPRegionInfo
{
240 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo
*OldCSI
,
241 const RegionCodeGenTy
&CodeGen
,
242 OpenMPDirectiveKind Kind
, bool HasCancel
)
243 : CGOpenMPRegionInfo(InlinedRegion
, CodeGen
, Kind
, HasCancel
),
245 OuterRegionInfo(dyn_cast_or_null
<CGOpenMPRegionInfo
>(OldCSI
)) {}
247 // Retrieve the value of the context parameter.
248 llvm::Value
*getContextValue() const override
{
250 return OuterRegionInfo
->getContextValue();
251 llvm_unreachable("No context value for inlined OpenMP region");
254 void setContextValue(llvm::Value
*V
) override
{
255 if (OuterRegionInfo
) {
256 OuterRegionInfo
->setContextValue(V
);
259 llvm_unreachable("No context value for inlined OpenMP region");
262 /// Lookup the captured field decl for a variable.
263 const FieldDecl
*lookup(const VarDecl
*VD
) const override
{
265 return OuterRegionInfo
->lookup(VD
);
266 // If there is no outer outlined region,no need to lookup in a list of
267 // captured variables, we can use the original one.
271 FieldDecl
*getThisFieldDecl() const override
{
273 return OuterRegionInfo
->getThisFieldDecl();
277 /// Get a variable or parameter for storing global thread id
278 /// inside OpenMP construct.
279 const VarDecl
*getThreadIDVariable() const override
{
281 return OuterRegionInfo
->getThreadIDVariable();
285 /// Get an LValue for the current ThreadID variable.
286 LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
) override
{
288 return OuterRegionInfo
->getThreadIDVariableLValue(CGF
);
289 llvm_unreachable("No LValue for inlined OpenMP construct");
292 /// Get the name of the capture helper.
293 StringRef
getHelperName() const override
{
294 if (auto *OuterRegionInfo
= getOldCSI())
295 return OuterRegionInfo
->getHelperName();
296 llvm_unreachable("No helper name for inlined OpenMP construct");
299 void emitUntiedSwitch(CodeGenFunction
&CGF
) override
{
301 OuterRegionInfo
->emitUntiedSwitch(CGF
);
304 CodeGenFunction::CGCapturedStmtInfo
*getOldCSI() const { return OldCSI
; }
306 static bool classof(const CGCapturedStmtInfo
*Info
) {
307 return CGOpenMPRegionInfo::classof(Info
) &&
308 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() == InlinedRegion
;
311 ~CGOpenMPInlinedRegionInfo() override
= default;
314 /// CodeGen info about outer OpenMP region.
315 CodeGenFunction::CGCapturedStmtInfo
*OldCSI
;
316 CGOpenMPRegionInfo
*OuterRegionInfo
;
319 /// API for captured statement code generation in OpenMP target
320 /// constructs. For this captures, implicit parameters are used instead of the
321 /// captured fields. The name of the target region has to be unique in a given
322 /// application so it is provided by the client, because only the client has
323 /// the information to generate that.
324 class CGOpenMPTargetRegionInfo final
: public CGOpenMPRegionInfo
{
326 CGOpenMPTargetRegionInfo(const CapturedStmt
&CS
,
327 const RegionCodeGenTy
&CodeGen
, StringRef HelperName
)
328 : CGOpenMPRegionInfo(CS
, TargetRegion
, CodeGen
, OMPD_target
,
329 /*HasCancel=*/false),
330 HelperName(HelperName
) {}
332 /// This is unused for target regions because each starts executing
333 /// with a single thread.
334 const VarDecl
*getThreadIDVariable() const override
{ return nullptr; }
336 /// Get the name of the capture helper.
337 StringRef
getHelperName() const override
{ return HelperName
; }
339 static bool classof(const CGCapturedStmtInfo
*Info
) {
340 return CGOpenMPRegionInfo::classof(Info
) &&
341 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() == TargetRegion
;
345 StringRef HelperName
;
348 static void EmptyCodeGen(CodeGenFunction
&, PrePostActionTy
&) {
349 llvm_unreachable("No codegen for expressions");
351 /// API for generation of expressions captured in a innermost OpenMP
353 class CGOpenMPInnerExprInfo final
: public CGOpenMPInlinedRegionInfo
{
355 CGOpenMPInnerExprInfo(CodeGenFunction
&CGF
, const CapturedStmt
&CS
)
356 : CGOpenMPInlinedRegionInfo(CGF
.CapturedStmtInfo
, EmptyCodeGen
,
358 /*HasCancel=*/false),
360 // Make sure the globals captured in the provided statement are local by
361 // using the privatization logic. We assume the same variable is not
362 // captured more than once.
363 for (const auto &C
: CS
.captures()) {
364 if (!C
.capturesVariable() && !C
.capturesVariableByCopy())
367 const VarDecl
*VD
= C
.getCapturedVar();
368 if (VD
->isLocalVarDeclOrParm())
371 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(VD
),
372 /*RefersToEnclosingVariableOrCapture=*/false,
373 VD
->getType().getNonReferenceType(), VK_LValue
,
375 PrivScope
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
377 (void)PrivScope
.Privatize();
380 /// Lookup the captured field decl for a variable.
381 const FieldDecl
*lookup(const VarDecl
*VD
) const override
{
382 if (const FieldDecl
*FD
= CGOpenMPInlinedRegionInfo::lookup(VD
))
387 /// Emit the captured statement body.
388 void EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) override
{
389 llvm_unreachable("No body for expressions");
392 /// Get a variable or parameter for storing global thread id
393 /// inside OpenMP construct.
394 const VarDecl
*getThreadIDVariable() const override
{
395 llvm_unreachable("No thread id for expressions");
398 /// Get the name of the capture helper.
399 StringRef
getHelperName() const override
{
400 llvm_unreachable("No helper name for expressions");
403 static bool classof(const CGCapturedStmtInfo
*Info
) { return false; }
406 /// Private scope to capture global variables.
407 CodeGenFunction::OMPPrivateScope PrivScope
;
410 /// RAII for emitting code of OpenMP constructs.
411 class InlinedOpenMPRegionRAII
{
412 CodeGenFunction
&CGF
;
413 llvm::DenseMap
<const ValueDecl
*, FieldDecl
*> LambdaCaptureFields
;
414 FieldDecl
*LambdaThisCaptureField
= nullptr;
415 const CodeGen::CGBlockInfo
*BlockInfo
= nullptr;
416 bool NoInheritance
= false;
419 /// Constructs region for combined constructs.
420 /// \param CodeGen Code generation sequence for combined directives. Includes
421 /// a list of functions used for code generation of implicitly inlined
423 InlinedOpenMPRegionRAII(CodeGenFunction
&CGF
, const RegionCodeGenTy
&CodeGen
,
424 OpenMPDirectiveKind Kind
, bool HasCancel
,
425 bool NoInheritance
= true)
426 : CGF(CGF
), NoInheritance(NoInheritance
) {
427 // Start emission for the construct.
428 CGF
.CapturedStmtInfo
= new CGOpenMPInlinedRegionInfo(
429 CGF
.CapturedStmtInfo
, CodeGen
, Kind
, HasCancel
);
431 std::swap(CGF
.LambdaCaptureFields
, LambdaCaptureFields
);
432 LambdaThisCaptureField
= CGF
.LambdaThisCaptureField
;
433 CGF
.LambdaThisCaptureField
= nullptr;
434 BlockInfo
= CGF
.BlockInfo
;
435 CGF
.BlockInfo
= nullptr;
439 ~InlinedOpenMPRegionRAII() {
440 // Restore original CapturedStmtInfo only if we're done with code emission.
442 cast
<CGOpenMPInlinedRegionInfo
>(CGF
.CapturedStmtInfo
)->getOldCSI();
443 delete CGF
.CapturedStmtInfo
;
444 CGF
.CapturedStmtInfo
= OldCSI
;
446 std::swap(CGF
.LambdaCaptureFields
, LambdaCaptureFields
);
447 CGF
.LambdaThisCaptureField
= LambdaThisCaptureField
;
448 CGF
.BlockInfo
= BlockInfo
;
453 /// Values for bit flags used in the ident_t to describe the fields.
454 /// All enumeric elements are named and described in accordance with the code
455 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
456 enum OpenMPLocationFlags
: unsigned {
457 /// Use trampoline for internal microtask.
458 OMP_IDENT_IMD
= 0x01,
459 /// Use c-style ident structure.
460 OMP_IDENT_KMPC
= 0x02,
461 /// Atomic reduction option for kmpc_reduce.
462 OMP_ATOMIC_REDUCE
= 0x10,
463 /// Explicit 'barrier' directive.
464 OMP_IDENT_BARRIER_EXPL
= 0x20,
465 /// Implicit barrier in code.
466 OMP_IDENT_BARRIER_IMPL
= 0x40,
467 /// Implicit barrier in 'for' directive.
468 OMP_IDENT_BARRIER_IMPL_FOR
= 0x40,
469 /// Implicit barrier in 'sections' directive.
470 OMP_IDENT_BARRIER_IMPL_SECTIONS
= 0xC0,
471 /// Implicit barrier in 'single' directive.
472 OMP_IDENT_BARRIER_IMPL_SINGLE
= 0x140,
473 /// Call of __kmp_for_static_init for static loop.
474 OMP_IDENT_WORK_LOOP
= 0x200,
475 /// Call of __kmp_for_static_init for sections.
476 OMP_IDENT_WORK_SECTIONS
= 0x400,
477 /// Call of __kmp_for_static_init for distribute.
478 OMP_IDENT_WORK_DISTRIBUTE
= 0x800,
479 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE
)
483 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
484 /// Values for bit flags for marking which requires clauses have been used.
485 enum OpenMPOffloadingRequiresDirFlags
: int64_t {
487 OMP_REQ_UNDEFINED
= 0x000,
488 /// no requires clause present.
489 OMP_REQ_NONE
= 0x001,
490 /// reverse_offload clause.
491 OMP_REQ_REVERSE_OFFLOAD
= 0x002,
492 /// unified_address clause.
493 OMP_REQ_UNIFIED_ADDRESS
= 0x004,
494 /// unified_shared_memory clause.
495 OMP_REQ_UNIFIED_SHARED_MEMORY
= 0x008,
496 /// dynamic_allocators clause.
497 OMP_REQ_DYNAMIC_ALLOCATORS
= 0x010,
498 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS
)
501 enum OpenMPOffloadingReservedDeviceIDs
{
502 /// Device ID if the device was not defined, runtime should get it
503 /// from environment variables in the spec.
504 OMP_DEVICEID_UNDEF
= -1,
506 } // anonymous namespace
508 /// Describes ident structure that describes a source location.
509 /// All descriptions are taken from
510 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
511 /// Original structure:
512 /// typedef struct ident {
513 /// kmp_int32 reserved_1; /**< might be used in Fortran;
515 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
516 /// KMP_IDENT_KMPC identifies this union
518 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
521 /// /* but currently used for storing
522 /// region-specific ITT */
523 /// /* contextual information. */
524 ///#endif /* USE_ITT_BUILD */
525 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
527 /// char const *psource; /**< String describing the source location.
528 /// The string is composed of semi-colon separated
529 // fields which describe the source file,
530 /// the function and a pair of line numbers that
531 /// delimit the construct.
534 enum IdentFieldIndex
{
535 /// might be used in Fortran
536 IdentField_Reserved_1
,
537 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
539 /// Not really used in Fortran any more
540 IdentField_Reserved_2
,
541 /// Source[4] in Fortran, do not use for C++
542 IdentField_Reserved_3
,
543 /// String describing the source location. The string is composed of
544 /// semi-colon separated fields which describe the source file, the function
545 /// and a pair of line numbers that delimit the construct.
549 /// Schedule types for 'omp for' loops (these enumerators are taken from
550 /// the enum sched_type in kmp.h).
551 enum OpenMPSchedType
{
552 /// Lower bound for default (unordered) versions.
554 OMP_sch_static_chunked
= 33,
556 OMP_sch_dynamic_chunked
= 35,
557 OMP_sch_guided_chunked
= 36,
558 OMP_sch_runtime
= 37,
560 /// static with chunk adjustment (e.g., simd)
561 OMP_sch_static_balanced_chunked
= 45,
562 /// Lower bound for 'ordered' versions.
564 OMP_ord_static_chunked
= 65,
566 OMP_ord_dynamic_chunked
= 67,
567 OMP_ord_guided_chunked
= 68,
568 OMP_ord_runtime
= 69,
570 OMP_sch_default
= OMP_sch_static
,
571 /// dist_schedule types
572 OMP_dist_sch_static_chunked
= 91,
573 OMP_dist_sch_static
= 92,
574 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
575 /// Set if the monotonic schedule modifier was present.
576 OMP_sch_modifier_monotonic
= (1 << 29),
577 /// Set if the nonmonotonic schedule modifier was present.
578 OMP_sch_modifier_nonmonotonic
= (1 << 30),
581 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
583 class CleanupTy final
: public EHScopeStack::Cleanup
{
584 PrePostActionTy
*Action
;
587 explicit CleanupTy(PrePostActionTy
*Action
) : Action(Action
) {}
588 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
589 if (!CGF
.HaveInsertPoint())
595 } // anonymous namespace
597 void RegionCodeGenTy::operator()(CodeGenFunction
&CGF
) const {
598 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
600 CGF
.EHStack
.pushCleanup
<CleanupTy
>(NormalAndEHCleanup
, PrePostAction
);
601 Callback(CodeGen
, CGF
, *PrePostAction
);
603 PrePostActionTy Action
;
604 Callback(CodeGen
, CGF
, Action
);
608 /// Check if the combiner is a call to UDR combiner and if it is so return the
609 /// UDR decl used for reduction.
610 static const OMPDeclareReductionDecl
*
611 getReductionInit(const Expr
*ReductionOp
) {
612 if (const auto *CE
= dyn_cast
<CallExpr
>(ReductionOp
))
613 if (const auto *OVE
= dyn_cast
<OpaqueValueExpr
>(CE
->getCallee()))
614 if (const auto *DRE
=
615 dyn_cast
<DeclRefExpr
>(OVE
->getSourceExpr()->IgnoreImpCasts()))
616 if (const auto *DRD
= dyn_cast
<OMPDeclareReductionDecl
>(DRE
->getDecl()))
621 static void emitInitWithReductionInitializer(CodeGenFunction
&CGF
,
622 const OMPDeclareReductionDecl
*DRD
,
624 Address Private
, Address Original
,
626 if (DRD
->getInitializer()) {
627 std::pair
<llvm::Function
*, llvm::Function
*> Reduction
=
628 CGF
.CGM
.getOpenMPRuntime().getUserDefinedReduction(DRD
);
629 const auto *CE
= cast
<CallExpr
>(InitOp
);
630 const auto *OVE
= cast
<OpaqueValueExpr
>(CE
->getCallee());
631 const Expr
*LHS
= CE
->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
632 const Expr
*RHS
= CE
->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
634 cast
<DeclRefExpr
>(cast
<UnaryOperator
>(LHS
)->getSubExpr());
636 cast
<DeclRefExpr
>(cast
<UnaryOperator
>(RHS
)->getSubExpr());
637 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
638 PrivateScope
.addPrivate(cast
<VarDecl
>(LHSDRE
->getDecl()), Private
);
639 PrivateScope
.addPrivate(cast
<VarDecl
>(RHSDRE
->getDecl()), Original
);
640 (void)PrivateScope
.Privatize();
641 RValue Func
= RValue::get(Reduction
.second
);
642 CodeGenFunction::OpaqueValueMapping
Map(CGF
, OVE
, Func
);
643 CGF
.EmitIgnoredExpr(InitOp
);
645 llvm::Constant
*Init
= CGF
.CGM
.EmitNullConstant(Ty
);
646 std::string Name
= CGF
.CGM
.getOpenMPRuntime().getName({"init"});
647 auto *GV
= new llvm::GlobalVariable(
648 CGF
.CGM
.getModule(), Init
->getType(), /*isConstant=*/true,
649 llvm::GlobalValue::PrivateLinkage
, Init
, Name
);
650 LValue LV
= CGF
.MakeNaturalAlignAddrLValue(GV
, Ty
);
652 switch (CGF
.getEvaluationKind(Ty
)) {
654 InitRVal
= CGF
.EmitLoadOfLValue(LV
, DRD
->getLocation());
658 RValue::getComplex(CGF
.EmitLoadOfComplex(LV
, DRD
->getLocation()));
660 case TEK_Aggregate
: {
661 OpaqueValueExpr
OVE(DRD
->getLocation(), Ty
, VK_LValue
);
662 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, &OVE
, LV
);
663 CGF
.EmitAnyExprToMem(&OVE
, Private
, Ty
.getQualifiers(),
664 /*IsInitializer=*/false);
668 OpaqueValueExpr
OVE(DRD
->getLocation(), Ty
, VK_PRValue
);
669 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, &OVE
, InitRVal
);
670 CGF
.EmitAnyExprToMem(&OVE
, Private
, Ty
.getQualifiers(),
671 /*IsInitializer=*/false);
675 /// Emit initialization of arrays of complex types.
676 /// \param DestAddr Address of the array.
677 /// \param Type Type of array.
678 /// \param Init Initial expression of array.
679 /// \param SrcAddr Address of the original array.
680 static void EmitOMPAggregateInit(CodeGenFunction
&CGF
, Address DestAddr
,
681 QualType Type
, bool EmitDeclareReductionInit
,
683 const OMPDeclareReductionDecl
*DRD
,
684 Address SrcAddr
= Address::invalid()) {
685 // Perform element-by-element initialization.
688 // Drill down to the base element type on both arrays.
689 const ArrayType
*ArrayTy
= Type
->getAsArrayTypeUnsafe();
690 llvm::Value
*NumElements
= CGF
.emitArrayLength(ArrayTy
, ElementTy
, DestAddr
);
693 CGF
.Builder
.CreateElementBitCast(SrcAddr
, DestAddr
.getElementType());
695 llvm::Value
*SrcBegin
= nullptr;
697 SrcBegin
= SrcAddr
.getPointer();
698 llvm::Value
*DestBegin
= DestAddr
.getPointer();
699 // Cast from pointer to array type to pointer to single element.
700 llvm::Value
*DestEnd
=
701 CGF
.Builder
.CreateGEP(DestAddr
.getElementType(), DestBegin
, NumElements
);
702 // The basic structure here is a while-do loop.
703 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.arrayinit.body");
704 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.arrayinit.done");
705 llvm::Value
*IsEmpty
=
706 CGF
.Builder
.CreateICmpEQ(DestBegin
, DestEnd
, "omp.arrayinit.isempty");
707 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
709 // Enter the loop body, making that address the current address.
710 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
711 CGF
.EmitBlock(BodyBB
);
713 CharUnits ElementSize
= CGF
.getContext().getTypeSizeInChars(ElementTy
);
715 llvm::PHINode
*SrcElementPHI
= nullptr;
716 Address SrcElementCurrent
= Address::invalid();
718 SrcElementPHI
= CGF
.Builder
.CreatePHI(SrcBegin
->getType(), 2,
719 "omp.arraycpy.srcElementPast");
720 SrcElementPHI
->addIncoming(SrcBegin
, EntryBB
);
722 Address(SrcElementPHI
, SrcAddr
.getElementType(),
723 SrcAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
725 llvm::PHINode
*DestElementPHI
= CGF
.Builder
.CreatePHI(
726 DestBegin
->getType(), 2, "omp.arraycpy.destElementPast");
727 DestElementPHI
->addIncoming(DestBegin
, EntryBB
);
728 Address DestElementCurrent
=
729 Address(DestElementPHI
, DestAddr
.getElementType(),
730 DestAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
734 CodeGenFunction::RunCleanupsScope
InitScope(CGF
);
735 if (EmitDeclareReductionInit
) {
736 emitInitWithReductionInitializer(CGF
, DRD
, Init
, DestElementCurrent
,
737 SrcElementCurrent
, ElementTy
);
739 CGF
.EmitAnyExprToMem(Init
, DestElementCurrent
, ElementTy
.getQualifiers(),
740 /*IsInitializer=*/false);
744 // Shift the address forward by one element.
745 llvm::Value
*SrcElementNext
= CGF
.Builder
.CreateConstGEP1_32(
746 SrcAddr
.getElementType(), SrcElementPHI
, /*Idx0=*/1,
747 "omp.arraycpy.dest.element");
748 SrcElementPHI
->addIncoming(SrcElementNext
, CGF
.Builder
.GetInsertBlock());
751 // Shift the address forward by one element.
752 llvm::Value
*DestElementNext
= CGF
.Builder
.CreateConstGEP1_32(
753 DestAddr
.getElementType(), DestElementPHI
, /*Idx0=*/1,
754 "omp.arraycpy.dest.element");
755 // Check whether we've reached the end.
757 CGF
.Builder
.CreateICmpEQ(DestElementNext
, DestEnd
, "omp.arraycpy.done");
758 CGF
.Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
759 DestElementPHI
->addIncoming(DestElementNext
, CGF
.Builder
.GetInsertBlock());
762 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
765 LValue
ReductionCodeGen::emitSharedLValue(CodeGenFunction
&CGF
, const Expr
*E
) {
766 return CGF
.EmitOMPSharedLValue(E
);
769 LValue
ReductionCodeGen::emitSharedLValueUB(CodeGenFunction
&CGF
,
771 if (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(E
))
772 return CGF
.EmitOMPArraySectionExpr(OASE
, /*IsLowerBound=*/false);
776 void ReductionCodeGen::emitAggregateInitialization(
777 CodeGenFunction
&CGF
, unsigned N
, Address PrivateAddr
, Address SharedAddr
,
778 const OMPDeclareReductionDecl
*DRD
) {
779 // Emit VarDecl with copy init for arrays.
780 // Get the address of the original variable captured in current
782 const auto *PrivateVD
=
783 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Private
)->getDecl());
784 bool EmitDeclareReductionInit
=
785 DRD
&& (DRD
->getInitializer() || !PrivateVD
->hasInit());
786 EmitOMPAggregateInit(CGF
, PrivateAddr
, PrivateVD
->getType(),
787 EmitDeclareReductionInit
,
788 EmitDeclareReductionInit
? ClausesData
[N
].ReductionOp
789 : PrivateVD
->getInit(),
793 ReductionCodeGen::ReductionCodeGen(ArrayRef
<const Expr
*> Shareds
,
794 ArrayRef
<const Expr
*> Origs
,
795 ArrayRef
<const Expr
*> Privates
,
796 ArrayRef
<const Expr
*> ReductionOps
) {
797 ClausesData
.reserve(Shareds
.size());
798 SharedAddresses
.reserve(Shareds
.size());
799 Sizes
.reserve(Shareds
.size());
800 BaseDecls
.reserve(Shareds
.size());
801 const auto *IOrig
= Origs
.begin();
802 const auto *IPriv
= Privates
.begin();
803 const auto *IRed
= ReductionOps
.begin();
804 for (const Expr
*Ref
: Shareds
) {
805 ClausesData
.emplace_back(Ref
, *IOrig
, *IPriv
, *IRed
);
806 std::advance(IOrig
, 1);
807 std::advance(IPriv
, 1);
808 std::advance(IRed
, 1);
812 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction
&CGF
, unsigned N
) {
813 assert(SharedAddresses
.size() == N
&& OrigAddresses
.size() == N
&&
814 "Number of generated lvalues must be exactly N.");
815 LValue First
= emitSharedLValue(CGF
, ClausesData
[N
].Shared
);
816 LValue Second
= emitSharedLValueUB(CGF
, ClausesData
[N
].Shared
);
817 SharedAddresses
.emplace_back(First
, Second
);
818 if (ClausesData
[N
].Shared
== ClausesData
[N
].Ref
) {
819 OrigAddresses
.emplace_back(First
, Second
);
821 LValue First
= emitSharedLValue(CGF
, ClausesData
[N
].Ref
);
822 LValue Second
= emitSharedLValueUB(CGF
, ClausesData
[N
].Ref
);
823 OrigAddresses
.emplace_back(First
, Second
);
827 void ReductionCodeGen::emitAggregateType(CodeGenFunction
&CGF
, unsigned N
) {
828 QualType PrivateType
= getPrivateType(N
);
829 bool AsArraySection
= isa
<OMPArraySectionExpr
>(ClausesData
[N
].Ref
);
830 if (!PrivateType
->isVariablyModifiedType()) {
832 CGF
.getTypeSize(OrigAddresses
[N
].first
.getType().getNonReferenceType()),
837 llvm::Value
*SizeInChars
;
838 auto *ElemType
= OrigAddresses
[N
].first
.getAddress(CGF
).getElementType();
839 auto *ElemSizeOf
= llvm::ConstantExpr::getSizeOf(ElemType
);
840 if (AsArraySection
) {
841 Size
= CGF
.Builder
.CreatePtrDiff(ElemType
,
842 OrigAddresses
[N
].second
.getPointer(CGF
),
843 OrigAddresses
[N
].first
.getPointer(CGF
));
844 Size
= CGF
.Builder
.CreateNUWAdd(
845 Size
, llvm::ConstantInt::get(Size
->getType(), /*V=*/1));
846 SizeInChars
= CGF
.Builder
.CreateNUWMul(Size
, ElemSizeOf
);
849 CGF
.getTypeSize(OrigAddresses
[N
].first
.getType().getNonReferenceType());
850 Size
= CGF
.Builder
.CreateExactUDiv(SizeInChars
, ElemSizeOf
);
852 Sizes
.emplace_back(SizeInChars
, Size
);
853 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
855 cast
<OpaqueValueExpr
>(
856 CGF
.getContext().getAsVariableArrayType(PrivateType
)->getSizeExpr()),
858 CGF
.EmitVariablyModifiedType(PrivateType
);
861 void ReductionCodeGen::emitAggregateType(CodeGenFunction
&CGF
, unsigned N
,
863 QualType PrivateType
= getPrivateType(N
);
864 if (!PrivateType
->isVariablyModifiedType()) {
865 assert(!Size
&& !Sizes
[N
].second
&&
866 "Size should be nullptr for non-variably modified reduction "
870 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
872 cast
<OpaqueValueExpr
>(
873 CGF
.getContext().getAsVariableArrayType(PrivateType
)->getSizeExpr()),
875 CGF
.EmitVariablyModifiedType(PrivateType
);
878 void ReductionCodeGen::emitInitialization(
879 CodeGenFunction
&CGF
, unsigned N
, Address PrivateAddr
, Address SharedAddr
,
880 llvm::function_ref
<bool(CodeGenFunction
&)> DefaultInit
) {
881 assert(SharedAddresses
.size() > N
&& "No variable was generated");
882 const auto *PrivateVD
=
883 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Private
)->getDecl());
884 const OMPDeclareReductionDecl
*DRD
=
885 getReductionInit(ClausesData
[N
].ReductionOp
);
886 if (CGF
.getContext().getAsArrayType(PrivateVD
->getType())) {
887 if (DRD
&& DRD
->getInitializer())
888 (void)DefaultInit(CGF
);
889 emitAggregateInitialization(CGF
, N
, PrivateAddr
, SharedAddr
, DRD
);
890 } else if (DRD
&& (DRD
->getInitializer() || !PrivateVD
->hasInit())) {
891 (void)DefaultInit(CGF
);
892 QualType SharedType
= SharedAddresses
[N
].first
.getType();
893 emitInitWithReductionInitializer(CGF
, DRD
, ClausesData
[N
].ReductionOp
,
894 PrivateAddr
, SharedAddr
, SharedType
);
895 } else if (!DefaultInit(CGF
) && PrivateVD
->hasInit() &&
896 !CGF
.isTrivialInitializer(PrivateVD
->getInit())) {
897 CGF
.EmitAnyExprToMem(PrivateVD
->getInit(), PrivateAddr
,
898 PrivateVD
->getType().getQualifiers(),
899 /*IsInitializer=*/false);
903 bool ReductionCodeGen::needCleanups(unsigned N
) {
904 QualType PrivateType
= getPrivateType(N
);
905 QualType::DestructionKind DTorKind
= PrivateType
.isDestructedType();
906 return DTorKind
!= QualType::DK_none
;
909 void ReductionCodeGen::emitCleanups(CodeGenFunction
&CGF
, unsigned N
,
910 Address PrivateAddr
) {
911 QualType PrivateType
= getPrivateType(N
);
912 QualType::DestructionKind DTorKind
= PrivateType
.isDestructedType();
913 if (needCleanups(N
)) {
914 PrivateAddr
= CGF
.Builder
.CreateElementBitCast(
915 PrivateAddr
, CGF
.ConvertTypeForMem(PrivateType
));
916 CGF
.pushDestroy(DTorKind
, PrivateAddr
, PrivateType
);
920 static LValue
loadToBegin(CodeGenFunction
&CGF
, QualType BaseTy
, QualType ElTy
,
922 BaseTy
= BaseTy
.getNonReferenceType();
923 while ((BaseTy
->isPointerType() || BaseTy
->isReferenceType()) &&
924 !CGF
.getContext().hasSameType(BaseTy
, ElTy
)) {
925 if (const auto *PtrTy
= BaseTy
->getAs
<PointerType
>()) {
926 BaseLV
= CGF
.EmitLoadOfPointerLValue(BaseLV
.getAddress(CGF
), PtrTy
);
928 LValue RefLVal
= CGF
.MakeAddrLValue(BaseLV
.getAddress(CGF
), BaseTy
);
929 BaseLV
= CGF
.EmitLoadOfReferenceLValue(RefLVal
);
931 BaseTy
= BaseTy
->getPointeeType();
933 return CGF
.MakeAddrLValue(
934 CGF
.Builder
.CreateElementBitCast(BaseLV
.getAddress(CGF
),
935 CGF
.ConvertTypeForMem(ElTy
)),
936 BaseLV
.getType(), BaseLV
.getBaseInfo(),
937 CGF
.CGM
.getTBAAInfoForSubobject(BaseLV
, BaseLV
.getType()));
940 static Address
castToBase(CodeGenFunction
&CGF
, QualType BaseTy
, QualType ElTy
,
941 Address OriginalBaseAddress
, llvm::Value
*Addr
) {
942 Address Tmp
= Address::invalid();
943 Address TopTmp
= Address::invalid();
944 Address MostTopTmp
= Address::invalid();
945 BaseTy
= BaseTy
.getNonReferenceType();
946 while ((BaseTy
->isPointerType() || BaseTy
->isReferenceType()) &&
947 !CGF
.getContext().hasSameType(BaseTy
, ElTy
)) {
948 Tmp
= CGF
.CreateMemTemp(BaseTy
);
949 if (TopTmp
.isValid())
950 CGF
.Builder
.CreateStore(Tmp
.getPointer(), TopTmp
);
954 BaseTy
= BaseTy
->getPointeeType();
958 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
959 Addr
, Tmp
.getElementType());
960 CGF
.Builder
.CreateStore(Addr
, Tmp
);
964 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
965 Addr
, OriginalBaseAddress
.getType());
966 return OriginalBaseAddress
.withPointer(Addr
);
969 static const VarDecl
*getBaseDecl(const Expr
*Ref
, const DeclRefExpr
*&DE
) {
970 const VarDecl
*OrigVD
= nullptr;
971 if (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(Ref
)) {
972 const Expr
*Base
= OASE
->getBase()->IgnoreParenImpCasts();
973 while (const auto *TempOASE
= dyn_cast
<OMPArraySectionExpr
>(Base
))
974 Base
= TempOASE
->getBase()->IgnoreParenImpCasts();
975 while (const auto *TempASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
976 Base
= TempASE
->getBase()->IgnoreParenImpCasts();
977 DE
= cast
<DeclRefExpr
>(Base
);
978 OrigVD
= cast
<VarDecl
>(DE
->getDecl());
979 } else if (const auto *ASE
= dyn_cast
<ArraySubscriptExpr
>(Ref
)) {
980 const Expr
*Base
= ASE
->getBase()->IgnoreParenImpCasts();
981 while (const auto *TempASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
982 Base
= TempASE
->getBase()->IgnoreParenImpCasts();
983 DE
= cast
<DeclRefExpr
>(Base
);
984 OrigVD
= cast
<VarDecl
>(DE
->getDecl());
989 Address
ReductionCodeGen::adjustPrivateAddress(CodeGenFunction
&CGF
, unsigned N
,
990 Address PrivateAddr
) {
991 const DeclRefExpr
*DE
;
992 if (const VarDecl
*OrigVD
= ::getBaseDecl(ClausesData
[N
].Ref
, DE
)) {
993 BaseDecls
.emplace_back(OrigVD
);
994 LValue OriginalBaseLValue
= CGF
.EmitLValue(DE
);
996 loadToBegin(CGF
, OrigVD
->getType(), SharedAddresses
[N
].first
.getType(),
998 Address SharedAddr
= SharedAddresses
[N
].first
.getAddress(CGF
);
999 llvm::Value
*Adjustment
= CGF
.Builder
.CreatePtrDiff(
1000 SharedAddr
.getElementType(), BaseLValue
.getPointer(CGF
),
1001 SharedAddr
.getPointer());
1002 llvm::Value
*PrivatePointer
=
1003 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
1004 PrivateAddr
.getPointer(), SharedAddr
.getType());
1005 llvm::Value
*Ptr
= CGF
.Builder
.CreateGEP(
1006 SharedAddr
.getElementType(), PrivatePointer
, Adjustment
);
1007 return castToBase(CGF
, OrigVD
->getType(),
1008 SharedAddresses
[N
].first
.getType(),
1009 OriginalBaseLValue
.getAddress(CGF
), Ptr
);
1011 BaseDecls
.emplace_back(
1012 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Ref
)->getDecl()));
1016 bool ReductionCodeGen::usesReductionInitializer(unsigned N
) const {
1017 const OMPDeclareReductionDecl
*DRD
=
1018 getReductionInit(ClausesData
[N
].ReductionOp
);
1019 return DRD
&& DRD
->getInitializer();
1022 LValue
CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction
&CGF
) {
1023 return CGF
.EmitLoadOfPointerLValue(
1024 CGF
.GetAddrOfLocalVar(getThreadIDVariable()),
1025 getThreadIDVariable()->getType()->castAs
<PointerType
>());
1028 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) {
1029 if (!CGF
.HaveInsertPoint())
1031 // 1.2.2 OpenMP Language Terminology
1032 // Structured block - An executable statement with a single entry at the
1033 // top and a single exit at the bottom.
1034 // The point of exit cannot be a branch out of the structured block.
1035 // longjmp() and throw() must not violate the entry/exit criteria.
1036 CGF
.EHStack
.pushTerminate();
1038 CGF
.incrementProfileCounter(S
);
1040 CGF
.EHStack
.popTerminate();
1043 LValue
CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1044 CodeGenFunction
&CGF
) {
1045 return CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(getThreadIDVariable()),
1046 getThreadIDVariable()->getType(),
1047 AlignmentSource::Decl
);
1050 static FieldDecl
*addFieldToRecordDecl(ASTContext
&C
, DeclContext
*DC
,
1052 auto *Field
= FieldDecl::Create(
1053 C
, DC
, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy
,
1054 C
.getTrivialTypeSourceInfo(FieldTy
, SourceLocation()),
1055 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit
);
1056 Field
->setAccess(AS_public
);
1061 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule
&CGM
)
1062 : CGM(CGM
), OMPBuilder(CGM
.getModule()), OffloadEntriesInfoManager() {
1063 KmpCriticalNameTy
= llvm::ArrayType::get(CGM
.Int32Ty
, /*NumElements*/ 8);
1064 llvm::OpenMPIRBuilderConfig
Config(CGM
.getLangOpts().OpenMPIsDevice
, false,
1065 hasRequiresUnifiedSharedMemory(),
1066 CGM
.getLangOpts().OpenMPOffloadMandatory
);
1067 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1068 OMPBuilder
.initialize();
1069 OMPBuilder
.setConfig(Config
);
1070 OffloadEntriesInfoManager
.setConfig(Config
);
1071 loadOffloadInfoMetadata();
1074 void CGOpenMPRuntime::clear() {
1075 InternalVars
.clear();
1076 // Clean non-target variable declarations possibly used only in debug info.
1077 for (const auto &Data
: EmittedNonTargetVariables
) {
1078 if (!Data
.getValue().pointsToAliveValue())
1080 auto *GV
= dyn_cast
<llvm::GlobalVariable
>(Data
.getValue());
1083 if (!GV
->isDeclaration() || GV
->getNumUses() > 0)
1085 GV
->eraseFromParent();
1089 std::string
CGOpenMPRuntime::getName(ArrayRef
<StringRef
> Parts
) const {
1090 return OMPBuilder
.createPlatformSpecificName(Parts
);
1093 static llvm::Function
*
1094 emitCombinerOrInitializer(CodeGenModule
&CGM
, QualType Ty
,
1095 const Expr
*CombinerInitializer
, const VarDecl
*In
,
1096 const VarDecl
*Out
, bool IsCombiner
) {
1097 // void .omp_combiner.(Ty *in, Ty *out);
1098 ASTContext
&C
= CGM
.getContext();
1099 QualType PtrTy
= C
.getPointerType(Ty
).withRestrict();
1100 FunctionArgList Args
;
1101 ImplicitParamDecl
OmpOutParm(C
, /*DC=*/nullptr, Out
->getLocation(),
1102 /*Id=*/nullptr, PtrTy
, ImplicitParamDecl::Other
);
1103 ImplicitParamDecl
OmpInParm(C
, /*DC=*/nullptr, In
->getLocation(),
1104 /*Id=*/nullptr, PtrTy
, ImplicitParamDecl::Other
);
1105 Args
.push_back(&OmpOutParm
);
1106 Args
.push_back(&OmpInParm
);
1107 const CGFunctionInfo
&FnInfo
=
1108 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
1109 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
1110 std::string Name
= CGM
.getOpenMPRuntime().getName(
1111 {IsCombiner
? "omp_combiner" : "omp_initializer", ""});
1112 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
1113 Name
, &CGM
.getModule());
1114 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
1115 if (CGM
.getLangOpts().Optimize
) {
1116 Fn
->removeFnAttr(llvm::Attribute::NoInline
);
1117 Fn
->removeFnAttr(llvm::Attribute::OptimizeNone
);
1118 Fn
->addFnAttr(llvm::Attribute::AlwaysInline
);
1120 CodeGenFunction
CGF(CGM
);
1121 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1122 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1123 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, In
->getLocation(),
1124 Out
->getLocation());
1125 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
1126 Address AddrIn
= CGF
.GetAddrOfLocalVar(&OmpInParm
);
1128 In
, CGF
.EmitLoadOfPointerLValue(AddrIn
, PtrTy
->castAs
<PointerType
>())
1130 Address AddrOut
= CGF
.GetAddrOfLocalVar(&OmpOutParm
);
1132 Out
, CGF
.EmitLoadOfPointerLValue(AddrOut
, PtrTy
->castAs
<PointerType
>())
1134 (void)Scope
.Privatize();
1135 if (!IsCombiner
&& Out
->hasInit() &&
1136 !CGF
.isTrivialInitializer(Out
->getInit())) {
1137 CGF
.EmitAnyExprToMem(Out
->getInit(), CGF
.GetAddrOfLocalVar(Out
),
1138 Out
->getType().getQualifiers(),
1139 /*IsInitializer=*/true);
1141 if (CombinerInitializer
)
1142 CGF
.EmitIgnoredExpr(CombinerInitializer
);
1143 Scope
.ForceCleanup();
1144 CGF
.FinishFunction();
1148 void CGOpenMPRuntime::emitUserDefinedReduction(
1149 CodeGenFunction
*CGF
, const OMPDeclareReductionDecl
*D
) {
1150 if (UDRMap
.count(D
) > 0)
1152 llvm::Function
*Combiner
= emitCombinerOrInitializer(
1153 CGM
, D
->getType(), D
->getCombiner(),
1154 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getCombinerIn())->getDecl()),
1155 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getCombinerOut())->getDecl()),
1156 /*IsCombiner=*/true);
1157 llvm::Function
*Initializer
= nullptr;
1158 if (const Expr
*Init
= D
->getInitializer()) {
1159 Initializer
= emitCombinerOrInitializer(
1161 D
->getInitializerKind() == OMPDeclareReductionDecl::CallInit
? Init
1163 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getInitOrig())->getDecl()),
1164 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getInitPriv())->getDecl()),
1165 /*IsCombiner=*/false);
1167 UDRMap
.try_emplace(D
, Combiner
, Initializer
);
1169 auto &Decls
= FunctionUDRMap
.FindAndConstruct(CGF
->CurFn
);
1170 Decls
.second
.push_back(D
);
1174 std::pair
<llvm::Function
*, llvm::Function
*>
1175 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl
*D
) {
1176 auto I
= UDRMap
.find(D
);
1177 if (I
!= UDRMap
.end())
1179 emitUserDefinedReduction(/*CGF=*/nullptr, D
);
1180 return UDRMap
.lookup(D
);
1184 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1185 // Builder if one is present.
1186 struct PushAndPopStackRAII
{
1187 PushAndPopStackRAII(llvm::OpenMPIRBuilder
*OMPBuilder
, CodeGenFunction
&CGF
,
1188 bool HasCancel
, llvm::omp::Directive Kind
)
1189 : OMPBuilder(OMPBuilder
) {
1193 // The following callback is the crucial part of clangs cleanup process.
1196 // Once the OpenMPIRBuilder is used to create parallel regions (and
1197 // similar), the cancellation destination (Dest below) is determined via
1198 // IP. That means if we have variables to finalize we split the block at IP,
1199 // use the new block (=BB) as destination to build a JumpDest (via
1200 // getJumpDestInCurrentScope(BB)) which then is fed to
1201 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1202 // to push & pop an FinalizationInfo object.
1203 // The FiniCB will still be needed but at the point where the
1204 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1205 auto FiniCB
= [&CGF
](llvm::OpenMPIRBuilder::InsertPointTy IP
) {
1206 assert(IP
.getBlock()->end() == IP
.getPoint() &&
1207 "Clang CG should cause non-terminated block!");
1208 CGBuilderTy::InsertPointGuard
IPG(CGF
.Builder
);
1209 CGF
.Builder
.restoreIP(IP
);
1210 CodeGenFunction::JumpDest Dest
=
1211 CGF
.getOMPCancelDestination(OMPD_parallel
);
1212 CGF
.EmitBranchThroughCleanup(Dest
);
1215 // TODO: Remove this once we emit parallel regions through the
1216 // OpenMPIRBuilder as it can do this setup internally.
1217 llvm::OpenMPIRBuilder::FinalizationInfo
FI({FiniCB
, Kind
, HasCancel
});
1218 OMPBuilder
->pushFinalizationCB(std::move(FI
));
1220 ~PushAndPopStackRAII() {
1222 OMPBuilder
->popFinalizationCB();
1224 llvm::OpenMPIRBuilder
*OMPBuilder
;
1228 static llvm::Function
*emitParallelOrTeamsOutlinedFunction(
1229 CodeGenModule
&CGM
, const OMPExecutableDirective
&D
, const CapturedStmt
*CS
,
1230 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1231 const StringRef OutlinedHelperName
, const RegionCodeGenTy
&CodeGen
) {
1232 assert(ThreadIDVar
->getType()->isPointerType() &&
1233 "thread id variable must be of type kmp_int32 *");
1234 CodeGenFunction
CGF(CGM
, true);
1235 bool HasCancel
= false;
1236 if (const auto *OPD
= dyn_cast
<OMPParallelDirective
>(&D
))
1237 HasCancel
= OPD
->hasCancel();
1238 else if (const auto *OPD
= dyn_cast
<OMPTargetParallelDirective
>(&D
))
1239 HasCancel
= OPD
->hasCancel();
1240 else if (const auto *OPSD
= dyn_cast
<OMPParallelSectionsDirective
>(&D
))
1241 HasCancel
= OPSD
->hasCancel();
1242 else if (const auto *OPFD
= dyn_cast
<OMPParallelForDirective
>(&D
))
1243 HasCancel
= OPFD
->hasCancel();
1244 else if (const auto *OPFD
= dyn_cast
<OMPTargetParallelForDirective
>(&D
))
1245 HasCancel
= OPFD
->hasCancel();
1246 else if (const auto *OPFD
= dyn_cast
<OMPDistributeParallelForDirective
>(&D
))
1247 HasCancel
= OPFD
->hasCancel();
1248 else if (const auto *OPFD
=
1249 dyn_cast
<OMPTeamsDistributeParallelForDirective
>(&D
))
1250 HasCancel
= OPFD
->hasCancel();
1251 else if (const auto *OPFD
=
1252 dyn_cast
<OMPTargetTeamsDistributeParallelForDirective
>(&D
))
1253 HasCancel
= OPFD
->hasCancel();
1255 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1256 // parallel region to make cancellation barriers work properly.
1257 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1258 PushAndPopStackRAII
PSR(&OMPBuilder
, CGF
, HasCancel
, InnermostKind
);
1259 CGOpenMPOutlinedRegionInfo
CGInfo(*CS
, ThreadIDVar
, CodeGen
, InnermostKind
,
1260 HasCancel
, OutlinedHelperName
);
1261 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
1262 return CGF
.GenerateOpenMPCapturedStmtFunction(*CS
, D
.getBeginLoc());
1265 llvm::Function
*CGOpenMPRuntime::emitParallelOutlinedFunction(
1266 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
1267 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
) {
1268 const CapturedStmt
*CS
= D
.getCapturedStmt(OMPD_parallel
);
1269 return emitParallelOrTeamsOutlinedFunction(
1270 CGM
, D
, CS
, ThreadIDVar
, InnermostKind
, getOutlinedHelperName(), CodeGen
);
1273 llvm::Function
*CGOpenMPRuntime::emitTeamsOutlinedFunction(
1274 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
1275 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
) {
1276 const CapturedStmt
*CS
= D
.getCapturedStmt(OMPD_teams
);
1277 return emitParallelOrTeamsOutlinedFunction(
1278 CGM
, D
, CS
, ThreadIDVar
, InnermostKind
, getOutlinedHelperName(), CodeGen
);
1281 llvm::Function
*CGOpenMPRuntime::emitTaskOutlinedFunction(
1282 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
1283 const VarDecl
*PartIDVar
, const VarDecl
*TaskTVar
,
1284 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
1285 bool Tied
, unsigned &NumberOfParts
) {
1286 auto &&UntiedCodeGen
= [this, &D
, TaskTVar
](CodeGenFunction
&CGF
,
1287 PrePostActionTy
&) {
1288 llvm::Value
*ThreadID
= getThreadID(CGF
, D
.getBeginLoc());
1289 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
1290 llvm::Value
*TaskArgs
[] = {
1292 CGF
.EmitLoadOfPointerLValue(CGF
.GetAddrOfLocalVar(TaskTVar
),
1293 TaskTVar
->getType()->castAs
<PointerType
>())
1295 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1296 CGM
.getModule(), OMPRTL___kmpc_omp_task
),
1299 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy
Action(Tied
, PartIDVar
,
1301 CodeGen
.setAction(Action
);
1302 assert(!ThreadIDVar
->getType()->isPointerType() &&
1303 "thread id variable must be of type kmp_int32 for tasks");
1304 const OpenMPDirectiveKind Region
=
1305 isOpenMPTaskLoopDirective(D
.getDirectiveKind()) ? OMPD_taskloop
1307 const CapturedStmt
*CS
= D
.getCapturedStmt(Region
);
1308 bool HasCancel
= false;
1309 if (const auto *TD
= dyn_cast
<OMPTaskDirective
>(&D
))
1310 HasCancel
= TD
->hasCancel();
1311 else if (const auto *TD
= dyn_cast
<OMPTaskLoopDirective
>(&D
))
1312 HasCancel
= TD
->hasCancel();
1313 else if (const auto *TD
= dyn_cast
<OMPMasterTaskLoopDirective
>(&D
))
1314 HasCancel
= TD
->hasCancel();
1315 else if (const auto *TD
= dyn_cast
<OMPParallelMasterTaskLoopDirective
>(&D
))
1316 HasCancel
= TD
->hasCancel();
1318 CodeGenFunction
CGF(CGM
, true);
1319 CGOpenMPTaskOutlinedRegionInfo
CGInfo(*CS
, ThreadIDVar
, CodeGen
,
1320 InnermostKind
, HasCancel
, Action
);
1321 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
1322 llvm::Function
*Res
= CGF
.GenerateCapturedStmtFunction(*CS
);
1324 NumberOfParts
= Action
.getNumberOfParts();
1328 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction
&CGF
,
1329 bool AtCurrentPoint
) {
1330 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1331 assert(!Elem
.second
.ServiceInsertPt
&& "Insert point is set already.");
1333 llvm::Value
*Undef
= llvm::UndefValue::get(CGF
.Int32Ty
);
1334 if (AtCurrentPoint
) {
1335 Elem
.second
.ServiceInsertPt
= new llvm::BitCastInst(
1336 Undef
, CGF
.Int32Ty
, "svcpt", CGF
.Builder
.GetInsertBlock());
1338 Elem
.second
.ServiceInsertPt
=
1339 new llvm::BitCastInst(Undef
, CGF
.Int32Ty
, "svcpt");
1340 Elem
.second
.ServiceInsertPt
->insertAfter(CGF
.AllocaInsertPt
);
1344 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction
&CGF
) {
1345 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1346 if (Elem
.second
.ServiceInsertPt
) {
1347 llvm::Instruction
*Ptr
= Elem
.second
.ServiceInsertPt
;
1348 Elem
.second
.ServiceInsertPt
= nullptr;
1349 Ptr
->eraseFromParent();
1353 static StringRef
getIdentStringFromSourceLocation(CodeGenFunction
&CGF
,
1355 SmallString
<128> &Buffer
) {
1356 llvm::raw_svector_ostream
OS(Buffer
);
1357 // Build debug location
1358 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
1359 OS
<< ";" << PLoc
.getFilename() << ";";
1360 if (const auto *FD
= dyn_cast_or_null
<FunctionDecl
>(CGF
.CurFuncDecl
))
1361 OS
<< FD
->getQualifiedNameAsString();
1362 OS
<< ";" << PLoc
.getLine() << ";" << PLoc
.getColumn() << ";;";
1366 llvm::Value
*CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction
&CGF
,
1368 unsigned Flags
, bool EmitLoc
) {
1369 uint32_t SrcLocStrSize
;
1370 llvm::Constant
*SrcLocStr
;
1372 CGM
.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo
) ||
1374 SrcLocStr
= OMPBuilder
.getOrCreateDefaultSrcLocStr(SrcLocStrSize
);
1376 std::string FunctionName
;
1377 if (const auto *FD
= dyn_cast_or_null
<FunctionDecl
>(CGF
.CurFuncDecl
))
1378 FunctionName
= FD
->getQualifiedNameAsString();
1379 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
1380 const char *FileName
= PLoc
.getFilename();
1381 unsigned Line
= PLoc
.getLine();
1382 unsigned Column
= PLoc
.getColumn();
1383 SrcLocStr
= OMPBuilder
.getOrCreateSrcLocStr(FunctionName
, FileName
, Line
,
1384 Column
, SrcLocStrSize
);
1386 unsigned Reserved2Flags
= getDefaultLocationReserved2Flags();
1387 return OMPBuilder
.getOrCreateIdent(
1388 SrcLocStr
, SrcLocStrSize
, llvm::omp::IdentFlag(Flags
), Reserved2Flags
);
1391 llvm::Value
*CGOpenMPRuntime::getThreadID(CodeGenFunction
&CGF
,
1392 SourceLocation Loc
) {
1393 assert(CGF
.CurFn
&& "No function in current CodeGenFunction.");
1394 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1395 // the clang invariants used below might be broken.
1396 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
1397 SmallString
<128> Buffer
;
1398 OMPBuilder
.updateToLocation(CGF
.Builder
.saveIP());
1399 uint32_t SrcLocStrSize
;
1400 auto *SrcLocStr
= OMPBuilder
.getOrCreateSrcLocStr(
1401 getIdentStringFromSourceLocation(CGF
, Loc
, Buffer
), SrcLocStrSize
);
1402 return OMPBuilder
.getOrCreateThreadID(
1403 OMPBuilder
.getOrCreateIdent(SrcLocStr
, SrcLocStrSize
));
1406 llvm::Value
*ThreadID
= nullptr;
1407 // Check whether we've already cached a load of the thread id in this
1409 auto I
= OpenMPLocThreadIDMap
.find(CGF
.CurFn
);
1410 if (I
!= OpenMPLocThreadIDMap
.end()) {
1411 ThreadID
= I
->second
.ThreadID
;
1412 if (ThreadID
!= nullptr)
1415 // If exceptions are enabled, do not use parameter to avoid possible crash.
1416 if (auto *OMPRegionInfo
=
1417 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
1418 if (OMPRegionInfo
->getThreadIDVariable()) {
1419 // Check if this an outlined function with thread id passed as argument.
1420 LValue LVal
= OMPRegionInfo
->getThreadIDVariableLValue(CGF
);
1421 llvm::BasicBlock
*TopBlock
= CGF
.AllocaInsertPt
->getParent();
1422 if (!CGF
.EHStack
.requiresLandingPad() || !CGF
.getLangOpts().Exceptions
||
1423 !CGF
.getLangOpts().CXXExceptions
||
1424 CGF
.Builder
.GetInsertBlock() == TopBlock
||
1425 !isa
<llvm::Instruction
>(LVal
.getPointer(CGF
)) ||
1426 cast
<llvm::Instruction
>(LVal
.getPointer(CGF
))->getParent() ==
1428 cast
<llvm::Instruction
>(LVal
.getPointer(CGF
))->getParent() ==
1429 CGF
.Builder
.GetInsertBlock()) {
1430 ThreadID
= CGF
.EmitLoadOfScalar(LVal
, Loc
);
1431 // If value loaded in entry block, cache it and use it everywhere in
1433 if (CGF
.Builder
.GetInsertBlock() == TopBlock
) {
1434 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1435 Elem
.second
.ThreadID
= ThreadID
;
1442 // This is not an outlined function region - need to call __kmpc_int32
1443 // kmpc_global_thread_num(ident_t *loc).
1444 // Generate thread id value and cache this value for use across the
1446 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1447 if (!Elem
.second
.ServiceInsertPt
)
1448 setLocThreadIdInsertPt(CGF
);
1449 CGBuilderTy::InsertPointGuard
IPG(CGF
.Builder
);
1450 CGF
.Builder
.SetInsertPoint(Elem
.second
.ServiceInsertPt
);
1451 llvm::CallInst
*Call
= CGF
.Builder
.CreateCall(
1452 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
1453 OMPRTL___kmpc_global_thread_num
),
1454 emitUpdateLocation(CGF
, Loc
));
1455 Call
->setCallingConv(CGF
.getRuntimeCC());
1456 Elem
.second
.ThreadID
= Call
;
1460 void CGOpenMPRuntime::functionFinished(CodeGenFunction
&CGF
) {
1461 assert(CGF
.CurFn
&& "No function in current CodeGenFunction.");
1462 if (OpenMPLocThreadIDMap
.count(CGF
.CurFn
)) {
1463 clearLocThreadIdInsertPt(CGF
);
1464 OpenMPLocThreadIDMap
.erase(CGF
.CurFn
);
1466 if (FunctionUDRMap
.count(CGF
.CurFn
) > 0) {
1467 for(const auto *D
: FunctionUDRMap
[CGF
.CurFn
])
1469 FunctionUDRMap
.erase(CGF
.CurFn
);
1471 auto I
= FunctionUDMMap
.find(CGF
.CurFn
);
1472 if (I
!= FunctionUDMMap
.end()) {
1473 for(const auto *D
: I
->second
)
1475 FunctionUDMMap
.erase(I
);
1477 LastprivateConditionalToTypes
.erase(CGF
.CurFn
);
1478 FunctionToUntiedTaskStackMap
.erase(CGF
.CurFn
);
1481 llvm::Type
*CGOpenMPRuntime::getIdentTyPointerTy() {
1482 return OMPBuilder
.IdentPtr
;
1485 llvm::Type
*CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1486 if (!Kmpc_MicroTy
) {
1487 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1488 llvm::Type
*MicroParams
[] = {llvm::PointerType::getUnqual(CGM
.Int32Ty
),
1489 llvm::PointerType::getUnqual(CGM
.Int32Ty
)};
1490 Kmpc_MicroTy
= llvm::FunctionType::get(CGM
.VoidTy
, MicroParams
, true);
1492 return llvm::PointerType::getUnqual(Kmpc_MicroTy
);
1495 llvm::FunctionCallee
1496 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize
, bool IVSigned
,
1497 bool IsGPUDistribute
) {
1498 assert((IVSize
== 32 || IVSize
== 64) &&
1499 "IV size is not compatible with the omp runtime");
1501 if (IsGPUDistribute
)
1502 Name
= IVSize
== 32 ? (IVSigned
? "__kmpc_distribute_static_init_4"
1503 : "__kmpc_distribute_static_init_4u")
1504 : (IVSigned
? "__kmpc_distribute_static_init_8"
1505 : "__kmpc_distribute_static_init_8u");
1507 Name
= IVSize
== 32 ? (IVSigned
? "__kmpc_for_static_init_4"
1508 : "__kmpc_for_static_init_4u")
1509 : (IVSigned
? "__kmpc_for_static_init_8"
1510 : "__kmpc_for_static_init_8u");
1512 llvm::Type
*ITy
= IVSize
== 32 ? CGM
.Int32Ty
: CGM
.Int64Ty
;
1513 auto *PtrTy
= llvm::PointerType::getUnqual(ITy
);
1514 llvm::Type
*TypeParams
[] = {
1515 getIdentTyPointerTy(), // loc
1517 CGM
.Int32Ty
, // schedtype
1518 llvm::PointerType::getUnqual(CGM
.Int32Ty
), // p_lastiter
1526 llvm::FunctionType::get(CGM
.VoidTy
, TypeParams
, /*isVarArg*/ false);
1527 return CGM
.CreateRuntimeFunction(FnTy
, Name
);
1530 llvm::FunctionCallee
1531 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize
, bool IVSigned
) {
1532 assert((IVSize
== 32 || IVSize
== 64) &&
1533 "IV size is not compatible with the omp runtime");
1536 ? (IVSigned
? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1537 : (IVSigned
? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1538 llvm::Type
*ITy
= IVSize
== 32 ? CGM
.Int32Ty
: CGM
.Int64Ty
;
1539 llvm::Type
*TypeParams
[] = { getIdentTyPointerTy(), // loc
1541 CGM
.Int32Ty
, // schedtype
1548 llvm::FunctionType::get(CGM
.VoidTy
, TypeParams
, /*isVarArg*/ false);
1549 return CGM
.CreateRuntimeFunction(FnTy
, Name
);
1552 llvm::FunctionCallee
1553 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize
, bool IVSigned
) {
1554 assert((IVSize
== 32 || IVSize
== 64) &&
1555 "IV size is not compatible with the omp runtime");
1558 ? (IVSigned
? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1559 : (IVSigned
? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1560 llvm::Type
*TypeParams
[] = {
1561 getIdentTyPointerTy(), // loc
1565 llvm::FunctionType::get(CGM
.VoidTy
, TypeParams
, /*isVarArg=*/false);
1566 return CGM
.CreateRuntimeFunction(FnTy
, Name
);
1569 llvm::FunctionCallee
1570 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize
, bool IVSigned
) {
1571 assert((IVSize
== 32 || IVSize
== 64) &&
1572 "IV size is not compatible with the omp runtime");
1575 ? (IVSigned
? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1576 : (IVSigned
? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1577 llvm::Type
*ITy
= IVSize
== 32 ? CGM
.Int32Ty
: CGM
.Int64Ty
;
1578 auto *PtrTy
= llvm::PointerType::getUnqual(ITy
);
1579 llvm::Type
*TypeParams
[] = {
1580 getIdentTyPointerTy(), // loc
1582 llvm::PointerType::getUnqual(CGM
.Int32Ty
), // p_lastiter
1588 llvm::FunctionType::get(CGM
.Int32Ty
, TypeParams
, /*isVarArg*/ false);
1589 return CGM
.CreateRuntimeFunction(FnTy
, Name
);
1592 /// Obtain information that uniquely identifies a target entry. This
1593 /// consists of the file and device IDs as well as line number associated with
1594 /// the relevant entry source location.
1595 static llvm::TargetRegionEntryInfo
1596 getTargetEntryUniqueInfo(ASTContext
&C
, SourceLocation Loc
,
1597 StringRef ParentName
= "") {
1598 SourceManager
&SM
= C
.getSourceManager();
1600 // The loc should be always valid and have a file ID (the user cannot use
1601 // #pragma directives in macros)
1603 assert(Loc
.isValid() && "Source location is expected to be always valid.");
1605 PresumedLoc PLoc
= SM
.getPresumedLoc(Loc
);
1606 assert(PLoc
.isValid() && "Source location is expected to be always valid.");
1608 llvm::sys::fs::UniqueID ID
;
1609 if (auto EC
= llvm::sys::fs::getUniqueID(PLoc
.getFilename(), ID
)) {
1610 PLoc
= SM
.getPresumedLoc(Loc
, /*UseLineDirectives=*/false);
1611 assert(PLoc
.isValid() && "Source location is expected to be always valid.");
1612 if (auto EC
= llvm::sys::fs::getUniqueID(PLoc
.getFilename(), ID
))
1613 SM
.getDiagnostics().Report(diag::err_cannot_open_file
)
1614 << PLoc
.getFilename() << EC
.message();
1617 return llvm::TargetRegionEntryInfo(ParentName
, ID
.getDevice(), ID
.getFile(),
1621 Address
CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl
*VD
) {
1622 if (CGM
.getLangOpts().OpenMPSimd
)
1623 return Address::invalid();
1624 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
1625 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
1626 if (Res
&& (*Res
== OMPDeclareTargetDeclAttr::MT_Link
||
1627 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
1628 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
1629 HasRequiresUnifiedSharedMemory
))) {
1630 SmallString
<64> PtrName
;
1632 llvm::raw_svector_ostream
OS(PtrName
);
1633 OS
<< CGM
.getMangledName(GlobalDecl(VD
));
1634 if (!VD
->isExternallyVisible()) {
1635 auto EntryInfo
= getTargetEntryUniqueInfo(
1636 CGM
.getContext(), VD
->getCanonicalDecl()->getBeginLoc());
1637 OS
<< llvm::format("_%x", EntryInfo
.FileID
);
1639 OS
<< "_decl_tgt_ref_ptr";
1641 llvm::Value
*Ptr
= CGM
.getModule().getNamedValue(PtrName
);
1642 QualType PtrTy
= CGM
.getContext().getPointerType(VD
->getType());
1643 llvm::Type
*LlvmPtrTy
= CGM
.getTypes().ConvertTypeForMem(PtrTy
);
1645 Ptr
= OMPBuilder
.getOrCreateInternalVariable(LlvmPtrTy
, PtrName
);
1647 auto *GV
= cast
<llvm::GlobalVariable
>(Ptr
);
1648 GV
->setLinkage(llvm::GlobalValue::WeakAnyLinkage
);
1650 if (!CGM
.getLangOpts().OpenMPIsDevice
)
1651 GV
->setInitializer(CGM
.GetAddrOfGlobal(VD
));
1652 registerTargetGlobalVariable(VD
, cast
<llvm::Constant
>(Ptr
));
1654 return Address(Ptr
, LlvmPtrTy
, CGM
.getContext().getDeclAlign(VD
));
1656 return Address::invalid();
1660 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl
*VD
) {
1661 assert(!CGM
.getLangOpts().OpenMPUseTLS
||
1662 !CGM
.getContext().getTargetInfo().isTLSSupported());
1663 // Lookup the entry, lazily creating it if necessary.
1664 std::string Suffix
= getName({"cache", ""});
1665 return OMPBuilder
.getOrCreateInternalVariable(
1666 CGM
.Int8PtrPtrTy
, Twine(CGM
.getMangledName(VD
)).concat(Suffix
).str());
1669 Address
CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction
&CGF
,
1672 SourceLocation Loc
) {
1673 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1674 CGM
.getContext().getTargetInfo().isTLSSupported())
1677 llvm::Type
*VarTy
= VDAddr
.getElementType();
1678 llvm::Value
*Args
[] = {
1679 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
1680 CGF
.Builder
.CreatePointerCast(VDAddr
.getPointer(), CGM
.Int8PtrTy
),
1681 CGM
.getSize(CGM
.GetTargetTypeStoreSize(VarTy
)),
1682 getOrCreateThreadPrivateCache(VD
)};
1684 CGF
.EmitRuntimeCall(
1685 OMPBuilder
.getOrCreateRuntimeFunction(
1686 CGM
.getModule(), OMPRTL___kmpc_threadprivate_cached
),
1688 CGF
.Int8Ty
, VDAddr
.getAlignment());
1691 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1692 CodeGenFunction
&CGF
, Address VDAddr
, llvm::Value
*Ctor
,
1693 llvm::Value
*CopyCtor
, llvm::Value
*Dtor
, SourceLocation Loc
) {
1694 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1696 llvm::Value
*OMPLoc
= emitUpdateLocation(CGF
, Loc
);
1697 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1698 CGM
.getModule(), OMPRTL___kmpc_global_thread_num
),
1700 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1701 // to register constructor/destructor for variable.
1702 llvm::Value
*Args
[] = {
1703 OMPLoc
, CGF
.Builder
.CreatePointerCast(VDAddr
.getPointer(), CGM
.VoidPtrTy
),
1704 Ctor
, CopyCtor
, Dtor
};
1705 CGF
.EmitRuntimeCall(
1706 OMPBuilder
.getOrCreateRuntimeFunction(
1707 CGM
.getModule(), OMPRTL___kmpc_threadprivate_register
),
1711 llvm::Function
*CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1712 const VarDecl
*VD
, Address VDAddr
, SourceLocation Loc
,
1713 bool PerformInit
, CodeGenFunction
*CGF
) {
1714 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1715 CGM
.getContext().getTargetInfo().isTLSSupported())
1718 VD
= VD
->getDefinition(CGM
.getContext());
1719 if (VD
&& ThreadPrivateWithDefinition
.insert(CGM
.getMangledName(VD
)).second
) {
1720 QualType ASTTy
= VD
->getType();
1722 llvm::Value
*Ctor
= nullptr, *CopyCtor
= nullptr, *Dtor
= nullptr;
1723 const Expr
*Init
= VD
->getAnyInitializer();
1724 if (CGM
.getLangOpts().CPlusPlus
&& PerformInit
) {
1725 // Generate function that re-emits the declaration's initializer into the
1726 // threadprivate copy of the variable VD
1727 CodeGenFunction
CtorCGF(CGM
);
1728 FunctionArgList Args
;
1729 ImplicitParamDecl
Dst(CGM
.getContext(), /*DC=*/nullptr, Loc
,
1730 /*Id=*/nullptr, CGM
.getContext().VoidPtrTy
,
1731 ImplicitParamDecl::Other
);
1732 Args
.push_back(&Dst
);
1734 const auto &FI
= CGM
.getTypes().arrangeBuiltinFunctionDeclaration(
1735 CGM
.getContext().VoidPtrTy
, Args
);
1736 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1737 std::string Name
= getName({"__kmpc_global_ctor_", ""});
1738 llvm::Function
*Fn
=
1739 CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, Name
, FI
, Loc
);
1740 CtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidPtrTy
, Fn
, FI
,
1742 llvm::Value
*ArgVal
= CtorCGF
.EmitLoadOfScalar(
1743 CtorCGF
.GetAddrOfLocalVar(&Dst
), /*Volatile=*/false,
1744 CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1745 Address
Arg(ArgVal
, CtorCGF
.Int8Ty
, VDAddr
.getAlignment());
1746 Arg
= CtorCGF
.Builder
.CreateElementBitCast(
1747 Arg
, CtorCGF
.ConvertTypeForMem(ASTTy
));
1748 CtorCGF
.EmitAnyExprToMem(Init
, Arg
, Init
->getType().getQualifiers(),
1749 /*IsInitializer=*/true);
1750 ArgVal
= CtorCGF
.EmitLoadOfScalar(
1751 CtorCGF
.GetAddrOfLocalVar(&Dst
), /*Volatile=*/false,
1752 CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1753 CtorCGF
.Builder
.CreateStore(ArgVal
, CtorCGF
.ReturnValue
);
1754 CtorCGF
.FinishFunction();
1757 if (VD
->getType().isDestructedType() != QualType::DK_none
) {
1758 // Generate function that emits destructor call for the threadprivate copy
1759 // of the variable VD
1760 CodeGenFunction
DtorCGF(CGM
);
1761 FunctionArgList Args
;
1762 ImplicitParamDecl
Dst(CGM
.getContext(), /*DC=*/nullptr, Loc
,
1763 /*Id=*/nullptr, CGM
.getContext().VoidPtrTy
,
1764 ImplicitParamDecl::Other
);
1765 Args
.push_back(&Dst
);
1767 const auto &FI
= CGM
.getTypes().arrangeBuiltinFunctionDeclaration(
1768 CGM
.getContext().VoidTy
, Args
);
1769 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1770 std::string Name
= getName({"__kmpc_global_dtor_", ""});
1771 llvm::Function
*Fn
=
1772 CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, Name
, FI
, Loc
);
1773 auto NL
= ApplyDebugLocation::CreateEmpty(DtorCGF
);
1774 DtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
, Args
,
1776 // Create a scope with an artificial location for the body of this function.
1777 auto AL
= ApplyDebugLocation::CreateArtificial(DtorCGF
);
1778 llvm::Value
*ArgVal
= DtorCGF
.EmitLoadOfScalar(
1779 DtorCGF
.GetAddrOfLocalVar(&Dst
),
1780 /*Volatile=*/false, CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1781 DtorCGF
.emitDestroy(
1782 Address(ArgVal
, DtorCGF
.Int8Ty
, VDAddr
.getAlignment()), ASTTy
,
1783 DtorCGF
.getDestroyer(ASTTy
.isDestructedType()),
1784 DtorCGF
.needsEHCleanup(ASTTy
.isDestructedType()));
1785 DtorCGF
.FinishFunction();
1788 // Do not emit init function if it is not required.
1792 llvm::Type
*CopyCtorTyArgs
[] = {CGM
.VoidPtrTy
, CGM
.VoidPtrTy
};
1793 auto *CopyCtorTy
= llvm::FunctionType::get(CGM
.VoidPtrTy
, CopyCtorTyArgs
,
1796 // Copying constructor for the threadprivate variable.
1797 // Must be NULL - reserved by runtime, but currently it requires that this
1798 // parameter is always NULL. Otherwise it fires assertion.
1799 CopyCtor
= llvm::Constant::getNullValue(CopyCtorTy
);
1800 if (Ctor
== nullptr) {
1801 auto *CtorTy
= llvm::FunctionType::get(CGM
.VoidPtrTy
, CGM
.VoidPtrTy
,
1804 Ctor
= llvm::Constant::getNullValue(CtorTy
);
1806 if (Dtor
== nullptr) {
1807 auto *DtorTy
= llvm::FunctionType::get(CGM
.VoidTy
, CGM
.VoidPtrTy
,
1810 Dtor
= llvm::Constant::getNullValue(DtorTy
);
1813 auto *InitFunctionTy
=
1814 llvm::FunctionType::get(CGM
.VoidTy
, /*isVarArg*/ false);
1815 std::string Name
= getName({"__omp_threadprivate_init_", ""});
1816 llvm::Function
*InitFunction
= CGM
.CreateGlobalInitOrCleanUpFunction(
1817 InitFunctionTy
, Name
, CGM
.getTypes().arrangeNullaryFunction());
1818 CodeGenFunction
InitCGF(CGM
);
1819 FunctionArgList ArgList
;
1820 InitCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, InitFunction
,
1821 CGM
.getTypes().arrangeNullaryFunction(), ArgList
,
1823 emitThreadPrivateVarInit(InitCGF
, VDAddr
, Ctor
, CopyCtor
, Dtor
, Loc
);
1824 InitCGF
.FinishFunction();
1825 return InitFunction
;
1827 emitThreadPrivateVarInit(*CGF
, VDAddr
, Ctor
, CopyCtor
, Dtor
, Loc
);
1832 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl
*VD
,
1833 llvm::GlobalVariable
*Addr
,
1835 if (CGM
.getLangOpts().OMPTargetTriples
.empty() &&
1836 !CGM
.getLangOpts().OpenMPIsDevice
)
1838 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
1839 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
1840 if (!Res
|| *Res
== OMPDeclareTargetDeclAttr::MT_Link
||
1841 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
1842 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
1843 HasRequiresUnifiedSharedMemory
))
1844 return CGM
.getLangOpts().OpenMPIsDevice
;
1845 VD
= VD
->getDefinition(CGM
.getContext());
1846 assert(VD
&& "Unknown VarDecl");
1848 if (!DeclareTargetWithDefinition
.insert(CGM
.getMangledName(VD
)).second
)
1849 return CGM
.getLangOpts().OpenMPIsDevice
;
1851 QualType ASTTy
= VD
->getType();
1852 SourceLocation Loc
= VD
->getCanonicalDecl()->getBeginLoc();
1854 // Produce the unique prefix to identify the new target regions. We use
1855 // the source location of the variable declaration which we know to not
1856 // conflict with any target region.
1858 getTargetEntryUniqueInfo(CGM
.getContext(), Loc
, VD
->getName());
1859 SmallString
<128> Buffer
, Out
;
1860 OffloadEntriesInfoManager
.getTargetRegionEntryFnName(Buffer
, EntryInfo
);
1862 const Expr
*Init
= VD
->getAnyInitializer();
1863 if (CGM
.getLangOpts().CPlusPlus
&& PerformInit
) {
1864 llvm::Constant
*Ctor
;
1866 if (CGM
.getLangOpts().OpenMPIsDevice
) {
1867 // Generate function that re-emits the declaration's initializer into
1868 // the threadprivate copy of the variable VD
1869 CodeGenFunction
CtorCGF(CGM
);
1871 const CGFunctionInfo
&FI
= CGM
.getTypes().arrangeNullaryFunction();
1872 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1873 llvm::Function
*Fn
= CGM
.CreateGlobalInitOrCleanUpFunction(
1874 FTy
, Twine(Buffer
, "_ctor"), FI
, Loc
, false,
1875 llvm::GlobalValue::WeakODRLinkage
);
1876 Fn
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
1877 if (CGM
.getTriple().isAMDGCN())
1878 Fn
->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL
);
1879 auto NL
= ApplyDebugLocation::CreateEmpty(CtorCGF
);
1880 CtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
,
1881 FunctionArgList(), Loc
, Loc
);
1882 auto AL
= ApplyDebugLocation::CreateArtificial(CtorCGF
);
1883 llvm::Constant
*AddrInAS0
= Addr
;
1884 if (Addr
->getAddressSpace() != 0)
1885 AddrInAS0
= llvm::ConstantExpr::getAddrSpaceCast(
1886 Addr
, llvm::PointerType::getWithSamePointeeType(
1887 cast
<llvm::PointerType
>(Addr
->getType()), 0));
1888 CtorCGF
.EmitAnyExprToMem(Init
,
1889 Address(AddrInAS0
, Addr
->getValueType(),
1890 CGM
.getContext().getDeclAlign(VD
)),
1891 Init
->getType().getQualifiers(),
1892 /*IsInitializer=*/true);
1893 CtorCGF
.FinishFunction();
1895 ID
= llvm::ConstantExpr::getBitCast(Fn
, CGM
.Int8PtrTy
);
1897 Ctor
= new llvm::GlobalVariable(
1898 CGM
.getModule(), CGM
.Int8Ty
, /*isConstant=*/true,
1899 llvm::GlobalValue::PrivateLinkage
,
1900 llvm::Constant::getNullValue(CGM
.Int8Ty
), Twine(Buffer
, "_ctor"));
1904 // Register the information for the entry associated with the constructor.
1906 auto CtorEntryInfo
= EntryInfo
;
1907 CtorEntryInfo
.ParentName
= Twine(Buffer
, "_ctor").toStringRef(Out
);
1908 OffloadEntriesInfoManager
.registerTargetRegionEntryInfo(
1909 CtorEntryInfo
, Ctor
, ID
,
1910 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor
);
1912 if (VD
->getType().isDestructedType() != QualType::DK_none
) {
1913 llvm::Constant
*Dtor
;
1915 if (CGM
.getLangOpts().OpenMPIsDevice
) {
1916 // Generate function that emits destructor call for the threadprivate
1917 // copy of the variable VD
1918 CodeGenFunction
DtorCGF(CGM
);
1920 const CGFunctionInfo
&FI
= CGM
.getTypes().arrangeNullaryFunction();
1921 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1922 llvm::Function
*Fn
= CGM
.CreateGlobalInitOrCleanUpFunction(
1923 FTy
, Twine(Buffer
, "_dtor"), FI
, Loc
, false,
1924 llvm::GlobalValue::WeakODRLinkage
);
1925 Fn
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
1926 if (CGM
.getTriple().isAMDGCN())
1927 Fn
->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL
);
1928 auto NL
= ApplyDebugLocation::CreateEmpty(DtorCGF
);
1929 DtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
,
1930 FunctionArgList(), Loc
, Loc
);
1931 // Create a scope with an artificial location for the body of this
1933 auto AL
= ApplyDebugLocation::CreateArtificial(DtorCGF
);
1934 llvm::Constant
*AddrInAS0
= Addr
;
1935 if (Addr
->getAddressSpace() != 0)
1936 AddrInAS0
= llvm::ConstantExpr::getAddrSpaceCast(
1937 Addr
, llvm::PointerType::getWithSamePointeeType(
1938 cast
<llvm::PointerType
>(Addr
->getType()), 0));
1939 DtorCGF
.emitDestroy(Address(AddrInAS0
, Addr
->getValueType(),
1940 CGM
.getContext().getDeclAlign(VD
)),
1941 ASTTy
, DtorCGF
.getDestroyer(ASTTy
.isDestructedType()),
1942 DtorCGF
.needsEHCleanup(ASTTy
.isDestructedType()));
1943 DtorCGF
.FinishFunction();
1945 ID
= llvm::ConstantExpr::getBitCast(Fn
, CGM
.Int8PtrTy
);
1947 Dtor
= new llvm::GlobalVariable(
1948 CGM
.getModule(), CGM
.Int8Ty
, /*isConstant=*/true,
1949 llvm::GlobalValue::PrivateLinkage
,
1950 llvm::Constant::getNullValue(CGM
.Int8Ty
), Twine(Buffer
, "_dtor"));
1953 // Register the information for the entry associated with the destructor.
1955 auto DtorEntryInfo
= EntryInfo
;
1956 DtorEntryInfo
.ParentName
= Twine(Buffer
, "_dtor").toStringRef(Out
);
1957 OffloadEntriesInfoManager
.registerTargetRegionEntryInfo(
1958 DtorEntryInfo
, Dtor
, ID
,
1959 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor
);
1961 return CGM
.getLangOpts().OpenMPIsDevice
;
1964 Address
CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction
&CGF
,
1967 std::string Suffix
= getName({"artificial", ""});
1968 llvm::Type
*VarLVType
= CGF
.ConvertTypeForMem(VarType
);
1969 llvm::GlobalVariable
*GAddr
= OMPBuilder
.getOrCreateInternalVariable(
1970 VarLVType
, Twine(Name
).concat(Suffix
).str());
1971 if (CGM
.getLangOpts().OpenMP
&& CGM
.getLangOpts().OpenMPUseTLS
&&
1972 CGM
.getTarget().isTLSSupported()) {
1973 GAddr
->setThreadLocal(/*Val=*/true);
1974 return Address(GAddr
, GAddr
->getValueType(),
1975 CGM
.getContext().getTypeAlignInChars(VarType
));
1977 std::string CacheSuffix
= getName({"cache", ""});
1978 llvm::Value
*Args
[] = {
1979 emitUpdateLocation(CGF
, SourceLocation()),
1980 getThreadID(CGF
, SourceLocation()),
1981 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(GAddr
, CGM
.VoidPtrTy
),
1982 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(VarType
), CGM
.SizeTy
,
1983 /*isSigned=*/false),
1984 OMPBuilder
.getOrCreateInternalVariable(
1986 Twine(Name
).concat(Suffix
).concat(CacheSuffix
).str())};
1988 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
1989 CGF
.EmitRuntimeCall(
1990 OMPBuilder
.getOrCreateRuntimeFunction(
1991 CGM
.getModule(), OMPRTL___kmpc_threadprivate_cached
),
1993 VarLVType
->getPointerTo(/*AddrSpace=*/0)),
1994 VarLVType
, CGM
.getContext().getTypeAlignInChars(VarType
));
1997 void CGOpenMPRuntime::emitIfClause(CodeGenFunction
&CGF
, const Expr
*Cond
,
1998 const RegionCodeGenTy
&ThenGen
,
1999 const RegionCodeGenTy
&ElseGen
) {
2000 CodeGenFunction::LexicalScope
ConditionScope(CGF
, Cond
->getSourceRange());
2002 // If the condition constant folds and can be elided, try to avoid emitting
2003 // the condition and the dead arm of the if/else.
2005 if (CGF
.ConstantFoldsToSimpleInteger(Cond
, CondConstant
)) {
2013 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2014 // emit the conditional branch.
2015 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("omp_if.then");
2016 llvm::BasicBlock
*ElseBlock
= CGF
.createBasicBlock("omp_if.else");
2017 llvm::BasicBlock
*ContBlock
= CGF
.createBasicBlock("omp_if.end");
2018 CGF
.EmitBranchOnBoolExpr(Cond
, ThenBlock
, ElseBlock
, /*TrueCount=*/0);
2020 // Emit the 'then' code.
2021 CGF
.EmitBlock(ThenBlock
);
2023 CGF
.EmitBranch(ContBlock
);
2024 // Emit the 'else' code if present.
2025 // There is no need to emit line number for unconditional branch.
2026 (void)ApplyDebugLocation::CreateEmpty(CGF
);
2027 CGF
.EmitBlock(ElseBlock
);
2029 // There is no need to emit line number for unconditional branch.
2030 (void)ApplyDebugLocation::CreateEmpty(CGF
);
2031 CGF
.EmitBranch(ContBlock
);
2032 // Emit the continuation block for code after the if.
2033 CGF
.EmitBlock(ContBlock
, /*IsFinished=*/true);
2036 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2037 llvm::Function
*OutlinedFn
,
2038 ArrayRef
<llvm::Value
*> CapturedVars
,
2040 llvm::Value
*NumThreads
) {
2041 if (!CGF
.HaveInsertPoint())
2043 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
2044 auto &M
= CGM
.getModule();
2045 auto &&ThenGen
= [&M
, OutlinedFn
, CapturedVars
, RTLoc
,
2046 this](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2047 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2048 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
2049 llvm::Value
*Args
[] = {
2051 CGF
.Builder
.getInt32(CapturedVars
.size()), // Number of captured vars
2052 CGF
.Builder
.CreateBitCast(OutlinedFn
, RT
.getKmpc_MicroPointerTy())};
2053 llvm::SmallVector
<llvm::Value
*, 16> RealArgs
;
2054 RealArgs
.append(std::begin(Args
), std::end(Args
));
2055 RealArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
2057 llvm::FunctionCallee RTLFn
=
2058 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_fork_call
);
2059 CGF
.EmitRuntimeCall(RTLFn
, RealArgs
);
2061 auto &&ElseGen
= [&M
, OutlinedFn
, CapturedVars
, RTLoc
, Loc
,
2062 this](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2063 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
2064 llvm::Value
*ThreadID
= RT
.getThreadID(CGF
, Loc
);
2066 // __kmpc_serialized_parallel(&Loc, GTid);
2067 llvm::Value
*Args
[] = {RTLoc
, ThreadID
};
2068 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2069 M
, OMPRTL___kmpc_serialized_parallel
),
2072 // OutlinedFn(>id, &zero_bound, CapturedStruct);
2073 Address ThreadIDAddr
= RT
.emitThreadIDAddress(CGF
, Loc
);
2074 Address ZeroAddrBound
=
2075 CGF
.CreateDefaultAlignTempAlloca(CGF
.Int32Ty
,
2076 /*Name=*/".bound.zero.addr");
2077 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(/*C*/ 0), ZeroAddrBound
);
2078 llvm::SmallVector
<llvm::Value
*, 16> OutlinedFnArgs
;
2079 // ThreadId for serialized parallels is 0.
2080 OutlinedFnArgs
.push_back(ThreadIDAddr
.getPointer());
2081 OutlinedFnArgs
.push_back(ZeroAddrBound
.getPointer());
2082 OutlinedFnArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
2084 // Ensure we do not inline the function. This is trivially true for the ones
2085 // passed to __kmpc_fork_call but the ones called in serialized regions
2086 // could be inlined. This is not a perfect but it is closer to the invariant
2087 // we want, namely, every data environment starts with a new function.
2088 // TODO: We should pass the if condition to the runtime function and do the
2089 // handling there. Much cleaner code.
2090 OutlinedFn
->removeFnAttr(llvm::Attribute::AlwaysInline
);
2091 OutlinedFn
->addFnAttr(llvm::Attribute::NoInline
);
2092 RT
.emitOutlinedFunctionCall(CGF
, Loc
, OutlinedFn
, OutlinedFnArgs
);
2094 // __kmpc_end_serialized_parallel(&Loc, GTid);
2095 llvm::Value
*EndArgs
[] = {RT
.emitUpdateLocation(CGF
, Loc
), ThreadID
};
2096 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2097 M
, OMPRTL___kmpc_end_serialized_parallel
),
2101 emitIfClause(CGF
, IfCond
, ThenGen
, ElseGen
);
2103 RegionCodeGenTy
ThenRCG(ThenGen
);
2108 // If we're inside an (outlined) parallel region, use the region info's
2109 // thread-ID variable (it is passed in a first argument of the outlined function
2110 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2111 // regular serial code region, get thread ID by calling kmp_int32
2112 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2113 // return the address of that temp.
2114 Address
CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction
&CGF
,
2115 SourceLocation Loc
) {
2116 if (auto *OMPRegionInfo
=
2117 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
2118 if (OMPRegionInfo
->getThreadIDVariable())
2119 return OMPRegionInfo
->getThreadIDVariableLValue(CGF
).getAddress(CGF
);
2121 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
2123 CGF
.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2124 Address ThreadIDTemp
= CGF
.CreateMemTemp(Int32Ty
, /*Name*/ ".threadid_temp.");
2125 CGF
.EmitStoreOfScalar(ThreadID
,
2126 CGF
.MakeAddrLValue(ThreadIDTemp
, Int32Ty
));
2128 return ThreadIDTemp
;
2131 llvm::Value
*CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName
) {
2132 std::string Prefix
= Twine("gomp_critical_user_", CriticalName
).str();
2133 std::string Name
= getName({Prefix
, "var"});
2134 return OMPBuilder
.getOrCreateInternalVariable(KmpCriticalNameTy
, Name
);
2138 /// Common pre(post)-action for different OpenMP constructs.
2139 class CommonActionTy final
: public PrePostActionTy
{
2140 llvm::FunctionCallee EnterCallee
;
2141 ArrayRef
<llvm::Value
*> EnterArgs
;
2142 llvm::FunctionCallee ExitCallee
;
2143 ArrayRef
<llvm::Value
*> ExitArgs
;
2145 llvm::BasicBlock
*ContBlock
= nullptr;
2148 CommonActionTy(llvm::FunctionCallee EnterCallee
,
2149 ArrayRef
<llvm::Value
*> EnterArgs
,
2150 llvm::FunctionCallee ExitCallee
,
2151 ArrayRef
<llvm::Value
*> ExitArgs
, bool Conditional
= false)
2152 : EnterCallee(EnterCallee
), EnterArgs(EnterArgs
), ExitCallee(ExitCallee
),
2153 ExitArgs(ExitArgs
), Conditional(Conditional
) {}
2154 void Enter(CodeGenFunction
&CGF
) override
{
2155 llvm::Value
*EnterRes
= CGF
.EmitRuntimeCall(EnterCallee
, EnterArgs
);
2157 llvm::Value
*CallBool
= CGF
.Builder
.CreateIsNotNull(EnterRes
);
2158 auto *ThenBlock
= CGF
.createBasicBlock("omp_if.then");
2159 ContBlock
= CGF
.createBasicBlock("omp_if.end");
2160 // Generate the branch (If-stmt)
2161 CGF
.Builder
.CreateCondBr(CallBool
, ThenBlock
, ContBlock
);
2162 CGF
.EmitBlock(ThenBlock
);
2165 void Done(CodeGenFunction
&CGF
) {
2166 // Emit the rest of blocks/branches
2167 CGF
.EmitBranch(ContBlock
);
2168 CGF
.EmitBlock(ContBlock
, true);
2170 void Exit(CodeGenFunction
&CGF
) override
{
2171 CGF
.EmitRuntimeCall(ExitCallee
, ExitArgs
);
2174 } // anonymous namespace
2176 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction
&CGF
,
2177 StringRef CriticalName
,
2178 const RegionCodeGenTy
&CriticalOpGen
,
2179 SourceLocation Loc
, const Expr
*Hint
) {
2180 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2182 // __kmpc_end_critical(ident_t *, gtid, Lock);
2183 // Prepare arguments and build a call to __kmpc_critical
2184 if (!CGF
.HaveInsertPoint())
2186 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2187 getCriticalRegionLock(CriticalName
)};
2188 llvm::SmallVector
<llvm::Value
*, 4> EnterArgs(std::begin(Args
),
2191 EnterArgs
.push_back(CGF
.Builder
.CreateIntCast(
2192 CGF
.EmitScalarExpr(Hint
), CGM
.Int32Ty
, /*isSigned=*/false));
2194 CommonActionTy
Action(
2195 OMPBuilder
.getOrCreateRuntimeFunction(
2197 Hint
? OMPRTL___kmpc_critical_with_hint
: OMPRTL___kmpc_critical
),
2199 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
2200 OMPRTL___kmpc_end_critical
),
2202 CriticalOpGen
.setAction(Action
);
2203 emitInlinedDirective(CGF
, OMPD_critical
, CriticalOpGen
);
2206 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction
&CGF
,
2207 const RegionCodeGenTy
&MasterOpGen
,
2208 SourceLocation Loc
) {
2209 if (!CGF
.HaveInsertPoint())
2211 // if(__kmpc_master(ident_t *, gtid)) {
2213 // __kmpc_end_master(ident_t *, gtid);
2215 // Prepare arguments and build a call to __kmpc_master
2216 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2217 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2218 CGM
.getModule(), OMPRTL___kmpc_master
),
2220 OMPBuilder
.getOrCreateRuntimeFunction(
2221 CGM
.getModule(), OMPRTL___kmpc_end_master
),
2223 /*Conditional=*/true);
2224 MasterOpGen
.setAction(Action
);
2225 emitInlinedDirective(CGF
, OMPD_master
, MasterOpGen
);
2229 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction
&CGF
,
2230 const RegionCodeGenTy
&MaskedOpGen
,
2231 SourceLocation Loc
, const Expr
*Filter
) {
2232 if (!CGF
.HaveInsertPoint())
2234 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2236 // __kmpc_end_masked(iden_t *, gtid);
2238 // Prepare arguments and build a call to __kmpc_masked
2239 llvm::Value
*FilterVal
= Filter
2240 ? CGF
.EmitScalarExpr(Filter
, CGF
.Int32Ty
)
2241 : llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/0);
2242 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2244 llvm::Value
*ArgsEnd
[] = {emitUpdateLocation(CGF
, Loc
),
2245 getThreadID(CGF
, Loc
)};
2246 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2247 CGM
.getModule(), OMPRTL___kmpc_masked
),
2249 OMPBuilder
.getOrCreateRuntimeFunction(
2250 CGM
.getModule(), OMPRTL___kmpc_end_masked
),
2252 /*Conditional=*/true);
2253 MaskedOpGen
.setAction(Action
);
2254 emitInlinedDirective(CGF
, OMPD_masked
, MaskedOpGen
);
2258 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction
&CGF
,
2259 SourceLocation Loc
) {
2260 if (!CGF
.HaveInsertPoint())
2262 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2263 OMPBuilder
.createTaskyield(CGF
.Builder
);
2265 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2266 llvm::Value
*Args
[] = {
2267 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2268 llvm::ConstantInt::get(CGM
.IntTy
, /*V=*/0, /*isSigned=*/true)};
2269 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2270 CGM
.getModule(), OMPRTL___kmpc_omp_taskyield
),
2274 if (auto *Region
= dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
2275 Region
->emitUntiedSwitch(CGF
);
2278 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction
&CGF
,
2279 const RegionCodeGenTy
&TaskgroupOpGen
,
2280 SourceLocation Loc
) {
2281 if (!CGF
.HaveInsertPoint())
2283 // __kmpc_taskgroup(ident_t *, gtid);
2284 // TaskgroupOpGen();
2285 // __kmpc_end_taskgroup(ident_t *, gtid);
2286 // Prepare arguments and build a call to __kmpc_taskgroup
2287 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2288 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2289 CGM
.getModule(), OMPRTL___kmpc_taskgroup
),
2291 OMPBuilder
.getOrCreateRuntimeFunction(
2292 CGM
.getModule(), OMPRTL___kmpc_end_taskgroup
),
2294 TaskgroupOpGen
.setAction(Action
);
2295 emitInlinedDirective(CGF
, OMPD_taskgroup
, TaskgroupOpGen
);
2298 /// Given an array of pointers to variables, project the address of a
2300 static Address
emitAddrOfVarFromArray(CodeGenFunction
&CGF
, Address Array
,
2301 unsigned Index
, const VarDecl
*Var
) {
2302 // Pull out the pointer to the variable.
2303 Address PtrAddr
= CGF
.Builder
.CreateConstArrayGEP(Array
, Index
);
2304 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(PtrAddr
);
2306 llvm::Type
*ElemTy
= CGF
.ConvertTypeForMem(Var
->getType());
2308 CGF
.Builder
.CreateBitCast(
2309 Ptr
, ElemTy
->getPointerTo(Ptr
->getType()->getPointerAddressSpace())),
2310 ElemTy
, CGF
.getContext().getDeclAlign(Var
));
2313 static llvm::Value
*emitCopyprivateCopyFunction(
2314 CodeGenModule
&CGM
, llvm::Type
*ArgsElemType
,
2315 ArrayRef
<const Expr
*> CopyprivateVars
, ArrayRef
<const Expr
*> DestExprs
,
2316 ArrayRef
<const Expr
*> SrcExprs
, ArrayRef
<const Expr
*> AssignmentOps
,
2317 SourceLocation Loc
) {
2318 ASTContext
&C
= CGM
.getContext();
2319 // void copy_func(void *LHSArg, void *RHSArg);
2320 FunctionArgList Args
;
2321 ImplicitParamDecl
LHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
2322 ImplicitParamDecl::Other
);
2323 ImplicitParamDecl
RHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
2324 ImplicitParamDecl::Other
);
2325 Args
.push_back(&LHSArg
);
2326 Args
.push_back(&RHSArg
);
2328 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
2330 CGM
.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2331 auto *Fn
= llvm::Function::Create(CGM
.getTypes().GetFunctionType(CGFI
),
2332 llvm::GlobalValue::InternalLinkage
, Name
,
2334 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, CGFI
);
2335 Fn
->setDoesNotRecurse();
2336 CodeGenFunction
CGF(CGM
);
2337 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, CGFI
, Args
, Loc
, Loc
);
2338 // Dest = (void*[n])(LHSArg);
2339 // Src = (void*[n])(RHSArg);
2340 Address
LHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2341 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&LHSArg
)),
2342 ArgsElemType
->getPointerTo()),
2343 ArgsElemType
, CGF
.getPointerAlign());
2344 Address
RHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2345 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&RHSArg
)),
2346 ArgsElemType
->getPointerTo()),
2347 ArgsElemType
, CGF
.getPointerAlign());
2348 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2349 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2351 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2352 for (unsigned I
= 0, E
= AssignmentOps
.size(); I
< E
; ++I
) {
2353 const auto *DestVar
=
2354 cast
<VarDecl
>(cast
<DeclRefExpr
>(DestExprs
[I
])->getDecl());
2355 Address DestAddr
= emitAddrOfVarFromArray(CGF
, LHS
, I
, DestVar
);
2357 const auto *SrcVar
=
2358 cast
<VarDecl
>(cast
<DeclRefExpr
>(SrcExprs
[I
])->getDecl());
2359 Address SrcAddr
= emitAddrOfVarFromArray(CGF
, RHS
, I
, SrcVar
);
2361 const auto *VD
= cast
<DeclRefExpr
>(CopyprivateVars
[I
])->getDecl();
2362 QualType Type
= VD
->getType();
2363 CGF
.EmitOMPCopy(Type
, DestAddr
, SrcAddr
, DestVar
, SrcVar
, AssignmentOps
[I
]);
2365 CGF
.FinishFunction();
2369 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction
&CGF
,
2370 const RegionCodeGenTy
&SingleOpGen
,
2372 ArrayRef
<const Expr
*> CopyprivateVars
,
2373 ArrayRef
<const Expr
*> SrcExprs
,
2374 ArrayRef
<const Expr
*> DstExprs
,
2375 ArrayRef
<const Expr
*> AssignmentOps
) {
2376 if (!CGF
.HaveInsertPoint())
2378 assert(CopyprivateVars
.size() == SrcExprs
.size() &&
2379 CopyprivateVars
.size() == DstExprs
.size() &&
2380 CopyprivateVars
.size() == AssignmentOps
.size());
2381 ASTContext
&C
= CGM
.getContext();
2382 // int32 did_it = 0;
2383 // if(__kmpc_single(ident_t *, gtid)) {
2385 // __kmpc_end_single(ident_t *, gtid);
2388 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2389 // <copy_func>, did_it);
2391 Address DidIt
= Address::invalid();
2392 if (!CopyprivateVars
.empty()) {
2393 // int32 did_it = 0;
2394 QualType KmpInt32Ty
=
2395 C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2396 DidIt
= CGF
.CreateMemTemp(KmpInt32Ty
, ".omp.copyprivate.did_it");
2397 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(0), DidIt
);
2399 // Prepare arguments and build a call to __kmpc_single
2400 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2401 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2402 CGM
.getModule(), OMPRTL___kmpc_single
),
2404 OMPBuilder
.getOrCreateRuntimeFunction(
2405 CGM
.getModule(), OMPRTL___kmpc_end_single
),
2407 /*Conditional=*/true);
2408 SingleOpGen
.setAction(Action
);
2409 emitInlinedDirective(CGF
, OMPD_single
, SingleOpGen
);
2410 if (DidIt
.isValid()) {
2412 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(1), DidIt
);
2415 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2416 // <copy_func>, did_it);
2417 if (DidIt
.isValid()) {
2418 llvm::APInt
ArraySize(/*unsigned int numBits=*/32, CopyprivateVars
.size());
2419 QualType CopyprivateArrayTy
= C
.getConstantArrayType(
2420 C
.VoidPtrTy
, ArraySize
, nullptr, ArrayType::Normal
,
2421 /*IndexTypeQuals=*/0);
2422 // Create a list of all private variables for copyprivate.
2423 Address CopyprivateList
=
2424 CGF
.CreateMemTemp(CopyprivateArrayTy
, ".omp.copyprivate.cpr_list");
2425 for (unsigned I
= 0, E
= CopyprivateVars
.size(); I
< E
; ++I
) {
2426 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(CopyprivateList
, I
);
2427 CGF
.Builder
.CreateStore(
2428 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2429 CGF
.EmitLValue(CopyprivateVars
[I
]).getPointer(CGF
),
2433 // Build function that copies private values from single region to all other
2434 // threads in the corresponding parallel region.
2435 llvm::Value
*CpyFn
= emitCopyprivateCopyFunction(
2436 CGM
, CGF
.ConvertTypeForMem(CopyprivateArrayTy
), CopyprivateVars
,
2437 SrcExprs
, DstExprs
, AssignmentOps
, Loc
);
2438 llvm::Value
*BufSize
= CGF
.getTypeSize(CopyprivateArrayTy
);
2439 Address CL
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2440 CopyprivateList
, CGF
.VoidPtrTy
, CGF
.Int8Ty
);
2441 llvm::Value
*DidItVal
= CGF
.Builder
.CreateLoad(DidIt
);
2442 llvm::Value
*Args
[] = {
2443 emitUpdateLocation(CGF
, Loc
), // ident_t *<loc>
2444 getThreadID(CGF
, Loc
), // i32 <gtid>
2445 BufSize
, // size_t <buf_size>
2446 CL
.getPointer(), // void *<copyprivate list>
2447 CpyFn
, // void (*) (void *, void *) <copy_func>
2448 DidItVal
// i32 did_it
2450 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2451 CGM
.getModule(), OMPRTL___kmpc_copyprivate
),
2456 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction
&CGF
,
2457 const RegionCodeGenTy
&OrderedOpGen
,
2458 SourceLocation Loc
, bool IsThreads
) {
2459 if (!CGF
.HaveInsertPoint())
2461 // __kmpc_ordered(ident_t *, gtid);
2463 // __kmpc_end_ordered(ident_t *, gtid);
2464 // Prepare arguments and build a call to __kmpc_ordered
2466 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2467 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2468 CGM
.getModule(), OMPRTL___kmpc_ordered
),
2470 OMPBuilder
.getOrCreateRuntimeFunction(
2471 CGM
.getModule(), OMPRTL___kmpc_end_ordered
),
2473 OrderedOpGen
.setAction(Action
);
2474 emitInlinedDirective(CGF
, OMPD_ordered
, OrderedOpGen
);
2477 emitInlinedDirective(CGF
, OMPD_ordered
, OrderedOpGen
);
2480 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind
) {
2482 if (Kind
== OMPD_for
)
2483 Flags
= OMP_IDENT_BARRIER_IMPL_FOR
;
2484 else if (Kind
== OMPD_sections
)
2485 Flags
= OMP_IDENT_BARRIER_IMPL_SECTIONS
;
2486 else if (Kind
== OMPD_single
)
2487 Flags
= OMP_IDENT_BARRIER_IMPL_SINGLE
;
2488 else if (Kind
== OMPD_barrier
)
2489 Flags
= OMP_IDENT_BARRIER_EXPL
;
2491 Flags
= OMP_IDENT_BARRIER_IMPL
;
2495 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2496 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2497 OpenMPScheduleClauseKind
&ScheduleKind
, const Expr
*&ChunkExpr
) const {
2498 // Check if the loop directive is actually a doacross loop directive. In this
2499 // case choose static, 1 schedule.
2501 S
.getClausesOfKind
<OMPOrderedClause
>(),
2502 [](const OMPOrderedClause
*C
) { return C
->getNumForLoops(); })) {
2503 ScheduleKind
= OMPC_SCHEDULE_static
;
2504 // Chunk size is 1 in this case.
2505 llvm::APInt
ChunkSize(32, 1);
2506 ChunkExpr
= IntegerLiteral::Create(
2507 CGF
.getContext(), ChunkSize
,
2508 CGF
.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2513 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2514 OpenMPDirectiveKind Kind
, bool EmitChecks
,
2515 bool ForceSimpleCall
) {
2516 // Check if we should use the OMPBuilder
2517 auto *OMPRegionInfo
=
2518 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
);
2519 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2520 CGF
.Builder
.restoreIP(OMPBuilder
.createBarrier(
2521 CGF
.Builder
, Kind
, ForceSimpleCall
, EmitChecks
));
2525 if (!CGF
.HaveInsertPoint())
2527 // Build call __kmpc_cancel_barrier(loc, thread_id);
2528 // Build call __kmpc_barrier(loc, thread_id);
2529 unsigned Flags
= getDefaultFlagsForBarriers(Kind
);
2530 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2532 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
, Flags
),
2533 getThreadID(CGF
, Loc
)};
2534 if (OMPRegionInfo
) {
2535 if (!ForceSimpleCall
&& OMPRegionInfo
->hasCancel()) {
2536 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
2537 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
2538 OMPRTL___kmpc_cancel_barrier
),
2541 // if (__kmpc_cancel_barrier()) {
2542 // exit from construct;
2544 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
2545 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
2546 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
2547 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
2548 CGF
.EmitBlock(ExitBB
);
2549 // exit from construct;
2550 CodeGenFunction::JumpDest CancelDestination
=
2551 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
2552 CGF
.EmitBranchThroughCleanup(CancelDestination
);
2553 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
2558 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2559 CGM
.getModule(), OMPRTL___kmpc_barrier
),
2563 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2564 Expr
*ME
, bool IsFatal
) {
2566 ME
? CGF
.EmitStringLiteralLValue(cast
<StringLiteral
>(ME
)).getPointer(CGF
)
2567 : llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
2568 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2570 llvm::Value
*Args
[] = {
2571 emitUpdateLocation(CGF
, Loc
, /*Flags=*/0, /*GenLoc=*/true),
2572 llvm::ConstantInt::get(CGM
.Int32Ty
, IsFatal
? 2 : 1),
2573 CGF
.Builder
.CreatePointerCast(MVL
, CGM
.Int8PtrTy
)};
2574 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2575 CGM
.getModule(), OMPRTL___kmpc_error
),
2579 /// Map the OpenMP loop schedule to the runtime enumeration.
2580 static OpenMPSchedType
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind
,
2581 bool Chunked
, bool Ordered
) {
2582 switch (ScheduleKind
) {
2583 case OMPC_SCHEDULE_static
:
2584 return Chunked
? (Ordered
? OMP_ord_static_chunked
: OMP_sch_static_chunked
)
2585 : (Ordered
? OMP_ord_static
: OMP_sch_static
);
2586 case OMPC_SCHEDULE_dynamic
:
2587 return Ordered
? OMP_ord_dynamic_chunked
: OMP_sch_dynamic_chunked
;
2588 case OMPC_SCHEDULE_guided
:
2589 return Ordered
? OMP_ord_guided_chunked
: OMP_sch_guided_chunked
;
2590 case OMPC_SCHEDULE_runtime
:
2591 return Ordered
? OMP_ord_runtime
: OMP_sch_runtime
;
2592 case OMPC_SCHEDULE_auto
:
2593 return Ordered
? OMP_ord_auto
: OMP_sch_auto
;
2594 case OMPC_SCHEDULE_unknown
:
2595 assert(!Chunked
&& "chunk was specified but schedule kind not known");
2596 return Ordered
? OMP_ord_static
: OMP_sch_static
;
2598 llvm_unreachable("Unexpected runtime schedule");
2601 /// Map the OpenMP distribute schedule to the runtime enumeration.
2602 static OpenMPSchedType
2603 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) {
2604 // only static is allowed for dist_schedule
2605 return Chunked
? OMP_dist_sch_static_chunked
: OMP_dist_sch_static
;
2608 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind
,
2609 bool Chunked
) const {
2610 OpenMPSchedType Schedule
=
2611 getRuntimeSchedule(ScheduleKind
, Chunked
, /*Ordered=*/false);
2612 return Schedule
== OMP_sch_static
;
2615 bool CGOpenMPRuntime::isStaticNonchunked(
2616 OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) const {
2617 OpenMPSchedType Schedule
= getRuntimeSchedule(ScheduleKind
, Chunked
);
2618 return Schedule
== OMP_dist_sch_static
;
2621 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind
,
2622 bool Chunked
) const {
2623 OpenMPSchedType Schedule
=
2624 getRuntimeSchedule(ScheduleKind
, Chunked
, /*Ordered=*/false);
2625 return Schedule
== OMP_sch_static_chunked
;
2628 bool CGOpenMPRuntime::isStaticChunked(
2629 OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) const {
2630 OpenMPSchedType Schedule
= getRuntimeSchedule(ScheduleKind
, Chunked
);
2631 return Schedule
== OMP_dist_sch_static_chunked
;
2634 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind
) const {
2635 OpenMPSchedType Schedule
=
2636 getRuntimeSchedule(ScheduleKind
, /*Chunked=*/false, /*Ordered=*/false);
2637 assert(Schedule
!= OMP_sch_static_chunked
&& "cannot be chunked here");
2638 return Schedule
!= OMP_sch_static
;
2641 static int addMonoNonMonoModifier(CodeGenModule
&CGM
, OpenMPSchedType Schedule
,
2642 OpenMPScheduleClauseModifier M1
,
2643 OpenMPScheduleClauseModifier M2
) {
2646 case OMPC_SCHEDULE_MODIFIER_monotonic
:
2647 Modifier
= OMP_sch_modifier_monotonic
;
2649 case OMPC_SCHEDULE_MODIFIER_nonmonotonic
:
2650 Modifier
= OMP_sch_modifier_nonmonotonic
;
2652 case OMPC_SCHEDULE_MODIFIER_simd
:
2653 if (Schedule
== OMP_sch_static_chunked
)
2654 Schedule
= OMP_sch_static_balanced_chunked
;
2656 case OMPC_SCHEDULE_MODIFIER_last
:
2657 case OMPC_SCHEDULE_MODIFIER_unknown
:
2661 case OMPC_SCHEDULE_MODIFIER_monotonic
:
2662 Modifier
= OMP_sch_modifier_monotonic
;
2664 case OMPC_SCHEDULE_MODIFIER_nonmonotonic
:
2665 Modifier
= OMP_sch_modifier_nonmonotonic
;
2667 case OMPC_SCHEDULE_MODIFIER_simd
:
2668 if (Schedule
== OMP_sch_static_chunked
)
2669 Schedule
= OMP_sch_static_balanced_chunked
;
2671 case OMPC_SCHEDULE_MODIFIER_last
:
2672 case OMPC_SCHEDULE_MODIFIER_unknown
:
2675 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2676 // If the static schedule kind is specified or if the ordered clause is
2677 // specified, and if the nonmonotonic modifier is not specified, the effect is
2678 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2679 // modifier is specified, the effect is as if the nonmonotonic modifier is
2681 if (CGM
.getLangOpts().OpenMP
>= 50 && Modifier
== 0) {
2682 if (!(Schedule
== OMP_sch_static_chunked
|| Schedule
== OMP_sch_static
||
2683 Schedule
== OMP_sch_static_balanced_chunked
||
2684 Schedule
== OMP_ord_static_chunked
|| Schedule
== OMP_ord_static
||
2685 Schedule
== OMP_dist_sch_static_chunked
||
2686 Schedule
== OMP_dist_sch_static
))
2687 Modifier
= OMP_sch_modifier_nonmonotonic
;
2689 return Schedule
| Modifier
;
2692 void CGOpenMPRuntime::emitForDispatchInit(
2693 CodeGenFunction
&CGF
, SourceLocation Loc
,
2694 const OpenMPScheduleTy
&ScheduleKind
, unsigned IVSize
, bool IVSigned
,
2695 bool Ordered
, const DispatchRTInput
&DispatchValues
) {
2696 if (!CGF
.HaveInsertPoint())
2698 OpenMPSchedType Schedule
= getRuntimeSchedule(
2699 ScheduleKind
.Schedule
, DispatchValues
.Chunk
!= nullptr, Ordered
);
2701 (Schedule
!= OMP_sch_static
&& Schedule
!= OMP_sch_static_chunked
&&
2702 Schedule
!= OMP_ord_static
&& Schedule
!= OMP_ord_static_chunked
&&
2703 Schedule
!= OMP_sch_static_balanced_chunked
));
2704 // Call __kmpc_dispatch_init(
2705 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2706 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2707 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2709 // If the Chunk was not specified in the clause - use default value 1.
2710 llvm::Value
*Chunk
= DispatchValues
.Chunk
? DispatchValues
.Chunk
2711 : CGF
.Builder
.getIntN(IVSize
, 1);
2712 llvm::Value
*Args
[] = {
2713 emitUpdateLocation(CGF
, Loc
),
2714 getThreadID(CGF
, Loc
),
2715 CGF
.Builder
.getInt32(addMonoNonMonoModifier(
2716 CGM
, Schedule
, ScheduleKind
.M1
, ScheduleKind
.M2
)), // Schedule type
2717 DispatchValues
.LB
, // Lower
2718 DispatchValues
.UB
, // Upper
2719 CGF
.Builder
.getIntN(IVSize
, 1), // Stride
2722 CGF
.EmitRuntimeCall(createDispatchInitFunction(IVSize
, IVSigned
), Args
);
2725 static void emitForStaticInitCall(
2726 CodeGenFunction
&CGF
, llvm::Value
*UpdateLocation
, llvm::Value
*ThreadId
,
2727 llvm::FunctionCallee ForStaticInitFunction
, OpenMPSchedType Schedule
,
2728 OpenMPScheduleClauseModifier M1
, OpenMPScheduleClauseModifier M2
,
2729 const CGOpenMPRuntime::StaticRTInput
&Values
) {
2730 if (!CGF
.HaveInsertPoint())
2733 assert(!Values
.Ordered
);
2734 assert(Schedule
== OMP_sch_static
|| Schedule
== OMP_sch_static_chunked
||
2735 Schedule
== OMP_sch_static_balanced_chunked
||
2736 Schedule
== OMP_ord_static
|| Schedule
== OMP_ord_static_chunked
||
2737 Schedule
== OMP_dist_sch_static
||
2738 Schedule
== OMP_dist_sch_static_chunked
);
2740 // Call __kmpc_for_static_init(
2741 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2742 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2743 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2744 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2745 llvm::Value
*Chunk
= Values
.Chunk
;
2746 if (Chunk
== nullptr) {
2747 assert((Schedule
== OMP_sch_static
|| Schedule
== OMP_ord_static
||
2748 Schedule
== OMP_dist_sch_static
) &&
2749 "expected static non-chunked schedule");
2750 // If the Chunk was not specified in the clause - use default value 1.
2751 Chunk
= CGF
.Builder
.getIntN(Values
.IVSize
, 1);
2753 assert((Schedule
== OMP_sch_static_chunked
||
2754 Schedule
== OMP_sch_static_balanced_chunked
||
2755 Schedule
== OMP_ord_static_chunked
||
2756 Schedule
== OMP_dist_sch_static_chunked
) &&
2757 "expected static chunked schedule");
2759 llvm::Value
*Args
[] = {
2762 CGF
.Builder
.getInt32(addMonoNonMonoModifier(CGF
.CGM
, Schedule
, M1
,
2763 M2
)), // Schedule type
2764 Values
.IL
.getPointer(), // &isLastIter
2765 Values
.LB
.getPointer(), // &LB
2766 Values
.UB
.getPointer(), // &UB
2767 Values
.ST
.getPointer(), // &Stride
2768 CGF
.Builder
.getIntN(Values
.IVSize
, 1), // Incr
2771 CGF
.EmitRuntimeCall(ForStaticInitFunction
, Args
);
2774 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction
&CGF
,
2776 OpenMPDirectiveKind DKind
,
2777 const OpenMPScheduleTy
&ScheduleKind
,
2778 const StaticRTInput
&Values
) {
2779 OpenMPSchedType ScheduleNum
= getRuntimeSchedule(
2780 ScheduleKind
.Schedule
, Values
.Chunk
!= nullptr, Values
.Ordered
);
2781 assert(isOpenMPWorksharingDirective(DKind
) &&
2782 "Expected loop-based or sections-based directive.");
2783 llvm::Value
*UpdatedLocation
= emitUpdateLocation(CGF
, Loc
,
2784 isOpenMPLoopDirective(DKind
)
2785 ? OMP_IDENT_WORK_LOOP
2786 : OMP_IDENT_WORK_SECTIONS
);
2787 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
2788 llvm::FunctionCallee StaticInitFunction
=
2789 createForStaticInitFunction(Values
.IVSize
, Values
.IVSigned
, false);
2790 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
2791 emitForStaticInitCall(CGF
, UpdatedLocation
, ThreadId
, StaticInitFunction
,
2792 ScheduleNum
, ScheduleKind
.M1
, ScheduleKind
.M2
, Values
);
2795 void CGOpenMPRuntime::emitDistributeStaticInit(
2796 CodeGenFunction
&CGF
, SourceLocation Loc
,
2797 OpenMPDistScheduleClauseKind SchedKind
,
2798 const CGOpenMPRuntime::StaticRTInput
&Values
) {
2799 OpenMPSchedType ScheduleNum
=
2800 getRuntimeSchedule(SchedKind
, Values
.Chunk
!= nullptr);
2801 llvm::Value
*UpdatedLocation
=
2802 emitUpdateLocation(CGF
, Loc
, OMP_IDENT_WORK_DISTRIBUTE
);
2803 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
2804 llvm::FunctionCallee StaticInitFunction
;
2805 bool isGPUDistribute
=
2806 CGM
.getLangOpts().OpenMPIsDevice
&&
2807 (CGM
.getTriple().isAMDGCN() || CGM
.getTriple().isNVPTX());
2808 StaticInitFunction
= createForStaticInitFunction(
2809 Values
.IVSize
, Values
.IVSigned
, isGPUDistribute
);
2811 emitForStaticInitCall(CGF
, UpdatedLocation
, ThreadId
, StaticInitFunction
,
2812 ScheduleNum
, OMPC_SCHEDULE_MODIFIER_unknown
,
2813 OMPC_SCHEDULE_MODIFIER_unknown
, Values
);
2816 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction
&CGF
,
2818 OpenMPDirectiveKind DKind
) {
2819 if (!CGF
.HaveInsertPoint())
2821 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2822 llvm::Value
*Args
[] = {
2823 emitUpdateLocation(CGF
, Loc
,
2824 isOpenMPDistributeDirective(DKind
)
2825 ? OMP_IDENT_WORK_DISTRIBUTE
2826 : isOpenMPLoopDirective(DKind
)
2827 ? OMP_IDENT_WORK_LOOP
2828 : OMP_IDENT_WORK_SECTIONS
),
2829 getThreadID(CGF
, Loc
)};
2830 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
2831 if (isOpenMPDistributeDirective(DKind
) && CGM
.getLangOpts().OpenMPIsDevice
&&
2832 (CGM
.getTriple().isAMDGCN() || CGM
.getTriple().isNVPTX()))
2833 CGF
.EmitRuntimeCall(
2834 OMPBuilder
.getOrCreateRuntimeFunction(
2835 CGM
.getModule(), OMPRTL___kmpc_distribute_static_fini
),
2838 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2839 CGM
.getModule(), OMPRTL___kmpc_for_static_fini
),
2843 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction
&CGF
,
2847 if (!CGF
.HaveInsertPoint())
2849 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2850 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2851 CGF
.EmitRuntimeCall(createDispatchFiniFunction(IVSize
, IVSigned
), Args
);
2854 llvm::Value
*CGOpenMPRuntime::emitForNext(CodeGenFunction
&CGF
,
2855 SourceLocation Loc
, unsigned IVSize
,
2856 bool IVSigned
, Address IL
,
2857 Address LB
, Address UB
,
2859 // Call __kmpc_dispatch_next(
2860 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2861 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2862 // kmp_int[32|64] *p_stride);
2863 llvm::Value
*Args
[] = {
2864 emitUpdateLocation(CGF
, Loc
),
2865 getThreadID(CGF
, Loc
),
2866 IL
.getPointer(), // &isLastIter
2867 LB
.getPointer(), // &Lower
2868 UB
.getPointer(), // &Upper
2869 ST
.getPointer() // &Stride
2872 CGF
.EmitRuntimeCall(createDispatchNextFunction(IVSize
, IVSigned
), Args
);
2873 return CGF
.EmitScalarConversion(
2874 Call
, CGF
.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2875 CGF
.getContext().BoolTy
, Loc
);
2878 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction
&CGF
,
2879 llvm::Value
*NumThreads
,
2880 SourceLocation Loc
) {
2881 if (!CGF
.HaveInsertPoint())
2883 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2884 llvm::Value
*Args
[] = {
2885 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2886 CGF
.Builder
.CreateIntCast(NumThreads
, CGF
.Int32Ty
, /*isSigned*/ true)};
2887 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2888 CGM
.getModule(), OMPRTL___kmpc_push_num_threads
),
2892 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction
&CGF
,
2893 ProcBindKind ProcBind
,
2894 SourceLocation Loc
) {
2895 if (!CGF
.HaveInsertPoint())
2897 assert(ProcBind
!= OMP_PROC_BIND_unknown
&& "Unsupported proc_bind value.");
2898 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2899 llvm::Value
*Args
[] = {
2900 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2901 llvm::ConstantInt::get(CGM
.IntTy
, unsigned(ProcBind
), /*isSigned=*/true)};
2902 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2903 CGM
.getModule(), OMPRTL___kmpc_push_proc_bind
),
2907 void CGOpenMPRuntime::emitFlush(CodeGenFunction
&CGF
, ArrayRef
<const Expr
*>,
2908 SourceLocation Loc
, llvm::AtomicOrdering AO
) {
2909 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2910 OMPBuilder
.createFlush(CGF
.Builder
);
2912 if (!CGF
.HaveInsertPoint())
2914 // Build call void __kmpc_flush(ident_t *loc)
2915 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2916 CGM
.getModule(), OMPRTL___kmpc_flush
),
2917 emitUpdateLocation(CGF
, Loc
));
2922 /// Indexes of fields for type kmp_task_t.
2923 enum KmpTaskTFields
{
2924 /// List of shared variables.
2928 /// Partition id for the untied tasks.
2930 /// Function with call of destructors for private variables.
2934 /// (Taskloops only) Lower bound.
2936 /// (Taskloops only) Upper bound.
2938 /// (Taskloops only) Stride.
2940 /// (Taskloops only) Is last iteration flag.
2942 /// (Taskloops only) Reduction data.
2945 } // anonymous namespace
2947 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2948 // If we are in simd mode or there are no entries, we don't need to do
2950 if (CGM
.getLangOpts().OpenMPSimd
|| OffloadEntriesInfoManager
.empty())
2953 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy
&&ErrorReportFn
=
2954 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind
,
2955 const llvm::TargetRegionEntryInfo
&EntryInfo
) -> void {
2957 if (Kind
!= llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR
) {
2958 for (auto I
= CGM
.getContext().getSourceManager().fileinfo_begin(),
2959 E
= CGM
.getContext().getSourceManager().fileinfo_end();
2961 if (I
->getFirst()->getUniqueID().getDevice() == EntryInfo
.DeviceID
&&
2962 I
->getFirst()->getUniqueID().getFile() == EntryInfo
.FileID
) {
2963 Loc
= CGM
.getContext().getSourceManager().translateFileLineCol(
2964 I
->getFirst(), EntryInfo
.Line
, 1);
2970 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR
: {
2971 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2972 DiagnosticsEngine::Error
, "Offloading entry for target region in "
2973 "%0 is incorrect: either the "
2974 "address or the ID is invalid.");
2975 CGM
.getDiags().Report(Loc
, DiagID
) << EntryInfo
.ParentName
;
2977 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR
: {
2978 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2979 DiagnosticsEngine::Error
, "Offloading entry for declare target "
2980 "variable %0 is incorrect: the "
2981 "address is invalid.");
2982 CGM
.getDiags().Report(Loc
, DiagID
) << EntryInfo
.ParentName
;
2984 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR
: {
2985 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2986 DiagnosticsEngine::Error
,
2987 "Offloading entry for declare target variable is incorrect: the "
2988 "address is invalid.");
2989 CGM
.getDiags().Report(DiagID
);
2994 OMPBuilder
.createOffloadEntriesAndInfoMetadata(OffloadEntriesInfoManager
,
2998 /// Loads all the offload entries information from the host IR
3000 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3001 // If we are in target mode, load the metadata from the host IR. This code has
3002 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3004 if (!CGM
.getLangOpts().OpenMPIsDevice
)
3007 if (CGM
.getLangOpts().OMPHostIRFile
.empty())
3010 auto Buf
= llvm::MemoryBuffer::getFile(CGM
.getLangOpts().OMPHostIRFile
);
3011 if (auto EC
= Buf
.getError()) {
3012 CGM
.getDiags().Report(diag::err_cannot_open_file
)
3013 << CGM
.getLangOpts().OMPHostIRFile
<< EC
.message();
3017 llvm::LLVMContext C
;
3018 auto ME
= expectedToErrorOrAndEmitErrors(
3019 C
, llvm::parseBitcodeFile(Buf
.get()->getMemBufferRef(), C
));
3021 if (auto EC
= ME
.getError()) {
3022 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
3023 DiagnosticsEngine::Error
, "Unable to parse host IR file '%0':'%1'");
3024 CGM
.getDiags().Report(DiagID
)
3025 << CGM
.getLangOpts().OMPHostIRFile
<< EC
.message();
3029 OMPBuilder
.loadOffloadInfoMetadata(*ME
.get(), OffloadEntriesInfoManager
);
3032 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty
) {
3033 if (!KmpRoutineEntryPtrTy
) {
3034 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3035 ASTContext
&C
= CGM
.getContext();
3036 QualType KmpRoutineEntryTyArgs
[] = {KmpInt32Ty
, C
.VoidPtrTy
};
3037 FunctionProtoType::ExtProtoInfo EPI
;
3038 KmpRoutineEntryPtrQTy
= C
.getPointerType(
3039 C
.getFunctionType(KmpInt32Ty
, KmpRoutineEntryTyArgs
, EPI
));
3040 KmpRoutineEntryPtrTy
= CGM
.getTypes().ConvertType(KmpRoutineEntryPtrQTy
);
3045 struct PrivateHelpersTy
{
3046 PrivateHelpersTy(const Expr
*OriginalRef
, const VarDecl
*Original
,
3047 const VarDecl
*PrivateCopy
, const VarDecl
*PrivateElemInit
)
3048 : OriginalRef(OriginalRef
), Original(Original
), PrivateCopy(PrivateCopy
),
3049 PrivateElemInit(PrivateElemInit
) {}
3050 PrivateHelpersTy(const VarDecl
*Original
) : Original(Original
) {}
3051 const Expr
*OriginalRef
= nullptr;
3052 const VarDecl
*Original
= nullptr;
3053 const VarDecl
*PrivateCopy
= nullptr;
3054 const VarDecl
*PrivateElemInit
= nullptr;
3055 bool isLocalPrivate() const {
3056 return !OriginalRef
&& !PrivateCopy
&& !PrivateElemInit
;
3059 typedef std::pair
<CharUnits
/*Align*/, PrivateHelpersTy
> PrivateDataTy
;
3060 } // anonymous namespace
3062 static bool isAllocatableDecl(const VarDecl
*VD
) {
3063 const VarDecl
*CVD
= VD
->getCanonicalDecl();
3064 if (!CVD
->hasAttr
<OMPAllocateDeclAttr
>())
3066 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
3067 // Use the default allocation.
3068 return !(AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc
&&
3069 !AA
->getAllocator());
3073 createPrivatesRecordDecl(CodeGenModule
&CGM
, ArrayRef
<PrivateDataTy
> Privates
) {
3074 if (!Privates
.empty()) {
3075 ASTContext
&C
= CGM
.getContext();
3076 // Build struct .kmp_privates_t. {
3077 // /* private vars */
3079 RecordDecl
*RD
= C
.buildImplicitRecord(".kmp_privates.t");
3080 RD
->startDefinition();
3081 for (const auto &Pair
: Privates
) {
3082 const VarDecl
*VD
= Pair
.second
.Original
;
3083 QualType Type
= VD
->getType().getNonReferenceType();
3084 // If the private variable is a local variable with lvalue ref type,
3085 // allocate the pointer instead of the pointee type.
3086 if (Pair
.second
.isLocalPrivate()) {
3087 if (VD
->getType()->isLValueReferenceType())
3088 Type
= C
.getPointerType(Type
);
3089 if (isAllocatableDecl(VD
))
3090 Type
= C
.getPointerType(Type
);
3092 FieldDecl
*FD
= addFieldToRecordDecl(C
, RD
, Type
);
3093 if (VD
->hasAttrs()) {
3094 for (specific_attr_iterator
<AlignedAttr
> I(VD
->getAttrs().begin()),
3095 E(VD
->getAttrs().end());
3100 RD
->completeDefinition();
3107 createKmpTaskTRecordDecl(CodeGenModule
&CGM
, OpenMPDirectiveKind Kind
,
3108 QualType KmpInt32Ty
,
3109 QualType KmpRoutineEntryPointerQTy
) {
3110 ASTContext
&C
= CGM
.getContext();
3111 // Build struct kmp_task_t {
3113 // kmp_routine_entry_t routine;
3114 // kmp_int32 part_id;
3115 // kmp_cmplrdata_t data1;
3116 // kmp_cmplrdata_t data2;
3117 // For taskloops additional fields:
3122 // void * reductions;
3124 RecordDecl
*UD
= C
.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union
);
3125 UD
->startDefinition();
3126 addFieldToRecordDecl(C
, UD
, KmpInt32Ty
);
3127 addFieldToRecordDecl(C
, UD
, KmpRoutineEntryPointerQTy
);
3128 UD
->completeDefinition();
3129 QualType KmpCmplrdataTy
= C
.getRecordType(UD
);
3130 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_task_t");
3131 RD
->startDefinition();
3132 addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
3133 addFieldToRecordDecl(C
, RD
, KmpRoutineEntryPointerQTy
);
3134 addFieldToRecordDecl(C
, RD
, KmpInt32Ty
);
3135 addFieldToRecordDecl(C
, RD
, KmpCmplrdataTy
);
3136 addFieldToRecordDecl(C
, RD
, KmpCmplrdataTy
);
3137 if (isOpenMPTaskLoopDirective(Kind
)) {
3138 QualType KmpUInt64Ty
=
3139 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3140 QualType KmpInt64Ty
=
3141 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3142 addFieldToRecordDecl(C
, RD
, KmpUInt64Ty
);
3143 addFieldToRecordDecl(C
, RD
, KmpUInt64Ty
);
3144 addFieldToRecordDecl(C
, RD
, KmpInt64Ty
);
3145 addFieldToRecordDecl(C
, RD
, KmpInt32Ty
);
3146 addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
3148 RD
->completeDefinition();
3153 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule
&CGM
, QualType KmpTaskTQTy
,
3154 ArrayRef
<PrivateDataTy
> Privates
) {
3155 ASTContext
&C
= CGM
.getContext();
3156 // Build struct kmp_task_t_with_privates {
3157 // kmp_task_t task_data;
3158 // .kmp_privates_t. privates;
3160 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_task_t_with_privates");
3161 RD
->startDefinition();
3162 addFieldToRecordDecl(C
, RD
, KmpTaskTQTy
);
3163 if (const RecordDecl
*PrivateRD
= createPrivatesRecordDecl(CGM
, Privates
))
3164 addFieldToRecordDecl(C
, RD
, C
.getRecordType(PrivateRD
));
3165 RD
->completeDefinition();
3169 /// Emit a proxy function which accepts kmp_task_t as the second
3172 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3173 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3175 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3176 /// tt->reductions, tt->shareds);
3180 static llvm::Function
*
3181 emitProxyTaskFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3182 OpenMPDirectiveKind Kind
, QualType KmpInt32Ty
,
3183 QualType KmpTaskTWithPrivatesPtrQTy
,
3184 QualType KmpTaskTWithPrivatesQTy
, QualType KmpTaskTQTy
,
3185 QualType SharedsPtrTy
, llvm::Function
*TaskFunction
,
3186 llvm::Value
*TaskPrivatesMap
) {
3187 ASTContext
&C
= CGM
.getContext();
3188 FunctionArgList Args
;
3189 ImplicitParamDecl
GtidArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, KmpInt32Ty
,
3190 ImplicitParamDecl::Other
);
3191 ImplicitParamDecl
TaskTypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3192 KmpTaskTWithPrivatesPtrQTy
.withRestrict(),
3193 ImplicitParamDecl::Other
);
3194 Args
.push_back(&GtidArg
);
3195 Args
.push_back(&TaskTypeArg
);
3196 const auto &TaskEntryFnInfo
=
3197 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty
, Args
);
3198 llvm::FunctionType
*TaskEntryTy
=
3199 CGM
.getTypes().GetFunctionType(TaskEntryFnInfo
);
3200 std::string Name
= CGM
.getOpenMPRuntime().getName({"omp_task_entry", ""});
3201 auto *TaskEntry
= llvm::Function::Create(
3202 TaskEntryTy
, llvm::GlobalValue::InternalLinkage
, Name
, &CGM
.getModule());
3203 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry
, TaskEntryFnInfo
);
3204 TaskEntry
->setDoesNotRecurse();
3205 CodeGenFunction
CGF(CGM
);
3206 CGF
.StartFunction(GlobalDecl(), KmpInt32Ty
, TaskEntry
, TaskEntryFnInfo
, Args
,
3209 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3212 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3213 // tt->task_data.shareds);
3214 llvm::Value
*GtidParam
= CGF
.EmitLoadOfScalar(
3215 CGF
.GetAddrOfLocalVar(&GtidArg
), /*Volatile=*/false, KmpInt32Ty
, Loc
);
3216 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3217 CGF
.GetAddrOfLocalVar(&TaskTypeArg
),
3218 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3219 const auto *KmpTaskTWithPrivatesQTyRD
=
3220 cast
<RecordDecl
>(KmpTaskTWithPrivatesQTy
->getAsTagDecl());
3222 CGF
.EmitLValueForField(TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3223 const auto *KmpTaskTQTyRD
= cast
<RecordDecl
>(KmpTaskTQTy
->getAsTagDecl());
3224 auto PartIdFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTPartId
);
3225 LValue PartIdLVal
= CGF
.EmitLValueForField(Base
, *PartIdFI
);
3226 llvm::Value
*PartidParam
= PartIdLVal
.getPointer(CGF
);
3228 auto SharedsFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTShareds
);
3229 LValue SharedsLVal
= CGF
.EmitLValueForField(Base
, *SharedsFI
);
3230 llvm::Value
*SharedsParam
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3231 CGF
.EmitLoadOfScalar(SharedsLVal
, Loc
),
3232 CGF
.ConvertTypeForMem(SharedsPtrTy
));
3234 auto PrivatesFI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin(), 1);
3235 llvm::Value
*PrivatesParam
;
3236 if (PrivatesFI
!= KmpTaskTWithPrivatesQTyRD
->field_end()) {
3237 LValue PrivatesLVal
= CGF
.EmitLValueForField(TDBase
, *PrivatesFI
);
3238 PrivatesParam
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3239 PrivatesLVal
.getPointer(CGF
), CGF
.VoidPtrTy
);
3241 PrivatesParam
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
3244 llvm::Value
*CommonArgs
[] = {
3245 GtidParam
, PartidParam
, PrivatesParam
, TaskPrivatesMap
,
3247 .CreatePointerBitCastOrAddrSpaceCast(TDBase
.getAddress(CGF
),
3248 CGF
.VoidPtrTy
, CGF
.Int8Ty
)
3250 SmallVector
<llvm::Value
*, 16> CallArgs(std::begin(CommonArgs
),
3251 std::end(CommonArgs
));
3252 if (isOpenMPTaskLoopDirective(Kind
)) {
3253 auto LBFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLowerBound
);
3254 LValue LBLVal
= CGF
.EmitLValueForField(Base
, *LBFI
);
3255 llvm::Value
*LBParam
= CGF
.EmitLoadOfScalar(LBLVal
, Loc
);
3256 auto UBFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTUpperBound
);
3257 LValue UBLVal
= CGF
.EmitLValueForField(Base
, *UBFI
);
3258 llvm::Value
*UBParam
= CGF
.EmitLoadOfScalar(UBLVal
, Loc
);
3259 auto StFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTStride
);
3260 LValue StLVal
= CGF
.EmitLValueForField(Base
, *StFI
);
3261 llvm::Value
*StParam
= CGF
.EmitLoadOfScalar(StLVal
, Loc
);
3262 auto LIFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLastIter
);
3263 LValue LILVal
= CGF
.EmitLValueForField(Base
, *LIFI
);
3264 llvm::Value
*LIParam
= CGF
.EmitLoadOfScalar(LILVal
, Loc
);
3265 auto RFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTReductions
);
3266 LValue RLVal
= CGF
.EmitLValueForField(Base
, *RFI
);
3267 llvm::Value
*RParam
= CGF
.EmitLoadOfScalar(RLVal
, Loc
);
3268 CallArgs
.push_back(LBParam
);
3269 CallArgs
.push_back(UBParam
);
3270 CallArgs
.push_back(StParam
);
3271 CallArgs
.push_back(LIParam
);
3272 CallArgs
.push_back(RParam
);
3274 CallArgs
.push_back(SharedsParam
);
3276 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, Loc
, TaskFunction
,
3278 CGF
.EmitStoreThroughLValue(RValue::get(CGF
.Builder
.getInt32(/*C=*/0)),
3279 CGF
.MakeAddrLValue(CGF
.ReturnValue
, KmpInt32Ty
));
3280 CGF
.FinishFunction();
3284 static llvm::Value
*emitDestructorsFunction(CodeGenModule
&CGM
,
3286 QualType KmpInt32Ty
,
3287 QualType KmpTaskTWithPrivatesPtrQTy
,
3288 QualType KmpTaskTWithPrivatesQTy
) {
3289 ASTContext
&C
= CGM
.getContext();
3290 FunctionArgList Args
;
3291 ImplicitParamDecl
GtidArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, KmpInt32Ty
,
3292 ImplicitParamDecl::Other
);
3293 ImplicitParamDecl
TaskTypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3294 KmpTaskTWithPrivatesPtrQTy
.withRestrict(),
3295 ImplicitParamDecl::Other
);
3296 Args
.push_back(&GtidArg
);
3297 Args
.push_back(&TaskTypeArg
);
3298 const auto &DestructorFnInfo
=
3299 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty
, Args
);
3300 llvm::FunctionType
*DestructorFnTy
=
3301 CGM
.getTypes().GetFunctionType(DestructorFnInfo
);
3303 CGM
.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3304 auto *DestructorFn
=
3305 llvm::Function::Create(DestructorFnTy
, llvm::GlobalValue::InternalLinkage
,
3306 Name
, &CGM
.getModule());
3307 CGM
.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn
,
3309 DestructorFn
->setDoesNotRecurse();
3310 CodeGenFunction
CGF(CGM
);
3311 CGF
.StartFunction(GlobalDecl(), KmpInt32Ty
, DestructorFn
, DestructorFnInfo
,
3314 LValue Base
= CGF
.EmitLoadOfPointerLValue(
3315 CGF
.GetAddrOfLocalVar(&TaskTypeArg
),
3316 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3317 const auto *KmpTaskTWithPrivatesQTyRD
=
3318 cast
<RecordDecl
>(KmpTaskTWithPrivatesQTy
->getAsTagDecl());
3319 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3320 Base
= CGF
.EmitLValueForField(Base
, *FI
);
3321 for (const auto *Field
:
3322 cast
<RecordDecl
>(FI
->getType()->getAsTagDecl())->fields()) {
3323 if (QualType::DestructionKind DtorKind
=
3324 Field
->getType().isDestructedType()) {
3325 LValue FieldLValue
= CGF
.EmitLValueForField(Base
, Field
);
3326 CGF
.pushDestroy(DtorKind
, FieldLValue
.getAddress(CGF
), Field
->getType());
3329 CGF
.FinishFunction();
3330 return DestructorFn
;
3333 /// Emit a privates mapping function for correct handling of private and
3334 /// firstprivate variables.
3336 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3337 /// **noalias priv1,..., <tyn> **noalias privn) {
3338 /// *priv1 = &.privates.priv1;
3340 /// *privn = &.privates.privn;
3343 static llvm::Value
*
3344 emitTaskPrivateMappingFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3345 const OMPTaskDataTy
&Data
, QualType PrivatesQTy
,
3346 ArrayRef
<PrivateDataTy
> Privates
) {
3347 ASTContext
&C
= CGM
.getContext();
3348 FunctionArgList Args
;
3349 ImplicitParamDecl
TaskPrivatesArg(
3350 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3351 C
.getPointerType(PrivatesQTy
).withConst().withRestrict(),
3352 ImplicitParamDecl::Other
);
3353 Args
.push_back(&TaskPrivatesArg
);
3354 llvm::DenseMap
<CanonicalDeclPtr
<const VarDecl
>, unsigned> PrivateVarsPos
;
3355 unsigned Counter
= 1;
3356 for (const Expr
*E
: Data
.PrivateVars
) {
3357 Args
.push_back(ImplicitParamDecl::Create(
3358 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3359 C
.getPointerType(C
.getPointerType(E
->getType()))
3362 ImplicitParamDecl::Other
));
3363 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3364 PrivateVarsPos
[VD
] = Counter
;
3367 for (const Expr
*E
: Data
.FirstprivateVars
) {
3368 Args
.push_back(ImplicitParamDecl::Create(
3369 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3370 C
.getPointerType(C
.getPointerType(E
->getType()))
3373 ImplicitParamDecl::Other
));
3374 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3375 PrivateVarsPos
[VD
] = Counter
;
3378 for (const Expr
*E
: Data
.LastprivateVars
) {
3379 Args
.push_back(ImplicitParamDecl::Create(
3380 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3381 C
.getPointerType(C
.getPointerType(E
->getType()))
3384 ImplicitParamDecl::Other
));
3385 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3386 PrivateVarsPos
[VD
] = Counter
;
3389 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
3390 QualType Ty
= VD
->getType().getNonReferenceType();
3391 if (VD
->getType()->isLValueReferenceType())
3392 Ty
= C
.getPointerType(Ty
);
3393 if (isAllocatableDecl(VD
))
3394 Ty
= C
.getPointerType(Ty
);
3395 Args
.push_back(ImplicitParamDecl::Create(
3396 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3397 C
.getPointerType(C
.getPointerType(Ty
)).withConst().withRestrict(),
3398 ImplicitParamDecl::Other
));
3399 PrivateVarsPos
[VD
] = Counter
;
3402 const auto &TaskPrivatesMapFnInfo
=
3403 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
3404 llvm::FunctionType
*TaskPrivatesMapTy
=
3405 CGM
.getTypes().GetFunctionType(TaskPrivatesMapFnInfo
);
3407 CGM
.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3408 auto *TaskPrivatesMap
= llvm::Function::Create(
3409 TaskPrivatesMapTy
, llvm::GlobalValue::InternalLinkage
, Name
,
3411 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap
,
3412 TaskPrivatesMapFnInfo
);
3413 if (CGM
.getLangOpts().Optimize
) {
3414 TaskPrivatesMap
->removeFnAttr(llvm::Attribute::NoInline
);
3415 TaskPrivatesMap
->removeFnAttr(llvm::Attribute::OptimizeNone
);
3416 TaskPrivatesMap
->addFnAttr(llvm::Attribute::AlwaysInline
);
3418 CodeGenFunction
CGF(CGM
);
3419 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, TaskPrivatesMap
,
3420 TaskPrivatesMapFnInfo
, Args
, Loc
, Loc
);
3422 // *privi = &.privates.privi;
3423 LValue Base
= CGF
.EmitLoadOfPointerLValue(
3424 CGF
.GetAddrOfLocalVar(&TaskPrivatesArg
),
3425 TaskPrivatesArg
.getType()->castAs
<PointerType
>());
3426 const auto *PrivatesQTyRD
= cast
<RecordDecl
>(PrivatesQTy
->getAsTagDecl());
3428 for (const FieldDecl
*Field
: PrivatesQTyRD
->fields()) {
3429 LValue FieldLVal
= CGF
.EmitLValueForField(Base
, Field
);
3430 const VarDecl
*VD
= Args
[PrivateVarsPos
[Privates
[Counter
].second
.Original
]];
3432 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(VD
), VD
->getType());
3433 LValue RefLoadLVal
= CGF
.EmitLoadOfPointerLValue(
3434 RefLVal
.getAddress(CGF
), RefLVal
.getType()->castAs
<PointerType
>());
3435 CGF
.EmitStoreOfScalar(FieldLVal
.getPointer(CGF
), RefLoadLVal
);
3438 CGF
.FinishFunction();
3439 return TaskPrivatesMap
;
3442 /// Emit initialization for private variables in task-based directives.
3443 static void emitPrivatesInit(CodeGenFunction
&CGF
,
3444 const OMPExecutableDirective
&D
,
3445 Address KmpTaskSharedsPtr
, LValue TDBase
,
3446 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3447 QualType SharedsTy
, QualType SharedsPtrTy
,
3448 const OMPTaskDataTy
&Data
,
3449 ArrayRef
<PrivateDataTy
> Privates
, bool ForDup
) {
3450 ASTContext
&C
= CGF
.getContext();
3451 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3452 LValue PrivatesBase
= CGF
.EmitLValueForField(TDBase
, *FI
);
3453 OpenMPDirectiveKind Kind
= isOpenMPTaskLoopDirective(D
.getDirectiveKind())
3456 const CapturedStmt
&CS
= *D
.getCapturedStmt(Kind
);
3457 CodeGenFunction::CGCapturedStmtInfo
CapturesInfo(CS
);
3460 isOpenMPTargetDataManagementDirective(D
.getDirectiveKind()) ||
3461 isOpenMPTargetExecutionDirective(D
.getDirectiveKind());
3462 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3463 // PointersArray, SizesArray, and MappersArray. The original variables for
3464 // these arrays are not captured and we get their addresses explicitly.
3465 if ((!IsTargetTask
&& !Data
.FirstprivateVars
.empty() && ForDup
) ||
3466 (IsTargetTask
&& KmpTaskSharedsPtr
.isValid())) {
3467 SrcBase
= CGF
.MakeAddrLValue(
3468 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3469 KmpTaskSharedsPtr
, CGF
.ConvertTypeForMem(SharedsPtrTy
),
3470 CGF
.ConvertTypeForMem(SharedsTy
)),
3473 FI
= cast
<RecordDecl
>(FI
->getType()->getAsTagDecl())->field_begin();
3474 for (const PrivateDataTy
&Pair
: Privates
) {
3475 // Do not initialize private locals.
3476 if (Pair
.second
.isLocalPrivate()) {
3480 const VarDecl
*VD
= Pair
.second
.PrivateCopy
;
3481 const Expr
*Init
= VD
->getAnyInitializer();
3482 if (Init
&& (!ForDup
|| (isa
<CXXConstructExpr
>(Init
) &&
3483 !CGF
.isTrivialInitializer(Init
)))) {
3484 LValue PrivateLValue
= CGF
.EmitLValueForField(PrivatesBase
, *FI
);
3485 if (const VarDecl
*Elem
= Pair
.second
.PrivateElemInit
) {
3486 const VarDecl
*OriginalVD
= Pair
.second
.Original
;
3487 // Check if the variable is the target-based BasePointersArray,
3488 // PointersArray, SizesArray, or MappersArray.
3489 LValue SharedRefLValue
;
3490 QualType Type
= PrivateLValue
.getType();
3491 const FieldDecl
*SharedField
= CapturesInfo
.lookup(OriginalVD
);
3492 if (IsTargetTask
&& !SharedField
) {
3493 assert(isa
<ImplicitParamDecl
>(OriginalVD
) &&
3494 isa
<CapturedDecl
>(OriginalVD
->getDeclContext()) &&
3495 cast
<CapturedDecl
>(OriginalVD
->getDeclContext())
3496 ->getNumParams() == 0 &&
3497 isa
<TranslationUnitDecl
>(
3498 cast
<CapturedDecl
>(OriginalVD
->getDeclContext())
3499 ->getDeclContext()) &&
3500 "Expected artificial target data variable.");
3502 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(OriginalVD
), Type
);
3503 } else if (ForDup
) {
3504 SharedRefLValue
= CGF
.EmitLValueForField(SrcBase
, SharedField
);
3505 SharedRefLValue
= CGF
.MakeAddrLValue(
3506 SharedRefLValue
.getAddress(CGF
).withAlignment(
3507 C
.getDeclAlign(OriginalVD
)),
3508 SharedRefLValue
.getType(), LValueBaseInfo(AlignmentSource::Decl
),
3509 SharedRefLValue
.getTBAAInfo());
3510 } else if (CGF
.LambdaCaptureFields
.count(
3511 Pair
.second
.Original
->getCanonicalDecl()) > 0 ||
3512 isa_and_nonnull
<BlockDecl
>(CGF
.CurCodeDecl
)) {
3513 SharedRefLValue
= CGF
.EmitLValue(Pair
.second
.OriginalRef
);
3515 // Processing for implicitly captured variables.
3516 InlinedOpenMPRegionRAII
Region(
3517 CGF
, [](CodeGenFunction
&, PrePostActionTy
&) {}, OMPD_unknown
,
3518 /*HasCancel=*/false, /*NoInheritance=*/true);
3519 SharedRefLValue
= CGF
.EmitLValue(Pair
.second
.OriginalRef
);
3521 if (Type
->isArrayType()) {
3522 // Initialize firstprivate array.
3523 if (!isa
<CXXConstructExpr
>(Init
) || CGF
.isTrivialInitializer(Init
)) {
3524 // Perform simple memcpy.
3525 CGF
.EmitAggregateAssign(PrivateLValue
, SharedRefLValue
, Type
);
3527 // Initialize firstprivate array using element-by-element
3529 CGF
.EmitOMPAggregateAssign(
3530 PrivateLValue
.getAddress(CGF
), SharedRefLValue
.getAddress(CGF
),
3532 [&CGF
, Elem
, Init
, &CapturesInfo
](Address DestElement
,
3533 Address SrcElement
) {
3534 // Clean up any temporaries needed by the initialization.
3535 CodeGenFunction::OMPPrivateScope
InitScope(CGF
);
3536 InitScope
.addPrivate(Elem
, SrcElement
);
3537 (void)InitScope
.Privatize();
3538 // Emit initialization for single element.
3539 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(
3540 CGF
, &CapturesInfo
);
3541 CGF
.EmitAnyExprToMem(Init
, DestElement
,
3542 Init
->getType().getQualifiers(),
3543 /*IsInitializer=*/false);
3547 CodeGenFunction::OMPPrivateScope
InitScope(CGF
);
3548 InitScope
.addPrivate(Elem
, SharedRefLValue
.getAddress(CGF
));
3549 (void)InitScope
.Privatize();
3550 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CapturesInfo
);
3551 CGF
.EmitExprAsInit(Init
, VD
, PrivateLValue
,
3552 /*capturedByInit=*/false);
3555 CGF
.EmitExprAsInit(Init
, VD
, PrivateLValue
, /*capturedByInit=*/false);
3562 /// Check if duplication function is required for taskloops.
3563 static bool checkInitIsRequired(CodeGenFunction
&CGF
,
3564 ArrayRef
<PrivateDataTy
> Privates
) {
3565 bool InitRequired
= false;
3566 for (const PrivateDataTy
&Pair
: Privates
) {
3567 if (Pair
.second
.isLocalPrivate())
3569 const VarDecl
*VD
= Pair
.second
.PrivateCopy
;
3570 const Expr
*Init
= VD
->getAnyInitializer();
3571 InitRequired
= InitRequired
|| (isa_and_nonnull
<CXXConstructExpr
>(Init
) &&
3572 !CGF
.isTrivialInitializer(Init
));
3576 return InitRequired
;
3580 /// Emit task_dup function (for initialization of
3581 /// private/firstprivate/lastprivate vars and last_iter flag)
3583 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3585 /// // setup lastprivate flag
3586 /// task_dst->last = lastpriv;
3587 /// // could be constructor calls here...
3590 static llvm::Value
*
3591 emitTaskDupFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3592 const OMPExecutableDirective
&D
,
3593 QualType KmpTaskTWithPrivatesPtrQTy
,
3594 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3595 const RecordDecl
*KmpTaskTQTyRD
, QualType SharedsTy
,
3596 QualType SharedsPtrTy
, const OMPTaskDataTy
&Data
,
3597 ArrayRef
<PrivateDataTy
> Privates
, bool WithLastIter
) {
3598 ASTContext
&C
= CGM
.getContext();
3599 FunctionArgList Args
;
3600 ImplicitParamDecl
DstArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3601 KmpTaskTWithPrivatesPtrQTy
,
3602 ImplicitParamDecl::Other
);
3603 ImplicitParamDecl
SrcArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3604 KmpTaskTWithPrivatesPtrQTy
,
3605 ImplicitParamDecl::Other
);
3606 ImplicitParamDecl
LastprivArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.IntTy
,
3607 ImplicitParamDecl::Other
);
3608 Args
.push_back(&DstArg
);
3609 Args
.push_back(&SrcArg
);
3610 Args
.push_back(&LastprivArg
);
3611 const auto &TaskDupFnInfo
=
3612 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
3613 llvm::FunctionType
*TaskDupTy
= CGM
.getTypes().GetFunctionType(TaskDupFnInfo
);
3614 std::string Name
= CGM
.getOpenMPRuntime().getName({"omp_task_dup", ""});
3615 auto *TaskDup
= llvm::Function::Create(
3616 TaskDupTy
, llvm::GlobalValue::InternalLinkage
, Name
, &CGM
.getModule());
3617 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskDup
, TaskDupFnInfo
);
3618 TaskDup
->setDoesNotRecurse();
3619 CodeGenFunction
CGF(CGM
);
3620 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, TaskDup
, TaskDupFnInfo
, Args
, Loc
,
3623 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3624 CGF
.GetAddrOfLocalVar(&DstArg
),
3625 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3626 // task_dst->liter = lastpriv;
3628 auto LIFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLastIter
);
3629 LValue Base
= CGF
.EmitLValueForField(
3630 TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3631 LValue LILVal
= CGF
.EmitLValueForField(Base
, *LIFI
);
3632 llvm::Value
*Lastpriv
= CGF
.EmitLoadOfScalar(
3633 CGF
.GetAddrOfLocalVar(&LastprivArg
), /*Volatile=*/false, C
.IntTy
, Loc
);
3634 CGF
.EmitStoreOfScalar(Lastpriv
, LILVal
);
3637 // Emit initial values for private copies (if any).
3638 assert(!Privates
.empty());
3639 Address KmpTaskSharedsPtr
= Address::invalid();
3640 if (!Data
.FirstprivateVars
.empty()) {
3641 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3642 CGF
.GetAddrOfLocalVar(&SrcArg
),
3643 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3644 LValue Base
= CGF
.EmitLValueForField(
3645 TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3646 KmpTaskSharedsPtr
= Address(
3647 CGF
.EmitLoadOfScalar(CGF
.EmitLValueForField(
3648 Base
, *std::next(KmpTaskTQTyRD
->field_begin(),
3651 CGF
.Int8Ty
, CGM
.getNaturalTypeAlignment(SharedsTy
));
3653 emitPrivatesInit(CGF
, D
, KmpTaskSharedsPtr
, TDBase
, KmpTaskTWithPrivatesQTyRD
,
3654 SharedsTy
, SharedsPtrTy
, Data
, Privates
, /*ForDup=*/true);
3655 CGF
.FinishFunction();
3659 /// Checks if destructor function is required to be generated.
3660 /// \return true if cleanups are required, false otherwise.
3662 checkDestructorsRequired(const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3663 ArrayRef
<PrivateDataTy
> Privates
) {
3664 for (const PrivateDataTy
&P
: Privates
) {
3665 if (P
.second
.isLocalPrivate())
3667 QualType Ty
= P
.second
.Original
->getType().getNonReferenceType();
3668 if (Ty
.isDestructedType())
3675 /// Loop generator for OpenMP iterator expression.
3676 class OMPIteratorGeneratorScope final
3677 : public CodeGenFunction::OMPPrivateScope
{
3678 CodeGenFunction
&CGF
;
3679 const OMPIteratorExpr
*E
= nullptr;
3680 SmallVector
<CodeGenFunction::JumpDest
, 4> ContDests
;
3681 SmallVector
<CodeGenFunction::JumpDest
, 4> ExitDests
;
3682 OMPIteratorGeneratorScope() = delete;
3683 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope
&) = delete;
3686 OMPIteratorGeneratorScope(CodeGenFunction
&CGF
, const OMPIteratorExpr
*E
)
3687 : CodeGenFunction::OMPPrivateScope(CGF
), CGF(CGF
), E(E
) {
3690 SmallVector
<llvm::Value
*, 4> Uppers
;
3691 for (unsigned I
= 0, End
= E
->numOfIterators(); I
< End
; ++I
) {
3692 Uppers
.push_back(CGF
.EmitScalarExpr(E
->getHelper(I
).Upper
));
3693 const auto *VD
= cast
<VarDecl
>(E
->getIteratorDecl(I
));
3694 addPrivate(VD
, CGF
.CreateMemTemp(VD
->getType(), VD
->getName()));
3695 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
);
3697 HelperData
.CounterVD
,
3698 CGF
.CreateMemTemp(HelperData
.CounterVD
->getType(), "counter.addr"));
3702 for (unsigned I
= 0, End
= E
->numOfIterators(); I
< End
; ++I
) {
3703 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
);
3705 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(HelperData
.CounterVD
),
3706 HelperData
.CounterVD
->getType());
3708 CGF
.EmitStoreOfScalar(
3709 llvm::ConstantInt::get(CLVal
.getAddress(CGF
).getElementType(), 0),
3711 CodeGenFunction::JumpDest
&ContDest
=
3712 ContDests
.emplace_back(CGF
.getJumpDestInCurrentScope("iter.cont"));
3713 CodeGenFunction::JumpDest
&ExitDest
=
3714 ExitDests
.emplace_back(CGF
.getJumpDestInCurrentScope("iter.exit"));
3715 // N = <number-of_iterations>;
3716 llvm::Value
*N
= Uppers
[I
];
3718 // if (Counter < N) goto body; else goto exit;
3719 CGF
.EmitBlock(ContDest
.getBlock());
3721 CGF
.EmitLoadOfScalar(CLVal
, HelperData
.CounterVD
->getLocation());
3723 HelperData
.CounterVD
->getType()->isSignedIntegerOrEnumerationType()
3724 ? CGF
.Builder
.CreateICmpSLT(CVal
, N
)
3725 : CGF
.Builder
.CreateICmpULT(CVal
, N
);
3726 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("iter.body");
3727 CGF
.Builder
.CreateCondBr(Cmp
, BodyBB
, ExitDest
.getBlock());
3729 CGF
.EmitBlock(BodyBB
);
3730 // Iteri = Begini + Counter * Stepi;
3731 CGF
.EmitIgnoredExpr(HelperData
.Update
);
3734 ~OMPIteratorGeneratorScope() {
3737 for (unsigned I
= E
->numOfIterators(); I
> 0; --I
) {
3738 // Counter = Counter + 1;
3739 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
- 1);
3740 CGF
.EmitIgnoredExpr(HelperData
.CounterUpdate
);
3742 CGF
.EmitBranchThroughCleanup(ContDests
[I
- 1]);
3744 CGF
.EmitBlock(ExitDests
[I
- 1].getBlock(), /*IsFinished=*/I
== 1);
3750 static std::pair
<llvm::Value
*, llvm::Value
*>
3751 getPointerAndSize(CodeGenFunction
&CGF
, const Expr
*E
) {
3752 const auto *OASE
= dyn_cast
<OMPArrayShapingExpr
>(E
);
3755 const Expr
*Base
= OASE
->getBase();
3756 Addr
= CGF
.EmitScalarExpr(Base
);
3758 Addr
= CGF
.EmitLValue(E
).getPointer(CGF
);
3760 llvm::Value
*SizeVal
;
3761 QualType Ty
= E
->getType();
3763 SizeVal
= CGF
.getTypeSize(OASE
->getBase()->getType()->getPointeeType());
3764 for (const Expr
*SE
: OASE
->getDimensions()) {
3765 llvm::Value
*Sz
= CGF
.EmitScalarExpr(SE
);
3766 Sz
= CGF
.EmitScalarConversion(
3767 Sz
, SE
->getType(), CGF
.getContext().getSizeType(), SE
->getExprLoc());
3768 SizeVal
= CGF
.Builder
.CreateNUWMul(SizeVal
, Sz
);
3770 } else if (const auto *ASE
=
3771 dyn_cast
<OMPArraySectionExpr
>(E
->IgnoreParenImpCasts())) {
3773 CGF
.EmitOMPArraySectionExpr(ASE
, /*IsLowerBound=*/false);
3774 Address UpAddrAddress
= UpAddrLVal
.getAddress(CGF
);
3775 llvm::Value
*UpAddr
= CGF
.Builder
.CreateConstGEP1_32(
3776 UpAddrAddress
.getElementType(), UpAddrAddress
.getPointer(), /*Idx0=*/1);
3777 llvm::Value
*LowIntPtr
= CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.SizeTy
);
3778 llvm::Value
*UpIntPtr
= CGF
.Builder
.CreatePtrToInt(UpAddr
, CGF
.SizeTy
);
3779 SizeVal
= CGF
.Builder
.CreateNUWSub(UpIntPtr
, LowIntPtr
);
3781 SizeVal
= CGF
.getTypeSize(Ty
);
3783 return std::make_pair(Addr
, SizeVal
);
3786 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3787 static void getKmpAffinityType(ASTContext
&C
, QualType
&KmpTaskAffinityInfoTy
) {
3788 QualType FlagsTy
= C
.getIntTypeForBitwidth(32, /*Signed=*/false);
3789 if (KmpTaskAffinityInfoTy
.isNull()) {
3790 RecordDecl
*KmpAffinityInfoRD
=
3791 C
.buildImplicitRecord("kmp_task_affinity_info_t");
3792 KmpAffinityInfoRD
->startDefinition();
3793 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, C
.getIntPtrType());
3794 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, C
.getSizeType());
3795 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, FlagsTy
);
3796 KmpAffinityInfoRD
->completeDefinition();
3797 KmpTaskAffinityInfoTy
= C
.getRecordType(KmpAffinityInfoRD
);
3801 CGOpenMPRuntime::TaskResultTy
3802 CGOpenMPRuntime::emitTaskInit(CodeGenFunction
&CGF
, SourceLocation Loc
,
3803 const OMPExecutableDirective
&D
,
3804 llvm::Function
*TaskFunction
, QualType SharedsTy
,
3805 Address Shareds
, const OMPTaskDataTy
&Data
) {
3806 ASTContext
&C
= CGM
.getContext();
3807 llvm::SmallVector
<PrivateDataTy
, 4> Privates
;
3808 // Aggregate privates and sort them by the alignment.
3809 const auto *I
= Data
.PrivateCopies
.begin();
3810 for (const Expr
*E
: Data
.PrivateVars
) {
3811 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3812 Privates
.emplace_back(
3814 PrivateHelpersTy(E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3815 /*PrivateElemInit=*/nullptr));
3818 I
= Data
.FirstprivateCopies
.begin();
3819 const auto *IElemInitRef
= Data
.FirstprivateInits
.begin();
3820 for (const Expr
*E
: Data
.FirstprivateVars
) {
3821 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3822 Privates
.emplace_back(
3825 E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3826 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IElemInitRef
)->getDecl())));
3830 I
= Data
.LastprivateCopies
.begin();
3831 for (const Expr
*E
: Data
.LastprivateVars
) {
3832 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3833 Privates
.emplace_back(
3835 PrivateHelpersTy(E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3836 /*PrivateElemInit=*/nullptr));
3839 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
3840 if (isAllocatableDecl(VD
))
3841 Privates
.emplace_back(CGM
.getPointerAlign(), PrivateHelpersTy(VD
));
3843 Privates
.emplace_back(C
.getDeclAlign(VD
), PrivateHelpersTy(VD
));
3845 llvm::stable_sort(Privates
,
3846 [](const PrivateDataTy
&L
, const PrivateDataTy
&R
) {
3847 return L
.first
> R
.first
;
3849 QualType KmpInt32Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3850 // Build type kmp_routine_entry_t (if not built yet).
3851 emitKmpRoutineEntryT(KmpInt32Ty
);
3852 // Build type kmp_task_t (if not built yet).
3853 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind())) {
3854 if (SavedKmpTaskloopTQTy
.isNull()) {
3855 SavedKmpTaskloopTQTy
= C
.getRecordType(createKmpTaskTRecordDecl(
3856 CGM
, D
.getDirectiveKind(), KmpInt32Ty
, KmpRoutineEntryPtrQTy
));
3858 KmpTaskTQTy
= SavedKmpTaskloopTQTy
;
3860 assert((D
.getDirectiveKind() == OMPD_task
||
3861 isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) ||
3862 isOpenMPTargetDataManagementDirective(D
.getDirectiveKind())) &&
3863 "Expected taskloop, task or target directive");
3864 if (SavedKmpTaskTQTy
.isNull()) {
3865 SavedKmpTaskTQTy
= C
.getRecordType(createKmpTaskTRecordDecl(
3866 CGM
, D
.getDirectiveKind(), KmpInt32Ty
, KmpRoutineEntryPtrQTy
));
3868 KmpTaskTQTy
= SavedKmpTaskTQTy
;
3870 const auto *KmpTaskTQTyRD
= cast
<RecordDecl
>(KmpTaskTQTy
->getAsTagDecl());
3871 // Build particular struct kmp_task_t for the given task.
3872 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
=
3873 createKmpTaskTWithPrivatesRecordDecl(CGM
, KmpTaskTQTy
, Privates
);
3874 QualType KmpTaskTWithPrivatesQTy
= C
.getRecordType(KmpTaskTWithPrivatesQTyRD
);
3875 QualType KmpTaskTWithPrivatesPtrQTy
=
3876 C
.getPointerType(KmpTaskTWithPrivatesQTy
);
3877 llvm::Type
*KmpTaskTWithPrivatesTy
= CGF
.ConvertType(KmpTaskTWithPrivatesQTy
);
3878 llvm::Type
*KmpTaskTWithPrivatesPtrTy
=
3879 KmpTaskTWithPrivatesTy
->getPointerTo();
3880 llvm::Value
*KmpTaskTWithPrivatesTySize
=
3881 CGF
.getTypeSize(KmpTaskTWithPrivatesQTy
);
3882 QualType SharedsPtrTy
= C
.getPointerType(SharedsTy
);
3884 // Emit initial values for private copies (if any).
3885 llvm::Value
*TaskPrivatesMap
= nullptr;
3886 llvm::Type
*TaskPrivatesMapTy
=
3887 std::next(TaskFunction
->arg_begin(), 3)->getType();
3888 if (!Privates
.empty()) {
3889 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3891 emitTaskPrivateMappingFunction(CGM
, Loc
, Data
, FI
->getType(), Privates
);
3892 TaskPrivatesMap
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3893 TaskPrivatesMap
, TaskPrivatesMapTy
);
3895 TaskPrivatesMap
= llvm::ConstantPointerNull::get(
3896 cast
<llvm::PointerType
>(TaskPrivatesMapTy
));
3898 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3900 llvm::Function
*TaskEntry
= emitProxyTaskFunction(
3901 CGM
, Loc
, D
.getDirectiveKind(), KmpInt32Ty
, KmpTaskTWithPrivatesPtrQTy
,
3902 KmpTaskTWithPrivatesQTy
, KmpTaskTQTy
, SharedsPtrTy
, TaskFunction
,
3905 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3906 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3907 // kmp_routine_entry_t *task_entry);
3908 // Task flags. Format is taken from
3909 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3910 // description of kmp_tasking_flags struct.
3914 DestructorsFlag
= 0x8,
3915 PriorityFlag
= 0x20,
3916 DetachableFlag
= 0x40,
3918 unsigned Flags
= Data
.Tied
? TiedFlag
: 0;
3919 bool NeedsCleanup
= false;
3920 if (!Privates
.empty()) {
3922 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD
, Privates
);
3924 Flags
= Flags
| DestructorsFlag
;
3926 if (Data
.Priority
.getInt())
3927 Flags
= Flags
| PriorityFlag
;
3928 if (D
.hasClausesOfKind
<OMPDetachClause
>())
3929 Flags
= Flags
| DetachableFlag
;
3930 llvm::Value
*TaskFlags
=
3931 Data
.Final
.getPointer()
3932 ? CGF
.Builder
.CreateSelect(Data
.Final
.getPointer(),
3933 CGF
.Builder
.getInt32(FinalFlag
),
3934 CGF
.Builder
.getInt32(/*C=*/0))
3935 : CGF
.Builder
.getInt32(Data
.Final
.getInt() ? FinalFlag
: 0);
3936 TaskFlags
= CGF
.Builder
.CreateOr(TaskFlags
, CGF
.Builder
.getInt32(Flags
));
3937 llvm::Value
*SharedsSize
= CGM
.getSize(C
.getTypeSizeInChars(SharedsTy
));
3938 SmallVector
<llvm::Value
*, 8> AllocArgs
= {emitUpdateLocation(CGF
, Loc
),
3939 getThreadID(CGF
, Loc
), TaskFlags
, KmpTaskTWithPrivatesTySize
,
3940 SharedsSize
, CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3941 TaskEntry
, KmpRoutineEntryPtrTy
)};
3942 llvm::Value
*NewTask
;
3943 if (D
.hasClausesOfKind
<OMPNowaitClause
>()) {
3944 // Check if we have any device clause associated with the directive.
3945 const Expr
*Device
= nullptr;
3946 if (auto *C
= D
.getSingleClause
<OMPDeviceClause
>())
3947 Device
= C
->getDevice();
3948 // Emit device ID if any otherwise use default value.
3949 llvm::Value
*DeviceID
;
3951 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
3952 CGF
.Int64Ty
, /*isSigned=*/true);
3954 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
3955 AllocArgs
.push_back(DeviceID
);
3956 NewTask
= CGF
.EmitRuntimeCall(
3957 OMPBuilder
.getOrCreateRuntimeFunction(
3958 CGM
.getModule(), OMPRTL___kmpc_omp_target_task_alloc
),
3962 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
3963 CGM
.getModule(), OMPRTL___kmpc_omp_task_alloc
),
3966 // Emit detach clause initialization.
3967 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3968 // task_descriptor);
3969 if (const auto *DC
= D
.getSingleClause
<OMPDetachClause
>()) {
3970 const Expr
*Evt
= DC
->getEventHandler()->IgnoreParenImpCasts();
3971 LValue EvtLVal
= CGF
.EmitLValue(Evt
);
3973 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3974 // int gtid, kmp_task_t *task);
3975 llvm::Value
*Loc
= emitUpdateLocation(CGF
, DC
->getBeginLoc());
3976 llvm::Value
*Tid
= getThreadID(CGF
, DC
->getBeginLoc());
3977 Tid
= CGF
.Builder
.CreateIntCast(Tid
, CGF
.IntTy
, /*isSigned=*/false);
3978 llvm::Value
*EvtVal
= CGF
.EmitRuntimeCall(
3979 OMPBuilder
.getOrCreateRuntimeFunction(
3980 CGM
.getModule(), OMPRTL___kmpc_task_allow_completion_event
),
3981 {Loc
, Tid
, NewTask
});
3982 EvtVal
= CGF
.EmitScalarConversion(EvtVal
, C
.VoidPtrTy
, Evt
->getType(),
3984 CGF
.EmitStoreOfScalar(EvtVal
, EvtLVal
);
3986 // Process affinity clauses.
3987 if (D
.hasClausesOfKind
<OMPAffinityClause
>()) {
3988 // Process list of affinity data.
3989 ASTContext
&C
= CGM
.getContext();
3990 Address AffinitiesArray
= Address::invalid();
3991 // Calculate number of elements to form the array of affinity data.
3992 llvm::Value
*NumOfElements
= nullptr;
3993 unsigned NumAffinities
= 0;
3994 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
3995 if (const Expr
*Modifier
= C
->getModifier()) {
3996 const auto *IE
= cast
<OMPIteratorExpr
>(Modifier
->IgnoreParenImpCasts());
3997 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
3998 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
3999 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.SizeTy
, /*isSigned=*/false);
4001 NumOfElements
? CGF
.Builder
.CreateNUWMul(NumOfElements
, Sz
) : Sz
;
4004 NumAffinities
+= C
->varlist_size();
4007 getKmpAffinityType(CGM
.getContext(), KmpTaskAffinityInfoTy
);
4008 // Fields ids in kmp_task_affinity_info record.
4009 enum RTLAffinityInfoFieldsTy
{ BaseAddr
, Len
, Flags
};
4011 QualType KmpTaskAffinityInfoArrayTy
;
4012 if (NumOfElements
) {
4013 NumOfElements
= CGF
.Builder
.CreateNUWAdd(
4014 llvm::ConstantInt::get(CGF
.SizeTy
, NumAffinities
), NumOfElements
);
4015 auto *OVE
= new (C
) OpaqueValueExpr(
4017 C
.getIntTypeForBitwidth(C
.getTypeSize(C
.getSizeType()), /*Signed=*/0),
4019 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, OVE
,
4020 RValue::get(NumOfElements
));
4021 KmpTaskAffinityInfoArrayTy
=
4022 C
.getVariableArrayType(KmpTaskAffinityInfoTy
, OVE
, ArrayType::Normal
,
4023 /*IndexTypeQuals=*/0, SourceRange(Loc
, Loc
));
4024 // Properly emit variable-sized array.
4025 auto *PD
= ImplicitParamDecl::Create(C
, KmpTaskAffinityInfoArrayTy
,
4026 ImplicitParamDecl::Other
);
4027 CGF
.EmitVarDecl(*PD
);
4028 AffinitiesArray
= CGF
.GetAddrOfLocalVar(PD
);
4029 NumOfElements
= CGF
.Builder
.CreateIntCast(NumOfElements
, CGF
.Int32Ty
,
4030 /*isSigned=*/false);
4032 KmpTaskAffinityInfoArrayTy
= C
.getConstantArrayType(
4033 KmpTaskAffinityInfoTy
,
4034 llvm::APInt(C
.getTypeSize(C
.getSizeType()), NumAffinities
), nullptr,
4035 ArrayType::Normal
, /*IndexTypeQuals=*/0);
4037 CGF
.CreateMemTemp(KmpTaskAffinityInfoArrayTy
, ".affs.arr.addr");
4038 AffinitiesArray
= CGF
.Builder
.CreateConstArrayGEP(AffinitiesArray
, 0);
4039 NumOfElements
= llvm::ConstantInt::get(CGM
.Int32Ty
, NumAffinities
,
4040 /*isSigned=*/false);
4043 const auto *KmpAffinityInfoRD
= KmpTaskAffinityInfoTy
->getAsRecordDecl();
4044 // Fill array by elements without iterators.
4046 bool HasIterator
= false;
4047 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
4048 if (C
->getModifier()) {
4052 for (const Expr
*E
: C
->varlists()) {
4055 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
4057 CGF
.MakeAddrLValue(CGF
.Builder
.CreateConstGEP(AffinitiesArray
, Pos
),
4058 KmpTaskAffinityInfoTy
);
4059 // affs[i].base_addr = &<Affinities[i].second>;
4060 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4061 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), BaseAddr
));
4062 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
),
4064 // affs[i].len = sizeof(<Affinities[i].second>);
4065 LValue LenLVal
= CGF
.EmitLValueForField(
4066 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), Len
));
4067 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4073 PosLVal
= CGF
.MakeAddrLValue(
4074 CGF
.CreateMemTemp(C
.getSizeType(), "affs.counter.addr"),
4076 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Pos
), PosLVal
);
4078 // Process elements with iterators.
4079 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
4080 const Expr
*Modifier
= C
->getModifier();
4083 OMPIteratorGeneratorScope
IteratorScope(
4084 CGF
, cast_or_null
<OMPIteratorExpr
>(Modifier
->IgnoreParenImpCasts()));
4085 for (const Expr
*E
: C
->varlists()) {
4088 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
4089 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4090 LValue Base
= CGF
.MakeAddrLValue(
4091 CGF
.Builder
.CreateGEP(AffinitiesArray
, Idx
), KmpTaskAffinityInfoTy
);
4092 // affs[i].base_addr = &<Affinities[i].second>;
4093 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4094 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), BaseAddr
));
4095 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
),
4097 // affs[i].len = sizeof(<Affinities[i].second>);
4098 LValue LenLVal
= CGF
.EmitLValueForField(
4099 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), Len
));
4100 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4101 Idx
= CGF
.Builder
.CreateNUWAdd(
4102 Idx
, llvm::ConstantInt::get(Idx
->getType(), 1));
4103 CGF
.EmitStoreOfScalar(Idx
, PosLVal
);
4106 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4107 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4108 // naffins, kmp_task_affinity_info_t *affin_list);
4109 llvm::Value
*LocRef
= emitUpdateLocation(CGF
, Loc
);
4110 llvm::Value
*GTid
= getThreadID(CGF
, Loc
);
4111 llvm::Value
*AffinListPtr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4112 AffinitiesArray
.getPointer(), CGM
.VoidPtrTy
);
4113 // FIXME: Emit the function and ignore its result for now unless the
4114 // runtime function is properly implemented.
4115 (void)CGF
.EmitRuntimeCall(
4116 OMPBuilder
.getOrCreateRuntimeFunction(
4117 CGM
.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity
),
4118 {LocRef
, GTid
, NewTask
, NumOfElements
, AffinListPtr
});
4120 llvm::Value
*NewTaskNewTaskTTy
=
4121 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4122 NewTask
, KmpTaskTWithPrivatesPtrTy
);
4123 LValue Base
= CGF
.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy
,
4124 KmpTaskTWithPrivatesQTy
);
4126 CGF
.EmitLValueForField(Base
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
4127 // Fill the data in the resulting kmp_task_t record.
4128 // Copy shareds if there are any.
4129 Address KmpTaskSharedsPtr
= Address::invalid();
4130 if (!SharedsTy
->getAsStructureType()->getDecl()->field_empty()) {
4131 KmpTaskSharedsPtr
= Address(
4132 CGF
.EmitLoadOfScalar(
4133 CGF
.EmitLValueForField(
4135 *std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTShareds
)),
4137 CGF
.Int8Ty
, CGM
.getNaturalTypeAlignment(SharedsTy
));
4138 LValue Dest
= CGF
.MakeAddrLValue(KmpTaskSharedsPtr
, SharedsTy
);
4139 LValue Src
= CGF
.MakeAddrLValue(Shareds
, SharedsTy
);
4140 CGF
.EmitAggregateCopy(Dest
, Src
, SharedsTy
, AggValueSlot::DoesNotOverlap
);
4142 // Emit initial values for private copies (if any).
4143 TaskResultTy Result
;
4144 if (!Privates
.empty()) {
4145 emitPrivatesInit(CGF
, D
, KmpTaskSharedsPtr
, Base
, KmpTaskTWithPrivatesQTyRD
,
4146 SharedsTy
, SharedsPtrTy
, Data
, Privates
,
4148 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind()) &&
4149 (!Data
.LastprivateVars
.empty() || checkInitIsRequired(CGF
, Privates
))) {
4150 Result
.TaskDupFn
= emitTaskDupFunction(
4151 CGM
, Loc
, D
, KmpTaskTWithPrivatesPtrQTy
, KmpTaskTWithPrivatesQTyRD
,
4152 KmpTaskTQTyRD
, SharedsTy
, SharedsPtrTy
, Data
, Privates
,
4153 /*WithLastIter=*/!Data
.LastprivateVars
.empty());
4156 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4157 enum { Priority
= 0, Destructors
= 1 };
4158 // Provide pointer to function with destructors for privates.
4159 auto FI
= std::next(KmpTaskTQTyRD
->field_begin(), Data1
);
4160 const RecordDecl
*KmpCmplrdataUD
=
4161 (*FI
)->getType()->getAsUnionType()->getDecl();
4163 llvm::Value
*DestructorFn
= emitDestructorsFunction(
4164 CGM
, Loc
, KmpInt32Ty
, KmpTaskTWithPrivatesPtrQTy
,
4165 KmpTaskTWithPrivatesQTy
);
4166 LValue Data1LV
= CGF
.EmitLValueForField(TDBase
, *FI
);
4167 LValue DestructorsLV
= CGF
.EmitLValueForField(
4168 Data1LV
, *std::next(KmpCmplrdataUD
->field_begin(), Destructors
));
4169 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4170 DestructorFn
, KmpRoutineEntryPtrTy
),
4174 if (Data
.Priority
.getInt()) {
4175 LValue Data2LV
= CGF
.EmitLValueForField(
4176 TDBase
, *std::next(KmpTaskTQTyRD
->field_begin(), Data2
));
4177 LValue PriorityLV
= CGF
.EmitLValueForField(
4178 Data2LV
, *std::next(KmpCmplrdataUD
->field_begin(), Priority
));
4179 CGF
.EmitStoreOfScalar(Data
.Priority
.getPointer(), PriorityLV
);
4181 Result
.NewTask
= NewTask
;
4182 Result
.TaskEntry
= TaskEntry
;
4183 Result
.NewTaskNewTaskTTy
= NewTaskNewTaskTTy
;
4184 Result
.TDBase
= TDBase
;
4185 Result
.KmpTaskTQTyRD
= KmpTaskTQTyRD
;
4189 /// Translates internal dependency kind into the runtime kind.
4190 static RTLDependenceKindTy
translateDependencyKind(OpenMPDependClauseKind K
) {
4191 RTLDependenceKindTy DepKind
;
4193 case OMPC_DEPEND_in
:
4194 DepKind
= RTLDependenceKindTy::DepIn
;
4196 // Out and InOut dependencies must use the same code.
4197 case OMPC_DEPEND_out
:
4198 case OMPC_DEPEND_inout
:
4199 DepKind
= RTLDependenceKindTy::DepInOut
;
4201 case OMPC_DEPEND_mutexinoutset
:
4202 DepKind
= RTLDependenceKindTy::DepMutexInOutSet
;
4204 case OMPC_DEPEND_inoutset
:
4205 DepKind
= RTLDependenceKindTy::DepInOutSet
;
4207 case OMPC_DEPEND_outallmemory
:
4208 DepKind
= RTLDependenceKindTy::DepOmpAllMem
;
4210 case OMPC_DEPEND_source
:
4211 case OMPC_DEPEND_sink
:
4212 case OMPC_DEPEND_depobj
:
4213 case OMPC_DEPEND_inoutallmemory
:
4214 case OMPC_DEPEND_unknown
:
4215 llvm_unreachable("Unknown task dependence type");
4220 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4221 static void getDependTypes(ASTContext
&C
, QualType
&KmpDependInfoTy
,
4222 QualType
&FlagsTy
) {
4223 FlagsTy
= C
.getIntTypeForBitwidth(C
.getTypeSize(C
.BoolTy
), /*Signed=*/false);
4224 if (KmpDependInfoTy
.isNull()) {
4225 RecordDecl
*KmpDependInfoRD
= C
.buildImplicitRecord("kmp_depend_info");
4226 KmpDependInfoRD
->startDefinition();
4227 addFieldToRecordDecl(C
, KmpDependInfoRD
, C
.getIntPtrType());
4228 addFieldToRecordDecl(C
, KmpDependInfoRD
, C
.getSizeType());
4229 addFieldToRecordDecl(C
, KmpDependInfoRD
, FlagsTy
);
4230 KmpDependInfoRD
->completeDefinition();
4231 KmpDependInfoTy
= C
.getRecordType(KmpDependInfoRD
);
4235 std::pair
<llvm::Value
*, LValue
>
4236 CGOpenMPRuntime::getDepobjElements(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4237 SourceLocation Loc
) {
4238 ASTContext
&C
= CGM
.getContext();
4240 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4241 RecordDecl
*KmpDependInfoRD
=
4242 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4243 QualType KmpDependInfoPtrTy
= C
.getPointerType(KmpDependInfoTy
);
4244 LValue Base
= CGF
.EmitLoadOfPointerLValue(
4245 CGF
.Builder
.CreateElementBitCast(
4246 DepobjLVal
.getAddress(CGF
),
4247 CGF
.ConvertTypeForMem(KmpDependInfoPtrTy
)),
4248 KmpDependInfoPtrTy
->castAs
<PointerType
>());
4249 Address DepObjAddr
= CGF
.Builder
.CreateGEP(
4250 Base
.getAddress(CGF
),
4251 llvm::ConstantInt::get(CGF
.IntPtrTy
, -1, /*isSigned=*/true));
4252 LValue NumDepsBase
= CGF
.MakeAddrLValue(
4253 DepObjAddr
, KmpDependInfoTy
, Base
.getBaseInfo(), Base
.getTBAAInfo());
4254 // NumDeps = deps[i].base_addr;
4255 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4257 *std::next(KmpDependInfoRD
->field_begin(),
4258 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4259 llvm::Value
*NumDeps
= CGF
.EmitLoadOfScalar(BaseAddrLVal
, Loc
);
4260 return std::make_pair(NumDeps
, Base
);
4263 static void emitDependData(CodeGenFunction
&CGF
, QualType
&KmpDependInfoTy
,
4264 llvm::PointerUnion
<unsigned *, LValue
*> Pos
,
4265 const OMPTaskDataTy::DependData
&Data
,
4266 Address DependenciesArray
) {
4267 CodeGenModule
&CGM
= CGF
.CGM
;
4268 ASTContext
&C
= CGM
.getContext();
4270 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4271 RecordDecl
*KmpDependInfoRD
=
4272 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4273 llvm::Type
*LLVMFlagsTy
= CGF
.ConvertTypeForMem(FlagsTy
);
4275 OMPIteratorGeneratorScope
IteratorScope(
4276 CGF
, cast_or_null
<OMPIteratorExpr
>(
4277 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4279 for (const Expr
*E
: Data
.DepExprs
) {
4283 // The expression will be a nullptr in the 'omp_all_memory' case.
4285 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
4286 Addr
= CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
);
4288 Addr
= llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4289 Size
= llvm::ConstantInt::get(CGF
.SizeTy
, 0);
4292 if (unsigned *P
= Pos
.dyn_cast
<unsigned *>()) {
4293 Base
= CGF
.MakeAddrLValue(
4294 CGF
.Builder
.CreateConstGEP(DependenciesArray
, *P
), KmpDependInfoTy
);
4296 assert(E
&& "Expected a non-null expression");
4297 LValue
&PosLVal
= *Pos
.get
<LValue
*>();
4298 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4299 Base
= CGF
.MakeAddrLValue(
4300 CGF
.Builder
.CreateGEP(DependenciesArray
, Idx
), KmpDependInfoTy
);
4302 // deps[i].base_addr = &<Dependencies[i].second>;
4303 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4305 *std::next(KmpDependInfoRD
->field_begin(),
4306 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4307 CGF
.EmitStoreOfScalar(Addr
, BaseAddrLVal
);
4308 // deps[i].len = sizeof(<Dependencies[i].second>);
4309 LValue LenLVal
= CGF
.EmitLValueForField(
4310 Base
, *std::next(KmpDependInfoRD
->field_begin(),
4311 static_cast<unsigned int>(RTLDependInfoFields::Len
)));
4312 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4313 // deps[i].flags = <Dependencies[i].first>;
4314 RTLDependenceKindTy DepKind
= translateDependencyKind(Data
.DepKind
);
4315 LValue FlagsLVal
= CGF
.EmitLValueForField(
4317 *std::next(KmpDependInfoRD
->field_begin(),
4318 static_cast<unsigned int>(RTLDependInfoFields::Flags
)));
4319 CGF
.EmitStoreOfScalar(
4320 llvm::ConstantInt::get(LLVMFlagsTy
, static_cast<unsigned int>(DepKind
)),
4322 if (unsigned *P
= Pos
.dyn_cast
<unsigned *>()) {
4325 LValue
&PosLVal
= *Pos
.get
<LValue
*>();
4326 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4327 Idx
= CGF
.Builder
.CreateNUWAdd(Idx
,
4328 llvm::ConstantInt::get(Idx
->getType(), 1));
4329 CGF
.EmitStoreOfScalar(Idx
, PosLVal
);
4334 SmallVector
<llvm::Value
*, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4335 CodeGenFunction
&CGF
, QualType
&KmpDependInfoTy
,
4336 const OMPTaskDataTy::DependData
&Data
) {
4337 assert(Data
.DepKind
== OMPC_DEPEND_depobj
&&
4338 "Expected depobj dependency kind.");
4339 SmallVector
<llvm::Value
*, 4> Sizes
;
4340 SmallVector
<LValue
, 4> SizeLVals
;
4341 ASTContext
&C
= CGF
.getContext();
4343 OMPIteratorGeneratorScope
IteratorScope(
4344 CGF
, cast_or_null
<OMPIteratorExpr
>(
4345 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4347 for (const Expr
*E
: Data
.DepExprs
) {
4348 llvm::Value
*NumDeps
;
4350 LValue DepobjLVal
= CGF
.EmitLValue(E
->IgnoreParenImpCasts());
4351 std::tie(NumDeps
, Base
) =
4352 getDepobjElements(CGF
, DepobjLVal
, E
->getExprLoc());
4353 LValue NumLVal
= CGF
.MakeAddrLValue(
4354 CGF
.CreateMemTemp(C
.getUIntPtrType(), "depobj.size.addr"),
4355 C
.getUIntPtrType());
4356 CGF
.Builder
.CreateStore(llvm::ConstantInt::get(CGF
.IntPtrTy
, 0),
4357 NumLVal
.getAddress(CGF
));
4358 llvm::Value
*PrevVal
= CGF
.EmitLoadOfScalar(NumLVal
, E
->getExprLoc());
4359 llvm::Value
*Add
= CGF
.Builder
.CreateNUWAdd(PrevVal
, NumDeps
);
4360 CGF
.EmitStoreOfScalar(Add
, NumLVal
);
4361 SizeLVals
.push_back(NumLVal
);
4364 for (unsigned I
= 0, E
= SizeLVals
.size(); I
< E
; ++I
) {
4366 CGF
.EmitLoadOfScalar(SizeLVals
[I
], Data
.DepExprs
[I
]->getExprLoc());
4367 Sizes
.push_back(Size
);
4372 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction
&CGF
,
4373 QualType
&KmpDependInfoTy
,
4375 const OMPTaskDataTy::DependData
&Data
,
4376 Address DependenciesArray
) {
4377 assert(Data
.DepKind
== OMPC_DEPEND_depobj
&&
4378 "Expected depobj dependency kind.");
4379 llvm::Value
*ElSize
= CGF
.getTypeSize(KmpDependInfoTy
);
4381 OMPIteratorGeneratorScope
IteratorScope(
4382 CGF
, cast_or_null
<OMPIteratorExpr
>(
4383 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4385 for (unsigned I
= 0, End
= Data
.DepExprs
.size(); I
< End
; ++I
) {
4386 const Expr
*E
= Data
.DepExprs
[I
];
4387 llvm::Value
*NumDeps
;
4389 LValue DepobjLVal
= CGF
.EmitLValue(E
->IgnoreParenImpCasts());
4390 std::tie(NumDeps
, Base
) =
4391 getDepobjElements(CGF
, DepobjLVal
, E
->getExprLoc());
4393 // memcopy dependency data.
4394 llvm::Value
*Size
= CGF
.Builder
.CreateNUWMul(
4396 CGF
.Builder
.CreateIntCast(NumDeps
, CGF
.SizeTy
, /*isSigned=*/false));
4397 llvm::Value
*Pos
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4398 Address DepAddr
= CGF
.Builder
.CreateGEP(DependenciesArray
, Pos
);
4399 CGF
.Builder
.CreateMemCpy(DepAddr
, Base
.getAddress(CGF
), Size
);
4403 llvm::Value
*Add
= CGF
.Builder
.CreateNUWAdd(Pos
, NumDeps
);
4404 CGF
.EmitStoreOfScalar(Add
, PosLVal
);
4409 std::pair
<llvm::Value
*, Address
> CGOpenMPRuntime::emitDependClause(
4410 CodeGenFunction
&CGF
, ArrayRef
<OMPTaskDataTy::DependData
> Dependencies
,
4411 SourceLocation Loc
) {
4412 if (llvm::all_of(Dependencies
, [](const OMPTaskDataTy::DependData
&D
) {
4413 return D
.DepExprs
.empty();
4415 return std::make_pair(nullptr, Address::invalid());
4416 // Process list of dependencies.
4417 ASTContext
&C
= CGM
.getContext();
4418 Address DependenciesArray
= Address::invalid();
4419 llvm::Value
*NumOfElements
= nullptr;
4420 unsigned NumDependencies
= std::accumulate(
4421 Dependencies
.begin(), Dependencies
.end(), 0,
4422 [](unsigned V
, const OMPTaskDataTy::DependData
&D
) {
4423 return D
.DepKind
== OMPC_DEPEND_depobj
4425 : (V
+ (D
.IteratorExpr
? 0 : D
.DepExprs
.size()));
4428 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4429 bool HasDepobjDeps
= false;
4430 bool HasRegularWithIterators
= false;
4431 llvm::Value
*NumOfDepobjElements
= llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4432 llvm::Value
*NumOfRegularWithIterators
=
4433 llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4434 // Calculate number of depobj dependencies and regular deps with the
4436 for (const OMPTaskDataTy::DependData
&D
: Dependencies
) {
4437 if (D
.DepKind
== OMPC_DEPEND_depobj
) {
4438 SmallVector
<llvm::Value
*, 4> Sizes
=
4439 emitDepobjElementsSizes(CGF
, KmpDependInfoTy
, D
);
4440 for (llvm::Value
*Size
: Sizes
) {
4441 NumOfDepobjElements
=
4442 CGF
.Builder
.CreateNUWAdd(NumOfDepobjElements
, Size
);
4444 HasDepobjDeps
= true;
4447 // Include number of iterations, if any.
4449 if (const auto *IE
= cast_or_null
<OMPIteratorExpr
>(D
.IteratorExpr
)) {
4450 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4451 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4452 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.IntPtrTy
, /*isSigned=*/false);
4453 llvm::Value
*NumClauseDeps
= CGF
.Builder
.CreateNUWMul(
4454 Sz
, llvm::ConstantInt::get(CGF
.IntPtrTy
, D
.DepExprs
.size()));
4455 NumOfRegularWithIterators
=
4456 CGF
.Builder
.CreateNUWAdd(NumOfRegularWithIterators
, NumClauseDeps
);
4458 HasRegularWithIterators
= true;
4463 QualType KmpDependInfoArrayTy
;
4464 if (HasDepobjDeps
|| HasRegularWithIterators
) {
4465 NumOfElements
= llvm::ConstantInt::get(CGM
.IntPtrTy
, NumDependencies
,
4466 /*isSigned=*/false);
4467 if (HasDepobjDeps
) {
4469 CGF
.Builder
.CreateNUWAdd(NumOfDepobjElements
, NumOfElements
);
4471 if (HasRegularWithIterators
) {
4473 CGF
.Builder
.CreateNUWAdd(NumOfRegularWithIterators
, NumOfElements
);
4475 auto *OVE
= new (C
) OpaqueValueExpr(
4476 Loc
, C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4478 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, OVE
,
4479 RValue::get(NumOfElements
));
4480 KmpDependInfoArrayTy
=
4481 C
.getVariableArrayType(KmpDependInfoTy
, OVE
, ArrayType::Normal
,
4482 /*IndexTypeQuals=*/0, SourceRange(Loc
, Loc
));
4483 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4484 // Properly emit variable-sized array.
4485 auto *PD
= ImplicitParamDecl::Create(C
, KmpDependInfoArrayTy
,
4486 ImplicitParamDecl::Other
);
4487 CGF
.EmitVarDecl(*PD
);
4488 DependenciesArray
= CGF
.GetAddrOfLocalVar(PD
);
4489 NumOfElements
= CGF
.Builder
.CreateIntCast(NumOfElements
, CGF
.Int32Ty
,
4490 /*isSigned=*/false);
4492 KmpDependInfoArrayTy
= C
.getConstantArrayType(
4493 KmpDependInfoTy
, llvm::APInt(/*numBits=*/64, NumDependencies
), nullptr,
4494 ArrayType::Normal
, /*IndexTypeQuals=*/0);
4496 CGF
.CreateMemTemp(KmpDependInfoArrayTy
, ".dep.arr.addr");
4497 DependenciesArray
= CGF
.Builder
.CreateConstArrayGEP(DependenciesArray
, 0);
4498 NumOfElements
= llvm::ConstantInt::get(CGM
.Int32Ty
, NumDependencies
,
4499 /*isSigned=*/false);
4502 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4503 if (Dependencies
[I
].DepKind
== OMPC_DEPEND_depobj
||
4504 Dependencies
[I
].IteratorExpr
)
4506 emitDependData(CGF
, KmpDependInfoTy
, &Pos
, Dependencies
[I
],
4509 // Copy regular dependencies with iterators.
4510 LValue PosLVal
= CGF
.MakeAddrLValue(
4511 CGF
.CreateMemTemp(C
.getSizeType(), "dep.counter.addr"), C
.getSizeType());
4512 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Pos
), PosLVal
);
4513 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4514 if (Dependencies
[I
].DepKind
== OMPC_DEPEND_depobj
||
4515 !Dependencies
[I
].IteratorExpr
)
4517 emitDependData(CGF
, KmpDependInfoTy
, &PosLVal
, Dependencies
[I
],
4520 // Copy final depobj arrays without iterators.
4521 if (HasDepobjDeps
) {
4522 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4523 if (Dependencies
[I
].DepKind
!= OMPC_DEPEND_depobj
)
4525 emitDepobjElements(CGF
, KmpDependInfoTy
, PosLVal
, Dependencies
[I
],
4529 DependenciesArray
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4530 DependenciesArray
, CGF
.VoidPtrTy
, CGF
.Int8Ty
);
4531 return std::make_pair(NumOfElements
, DependenciesArray
);
4534 Address
CGOpenMPRuntime::emitDepobjDependClause(
4535 CodeGenFunction
&CGF
, const OMPTaskDataTy::DependData
&Dependencies
,
4536 SourceLocation Loc
) {
4537 if (Dependencies
.DepExprs
.empty())
4538 return Address::invalid();
4539 // Process list of dependencies.
4540 ASTContext
&C
= CGM
.getContext();
4541 Address DependenciesArray
= Address::invalid();
4542 unsigned NumDependencies
= Dependencies
.DepExprs
.size();
4544 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4545 RecordDecl
*KmpDependInfoRD
=
4546 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4549 // Define type kmp_depend_info[<Dependencies.size()>];
4550 // For depobj reserve one extra element to store the number of elements.
4551 // It is required to handle depobj(x) update(in) construct.
4552 // kmp_depend_info[<Dependencies.size()>] deps;
4553 llvm::Value
*NumDepsVal
;
4554 CharUnits Align
= C
.getTypeAlignInChars(KmpDependInfoTy
);
4555 if (const auto *IE
=
4556 cast_or_null
<OMPIteratorExpr
>(Dependencies
.IteratorExpr
)) {
4557 NumDepsVal
= llvm::ConstantInt::get(CGF
.SizeTy
, 1);
4558 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4559 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4560 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.SizeTy
, /*isSigned=*/false);
4561 NumDepsVal
= CGF
.Builder
.CreateNUWMul(NumDepsVal
, Sz
);
4563 Size
= CGF
.Builder
.CreateNUWAdd(llvm::ConstantInt::get(CGF
.SizeTy
, 1),
4565 CharUnits SizeInBytes
=
4566 C
.getTypeSizeInChars(KmpDependInfoTy
).alignTo(Align
);
4567 llvm::Value
*RecSize
= CGM
.getSize(SizeInBytes
);
4568 Size
= CGF
.Builder
.CreateNUWMul(Size
, RecSize
);
4570 CGF
.Builder
.CreateIntCast(NumDepsVal
, CGF
.IntPtrTy
, /*isSigned=*/false);
4572 QualType KmpDependInfoArrayTy
= C
.getConstantArrayType(
4573 KmpDependInfoTy
, llvm::APInt(/*numBits=*/64, NumDependencies
+ 1),
4574 nullptr, ArrayType::Normal
, /*IndexTypeQuals=*/0);
4575 CharUnits Sz
= C
.getTypeSizeInChars(KmpDependInfoArrayTy
);
4576 Size
= CGM
.getSize(Sz
.alignTo(Align
));
4577 NumDepsVal
= llvm::ConstantInt::get(CGF
.IntPtrTy
, NumDependencies
);
4579 // Need to allocate on the dynamic memory.
4580 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4581 // Use default allocator.
4582 llvm::Value
*Allocator
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4583 llvm::Value
*Args
[] = {ThreadID
, Size
, Allocator
};
4586 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4587 CGM
.getModule(), OMPRTL___kmpc_alloc
),
4588 Args
, ".dep.arr.addr");
4589 llvm::Type
*KmpDependInfoLlvmTy
= CGF
.ConvertTypeForMem(KmpDependInfoTy
);
4590 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4591 Addr
, KmpDependInfoLlvmTy
->getPointerTo());
4592 DependenciesArray
= Address(Addr
, KmpDependInfoLlvmTy
, Align
);
4593 // Write number of elements in the first element of array for depobj.
4594 LValue Base
= CGF
.MakeAddrLValue(DependenciesArray
, KmpDependInfoTy
);
4595 // deps[i].base_addr = NumDependencies;
4596 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4598 *std::next(KmpDependInfoRD
->field_begin(),
4599 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4600 CGF
.EmitStoreOfScalar(NumDepsVal
, BaseAddrLVal
);
4601 llvm::PointerUnion
<unsigned *, LValue
*> Pos
;
4604 if (Dependencies
.IteratorExpr
) {
4605 PosLVal
= CGF
.MakeAddrLValue(
4606 CGF
.CreateMemTemp(C
.getSizeType(), "iterator.counter.addr"),
4608 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Idx
), PosLVal
,
4614 emitDependData(CGF
, KmpDependInfoTy
, Pos
, Dependencies
, DependenciesArray
);
4615 DependenciesArray
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4616 CGF
.Builder
.CreateConstGEP(DependenciesArray
, 1), CGF
.VoidPtrTy
,
4618 return DependenciesArray
;
4621 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4622 SourceLocation Loc
) {
4623 ASTContext
&C
= CGM
.getContext();
4625 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4626 LValue Base
= CGF
.EmitLoadOfPointerLValue(
4627 DepobjLVal
.getAddress(CGF
), C
.VoidPtrTy
.castAs
<PointerType
>());
4628 QualType KmpDependInfoPtrTy
= C
.getPointerType(KmpDependInfoTy
);
4629 Address Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4630 Base
.getAddress(CGF
), CGF
.ConvertTypeForMem(KmpDependInfoPtrTy
),
4631 CGF
.ConvertTypeForMem(KmpDependInfoTy
));
4632 llvm::Value
*DepObjAddr
= CGF
.Builder
.CreateGEP(
4633 Addr
.getElementType(), Addr
.getPointer(),
4634 llvm::ConstantInt::get(CGF
.IntPtrTy
, -1, /*isSigned=*/true));
4635 DepObjAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr
,
4637 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4638 // Use default allocator.
4639 llvm::Value
*Allocator
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4640 llvm::Value
*Args
[] = {ThreadID
, DepObjAddr
, Allocator
};
4642 // _kmpc_free(gtid, addr, nullptr);
4643 (void)CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4644 CGM
.getModule(), OMPRTL___kmpc_free
),
4648 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4649 OpenMPDependClauseKind NewDepKind
,
4650 SourceLocation Loc
) {
4651 ASTContext
&C
= CGM
.getContext();
4653 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4654 RecordDecl
*KmpDependInfoRD
=
4655 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4656 llvm::Type
*LLVMFlagsTy
= CGF
.ConvertTypeForMem(FlagsTy
);
4657 llvm::Value
*NumDeps
;
4659 std::tie(NumDeps
, Base
) = getDepobjElements(CGF
, DepobjLVal
, Loc
);
4661 Address Begin
= Base
.getAddress(CGF
);
4662 // Cast from pointer to array type to pointer to single element.
4663 llvm::Value
*End
= CGF
.Builder
.CreateGEP(
4664 Begin
.getElementType(), Begin
.getPointer(), NumDeps
);
4665 // The basic structure here is a while-do loop.
4666 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.body");
4667 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.done");
4668 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
4669 CGF
.EmitBlock(BodyBB
);
4670 llvm::PHINode
*ElementPHI
=
4671 CGF
.Builder
.CreatePHI(Begin
.getType(), 2, "omp.elementPast");
4672 ElementPHI
->addIncoming(Begin
.getPointer(), EntryBB
);
4673 Begin
= Begin
.withPointer(ElementPHI
);
4674 Base
= CGF
.MakeAddrLValue(Begin
, KmpDependInfoTy
, Base
.getBaseInfo(),
4675 Base
.getTBAAInfo());
4676 // deps[i].flags = NewDepKind;
4677 RTLDependenceKindTy DepKind
= translateDependencyKind(NewDepKind
);
4678 LValue FlagsLVal
= CGF
.EmitLValueForField(
4679 Base
, *std::next(KmpDependInfoRD
->field_begin(),
4680 static_cast<unsigned int>(RTLDependInfoFields::Flags
)));
4681 CGF
.EmitStoreOfScalar(
4682 llvm::ConstantInt::get(LLVMFlagsTy
, static_cast<unsigned int>(DepKind
)),
4685 // Shift the address forward by one element.
4686 Address ElementNext
=
4687 CGF
.Builder
.CreateConstGEP(Begin
, /*Index=*/1, "omp.elementNext");
4688 ElementPHI
->addIncoming(ElementNext
.getPointer(),
4689 CGF
.Builder
.GetInsertBlock());
4690 llvm::Value
*IsEmpty
=
4691 CGF
.Builder
.CreateICmpEQ(ElementNext
.getPointer(), End
, "omp.isempty");
4692 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
4694 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
4697 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
4698 const OMPExecutableDirective
&D
,
4699 llvm::Function
*TaskFunction
,
4700 QualType SharedsTy
, Address Shareds
,
4702 const OMPTaskDataTy
&Data
) {
4703 if (!CGF
.HaveInsertPoint())
4706 TaskResultTy Result
=
4707 emitTaskInit(CGF
, Loc
, D
, TaskFunction
, SharedsTy
, Shareds
, Data
);
4708 llvm::Value
*NewTask
= Result
.NewTask
;
4709 llvm::Function
*TaskEntry
= Result
.TaskEntry
;
4710 llvm::Value
*NewTaskNewTaskTTy
= Result
.NewTaskNewTaskTTy
;
4711 LValue TDBase
= Result
.TDBase
;
4712 const RecordDecl
*KmpTaskTQTyRD
= Result
.KmpTaskTQTyRD
;
4713 // Process list of dependences.
4714 Address DependenciesArray
= Address::invalid();
4715 llvm::Value
*NumOfElements
;
4716 std::tie(NumOfElements
, DependenciesArray
) =
4717 emitDependClause(CGF
, Data
.Dependences
, Loc
);
4719 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4721 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4722 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4723 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4724 // list is not empty
4725 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4726 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
4727 llvm::Value
*TaskArgs
[] = { UpLoc
, ThreadID
, NewTask
};
4728 llvm::Value
*DepTaskArgs
[7];
4729 if (!Data
.Dependences
.empty()) {
4730 DepTaskArgs
[0] = UpLoc
;
4731 DepTaskArgs
[1] = ThreadID
;
4732 DepTaskArgs
[2] = NewTask
;
4733 DepTaskArgs
[3] = NumOfElements
;
4734 DepTaskArgs
[4] = DependenciesArray
.getPointer();
4735 DepTaskArgs
[5] = CGF
.Builder
.getInt32(0);
4736 DepTaskArgs
[6] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4738 auto &&ThenCodeGen
= [this, &Data
, TDBase
, KmpTaskTQTyRD
, &TaskArgs
,
4739 &DepTaskArgs
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4741 auto PartIdFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTPartId
);
4742 LValue PartIdLVal
= CGF
.EmitLValueForField(TDBase
, *PartIdFI
);
4743 CGF
.EmitStoreOfScalar(CGF
.Builder
.getInt32(0), PartIdLVal
);
4745 if (!Data
.Dependences
.empty()) {
4746 CGF
.EmitRuntimeCall(
4747 OMPBuilder
.getOrCreateRuntimeFunction(
4748 CGM
.getModule(), OMPRTL___kmpc_omp_task_with_deps
),
4751 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4752 CGM
.getModule(), OMPRTL___kmpc_omp_task
),
4755 // Check if parent region is untied and build return for untied task;
4757 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
4758 Region
->emitUntiedSwitch(CGF
);
4761 llvm::Value
*DepWaitTaskArgs
[7];
4762 if (!Data
.Dependences
.empty()) {
4763 DepWaitTaskArgs
[0] = UpLoc
;
4764 DepWaitTaskArgs
[1] = ThreadID
;
4765 DepWaitTaskArgs
[2] = NumOfElements
;
4766 DepWaitTaskArgs
[3] = DependenciesArray
.getPointer();
4767 DepWaitTaskArgs
[4] = CGF
.Builder
.getInt32(0);
4768 DepWaitTaskArgs
[5] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4769 DepWaitTaskArgs
[6] =
4770 llvm::ConstantInt::get(CGF
.Int32Ty
, Data
.HasNowaitClause
);
4772 auto &M
= CGM
.getModule();
4773 auto &&ElseCodeGen
= [this, &M
, &TaskArgs
, ThreadID
, NewTaskNewTaskTTy
,
4774 TaskEntry
, &Data
, &DepWaitTaskArgs
,
4775 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4776 CodeGenFunction::RunCleanupsScope
LocalScope(CGF
);
4777 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4778 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4779 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4781 if (!Data
.Dependences
.empty())
4782 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4783 M
, OMPRTL___kmpc_omp_taskwait_deps_51
),
4785 // Call proxy_task_entry(gtid, new_task);
4786 auto &&CodeGen
= [TaskEntry
, ThreadID
, NewTaskNewTaskTTy
,
4787 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4789 llvm::Value
*OutlinedFnArgs
[] = {ThreadID
, NewTaskNewTaskTTy
};
4790 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, Loc
, TaskEntry
,
4794 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4795 // kmp_task_t *new_task);
4796 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4797 // kmp_task_t *new_task);
4798 RegionCodeGenTy
RCG(CodeGen
);
4799 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
4800 M
, OMPRTL___kmpc_omp_task_begin_if0
),
4802 OMPBuilder
.getOrCreateRuntimeFunction(
4803 M
, OMPRTL___kmpc_omp_task_complete_if0
),
4805 RCG
.setAction(Action
);
4810 emitIfClause(CGF
, IfCond
, ThenCodeGen
, ElseCodeGen
);
4812 RegionCodeGenTy
ThenRCG(ThenCodeGen
);
4817 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
4818 const OMPLoopDirective
&D
,
4819 llvm::Function
*TaskFunction
,
4820 QualType SharedsTy
, Address Shareds
,
4822 const OMPTaskDataTy
&Data
) {
4823 if (!CGF
.HaveInsertPoint())
4825 TaskResultTy Result
=
4826 emitTaskInit(CGF
, Loc
, D
, TaskFunction
, SharedsTy
, Shareds
, Data
);
4827 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4829 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4830 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4831 // sched, kmp_uint64 grainsize, void *task_dup);
4832 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4833 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
4836 IfVal
= CGF
.Builder
.CreateIntCast(CGF
.EvaluateExprAsBool(IfCond
), CGF
.IntTy
,
4839 IfVal
= llvm::ConstantInt::getSigned(CGF
.IntTy
, /*V=*/1);
4842 LValue LBLVal
= CGF
.EmitLValueForField(
4844 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTLowerBound
));
4846 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getLowerBoundVariable())->getDecl());
4847 CGF
.EmitAnyExprToMem(LBVar
->getInit(), LBLVal
.getAddress(CGF
),
4849 /*IsInitializer=*/true);
4850 LValue UBLVal
= CGF
.EmitLValueForField(
4852 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTUpperBound
));
4854 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getUpperBoundVariable())->getDecl());
4855 CGF
.EmitAnyExprToMem(UBVar
->getInit(), UBLVal
.getAddress(CGF
),
4857 /*IsInitializer=*/true);
4858 LValue StLVal
= CGF
.EmitLValueForField(
4860 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTStride
));
4862 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getStrideVariable())->getDecl());
4863 CGF
.EmitAnyExprToMem(StVar
->getInit(), StLVal
.getAddress(CGF
),
4865 /*IsInitializer=*/true);
4866 // Store reductions address.
4867 LValue RedLVal
= CGF
.EmitLValueForField(
4869 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTReductions
));
4870 if (Data
.Reductions
) {
4871 CGF
.EmitStoreOfScalar(Data
.Reductions
, RedLVal
);
4873 CGF
.EmitNullInitialization(RedLVal
.getAddress(CGF
),
4874 CGF
.getContext().VoidPtrTy
);
4876 enum { NoSchedule
= 0, Grainsize
= 1, NumTasks
= 2 };
4877 llvm::Value
*TaskArgs
[] = {
4882 LBLVal
.getPointer(CGF
),
4883 UBLVal
.getPointer(CGF
),
4884 CGF
.EmitLoadOfScalar(StLVal
, Loc
),
4885 llvm::ConstantInt::getSigned(
4886 CGF
.IntTy
, 1), // Always 1 because taskgroup emitted by the compiler
4887 llvm::ConstantInt::getSigned(
4888 CGF
.IntTy
, Data
.Schedule
.getPointer()
4889 ? Data
.Schedule
.getInt() ? NumTasks
: Grainsize
4891 Data
.Schedule
.getPointer()
4892 ? CGF
.Builder
.CreateIntCast(Data
.Schedule
.getPointer(), CGF
.Int64Ty
,
4894 : llvm::ConstantInt::get(CGF
.Int64Ty
, /*V=*/0),
4895 Result
.TaskDupFn
? CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4896 Result
.TaskDupFn
, CGF
.VoidPtrTy
)
4897 : llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
)};
4898 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4899 CGM
.getModule(), OMPRTL___kmpc_taskloop
),
4903 /// Emit reduction operation for each element of array (required for
4904 /// array sections) LHS op = RHS.
4905 /// \param Type Type of array.
4906 /// \param LHSVar Variable on the left side of the reduction operation
4907 /// (references element of array in original variable).
4908 /// \param RHSVar Variable on the right side of the reduction operation
4909 /// (references element of array in original variable).
4910 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4912 static void EmitOMPAggregateReduction(
4913 CodeGenFunction
&CGF
, QualType Type
, const VarDecl
*LHSVar
,
4914 const VarDecl
*RHSVar
,
4915 const llvm::function_ref
<void(CodeGenFunction
&CGF
, const Expr
*,
4916 const Expr
*, const Expr
*)> &RedOpGen
,
4917 const Expr
*XExpr
= nullptr, const Expr
*EExpr
= nullptr,
4918 const Expr
*UpExpr
= nullptr) {
4919 // Perform element-by-element initialization.
4921 Address LHSAddr
= CGF
.GetAddrOfLocalVar(LHSVar
);
4922 Address RHSAddr
= CGF
.GetAddrOfLocalVar(RHSVar
);
4924 // Drill down to the base element type on both arrays.
4925 const ArrayType
*ArrayTy
= Type
->getAsArrayTypeUnsafe();
4926 llvm::Value
*NumElements
= CGF
.emitArrayLength(ArrayTy
, ElementTy
, LHSAddr
);
4928 llvm::Value
*RHSBegin
= RHSAddr
.getPointer();
4929 llvm::Value
*LHSBegin
= LHSAddr
.getPointer();
4930 // Cast from pointer to array type to pointer to single element.
4931 llvm::Value
*LHSEnd
=
4932 CGF
.Builder
.CreateGEP(LHSAddr
.getElementType(), LHSBegin
, NumElements
);
4933 // The basic structure here is a while-do loop.
4934 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.arraycpy.body");
4935 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.arraycpy.done");
4936 llvm::Value
*IsEmpty
=
4937 CGF
.Builder
.CreateICmpEQ(LHSBegin
, LHSEnd
, "omp.arraycpy.isempty");
4938 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
4940 // Enter the loop body, making that address the current address.
4941 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
4942 CGF
.EmitBlock(BodyBB
);
4944 CharUnits ElementSize
= CGF
.getContext().getTypeSizeInChars(ElementTy
);
4946 llvm::PHINode
*RHSElementPHI
= CGF
.Builder
.CreatePHI(
4947 RHSBegin
->getType(), 2, "omp.arraycpy.srcElementPast");
4948 RHSElementPHI
->addIncoming(RHSBegin
, EntryBB
);
4949 Address
RHSElementCurrent(
4950 RHSElementPHI
, RHSAddr
.getElementType(),
4951 RHSAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
4953 llvm::PHINode
*LHSElementPHI
= CGF
.Builder
.CreatePHI(
4954 LHSBegin
->getType(), 2, "omp.arraycpy.destElementPast");
4955 LHSElementPHI
->addIncoming(LHSBegin
, EntryBB
);
4956 Address
LHSElementCurrent(
4957 LHSElementPHI
, LHSAddr
.getElementType(),
4958 LHSAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
4961 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
4962 Scope
.addPrivate(LHSVar
, LHSElementCurrent
);
4963 Scope
.addPrivate(RHSVar
, RHSElementCurrent
);
4965 RedOpGen(CGF
, XExpr
, EExpr
, UpExpr
);
4966 Scope
.ForceCleanup();
4968 // Shift the address forward by one element.
4969 llvm::Value
*LHSElementNext
= CGF
.Builder
.CreateConstGEP1_32(
4970 LHSAddr
.getElementType(), LHSElementPHI
, /*Idx0=*/1,
4971 "omp.arraycpy.dest.element");
4972 llvm::Value
*RHSElementNext
= CGF
.Builder
.CreateConstGEP1_32(
4973 RHSAddr
.getElementType(), RHSElementPHI
, /*Idx0=*/1,
4974 "omp.arraycpy.src.element");
4975 // Check whether we've reached the end.
4977 CGF
.Builder
.CreateICmpEQ(LHSElementNext
, LHSEnd
, "omp.arraycpy.done");
4978 CGF
.Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
4979 LHSElementPHI
->addIncoming(LHSElementNext
, CGF
.Builder
.GetInsertBlock());
4980 RHSElementPHI
->addIncoming(RHSElementNext
, CGF
.Builder
.GetInsertBlock());
4983 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
4986 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4987 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4988 /// UDR combiner function.
4989 static void emitReductionCombiner(CodeGenFunction
&CGF
,
4990 const Expr
*ReductionOp
) {
4991 if (const auto *CE
= dyn_cast
<CallExpr
>(ReductionOp
))
4992 if (const auto *OVE
= dyn_cast
<OpaqueValueExpr
>(CE
->getCallee()))
4993 if (const auto *DRE
=
4994 dyn_cast
<DeclRefExpr
>(OVE
->getSourceExpr()->IgnoreImpCasts()))
4995 if (const auto *DRD
=
4996 dyn_cast
<OMPDeclareReductionDecl
>(DRE
->getDecl())) {
4997 std::pair
<llvm::Function
*, llvm::Function
*> Reduction
=
4998 CGF
.CGM
.getOpenMPRuntime().getUserDefinedReduction(DRD
);
4999 RValue Func
= RValue::get(Reduction
.first
);
5000 CodeGenFunction::OpaqueValueMapping
Map(CGF
, OVE
, Func
);
5001 CGF
.EmitIgnoredExpr(ReductionOp
);
5004 CGF
.EmitIgnoredExpr(ReductionOp
);
5007 llvm::Function
*CGOpenMPRuntime::emitReductionFunction(
5008 SourceLocation Loc
, llvm::Type
*ArgsElemType
,
5009 ArrayRef
<const Expr
*> Privates
, ArrayRef
<const Expr
*> LHSExprs
,
5010 ArrayRef
<const Expr
*> RHSExprs
, ArrayRef
<const Expr
*> ReductionOps
) {
5011 ASTContext
&C
= CGM
.getContext();
5013 // void reduction_func(void *LHSArg, void *RHSArg);
5014 FunctionArgList Args
;
5015 ImplicitParamDecl
LHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5016 ImplicitParamDecl::Other
);
5017 ImplicitParamDecl
RHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5018 ImplicitParamDecl::Other
);
5019 Args
.push_back(&LHSArg
);
5020 Args
.push_back(&RHSArg
);
5022 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5023 std::string Name
= getName({"omp", "reduction", "reduction_func"});
5024 auto *Fn
= llvm::Function::Create(CGM
.getTypes().GetFunctionType(CGFI
),
5025 llvm::GlobalValue::InternalLinkage
, Name
,
5027 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, CGFI
);
5028 Fn
->setDoesNotRecurse();
5029 CodeGenFunction
CGF(CGM
);
5030 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, CGFI
, Args
, Loc
, Loc
);
5032 // Dst = (void*[n])(LHSArg);
5033 // Src = (void*[n])(RHSArg);
5034 Address
LHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5035 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&LHSArg
)),
5036 ArgsElemType
->getPointerTo()),
5037 ArgsElemType
, CGF
.getPointerAlign());
5038 Address
RHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5039 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&RHSArg
)),
5040 ArgsElemType
->getPointerTo()),
5041 ArgsElemType
, CGF
.getPointerAlign());
5044 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5046 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
5047 const auto *IPriv
= Privates
.begin();
5049 for (unsigned I
= 0, E
= ReductionOps
.size(); I
< E
; ++I
, ++IPriv
, ++Idx
) {
5050 const auto *RHSVar
=
5051 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSExprs
[I
])->getDecl());
5052 Scope
.addPrivate(RHSVar
, emitAddrOfVarFromArray(CGF
, RHS
, Idx
, RHSVar
));
5053 const auto *LHSVar
=
5054 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSExprs
[I
])->getDecl());
5055 Scope
.addPrivate(LHSVar
, emitAddrOfVarFromArray(CGF
, LHS
, Idx
, LHSVar
));
5056 QualType PrivTy
= (*IPriv
)->getType();
5057 if (PrivTy
->isVariablyModifiedType()) {
5058 // Get array size and emit VLA type.
5060 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(LHS
, Idx
);
5061 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Elem
);
5062 const VariableArrayType
*VLA
=
5063 CGF
.getContext().getAsVariableArrayType(PrivTy
);
5064 const auto *OVE
= cast
<OpaqueValueExpr
>(VLA
->getSizeExpr());
5065 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
5066 CGF
, OVE
, RValue::get(CGF
.Builder
.CreatePtrToInt(Ptr
, CGF
.SizeTy
)));
5067 CGF
.EmitVariablyModifiedType(PrivTy
);
5071 IPriv
= Privates
.begin();
5072 const auto *ILHS
= LHSExprs
.begin();
5073 const auto *IRHS
= RHSExprs
.begin();
5074 for (const Expr
*E
: ReductionOps
) {
5075 if ((*IPriv
)->getType()->isArrayType()) {
5076 // Emit reduction for array section.
5077 const auto *LHSVar
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5078 const auto *RHSVar
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5079 EmitOMPAggregateReduction(
5080 CGF
, (*IPriv
)->getType(), LHSVar
, RHSVar
,
5081 [=](CodeGenFunction
&CGF
, const Expr
*, const Expr
*, const Expr
*) {
5082 emitReductionCombiner(CGF
, E
);
5085 // Emit reduction for array subscript or single variable.
5086 emitReductionCombiner(CGF
, E
);
5092 Scope
.ForceCleanup();
5093 CGF
.FinishFunction();
5097 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction
&CGF
,
5098 const Expr
*ReductionOp
,
5099 const Expr
*PrivateRef
,
5100 const DeclRefExpr
*LHS
,
5101 const DeclRefExpr
*RHS
) {
5102 if (PrivateRef
->getType()->isArrayType()) {
5103 // Emit reduction for array section.
5104 const auto *LHSVar
= cast
<VarDecl
>(LHS
->getDecl());
5105 const auto *RHSVar
= cast
<VarDecl
>(RHS
->getDecl());
5106 EmitOMPAggregateReduction(
5107 CGF
, PrivateRef
->getType(), LHSVar
, RHSVar
,
5108 [=](CodeGenFunction
&CGF
, const Expr
*, const Expr
*, const Expr
*) {
5109 emitReductionCombiner(CGF
, ReductionOp
);
5112 // Emit reduction for array subscript or single variable.
5113 emitReductionCombiner(CGF
, ReductionOp
);
5117 void CGOpenMPRuntime::emitReduction(CodeGenFunction
&CGF
, SourceLocation Loc
,
5118 ArrayRef
<const Expr
*> Privates
,
5119 ArrayRef
<const Expr
*> LHSExprs
,
5120 ArrayRef
<const Expr
*> RHSExprs
,
5121 ArrayRef
<const Expr
*> ReductionOps
,
5122 ReductionOptionsTy Options
) {
5123 if (!CGF
.HaveInsertPoint())
5126 bool WithNowait
= Options
.WithNowait
;
5127 bool SimpleReduction
= Options
.SimpleReduction
;
5129 // Next code should be emitted for reduction:
5131 // static kmp_critical_name lock = { 0 };
5133 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5134 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5136 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5137 // *(Type<n>-1*)rhs[<n>-1]);
5141 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5142 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5143 // RedList, reduce_func, &<lock>)) {
5146 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5148 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5152 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5154 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5159 // if SimpleReduction is true, only the next code is generated:
5161 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5164 ASTContext
&C
= CGM
.getContext();
5166 if (SimpleReduction
) {
5167 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
5168 const auto *IPriv
= Privates
.begin();
5169 const auto *ILHS
= LHSExprs
.begin();
5170 const auto *IRHS
= RHSExprs
.begin();
5171 for (const Expr
*E
: ReductionOps
) {
5172 emitSingleReductionCombiner(CGF
, E
, *IPriv
, cast
<DeclRefExpr
>(*ILHS
),
5173 cast
<DeclRefExpr
>(*IRHS
));
5181 // 1. Build a list of reduction variables.
5182 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5183 auto Size
= RHSExprs
.size();
5184 for (const Expr
*E
: Privates
) {
5185 if (E
->getType()->isVariablyModifiedType())
5186 // Reserve place for array size.
5189 llvm::APInt
ArraySize(/*unsigned int numBits=*/32, Size
);
5190 QualType ReductionArrayTy
=
5191 C
.getConstantArrayType(C
.VoidPtrTy
, ArraySize
, nullptr, ArrayType::Normal
,
5192 /*IndexTypeQuals=*/0);
5193 Address ReductionList
=
5194 CGF
.CreateMemTemp(ReductionArrayTy
, ".omp.reduction.red_list");
5195 const auto *IPriv
= Privates
.begin();
5197 for (unsigned I
= 0, E
= RHSExprs
.size(); I
< E
; ++I
, ++IPriv
, ++Idx
) {
5198 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(ReductionList
, Idx
);
5199 CGF
.Builder
.CreateStore(
5200 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5201 CGF
.EmitLValue(RHSExprs
[I
]).getPointer(CGF
), CGF
.VoidPtrTy
),
5203 if ((*IPriv
)->getType()->isVariablyModifiedType()) {
5204 // Store array size.
5206 Elem
= CGF
.Builder
.CreateConstArrayGEP(ReductionList
, Idx
);
5207 llvm::Value
*Size
= CGF
.Builder
.CreateIntCast(
5209 CGF
.getContext().getAsVariableArrayType((*IPriv
)->getType()))
5211 CGF
.SizeTy
, /*isSigned=*/false);
5212 CGF
.Builder
.CreateStore(CGF
.Builder
.CreateIntToPtr(Size
, CGF
.VoidPtrTy
),
5217 // 2. Emit reduce_func().
5218 llvm::Function
*ReductionFn
=
5219 emitReductionFunction(Loc
, CGF
.ConvertTypeForMem(ReductionArrayTy
),
5220 Privates
, LHSExprs
, RHSExprs
, ReductionOps
);
5222 // 3. Create static kmp_critical_name lock = { 0 };
5223 std::string Name
= getName({"reduction"});
5224 llvm::Value
*Lock
= getCriticalRegionLock(Name
);
5226 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5227 // RedList, reduce_func, &<lock>);
5228 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
, OMP_ATOMIC_REDUCE
);
5229 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
5230 llvm::Value
*ReductionArrayTySize
= CGF
.getTypeSize(ReductionArrayTy
);
5231 llvm::Value
*RL
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5232 ReductionList
.getPointer(), CGF
.VoidPtrTy
);
5233 llvm::Value
*Args
[] = {
5234 IdentTLoc
, // ident_t *<loc>
5235 ThreadId
, // i32 <gtid>
5236 CGF
.Builder
.getInt32(RHSExprs
.size()), // i32 <n>
5237 ReductionArrayTySize
, // size_type sizeof(RedList)
5238 RL
, // void *RedList
5239 ReductionFn
, // void (*) (void *, void *) <reduce_func>
5240 Lock
// kmp_critical_name *&<lock>
5242 llvm::Value
*Res
= CGF
.EmitRuntimeCall(
5243 OMPBuilder
.getOrCreateRuntimeFunction(
5245 WithNowait
? OMPRTL___kmpc_reduce_nowait
: OMPRTL___kmpc_reduce
),
5248 // 5. Build switch(res)
5249 llvm::BasicBlock
*DefaultBB
= CGF
.createBasicBlock(".omp.reduction.default");
5250 llvm::SwitchInst
*SwInst
=
5251 CGF
.Builder
.CreateSwitch(Res
, DefaultBB
, /*NumCases=*/2);
5255 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5257 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5259 llvm::BasicBlock
*Case1BB
= CGF
.createBasicBlock(".omp.reduction.case1");
5260 SwInst
->addCase(CGF
.Builder
.getInt32(1), Case1BB
);
5261 CGF
.EmitBlock(Case1BB
);
5263 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5264 llvm::Value
*EndArgs
[] = {
5265 IdentTLoc
, // ident_t *<loc>
5266 ThreadId
, // i32 <gtid>
5267 Lock
// kmp_critical_name *&<lock>
5269 auto &&CodeGen
= [Privates
, LHSExprs
, RHSExprs
, ReductionOps
](
5270 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5271 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5272 const auto *IPriv
= Privates
.begin();
5273 const auto *ILHS
= LHSExprs
.begin();
5274 const auto *IRHS
= RHSExprs
.begin();
5275 for (const Expr
*E
: ReductionOps
) {
5276 RT
.emitSingleReductionCombiner(CGF
, E
, *IPriv
, cast
<DeclRefExpr
>(*ILHS
),
5277 cast
<DeclRefExpr
>(*IRHS
));
5283 RegionCodeGenTy
RCG(CodeGen
);
5284 CommonActionTy
Action(
5285 nullptr, std::nullopt
,
5286 OMPBuilder
.getOrCreateRuntimeFunction(
5287 CGM
.getModule(), WithNowait
? OMPRTL___kmpc_end_reduce_nowait
5288 : OMPRTL___kmpc_end_reduce
),
5290 RCG
.setAction(Action
);
5293 CGF
.EmitBranch(DefaultBB
);
5297 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5300 llvm::BasicBlock
*Case2BB
= CGF
.createBasicBlock(".omp.reduction.case2");
5301 SwInst
->addCase(CGF
.Builder
.getInt32(2), Case2BB
);
5302 CGF
.EmitBlock(Case2BB
);
5304 auto &&AtomicCodeGen
= [Loc
, Privates
, LHSExprs
, RHSExprs
, ReductionOps
](
5305 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5306 const auto *ILHS
= LHSExprs
.begin();
5307 const auto *IRHS
= RHSExprs
.begin();
5308 const auto *IPriv
= Privates
.begin();
5309 for (const Expr
*E
: ReductionOps
) {
5310 const Expr
*XExpr
= nullptr;
5311 const Expr
*EExpr
= nullptr;
5312 const Expr
*UpExpr
= nullptr;
5313 BinaryOperatorKind BO
= BO_Comma
;
5314 if (const auto *BO
= dyn_cast
<BinaryOperator
>(E
)) {
5315 if (BO
->getOpcode() == BO_Assign
) {
5316 XExpr
= BO
->getLHS();
5317 UpExpr
= BO
->getRHS();
5320 // Try to emit update expression as a simple atomic.
5321 const Expr
*RHSExpr
= UpExpr
;
5323 // Analyze RHS part of the whole expression.
5324 if (const auto *ACO
= dyn_cast
<AbstractConditionalOperator
>(
5325 RHSExpr
->IgnoreParenImpCasts())) {
5326 // If this is a conditional operator, analyze its condition for
5327 // min/max reduction operator.
5328 RHSExpr
= ACO
->getCond();
5330 if (const auto *BORHS
=
5331 dyn_cast
<BinaryOperator
>(RHSExpr
->IgnoreParenImpCasts())) {
5332 EExpr
= BORHS
->getRHS();
5333 BO
= BORHS
->getOpcode();
5337 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5338 auto &&AtomicRedGen
= [BO
, VD
,
5339 Loc
](CodeGenFunction
&CGF
, const Expr
*XExpr
,
5340 const Expr
*EExpr
, const Expr
*UpExpr
) {
5341 LValue X
= CGF
.EmitLValue(XExpr
);
5344 E
= CGF
.EmitAnyExpr(EExpr
);
5345 CGF
.EmitOMPAtomicSimpleUpdateExpr(
5346 X
, E
, BO
, /*IsXLHSInRHSPart=*/true,
5347 llvm::AtomicOrdering::Monotonic
, Loc
,
5348 [&CGF
, UpExpr
, VD
, Loc
](RValue XRValue
) {
5349 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
5350 Address LHSTemp
= CGF
.CreateMemTemp(VD
->getType());
5351 CGF
.emitOMPSimpleStore(
5352 CGF
.MakeAddrLValue(LHSTemp
, VD
->getType()), XRValue
,
5353 VD
->getType().getNonReferenceType(), Loc
);
5354 PrivateScope
.addPrivate(VD
, LHSTemp
);
5355 (void)PrivateScope
.Privatize();
5356 return CGF
.EmitAnyExpr(UpExpr
);
5359 if ((*IPriv
)->getType()->isArrayType()) {
5360 // Emit atomic reduction for array section.
5361 const auto *RHSVar
=
5362 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5363 EmitOMPAggregateReduction(CGF
, (*IPriv
)->getType(), VD
, RHSVar
,
5364 AtomicRedGen
, XExpr
, EExpr
, UpExpr
);
5366 // Emit atomic reduction for array subscript or single variable.
5367 AtomicRedGen(CGF
, XExpr
, EExpr
, UpExpr
);
5370 // Emit as a critical region.
5371 auto &&CritRedGen
= [E
, Loc
](CodeGenFunction
&CGF
, const Expr
*,
5372 const Expr
*, const Expr
*) {
5373 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5374 std::string Name
= RT
.getName({"atomic_reduction"});
5375 RT
.emitCriticalRegion(
5377 [=](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5379 emitReductionCombiner(CGF
, E
);
5383 if ((*IPriv
)->getType()->isArrayType()) {
5384 const auto *LHSVar
=
5385 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5386 const auto *RHSVar
=
5387 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5388 EmitOMPAggregateReduction(CGF
, (*IPriv
)->getType(), LHSVar
, RHSVar
,
5391 CritRedGen(CGF
, nullptr, nullptr, nullptr);
5399 RegionCodeGenTy
AtomicRCG(AtomicCodeGen
);
5401 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5402 llvm::Value
*EndArgs
[] = {
5403 IdentTLoc
, // ident_t *<loc>
5404 ThreadId
, // i32 <gtid>
5405 Lock
// kmp_critical_name *&<lock>
5407 CommonActionTy
Action(nullptr, std::nullopt
,
5408 OMPBuilder
.getOrCreateRuntimeFunction(
5409 CGM
.getModule(), OMPRTL___kmpc_end_reduce
),
5411 AtomicRCG
.setAction(Action
);
5417 CGF
.EmitBranch(DefaultBB
);
5418 CGF
.EmitBlock(DefaultBB
, /*IsFinished=*/true);
5421 /// Generates unique name for artificial threadprivate variables.
5422 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5423 static std::string
generateUniqueName(CodeGenModule
&CGM
, StringRef Prefix
,
5425 SmallString
<256> Buffer
;
5426 llvm::raw_svector_ostream
Out(Buffer
);
5427 const clang::DeclRefExpr
*DE
;
5428 const VarDecl
*D
= ::getBaseDecl(Ref
, DE
);
5430 D
= cast
<VarDecl
>(cast
<DeclRefExpr
>(Ref
)->getDecl());
5431 D
= D
->getCanonicalDecl();
5432 std::string Name
= CGM
.getOpenMPRuntime().getName(
5433 {D
->isLocalVarDeclOrParm() ? D
->getName() : CGM
.getMangledName(D
)});
5434 Out
<< Prefix
<< Name
<< "_"
5435 << D
->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5436 return std::string(Out
.str());
5439 /// Emits reduction initializer function:
5441 /// void @.red_init(void* %arg, void* %orig) {
5442 /// %0 = bitcast void* %arg to <type>*
5443 /// store <type> <init>, <type>* %0
5447 static llvm::Value
*emitReduceInitFunction(CodeGenModule
&CGM
,
5449 ReductionCodeGen
&RCG
, unsigned N
) {
5450 ASTContext
&C
= CGM
.getContext();
5451 QualType VoidPtrTy
= C
.VoidPtrTy
;
5452 VoidPtrTy
.addRestrict();
5453 FunctionArgList Args
;
5454 ImplicitParamDecl
Param(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, VoidPtrTy
,
5455 ImplicitParamDecl::Other
);
5456 ImplicitParamDecl
ParamOrig(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, VoidPtrTy
,
5457 ImplicitParamDecl::Other
);
5458 Args
.emplace_back(&Param
);
5459 Args
.emplace_back(&ParamOrig
);
5460 const auto &FnInfo
=
5461 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5462 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5463 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_init", ""});
5464 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5465 Name
, &CGM
.getModule());
5466 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5467 Fn
->setDoesNotRecurse();
5468 CodeGenFunction
CGF(CGM
);
5469 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5470 QualType PrivateType
= RCG
.getPrivateType(N
);
5471 Address PrivateAddr
= CGF
.EmitLoadOfPointer(
5472 CGF
.Builder
.CreateElementBitCast(
5473 CGF
.GetAddrOfLocalVar(&Param
),
5474 CGF
.ConvertTypeForMem(PrivateType
)->getPointerTo()),
5475 C
.getPointerType(PrivateType
)->castAs
<PointerType
>());
5476 llvm::Value
*Size
= nullptr;
5477 // If the size of the reduction item is non-constant, load it from global
5478 // threadprivate variable.
5479 if (RCG
.getSizes(N
).second
) {
5480 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5481 CGF
, CGM
.getContext().getSizeType(),
5482 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5483 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5484 CGM
.getContext().getSizeType(), Loc
);
5486 RCG
.emitAggregateType(CGF
, N
, Size
);
5487 Address OrigAddr
= Address::invalid();
5488 // If initializer uses initializer from declare reduction construct, emit a
5489 // pointer to the address of the original reduction item (reuired by reduction
5491 if (RCG
.usesReductionInitializer(N
)) {
5492 Address SharedAddr
= CGF
.GetAddrOfLocalVar(&ParamOrig
);
5493 OrigAddr
= CGF
.EmitLoadOfPointer(
5495 CGM
.getContext().VoidPtrTy
.castAs
<PointerType
>()->getTypePtr());
5497 // Emit the initializer:
5498 // %0 = bitcast void* %arg to <type>*
5499 // store <type> <init>, <type>* %0
5500 RCG
.emitInitialization(CGF
, N
, PrivateAddr
, OrigAddr
,
5501 [](CodeGenFunction
&) { return false; });
5502 CGF
.FinishFunction();
5506 /// Emits reduction combiner function:
5508 /// void @.red_comb(void* %arg0, void* %arg1) {
5509 /// %lhs = bitcast void* %arg0 to <type>*
5510 /// %rhs = bitcast void* %arg1 to <type>*
5511 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5512 /// store <type> %2, <type>* %lhs
5516 static llvm::Value
*emitReduceCombFunction(CodeGenModule
&CGM
,
5518 ReductionCodeGen
&RCG
, unsigned N
,
5519 const Expr
*ReductionOp
,
5520 const Expr
*LHS
, const Expr
*RHS
,
5521 const Expr
*PrivateRef
) {
5522 ASTContext
&C
= CGM
.getContext();
5523 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(LHS
)->getDecl());
5524 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(RHS
)->getDecl());
5525 FunctionArgList Args
;
5526 ImplicitParamDecl
ParamInOut(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
5527 C
.VoidPtrTy
, ImplicitParamDecl::Other
);
5528 ImplicitParamDecl
ParamIn(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5529 ImplicitParamDecl::Other
);
5530 Args
.emplace_back(&ParamInOut
);
5531 Args
.emplace_back(&ParamIn
);
5532 const auto &FnInfo
=
5533 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5534 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5535 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_comb", ""});
5536 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5537 Name
, &CGM
.getModule());
5538 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5539 Fn
->setDoesNotRecurse();
5540 CodeGenFunction
CGF(CGM
);
5541 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5542 llvm::Value
*Size
= nullptr;
5543 // If the size of the reduction item is non-constant, load it from global
5544 // threadprivate variable.
5545 if (RCG
.getSizes(N
).second
) {
5546 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5547 CGF
, CGM
.getContext().getSizeType(),
5548 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5549 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5550 CGM
.getContext().getSizeType(), Loc
);
5552 RCG
.emitAggregateType(CGF
, N
, Size
);
5553 // Remap lhs and rhs variables to the addresses of the function arguments.
5554 // %lhs = bitcast void* %arg0 to <type>*
5555 // %rhs = bitcast void* %arg1 to <type>*
5556 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
5557 PrivateScope
.addPrivate(
5559 // Pull out the pointer to the variable.
5560 CGF
.EmitLoadOfPointer(
5561 CGF
.Builder
.CreateElementBitCast(
5562 CGF
.GetAddrOfLocalVar(&ParamInOut
),
5563 CGF
.ConvertTypeForMem(LHSVD
->getType())->getPointerTo()),
5564 C
.getPointerType(LHSVD
->getType())->castAs
<PointerType
>()));
5565 PrivateScope
.addPrivate(
5567 // Pull out the pointer to the variable.
5568 CGF
.EmitLoadOfPointer(
5569 CGF
.Builder
.CreateElementBitCast(
5570 CGF
.GetAddrOfLocalVar(&ParamIn
),
5571 CGF
.ConvertTypeForMem(RHSVD
->getType())->getPointerTo()),
5572 C
.getPointerType(RHSVD
->getType())->castAs
<PointerType
>()));
5573 PrivateScope
.Privatize();
5574 // Emit the combiner body:
5575 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5576 // store <type> %2, <type>* %lhs
5577 CGM
.getOpenMPRuntime().emitSingleReductionCombiner(
5578 CGF
, ReductionOp
, PrivateRef
, cast
<DeclRefExpr
>(LHS
),
5579 cast
<DeclRefExpr
>(RHS
));
5580 CGF
.FinishFunction();
5584 /// Emits reduction finalizer function:
5586 /// void @.red_fini(void* %arg) {
5587 /// %0 = bitcast void* %arg to <type>*
5588 /// <destroy>(<type>* %0)
5592 static llvm::Value
*emitReduceFiniFunction(CodeGenModule
&CGM
,
5594 ReductionCodeGen
&RCG
, unsigned N
) {
5595 if (!RCG
.needCleanups(N
))
5597 ASTContext
&C
= CGM
.getContext();
5598 FunctionArgList Args
;
5599 ImplicitParamDecl
Param(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5600 ImplicitParamDecl::Other
);
5601 Args
.emplace_back(&Param
);
5602 const auto &FnInfo
=
5603 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5604 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5605 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_fini", ""});
5606 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5607 Name
, &CGM
.getModule());
5608 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5609 Fn
->setDoesNotRecurse();
5610 CodeGenFunction
CGF(CGM
);
5611 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5612 Address PrivateAddr
= CGF
.EmitLoadOfPointer(
5613 CGF
.GetAddrOfLocalVar(&Param
), C
.VoidPtrTy
.castAs
<PointerType
>());
5614 llvm::Value
*Size
= nullptr;
5615 // If the size of the reduction item is non-constant, load it from global
5616 // threadprivate variable.
5617 if (RCG
.getSizes(N
).second
) {
5618 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5619 CGF
, CGM
.getContext().getSizeType(),
5620 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5621 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5622 CGM
.getContext().getSizeType(), Loc
);
5624 RCG
.emitAggregateType(CGF
, N
, Size
);
5625 // Emit the finalizer body:
5626 // <destroy>(<type>* %0)
5627 RCG
.emitCleanups(CGF
, N
, PrivateAddr
);
5628 CGF
.FinishFunction(Loc
);
5632 llvm::Value
*CGOpenMPRuntime::emitTaskReductionInit(
5633 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> LHSExprs
,
5634 ArrayRef
<const Expr
*> RHSExprs
, const OMPTaskDataTy
&Data
) {
5635 if (!CGF
.HaveInsertPoint() || Data
.ReductionVars
.empty())
5638 // Build typedef struct:
5639 // kmp_taskred_input {
5640 // void *reduce_shar; // shared reduction item
5641 // void *reduce_orig; // original reduction item used for initialization
5642 // size_t reduce_size; // size of data item
5643 // void *reduce_init; // data initialization routine
5644 // void *reduce_fini; // data finalization routine
5645 // void *reduce_comb; // data combiner routine
5646 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5647 // } kmp_taskred_input_t;
5648 ASTContext
&C
= CGM
.getContext();
5649 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_taskred_input_t");
5650 RD
->startDefinition();
5651 const FieldDecl
*SharedFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5652 const FieldDecl
*OrigFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5653 const FieldDecl
*SizeFD
= addFieldToRecordDecl(C
, RD
, C
.getSizeType());
5654 const FieldDecl
*InitFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5655 const FieldDecl
*FiniFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5656 const FieldDecl
*CombFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5657 const FieldDecl
*FlagsFD
= addFieldToRecordDecl(
5658 C
, RD
, C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5659 RD
->completeDefinition();
5660 QualType RDType
= C
.getRecordType(RD
);
5661 unsigned Size
= Data
.ReductionVars
.size();
5662 llvm::APInt
ArraySize(/*numBits=*/64, Size
);
5663 QualType ArrayRDType
= C
.getConstantArrayType(
5664 RDType
, ArraySize
, nullptr, ArrayType::Normal
, /*IndexTypeQuals=*/0);
5665 // kmp_task_red_input_t .rd_input.[Size];
5666 Address TaskRedInput
= CGF
.CreateMemTemp(ArrayRDType
, ".rd_input.");
5667 ReductionCodeGen
RCG(Data
.ReductionVars
, Data
.ReductionOrigs
,
5668 Data
.ReductionCopies
, Data
.ReductionOps
);
5669 for (unsigned Cnt
= 0; Cnt
< Size
; ++Cnt
) {
5670 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5671 llvm::Value
*Idxs
[] = {llvm::ConstantInt::get(CGM
.SizeTy
, /*V=*/0),
5672 llvm::ConstantInt::get(CGM
.SizeTy
, Cnt
)};
5673 llvm::Value
*GEP
= CGF
.EmitCheckedInBoundsGEP(
5674 TaskRedInput
.getElementType(), TaskRedInput
.getPointer(), Idxs
,
5675 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc
,
5677 LValue ElemLVal
= CGF
.MakeNaturalAlignAddrLValue(GEP
, RDType
);
5678 // ElemLVal.reduce_shar = &Shareds[Cnt];
5679 LValue SharedLVal
= CGF
.EmitLValueForField(ElemLVal
, SharedFD
);
5680 RCG
.emitSharedOrigLValue(CGF
, Cnt
);
5681 llvm::Value
*CastedShared
=
5682 CGF
.EmitCastToVoidPtr(RCG
.getSharedLValue(Cnt
).getPointer(CGF
));
5683 CGF
.EmitStoreOfScalar(CastedShared
, SharedLVal
);
5684 // ElemLVal.reduce_orig = &Origs[Cnt];
5685 LValue OrigLVal
= CGF
.EmitLValueForField(ElemLVal
, OrigFD
);
5686 llvm::Value
*CastedOrig
=
5687 CGF
.EmitCastToVoidPtr(RCG
.getOrigLValue(Cnt
).getPointer(CGF
));
5688 CGF
.EmitStoreOfScalar(CastedOrig
, OrigLVal
);
5689 RCG
.emitAggregateType(CGF
, Cnt
);
5690 llvm::Value
*SizeValInChars
;
5691 llvm::Value
*SizeVal
;
5692 std::tie(SizeValInChars
, SizeVal
) = RCG
.getSizes(Cnt
);
5693 // We use delayed creation/initialization for VLAs and array sections. It is
5694 // required because runtime does not provide the way to pass the sizes of
5695 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5696 // threadprivate global variables are used to store these values and use
5697 // them in the functions.
5698 bool DelayedCreation
= !!SizeVal
;
5699 SizeValInChars
= CGF
.Builder
.CreateIntCast(SizeValInChars
, CGM
.SizeTy
,
5700 /*isSigned=*/false);
5701 LValue SizeLVal
= CGF
.EmitLValueForField(ElemLVal
, SizeFD
);
5702 CGF
.EmitStoreOfScalar(SizeValInChars
, SizeLVal
);
5703 // ElemLVal.reduce_init = init;
5704 LValue InitLVal
= CGF
.EmitLValueForField(ElemLVal
, InitFD
);
5705 llvm::Value
*InitAddr
=
5706 CGF
.EmitCastToVoidPtr(emitReduceInitFunction(CGM
, Loc
, RCG
, Cnt
));
5707 CGF
.EmitStoreOfScalar(InitAddr
, InitLVal
);
5708 // ElemLVal.reduce_fini = fini;
5709 LValue FiniLVal
= CGF
.EmitLValueForField(ElemLVal
, FiniFD
);
5710 llvm::Value
*Fini
= emitReduceFiniFunction(CGM
, Loc
, RCG
, Cnt
);
5711 llvm::Value
*FiniAddr
= Fini
5712 ? CGF
.EmitCastToVoidPtr(Fini
)
5713 : llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
);
5714 CGF
.EmitStoreOfScalar(FiniAddr
, FiniLVal
);
5715 // ElemLVal.reduce_comb = comb;
5716 LValue CombLVal
= CGF
.EmitLValueForField(ElemLVal
, CombFD
);
5717 llvm::Value
*CombAddr
= CGF
.EmitCastToVoidPtr(emitReduceCombFunction(
5718 CGM
, Loc
, RCG
, Cnt
, Data
.ReductionOps
[Cnt
], LHSExprs
[Cnt
],
5719 RHSExprs
[Cnt
], Data
.ReductionCopies
[Cnt
]));
5720 CGF
.EmitStoreOfScalar(CombAddr
, CombLVal
);
5721 // ElemLVal.flags = 0;
5722 LValue FlagsLVal
= CGF
.EmitLValueForField(ElemLVal
, FlagsFD
);
5723 if (DelayedCreation
) {
5724 CGF
.EmitStoreOfScalar(
5725 llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/1, /*isSigned=*/true),
5728 CGF
.EmitNullInitialization(FlagsLVal
.getAddress(CGF
),
5729 FlagsLVal
.getType());
5731 if (Data
.IsReductionWithTaskMod
) {
5732 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5733 // is_ws, int num, void *data);
5734 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
);
5735 llvm::Value
*GTid
= CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5736 CGM
.IntTy
, /*isSigned=*/true);
5737 llvm::Value
*Args
[] = {
5739 llvm::ConstantInt::get(CGM
.IntTy
, Data
.IsWorksharingReduction
? 1 : 0,
5741 llvm::ConstantInt::get(CGM
.IntTy
, Size
, /*isSigned=*/true),
5742 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5743 TaskRedInput
.getPointer(), CGM
.VoidPtrTy
)};
5744 return CGF
.EmitRuntimeCall(
5745 OMPBuilder
.getOrCreateRuntimeFunction(
5746 CGM
.getModule(), OMPRTL___kmpc_taskred_modifier_init
),
5749 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5750 llvm::Value
*Args
[] = {
5751 CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
), CGM
.IntTy
,
5753 llvm::ConstantInt::get(CGM
.IntTy
, Size
, /*isSigned=*/true),
5754 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput
.getPointer(),
5756 return CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5757 CGM
.getModule(), OMPRTL___kmpc_taskred_init
),
5761 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction
&CGF
,
5763 bool IsWorksharingReduction
) {
5764 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5765 // is_ws, int num, void *data);
5766 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
);
5767 llvm::Value
*GTid
= CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5768 CGM
.IntTy
, /*isSigned=*/true);
5769 llvm::Value
*Args
[] = {IdentTLoc
, GTid
,
5770 llvm::ConstantInt::get(CGM
.IntTy
,
5771 IsWorksharingReduction
? 1 : 0,
5772 /*isSigned=*/true)};
5773 (void)CGF
.EmitRuntimeCall(
5774 OMPBuilder
.getOrCreateRuntimeFunction(
5775 CGM
.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini
),
5779 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction
&CGF
,
5781 ReductionCodeGen
&RCG
,
5783 auto Sizes
= RCG
.getSizes(N
);
5784 // Emit threadprivate global variable if the type is non-constant
5785 // (Sizes.second = nullptr).
5787 llvm::Value
*SizeVal
= CGF
.Builder
.CreateIntCast(Sizes
.second
, CGM
.SizeTy
,
5788 /*isSigned=*/false);
5789 Address SizeAddr
= getAddrOfArtificialThreadPrivate(
5790 CGF
, CGM
.getContext().getSizeType(),
5791 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5792 CGF
.Builder
.CreateStore(SizeVal
, SizeAddr
, /*IsVolatile=*/false);
5796 Address
CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction
&CGF
,
5798 llvm::Value
*ReductionsPtr
,
5799 LValue SharedLVal
) {
5800 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5802 llvm::Value
*Args
[] = {CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5806 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5807 SharedLVal
.getPointer(CGF
), CGM
.VoidPtrTy
)};
5809 CGF
.EmitRuntimeCall(
5810 OMPBuilder
.getOrCreateRuntimeFunction(
5811 CGM
.getModule(), OMPRTL___kmpc_task_reduction_get_th_data
),
5813 CGF
.Int8Ty
, SharedLVal
.getAlignment());
5816 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
5817 const OMPTaskDataTy
&Data
) {
5818 if (!CGF
.HaveInsertPoint())
5821 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
&& Data
.Dependences
.empty()) {
5822 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5823 OMPBuilder
.createTaskwait(CGF
.Builder
);
5825 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
5826 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
5827 auto &M
= CGM
.getModule();
5828 Address DependenciesArray
= Address::invalid();
5829 llvm::Value
*NumOfElements
;
5830 std::tie(NumOfElements
, DependenciesArray
) =
5831 emitDependClause(CGF
, Data
.Dependences
, Loc
);
5832 if (!Data
.Dependences
.empty()) {
5833 llvm::Value
*DepWaitTaskArgs
[7];
5834 DepWaitTaskArgs
[0] = UpLoc
;
5835 DepWaitTaskArgs
[1] = ThreadID
;
5836 DepWaitTaskArgs
[2] = NumOfElements
;
5837 DepWaitTaskArgs
[3] = DependenciesArray
.getPointer();
5838 DepWaitTaskArgs
[4] = CGF
.Builder
.getInt32(0);
5839 DepWaitTaskArgs
[5] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5840 DepWaitTaskArgs
[6] =
5841 llvm::ConstantInt::get(CGF
.Int32Ty
, Data
.HasNowaitClause
);
5843 CodeGenFunction::RunCleanupsScope
LocalScope(CGF
);
5845 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5846 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5847 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5848 // kmp_int32 has_no_wait); if dependence info is specified.
5849 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5850 M
, OMPRTL___kmpc_omp_taskwait_deps_51
),
5855 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5857 llvm::Value
*Args
[] = {UpLoc
, ThreadID
};
5858 // Ignore return result until untied tasks are supported.
5859 CGF
.EmitRuntimeCall(
5860 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_omp_taskwait
),
5865 if (auto *Region
= dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
5866 Region
->emitUntiedSwitch(CGF
);
5869 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction
&CGF
,
5870 OpenMPDirectiveKind InnerKind
,
5871 const RegionCodeGenTy
&CodeGen
,
5873 if (!CGF
.HaveInsertPoint())
5875 InlinedOpenMPRegionRAII
Region(CGF
, CodeGen
, InnerKind
, HasCancel
,
5876 InnerKind
!= OMPD_critical
&&
5877 InnerKind
!= OMPD_master
&&
5878 InnerKind
!= OMPD_masked
);
5879 CGF
.CapturedStmtInfo
->EmitBody(CGF
, /*S=*/nullptr);
5890 } // anonymous namespace
5892 static RTCancelKind
getCancellationKind(OpenMPDirectiveKind CancelRegion
) {
5893 RTCancelKind CancelKind
= CancelNoreq
;
5894 if (CancelRegion
== OMPD_parallel
)
5895 CancelKind
= CancelParallel
;
5896 else if (CancelRegion
== OMPD_for
)
5897 CancelKind
= CancelLoop
;
5898 else if (CancelRegion
== OMPD_sections
)
5899 CancelKind
= CancelSections
;
5901 assert(CancelRegion
== OMPD_taskgroup
);
5902 CancelKind
= CancelTaskgroup
;
5907 void CGOpenMPRuntime::emitCancellationPointCall(
5908 CodeGenFunction
&CGF
, SourceLocation Loc
,
5909 OpenMPDirectiveKind CancelRegion
) {
5910 if (!CGF
.HaveInsertPoint())
5912 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5913 // global_tid, kmp_int32 cncl_kind);
5914 if (auto *OMPRegionInfo
=
5915 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
5916 // For 'cancellation point taskgroup', the task region info may not have a
5917 // cancel. This may instead happen in another adjacent task.
5918 if (CancelRegion
== OMPD_taskgroup
|| OMPRegionInfo
->hasCancel()) {
5919 llvm::Value
*Args
[] = {
5920 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
5921 CGF
.Builder
.getInt32(getCancellationKind(CancelRegion
))};
5922 // Ignore return result until untied tasks are supported.
5923 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
5924 OMPBuilder
.getOrCreateRuntimeFunction(
5925 CGM
.getModule(), OMPRTL___kmpc_cancellationpoint
),
5927 // if (__kmpc_cancellationpoint()) {
5928 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5929 // exit from construct;
5931 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
5932 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
5933 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
5934 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
5935 CGF
.EmitBlock(ExitBB
);
5936 if (CancelRegion
== OMPD_parallel
)
5937 emitBarrierCall(CGF
, Loc
, OMPD_unknown
, /*EmitChecks=*/false);
5938 // exit from construct;
5939 CodeGenFunction::JumpDest CancelDest
=
5940 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
5941 CGF
.EmitBranchThroughCleanup(CancelDest
);
5942 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
5947 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
5949 OpenMPDirectiveKind CancelRegion
) {
5950 if (!CGF
.HaveInsertPoint())
5952 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5953 // kmp_int32 cncl_kind);
5954 auto &M
= CGM
.getModule();
5955 if (auto *OMPRegionInfo
=
5956 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
5957 auto &&ThenGen
= [this, &M
, Loc
, CancelRegion
,
5958 OMPRegionInfo
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5959 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5960 llvm::Value
*Args
[] = {
5961 RT
.emitUpdateLocation(CGF
, Loc
), RT
.getThreadID(CGF
, Loc
),
5962 CGF
.Builder
.getInt32(getCancellationKind(CancelRegion
))};
5963 // Ignore return result until untied tasks are supported.
5964 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
5965 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_cancel
), Args
);
5966 // if (__kmpc_cancel()) {
5967 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5968 // exit from construct;
5970 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
5971 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
5972 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
5973 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
5974 CGF
.EmitBlock(ExitBB
);
5975 if (CancelRegion
== OMPD_parallel
)
5976 RT
.emitBarrierCall(CGF
, Loc
, OMPD_unknown
, /*EmitChecks=*/false);
5977 // exit from construct;
5978 CodeGenFunction::JumpDest CancelDest
=
5979 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
5980 CGF
.EmitBranchThroughCleanup(CancelDest
);
5981 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
5984 emitIfClause(CGF
, IfCond
, ThenGen
,
5985 [](CodeGenFunction
&, PrePostActionTy
&) {});
5987 RegionCodeGenTy
ThenRCG(ThenGen
);
5994 /// Cleanup action for uses_allocators support.
5995 class OMPUsesAllocatorsActionTy final
: public PrePostActionTy
{
5996 ArrayRef
<std::pair
<const Expr
*, const Expr
*>> Allocators
;
5999 OMPUsesAllocatorsActionTy(
6000 ArrayRef
<std::pair
<const Expr
*, const Expr
*>> Allocators
)
6001 : Allocators(Allocators
) {}
6002 void Enter(CodeGenFunction
&CGF
) override
{
6003 if (!CGF
.HaveInsertPoint())
6005 for (const auto &AllocatorData
: Allocators
) {
6006 CGF
.CGM
.getOpenMPRuntime().emitUsesAllocatorsInit(
6007 CGF
, AllocatorData
.first
, AllocatorData
.second
);
6010 void Exit(CodeGenFunction
&CGF
) override
{
6011 if (!CGF
.HaveInsertPoint())
6013 for (const auto &AllocatorData
: Allocators
) {
6014 CGF
.CGM
.getOpenMPRuntime().emitUsesAllocatorsFini(CGF
,
6015 AllocatorData
.first
);
6021 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6022 const OMPExecutableDirective
&D
, StringRef ParentName
,
6023 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
6024 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
6025 assert(!ParentName
.empty() && "Invalid target entry parent name!");
6026 HasEmittedTargetRegion
= true;
6027 SmallVector
<std::pair
<const Expr
*, const Expr
*>, 4> Allocators
;
6028 for (const auto *C
: D
.getClausesOfKind
<OMPUsesAllocatorsClause
>()) {
6029 for (unsigned I
= 0, E
= C
->getNumberOfAllocators(); I
< E
; ++I
) {
6030 const OMPUsesAllocatorsClause::Data D
= C
->getAllocatorData(I
);
6031 if (!D
.AllocatorTraits
)
6033 Allocators
.emplace_back(D
.Allocator
, D
.AllocatorTraits
);
6036 OMPUsesAllocatorsActionTy
UsesAllocatorAction(Allocators
);
6037 CodeGen
.setAction(UsesAllocatorAction
);
6038 emitTargetOutlinedFunctionHelper(D
, ParentName
, OutlinedFn
, OutlinedFnID
,
6039 IsOffloadEntry
, CodeGen
);
6042 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction
&CGF
,
6043 const Expr
*Allocator
,
6044 const Expr
*AllocatorTraits
) {
6045 llvm::Value
*ThreadId
= getThreadID(CGF
, Allocator
->getExprLoc());
6046 ThreadId
= CGF
.Builder
.CreateIntCast(ThreadId
, CGF
.IntTy
, /*isSigned=*/true);
6047 // Use default memspace handle.
6048 llvm::Value
*MemSpaceHandle
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
6049 llvm::Value
*NumTraits
= llvm::ConstantInt::get(
6050 CGF
.IntTy
, cast
<ConstantArrayType
>(
6051 AllocatorTraits
->getType()->getAsArrayTypeUnsafe())
6053 .getLimitedValue());
6054 LValue AllocatorTraitsLVal
= CGF
.EmitLValue(AllocatorTraits
);
6055 Address Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
6056 AllocatorTraitsLVal
.getAddress(CGF
), CGF
.VoidPtrPtrTy
, CGF
.VoidPtrTy
);
6057 AllocatorTraitsLVal
= CGF
.MakeAddrLValue(Addr
, CGF
.getContext().VoidPtrTy
,
6058 AllocatorTraitsLVal
.getBaseInfo(),
6059 AllocatorTraitsLVal
.getTBAAInfo());
6060 llvm::Value
*Traits
=
6061 CGF
.EmitLoadOfScalar(AllocatorTraitsLVal
, AllocatorTraits
->getExprLoc());
6063 llvm::Value
*AllocatorVal
=
6064 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
6065 CGM
.getModule(), OMPRTL___kmpc_init_allocator
),
6066 {ThreadId
, MemSpaceHandle
, NumTraits
, Traits
});
6067 // Store to allocator.
6068 CGF
.EmitVarDecl(*cast
<VarDecl
>(
6069 cast
<DeclRefExpr
>(Allocator
->IgnoreParenImpCasts())->getDecl()));
6070 LValue AllocatorLVal
= CGF
.EmitLValue(Allocator
->IgnoreParenImpCasts());
6072 CGF
.EmitScalarConversion(AllocatorVal
, CGF
.getContext().VoidPtrTy
,
6073 Allocator
->getType(), Allocator
->getExprLoc());
6074 CGF
.EmitStoreOfScalar(AllocatorVal
, AllocatorLVal
);
6077 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction
&CGF
,
6078 const Expr
*Allocator
) {
6079 llvm::Value
*ThreadId
= getThreadID(CGF
, Allocator
->getExprLoc());
6080 ThreadId
= CGF
.Builder
.CreateIntCast(ThreadId
, CGF
.IntTy
, /*isSigned=*/true);
6081 LValue AllocatorLVal
= CGF
.EmitLValue(Allocator
->IgnoreParenImpCasts());
6082 llvm::Value
*AllocatorVal
=
6083 CGF
.EmitLoadOfScalar(AllocatorLVal
, Allocator
->getExprLoc());
6084 AllocatorVal
= CGF
.EmitScalarConversion(AllocatorVal
, Allocator
->getType(),
6085 CGF
.getContext().VoidPtrTy
,
6086 Allocator
->getExprLoc());
6087 (void)CGF
.EmitRuntimeCall(
6088 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
6089 OMPRTL___kmpc_destroy_allocator
),
6090 {ThreadId
, AllocatorVal
});
6093 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6094 const OMPExecutableDirective
&D
, StringRef ParentName
,
6095 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
6096 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
6099 getTargetEntryUniqueInfo(CGM
.getContext(), D
.getBeginLoc(), ParentName
);
6101 CodeGenFunction
CGF(CGM
, true);
6102 llvm::OpenMPIRBuilder::FunctionGenCallback
&&GenerateOutlinedFunction
=
6103 [&CGF
, &D
, &CodeGen
](StringRef EntryFnName
) {
6104 const CapturedStmt
&CS
= *D
.getCapturedStmt(OMPD_target
);
6106 CGOpenMPTargetRegionInfo
CGInfo(CS
, CodeGen
, EntryFnName
);
6107 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6108 return CGF
.GenerateOpenMPCapturedStmtFunction(CS
, D
.getBeginLoc());
6111 // Get NumTeams and ThreadLimit attributes
6112 int32_t DefaultValTeams
= -1;
6113 int32_t DefaultValThreads
= -1;
6114 getNumTeamsExprForTargetDirective(CGF
, D
, DefaultValTeams
);
6115 getNumThreadsExprForTargetDirective(CGF
, D
, DefaultValThreads
);
6117 OMPBuilder
.emitTargetRegionFunction(OffloadEntriesInfoManager
, EntryInfo
,
6118 GenerateOutlinedFunction
, DefaultValTeams
,
6119 DefaultValThreads
, IsOffloadEntry
,
6120 OutlinedFn
, OutlinedFnID
);
6122 if (OutlinedFn
!= nullptr)
6123 CGM
.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn
, CGM
);
6126 /// Checks if the expression is constant or does not have non-trivial function
6128 static bool isTrivial(ASTContext
&Ctx
, const Expr
* E
) {
6129 // We can skip constant expressions.
6130 // We can skip expressions with trivial calls or simple expressions.
6131 return (E
->isEvaluatable(Ctx
, Expr::SE_AllowUndefinedBehavior
) ||
6132 !E
->hasNonTrivialCall(Ctx
)) &&
6133 !E
->HasSideEffects(Ctx
, /*IncludePossibleEffects=*/true);
6136 const Stmt
*CGOpenMPRuntime::getSingleCompoundChild(ASTContext
&Ctx
,
6138 const Stmt
*Child
= Body
->IgnoreContainers();
6139 while (const auto *C
= dyn_cast_or_null
<CompoundStmt
>(Child
)) {
6141 for (const Stmt
*S
: C
->body()) {
6142 if (const auto *E
= dyn_cast
<Expr
>(S
)) {
6143 if (isTrivial(Ctx
, E
))
6146 // Some of the statements can be ignored.
6147 if (isa
<AsmStmt
>(S
) || isa
<NullStmt
>(S
) || isa
<OMPFlushDirective
>(S
) ||
6148 isa
<OMPBarrierDirective
>(S
) || isa
<OMPTaskyieldDirective
>(S
))
6150 // Analyze declarations.
6151 if (const auto *DS
= dyn_cast
<DeclStmt
>(S
)) {
6152 if (llvm::all_of(DS
->decls(), [](const Decl
*D
) {
6153 if (isa
<EmptyDecl
>(D
) || isa
<DeclContext
>(D
) ||
6154 isa
<TypeDecl
>(D
) || isa
<PragmaCommentDecl
>(D
) ||
6155 isa
<PragmaDetectMismatchDecl
>(D
) || isa
<UsingDecl
>(D
) ||
6156 isa
<UsingDirectiveDecl
>(D
) ||
6157 isa
<OMPDeclareReductionDecl
>(D
) ||
6158 isa
<OMPThreadPrivateDecl
>(D
) || isa
<OMPAllocateDecl
>(D
))
6160 const auto *VD
= dyn_cast
<VarDecl
>(D
);
6163 return VD
->hasGlobalStorage() || !VD
->isUsed();
6167 // Found multiple children - cannot get the one child only.
6173 Child
= Child
->IgnoreContainers();
6178 const Expr
*CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6179 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
6180 int32_t &DefaultVal
) {
6182 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6183 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6184 "Expected target-based executable directive.");
6185 switch (DirectiveKind
) {
6187 const auto *CS
= D
.getInnermostCapturedStmt();
6189 CS
->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6190 const Stmt
*ChildStmt
=
6191 CGOpenMPRuntime::getSingleCompoundChild(CGF
.getContext(), Body
);
6192 if (const auto *NestedDir
=
6193 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
6194 if (isOpenMPTeamsDirective(NestedDir
->getDirectiveKind())) {
6195 if (NestedDir
->hasClausesOfKind
<OMPNumTeamsClause
>()) {
6196 const Expr
*NumTeams
=
6197 NestedDir
->getSingleClause
<OMPNumTeamsClause
>()->getNumTeams();
6198 if (NumTeams
->isIntegerConstantExpr(CGF
.getContext()))
6200 NumTeams
->getIntegerConstantExpr(CGF
.getContext()))
6201 DefaultVal
= Constant
->getExtValue();
6207 if (isOpenMPParallelDirective(NestedDir
->getDirectiveKind()) ||
6208 isOpenMPSimdDirective(NestedDir
->getDirectiveKind())) {
6215 // A value of -1 is used to check if we need to emit no teams region
6219 case OMPD_target_teams
:
6220 case OMPD_target_teams_distribute
:
6221 case OMPD_target_teams_distribute_simd
:
6222 case OMPD_target_teams_distribute_parallel_for
:
6223 case OMPD_target_teams_distribute_parallel_for_simd
: {
6224 if (D
.hasClausesOfKind
<OMPNumTeamsClause
>()) {
6225 const Expr
*NumTeams
=
6226 D
.getSingleClause
<OMPNumTeamsClause
>()->getNumTeams();
6227 if (NumTeams
->isIntegerConstantExpr(CGF
.getContext()))
6228 if (auto Constant
= NumTeams
->getIntegerConstantExpr(CGF
.getContext()))
6229 DefaultVal
= Constant
->getExtValue();
6235 case OMPD_target_parallel
:
6236 case OMPD_target_parallel_for
:
6237 case OMPD_target_parallel_for_simd
:
6238 case OMPD_target_simd
:
6243 case OMPD_parallel_for
:
6244 case OMPD_parallel_master
:
6245 case OMPD_parallel_sections
:
6247 case OMPD_parallel_for_simd
:
6249 case OMPD_cancellation_point
:
6251 case OMPD_threadprivate
:
6262 case OMPD_taskyield
:
6265 case OMPD_taskgroup
:
6271 case OMPD_target_data
:
6272 case OMPD_target_exit_data
:
6273 case OMPD_target_enter_data
:
6274 case OMPD_distribute
:
6275 case OMPD_distribute_simd
:
6276 case OMPD_distribute_parallel_for
:
6277 case OMPD_distribute_parallel_for_simd
:
6278 case OMPD_teams_distribute
:
6279 case OMPD_teams_distribute_simd
:
6280 case OMPD_teams_distribute_parallel_for
:
6281 case OMPD_teams_distribute_parallel_for_simd
:
6282 case OMPD_target_update
:
6283 case OMPD_declare_simd
:
6284 case OMPD_declare_variant
:
6285 case OMPD_begin_declare_variant
:
6286 case OMPD_end_declare_variant
:
6287 case OMPD_declare_target
:
6288 case OMPD_end_declare_target
:
6289 case OMPD_declare_reduction
:
6290 case OMPD_declare_mapper
:
6292 case OMPD_taskloop_simd
:
6293 case OMPD_master_taskloop
:
6294 case OMPD_master_taskloop_simd
:
6295 case OMPD_parallel_master_taskloop
:
6296 case OMPD_parallel_master_taskloop_simd
:
6298 case OMPD_metadirective
:
6304 llvm_unreachable("Unexpected directive kind.");
6307 llvm::Value
*CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6308 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) {
6309 assert(!CGF
.getLangOpts().OpenMPIsDevice
&&
6310 "Clauses associated with the teams directive expected to be emitted "
6311 "only for the host!");
6312 CGBuilderTy
&Bld
= CGF
.Builder
;
6313 int32_t DefaultNT
= -1;
6314 const Expr
*NumTeams
= getNumTeamsExprForTargetDirective(CGF
, D
, DefaultNT
);
6315 if (NumTeams
!= nullptr) {
6316 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6318 switch (DirectiveKind
) {
6320 const auto *CS
= D
.getInnermostCapturedStmt();
6321 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6322 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6323 llvm::Value
*NumTeamsVal
= CGF
.EmitScalarExpr(NumTeams
,
6324 /*IgnoreResultAssign*/ true);
6325 return Bld
.CreateIntCast(NumTeamsVal
, CGF
.Int32Ty
,
6328 case OMPD_target_teams
:
6329 case OMPD_target_teams_distribute
:
6330 case OMPD_target_teams_distribute_simd
:
6331 case OMPD_target_teams_distribute_parallel_for
:
6332 case OMPD_target_teams_distribute_parallel_for_simd
: {
6333 CodeGenFunction::RunCleanupsScope
NumTeamsScope(CGF
);
6334 llvm::Value
*NumTeamsVal
= CGF
.EmitScalarExpr(NumTeams
,
6335 /*IgnoreResultAssign*/ true);
6336 return Bld
.CreateIntCast(NumTeamsVal
, CGF
.Int32Ty
,
6344 return llvm::ConstantInt::get(CGF
.Int32Ty
, DefaultNT
);
6347 static llvm::Value
*getNumThreads(CodeGenFunction
&CGF
, const CapturedStmt
*CS
,
6348 llvm::Value
*DefaultThreadLimitVal
) {
6349 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6350 CGF
.getContext(), CS
->getCapturedStmt());
6351 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6352 if (isOpenMPParallelDirective(Dir
->getDirectiveKind())) {
6353 llvm::Value
*NumThreads
= nullptr;
6354 llvm::Value
*CondVal
= nullptr;
6355 // Handle if clause. If if clause present, the number of threads is
6356 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6357 if (Dir
->hasClausesOfKind
<OMPIfClause
>()) {
6358 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6359 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6360 const OMPIfClause
*IfClause
= nullptr;
6361 for (const auto *C
: Dir
->getClausesOfKind
<OMPIfClause
>()) {
6362 if (C
->getNameModifier() == OMPD_unknown
||
6363 C
->getNameModifier() == OMPD_parallel
) {
6369 const Expr
*Cond
= IfClause
->getCondition();
6371 if (Cond
->EvaluateAsBooleanCondition(Result
, CGF
.getContext())) {
6373 return CGF
.Builder
.getInt32(1);
6375 CodeGenFunction::LexicalScope
Scope(CGF
, Cond
->getSourceRange());
6376 if (const auto *PreInit
=
6377 cast_or_null
<DeclStmt
>(IfClause
->getPreInitStmt())) {
6378 for (const auto *I
: PreInit
->decls()) {
6379 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6380 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6382 CodeGenFunction::AutoVarEmission Emission
=
6383 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6384 CGF
.EmitAutoVarCleanups(Emission
);
6388 CondVal
= CGF
.EvaluateExprAsBool(Cond
);
6392 // Check the value of num_threads clause iff if clause was not specified
6393 // or is not evaluated to false.
6394 if (Dir
->hasClausesOfKind
<OMPNumThreadsClause
>()) {
6395 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6396 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6397 const auto *NumThreadsClause
=
6398 Dir
->getSingleClause
<OMPNumThreadsClause
>();
6399 CodeGenFunction::LexicalScope
Scope(
6400 CGF
, NumThreadsClause
->getNumThreads()->getSourceRange());
6401 if (const auto *PreInit
=
6402 cast_or_null
<DeclStmt
>(NumThreadsClause
->getPreInitStmt())) {
6403 for (const auto *I
: PreInit
->decls()) {
6404 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6405 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6407 CodeGenFunction::AutoVarEmission Emission
=
6408 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6409 CGF
.EmitAutoVarCleanups(Emission
);
6413 NumThreads
= CGF
.EmitScalarExpr(NumThreadsClause
->getNumThreads());
6414 NumThreads
= CGF
.Builder
.CreateIntCast(NumThreads
, CGF
.Int32Ty
,
6415 /*isSigned=*/false);
6416 if (DefaultThreadLimitVal
)
6417 NumThreads
= CGF
.Builder
.CreateSelect(
6418 CGF
.Builder
.CreateICmpULT(DefaultThreadLimitVal
, NumThreads
),
6419 DefaultThreadLimitVal
, NumThreads
);
6421 NumThreads
= DefaultThreadLimitVal
? DefaultThreadLimitVal
6422 : CGF
.Builder
.getInt32(0);
6424 // Process condition of the if clause.
6426 NumThreads
= CGF
.Builder
.CreateSelect(CondVal
, NumThreads
,
6427 CGF
.Builder
.getInt32(1));
6431 if (isOpenMPSimdDirective(Dir
->getDirectiveKind()))
6432 return CGF
.Builder
.getInt32(1);
6434 return DefaultThreadLimitVal
;
6437 const Expr
*CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6438 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
6439 int32_t &DefaultVal
) {
6440 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6441 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6442 "Expected target-based executable directive.");
6444 switch (DirectiveKind
) {
6446 // Teams have no clause thread_limit
6448 case OMPD_target_teams
:
6449 case OMPD_target_teams_distribute
:
6450 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6451 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6452 const Expr
*ThreadLimit
= ThreadLimitClause
->getThreadLimit();
6453 if (ThreadLimit
->isIntegerConstantExpr(CGF
.getContext()))
6455 ThreadLimit
->getIntegerConstantExpr(CGF
.getContext()))
6456 DefaultVal
= Constant
->getExtValue();
6460 case OMPD_target_parallel
:
6461 case OMPD_target_parallel_for
:
6462 case OMPD_target_parallel_for_simd
:
6463 case OMPD_target_teams_distribute_parallel_for
:
6464 case OMPD_target_teams_distribute_parallel_for_simd
: {
6465 Expr
*ThreadLimit
= nullptr;
6466 Expr
*NumThreads
= nullptr;
6467 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6468 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6469 ThreadLimit
= ThreadLimitClause
->getThreadLimit();
6470 if (ThreadLimit
->isIntegerConstantExpr(CGF
.getContext()))
6472 ThreadLimit
->getIntegerConstantExpr(CGF
.getContext()))
6473 DefaultVal
= Constant
->getExtValue();
6475 if (D
.hasClausesOfKind
<OMPNumThreadsClause
>()) {
6476 const auto *NumThreadsClause
= D
.getSingleClause
<OMPNumThreadsClause
>();
6477 NumThreads
= NumThreadsClause
->getNumThreads();
6478 if (NumThreads
->isIntegerConstantExpr(CGF
.getContext())) {
6480 NumThreads
->getIntegerConstantExpr(CGF
.getContext())) {
6481 if (Constant
->getExtValue() < DefaultVal
) {
6482 DefaultVal
= Constant
->getExtValue();
6483 ThreadLimit
= NumThreads
;
6490 case OMPD_target_teams_distribute_simd
:
6491 case OMPD_target_simd
:
6496 case OMPD_parallel_for
:
6497 case OMPD_parallel_master
:
6498 case OMPD_parallel_sections
:
6500 case OMPD_parallel_for_simd
:
6502 case OMPD_cancellation_point
:
6504 case OMPD_threadprivate
:
6515 case OMPD_taskyield
:
6518 case OMPD_taskgroup
:
6524 case OMPD_target_data
:
6525 case OMPD_target_exit_data
:
6526 case OMPD_target_enter_data
:
6527 case OMPD_distribute
:
6528 case OMPD_distribute_simd
:
6529 case OMPD_distribute_parallel_for
:
6530 case OMPD_distribute_parallel_for_simd
:
6531 case OMPD_teams_distribute
:
6532 case OMPD_teams_distribute_simd
:
6533 case OMPD_teams_distribute_parallel_for
:
6534 case OMPD_teams_distribute_parallel_for_simd
:
6535 case OMPD_target_update
:
6536 case OMPD_declare_simd
:
6537 case OMPD_declare_variant
:
6538 case OMPD_begin_declare_variant
:
6539 case OMPD_end_declare_variant
:
6540 case OMPD_declare_target
:
6541 case OMPD_end_declare_target
:
6542 case OMPD_declare_reduction
:
6543 case OMPD_declare_mapper
:
6545 case OMPD_taskloop_simd
:
6546 case OMPD_master_taskloop
:
6547 case OMPD_master_taskloop_simd
:
6548 case OMPD_parallel_master_taskloop
:
6549 case OMPD_parallel_master_taskloop_simd
:
6556 llvm_unreachable("Unsupported directive kind.");
6559 llvm::Value
*CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6560 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) {
6561 assert(!CGF
.getLangOpts().OpenMPIsDevice
&&
6562 "Clauses associated with the teams directive expected to be emitted "
6563 "only for the host!");
6564 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6565 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6566 "Expected target-based executable directive.");
6567 CGBuilderTy
&Bld
= CGF
.Builder
;
6568 llvm::Value
*ThreadLimitVal
= nullptr;
6569 llvm::Value
*NumThreadsVal
= nullptr;
6570 switch (DirectiveKind
) {
6572 const CapturedStmt
*CS
= D
.getInnermostCapturedStmt();
6573 if (llvm::Value
*NumThreads
= getNumThreads(CGF
, CS
, ThreadLimitVal
))
6575 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6576 CGF
.getContext(), CS
->getCapturedStmt());
6577 // TODO: The standard is not clear how to resolve two thread limit clauses,
6578 // let's pick the teams one if it's present, otherwise the target one.
6579 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6580 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6581 if (const auto *TLC
= Dir
->getSingleClause
<OMPThreadLimitClause
>()) {
6582 ThreadLimitClause
= TLC
;
6583 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6584 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6585 CodeGenFunction::LexicalScope
Scope(
6586 CGF
, ThreadLimitClause
->getThreadLimit()->getSourceRange());
6587 if (const auto *PreInit
=
6588 cast_or_null
<DeclStmt
>(ThreadLimitClause
->getPreInitStmt())) {
6589 for (const auto *I
: PreInit
->decls()) {
6590 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6591 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6593 CodeGenFunction::AutoVarEmission Emission
=
6594 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6595 CGF
.EmitAutoVarCleanups(Emission
);
6601 if (ThreadLimitClause
) {
6602 llvm::Value
*ThreadLimit
= CGF
.EmitScalarExpr(
6603 ThreadLimitClause
->getThreadLimit(), /*IgnoreResultAssign=*/true);
6605 Bld
.CreateIntCast(ThreadLimit
, CGF
.Int32Ty
, /*isSigned=*/false);
6607 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6608 if (isOpenMPTeamsDirective(Dir
->getDirectiveKind()) &&
6609 !isOpenMPDistributeDirective(Dir
->getDirectiveKind())) {
6610 CS
= Dir
->getInnermostCapturedStmt();
6611 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6612 CGF
.getContext(), CS
->getCapturedStmt());
6613 Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
);
6615 if (Dir
&& isOpenMPDistributeDirective(Dir
->getDirectiveKind()) &&
6616 !isOpenMPSimdDirective(Dir
->getDirectiveKind())) {
6617 CS
= Dir
->getInnermostCapturedStmt();
6618 if (llvm::Value
*NumThreads
= getNumThreads(CGF
, CS
, ThreadLimitVal
))
6621 if (Dir
&& isOpenMPSimdDirective(Dir
->getDirectiveKind()))
6622 return Bld
.getInt32(1);
6624 return ThreadLimitVal
? ThreadLimitVal
: Bld
.getInt32(0);
6626 case OMPD_target_teams
: {
6627 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6628 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6629 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6630 llvm::Value
*ThreadLimit
= CGF
.EmitScalarExpr(
6631 ThreadLimitClause
->getThreadLimit(), /*IgnoreResultAssign=*/true);
6633 Bld
.CreateIntCast(ThreadLimit
, CGF
.Int32Ty
, /*isSigned=*/false);
6635 const CapturedStmt
*CS
= D
.getInnermostCapturedStmt();
6636 if (llvm::Value
*NumThreads
= getNumThreads(CGF
, CS
, ThreadLimitVal
))
6638 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6639 CGF
.getContext(), CS
->getCapturedStmt());
6640 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6641 if (Dir
->getDirectiveKind() == OMPD_distribute
) {
6642 CS
= Dir
->getInnermostCapturedStmt();
6643 if (llvm::Value
*NumThreads
= getNumThreads(CGF
, CS
, ThreadLimitVal
))
6647 return ThreadLimitVal
? ThreadLimitVal
: Bld
.getInt32(0);
6649 case OMPD_target_teams_distribute
:
6650 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6651 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6652 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6653 llvm::Value
*ThreadLimit
= CGF
.EmitScalarExpr(
6654 ThreadLimitClause
->getThreadLimit(), /*IgnoreResultAssign=*/true);
6656 Bld
.CreateIntCast(ThreadLimit
, CGF
.Int32Ty
, /*isSigned=*/false);
6658 if (llvm::Value
*NumThreads
=
6659 getNumThreads(CGF
, D
.getInnermostCapturedStmt(), ThreadLimitVal
))
6661 return Bld
.getInt32(0);
6662 case OMPD_target_parallel
:
6663 case OMPD_target_parallel_for
:
6664 case OMPD_target_parallel_for_simd
:
6665 case OMPD_target_teams_distribute_parallel_for
:
6666 case OMPD_target_teams_distribute_parallel_for_simd
: {
6667 llvm::Value
*CondVal
= nullptr;
6668 // Handle if clause. If if clause present, the number of threads is
6669 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6670 if (D
.hasClausesOfKind
<OMPIfClause
>()) {
6671 const OMPIfClause
*IfClause
= nullptr;
6672 for (const auto *C
: D
.getClausesOfKind
<OMPIfClause
>()) {
6673 if (C
->getNameModifier() == OMPD_unknown
||
6674 C
->getNameModifier() == OMPD_parallel
) {
6680 const Expr
*Cond
= IfClause
->getCondition();
6682 if (Cond
->EvaluateAsBooleanCondition(Result
, CGF
.getContext())) {
6684 return Bld
.getInt32(1);
6686 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
6687 CondVal
= CGF
.EvaluateExprAsBool(Cond
);
6691 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6692 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6693 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6694 llvm::Value
*ThreadLimit
= CGF
.EmitScalarExpr(
6695 ThreadLimitClause
->getThreadLimit(), /*IgnoreResultAssign=*/true);
6697 Bld
.CreateIntCast(ThreadLimit
, CGF
.Int32Ty
, /*isSigned=*/false);
6699 if (D
.hasClausesOfKind
<OMPNumThreadsClause
>()) {
6700 CodeGenFunction::RunCleanupsScope
NumThreadsScope(CGF
);
6701 const auto *NumThreadsClause
= D
.getSingleClause
<OMPNumThreadsClause
>();
6702 llvm::Value
*NumThreads
= CGF
.EmitScalarExpr(
6703 NumThreadsClause
->getNumThreads(), /*IgnoreResultAssign=*/true);
6705 Bld
.CreateIntCast(NumThreads
, CGF
.Int32Ty
, /*isSigned=*/false);
6706 ThreadLimitVal
= ThreadLimitVal
6707 ? Bld
.CreateSelect(Bld
.CreateICmpULT(NumThreadsVal
,
6709 NumThreadsVal
, ThreadLimitVal
)
6712 if (!ThreadLimitVal
)
6713 ThreadLimitVal
= Bld
.getInt32(0);
6715 return Bld
.CreateSelect(CondVal
, ThreadLimitVal
, Bld
.getInt32(1));
6716 return ThreadLimitVal
;
6718 case OMPD_target_teams_distribute_simd
:
6719 case OMPD_target_simd
:
6720 return Bld
.getInt32(1);
6723 case OMPD_parallel_for
:
6724 case OMPD_parallel_master
:
6725 case OMPD_parallel_sections
:
6727 case OMPD_parallel_for_simd
:
6729 case OMPD_cancellation_point
:
6731 case OMPD_threadprivate
:
6742 case OMPD_taskyield
:
6745 case OMPD_taskgroup
:
6751 case OMPD_target_data
:
6752 case OMPD_target_exit_data
:
6753 case OMPD_target_enter_data
:
6754 case OMPD_distribute
:
6755 case OMPD_distribute_simd
:
6756 case OMPD_distribute_parallel_for
:
6757 case OMPD_distribute_parallel_for_simd
:
6758 case OMPD_teams_distribute
:
6759 case OMPD_teams_distribute_simd
:
6760 case OMPD_teams_distribute_parallel_for
:
6761 case OMPD_teams_distribute_parallel_for_simd
:
6762 case OMPD_target_update
:
6763 case OMPD_declare_simd
:
6764 case OMPD_declare_variant
:
6765 case OMPD_begin_declare_variant
:
6766 case OMPD_end_declare_variant
:
6767 case OMPD_declare_target
:
6768 case OMPD_end_declare_target
:
6769 case OMPD_declare_reduction
:
6770 case OMPD_declare_mapper
:
6772 case OMPD_taskloop_simd
:
6773 case OMPD_master_taskloop
:
6774 case OMPD_master_taskloop_simd
:
6775 case OMPD_parallel_master_taskloop
:
6776 case OMPD_parallel_master_taskloop_simd
:
6778 case OMPD_metadirective
:
6784 llvm_unreachable("Unsupported directive kind.");
6788 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6790 // Utility to handle information from clauses associated with a given
6791 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6792 // It provides a convenient interface to obtain the information and generate
6793 // code for that information.
6794 class MappableExprsHandler
{
6796 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6797 static unsigned getFlagMemberOffset() {
6798 unsigned Offset
= 0;
6799 for (uint64_t Remain
=
6800 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
6801 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
6802 !(Remain
& 1); Remain
= Remain
>> 1)
6807 /// Class that holds debugging information for a data mapping to be passed to
6808 /// the runtime library.
6809 class MappingExprInfo
{
6810 /// The variable declaration used for the data mapping.
6811 const ValueDecl
*MapDecl
= nullptr;
6812 /// The original expression used in the map clause, or null if there is
6814 const Expr
*MapExpr
= nullptr;
6817 MappingExprInfo(const ValueDecl
*MapDecl
, const Expr
*MapExpr
= nullptr)
6818 : MapDecl(MapDecl
), MapExpr(MapExpr
) {}
6820 const ValueDecl
*getMapDecl() const { return MapDecl
; }
6821 const Expr
*getMapExpr() const { return MapExpr
; }
6824 /// Class that associates information with a base pointer to be passed to the
6825 /// runtime library.
6826 class BasePointerInfo
{
6827 /// The base pointer.
6828 llvm::Value
*Ptr
= nullptr;
6829 /// The base declaration that refers to this device pointer, or null if
6831 const ValueDecl
*DevPtrDecl
= nullptr;
6834 BasePointerInfo(llvm::Value
*Ptr
, const ValueDecl
*DevPtrDecl
= nullptr)
6835 : Ptr(Ptr
), DevPtrDecl(DevPtrDecl
) {}
6836 llvm::Value
*operator*() const { return Ptr
; }
6837 const ValueDecl
*getDevicePtrDecl() const { return DevPtrDecl
; }
6838 void setDevicePtrDecl(const ValueDecl
*D
) { DevPtrDecl
= D
; }
6841 using MapExprsArrayTy
= SmallVector
<MappingExprInfo
, 4>;
6842 using MapBaseValuesArrayTy
= SmallVector
<BasePointerInfo
, 4>;
6843 using MapValuesArrayTy
= SmallVector
<llvm::Value
*, 4>;
6844 using MapFlagsArrayTy
= SmallVector
<OpenMPOffloadMappingFlags
, 4>;
6845 using MapMappersArrayTy
= SmallVector
<const ValueDecl
*, 4>;
6846 using MapDimArrayTy
= SmallVector
<uint64_t, 4>;
6847 using MapNonContiguousArrayTy
= SmallVector
<MapValuesArrayTy
, 4>;
6849 /// This structure contains combined information generated for mappable
6850 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6851 /// mappers, and non-contiguous information.
6852 struct MapCombinedInfoTy
{
6853 struct StructNonContiguousInfo
{
6854 bool IsNonContiguous
= false;
6856 MapNonContiguousArrayTy Offsets
;
6857 MapNonContiguousArrayTy Counts
;
6858 MapNonContiguousArrayTy Strides
;
6860 MapExprsArrayTy Exprs
;
6861 MapBaseValuesArrayTy BasePointers
;
6862 MapValuesArrayTy Pointers
;
6863 MapValuesArrayTy Sizes
;
6864 MapFlagsArrayTy Types
;
6865 MapMappersArrayTy Mappers
;
6866 StructNonContiguousInfo NonContigInfo
;
6868 /// Append arrays in \a CurInfo.
6869 void append(MapCombinedInfoTy
&CurInfo
) {
6870 Exprs
.append(CurInfo
.Exprs
.begin(), CurInfo
.Exprs
.end());
6871 BasePointers
.append(CurInfo
.BasePointers
.begin(),
6872 CurInfo
.BasePointers
.end());
6873 Pointers
.append(CurInfo
.Pointers
.begin(), CurInfo
.Pointers
.end());
6874 Sizes
.append(CurInfo
.Sizes
.begin(), CurInfo
.Sizes
.end());
6875 Types
.append(CurInfo
.Types
.begin(), CurInfo
.Types
.end());
6876 Mappers
.append(CurInfo
.Mappers
.begin(), CurInfo
.Mappers
.end());
6877 NonContigInfo
.Dims
.append(CurInfo
.NonContigInfo
.Dims
.begin(),
6878 CurInfo
.NonContigInfo
.Dims
.end());
6879 NonContigInfo
.Offsets
.append(CurInfo
.NonContigInfo
.Offsets
.begin(),
6880 CurInfo
.NonContigInfo
.Offsets
.end());
6881 NonContigInfo
.Counts
.append(CurInfo
.NonContigInfo
.Counts
.begin(),
6882 CurInfo
.NonContigInfo
.Counts
.end());
6883 NonContigInfo
.Strides
.append(CurInfo
.NonContigInfo
.Strides
.begin(),
6884 CurInfo
.NonContigInfo
.Strides
.end());
6888 /// Map between a struct and the its lowest & highest elements which have been
6890 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6891 /// HE(FieldIndex, Pointer)}
6892 struct StructRangeInfoTy
{
6893 MapCombinedInfoTy PreliminaryMapData
;
6894 std::pair
<unsigned /*FieldIndex*/, Address
/*Pointer*/> LowestElem
= {
6895 0, Address::invalid()};
6896 std::pair
<unsigned /*FieldIndex*/, Address
/*Pointer*/> HighestElem
= {
6897 0, Address::invalid()};
6898 Address Base
= Address::invalid();
6899 Address LB
= Address::invalid();
6900 bool IsArraySection
= false;
6901 bool HasCompleteRecord
= false;
6905 /// Kind that defines how a device pointer has to be returned.
6907 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
6908 OpenMPMapClauseKind MapType
= OMPC_MAP_unknown
;
6909 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
6910 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
;
6911 bool ReturnDevicePointer
= false;
6912 bool IsImplicit
= false;
6913 const ValueDecl
*Mapper
= nullptr;
6914 const Expr
*VarRef
= nullptr;
6915 bool ForDeviceAddr
= false;
6917 MapInfo() = default;
6919 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
,
6920 OpenMPMapClauseKind MapType
,
6921 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6922 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
6923 bool ReturnDevicePointer
, bool IsImplicit
,
6924 const ValueDecl
*Mapper
= nullptr, const Expr
*VarRef
= nullptr,
6925 bool ForDeviceAddr
= false)
6926 : Components(Components
), MapType(MapType
), MapModifiers(MapModifiers
),
6927 MotionModifiers(MotionModifiers
),
6928 ReturnDevicePointer(ReturnDevicePointer
), IsImplicit(IsImplicit
),
6929 Mapper(Mapper
), VarRef(VarRef
), ForDeviceAddr(ForDeviceAddr
) {}
6932 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6933 /// member and there is no map information about it, then emission of that
6934 /// entry is deferred until the whole struct has been processed.
6935 struct DeferredDevicePtrEntryTy
{
6936 const Expr
*IE
= nullptr;
6937 const ValueDecl
*VD
= nullptr;
6938 bool ForDeviceAddr
= false;
6940 DeferredDevicePtrEntryTy(const Expr
*IE
, const ValueDecl
*VD
,
6942 : IE(IE
), VD(VD
), ForDeviceAddr(ForDeviceAddr
) {}
6945 /// The target directive from where the mappable clauses were extracted. It
6946 /// is either a executable directive or a user-defined mapper directive.
6947 llvm::PointerUnion
<const OMPExecutableDirective
*,
6948 const OMPDeclareMapperDecl
*>
6951 /// Function the directive is being generated for.
6952 CodeGenFunction
&CGF
;
6954 /// Set of all first private variables in the current directive.
6955 /// bool data is set to true if the variable is implicitly marked as
6956 /// firstprivate, false otherwise.
6957 llvm::DenseMap
<CanonicalDeclPtr
<const VarDecl
>, bool> FirstPrivateDecls
;
6959 /// Map between device pointer declarations and their expression components.
6960 /// The key value for declarations in 'this' is null.
6963 SmallVector
<OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>>
6966 /// Map between device addr declarations and their expression components.
6967 /// The key value for declarations in 'this' is null.
6970 SmallVector
<OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>>
6973 /// Map between lambda declarations and their map type.
6974 llvm::DenseMap
<const ValueDecl
*, const OMPMapClause
*> LambdasMap
;
6976 llvm::Value
*getExprTypeSize(const Expr
*E
) const {
6977 QualType ExprTy
= E
->getType().getCanonicalType();
6979 // Calculate the size for array shaping expression.
6980 if (const auto *OAE
= dyn_cast
<OMPArrayShapingExpr
>(E
)) {
6982 CGF
.getTypeSize(OAE
->getBase()->getType()->getPointeeType());
6983 for (const Expr
*SE
: OAE
->getDimensions()) {
6984 llvm::Value
*Sz
= CGF
.EmitScalarExpr(SE
);
6985 Sz
= CGF
.EmitScalarConversion(Sz
, SE
->getType(),
6986 CGF
.getContext().getSizeType(),
6988 Size
= CGF
.Builder
.CreateNUWMul(Size
, Sz
);
6993 // Reference types are ignored for mapping purposes.
6994 if (const auto *RefTy
= ExprTy
->getAs
<ReferenceType
>())
6995 ExprTy
= RefTy
->getPointeeType().getCanonicalType();
6997 // Given that an array section is considered a built-in type, we need to
6998 // do the calculation based on the length of the section instead of relying
6999 // on CGF.getTypeSize(E->getType()).
7000 if (const auto *OAE
= dyn_cast
<OMPArraySectionExpr
>(E
)) {
7001 QualType BaseTy
= OMPArraySectionExpr::getBaseOriginalType(
7002 OAE
->getBase()->IgnoreParenImpCasts())
7003 .getCanonicalType();
7005 // If there is no length associated with the expression and lower bound is
7006 // not specified too, that means we are using the whole length of the
7008 if (!OAE
->getLength() && OAE
->getColonLocFirst().isValid() &&
7009 !OAE
->getLowerBound())
7010 return CGF
.getTypeSize(BaseTy
);
7012 llvm::Value
*ElemSize
;
7013 if (const auto *PTy
= BaseTy
->getAs
<PointerType
>()) {
7014 ElemSize
= CGF
.getTypeSize(PTy
->getPointeeType().getCanonicalType());
7016 const auto *ATy
= cast
<ArrayType
>(BaseTy
.getTypePtr());
7017 assert(ATy
&& "Expecting array type if not a pointer type.");
7018 ElemSize
= CGF
.getTypeSize(ATy
->getElementType().getCanonicalType());
7021 // If we don't have a length at this point, that is because we have an
7022 // array section with a single element.
7023 if (!OAE
->getLength() && OAE
->getColonLocFirst().isInvalid())
7026 if (const Expr
*LenExpr
= OAE
->getLength()) {
7027 llvm::Value
*LengthVal
= CGF
.EmitScalarExpr(LenExpr
);
7028 LengthVal
= CGF
.EmitScalarConversion(LengthVal
, LenExpr
->getType(),
7029 CGF
.getContext().getSizeType(),
7030 LenExpr
->getExprLoc());
7031 return CGF
.Builder
.CreateNUWMul(LengthVal
, ElemSize
);
7033 assert(!OAE
->getLength() && OAE
->getColonLocFirst().isValid() &&
7034 OAE
->getLowerBound() && "expected array_section[lb:].");
7035 // Size = sizetype - lb * elemtype;
7036 llvm::Value
*LengthVal
= CGF
.getTypeSize(BaseTy
);
7037 llvm::Value
*LBVal
= CGF
.EmitScalarExpr(OAE
->getLowerBound());
7038 LBVal
= CGF
.EmitScalarConversion(LBVal
, OAE
->getLowerBound()->getType(),
7039 CGF
.getContext().getSizeType(),
7040 OAE
->getLowerBound()->getExprLoc());
7041 LBVal
= CGF
.Builder
.CreateNUWMul(LBVal
, ElemSize
);
7042 llvm::Value
*Cmp
= CGF
.Builder
.CreateICmpUGT(LengthVal
, LBVal
);
7043 llvm::Value
*TrueVal
= CGF
.Builder
.CreateNUWSub(LengthVal
, LBVal
);
7044 LengthVal
= CGF
.Builder
.CreateSelect(
7045 Cmp
, TrueVal
, llvm::ConstantInt::get(CGF
.SizeTy
, 0));
7048 return CGF
.getTypeSize(ExprTy
);
7051 /// Return the corresponding bits for a given map clause modifier. Add
7052 /// a flag marking the map as a pointer if requested. Add a flag marking the
7053 /// map as the first one of a series of maps that relate to the same map
7055 OpenMPOffloadMappingFlags
getMapTypeBits(
7056 OpenMPMapClauseKind MapType
, ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
7057 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
, bool IsImplicit
,
7058 bool AddPtrFlag
, bool AddIsTargetParamFlag
, bool IsNonContiguous
) const {
7059 OpenMPOffloadMappingFlags Bits
=
7060 IsImplicit
? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
7061 : OpenMPOffloadMappingFlags::OMP_MAP_NONE
;
7063 case OMPC_MAP_alloc
:
7064 case OMPC_MAP_release
:
7065 // alloc and release is the default behavior in the runtime library, i.e.
7066 // if we don't pass any bits alloc/release that is what the runtime is
7067 // going to do. Therefore, we don't need to signal anything for these two
7071 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TO
;
7074 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
7076 case OMPC_MAP_tofrom
:
7077 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7078 OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
7080 case OMPC_MAP_delete
:
7081 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_DELETE
;
7083 case OMPC_MAP_unknown
:
7084 llvm_unreachable("Unexpected map type!");
7087 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
;
7088 if (AddIsTargetParamFlag
)
7089 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
7090 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_always
))
7091 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS
;
7092 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_close
))
7093 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE
;
7094 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_present
) ||
7095 llvm::is_contained(MotionModifiers
, OMPC_MOTION_MODIFIER_present
))
7096 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
;
7097 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_ompx_hold
))
7098 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
7099 if (IsNonContiguous
)
7100 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG
;
7104 /// Return true if the provided expression is a final array section. A
7105 /// final array section, is one whose length can't be proved to be one.
7106 bool isFinalArraySectionExpression(const Expr
*E
) const {
7107 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(E
);
7109 // It is not an array section and therefore not a unity-size one.
7113 // An array section with no colon always refer to a single element.
7114 if (OASE
->getColonLocFirst().isInvalid())
7117 const Expr
*Length
= OASE
->getLength();
7119 // If we don't have a length we have to check if the array has size 1
7120 // for this dimension. Also, we should always expect a length if the
7121 // base type is pointer.
7123 QualType BaseQTy
= OMPArraySectionExpr::getBaseOriginalType(
7124 OASE
->getBase()->IgnoreParenImpCasts())
7125 .getCanonicalType();
7126 if (const auto *ATy
= dyn_cast
<ConstantArrayType
>(BaseQTy
.getTypePtr()))
7127 return ATy
->getSize().getSExtValue() != 1;
7128 // If we don't have a constant dimension length, we have to consider
7129 // the current section as having any size, so it is not necessarily
7130 // unitary. If it happen to be unity size, that's user fault.
7134 // Check if the length evaluates to 1.
7135 Expr::EvalResult Result
;
7136 if (!Length
->EvaluateAsInt(Result
, CGF
.getContext()))
7137 return true; // Can have more that size 1.
7139 llvm::APSInt ConstLength
= Result
.Val
.getInt();
7140 return ConstLength
.getSExtValue() != 1;
7143 /// Generate the base pointers, section pointers, sizes, map type bits, and
7144 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7145 /// map type, map or motion modifiers, and expression components.
7146 /// \a IsFirstComponent should be set to true if the provided set of
7147 /// components is the first associated with a capture.
7148 void generateInfoForComponentList(
7149 OpenMPMapClauseKind MapType
, ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
7150 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
7151 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
,
7152 MapCombinedInfoTy
&CombinedInfo
, StructRangeInfoTy
&PartialStruct
,
7153 bool IsFirstComponentList
, bool IsImplicit
,
7154 const ValueDecl
*Mapper
= nullptr, bool ForDeviceAddr
= false,
7155 const ValueDecl
*BaseDecl
= nullptr, const Expr
*MapExpr
= nullptr,
7156 ArrayRef
<OMPClauseMappableExprCommon::MappableExprComponentListRef
>
7157 OverlappedElements
= std::nullopt
) const {
7158 // The following summarizes what has to be generated for each map and the
7159 // types below. The generated information is expressed in this order:
7160 // base pointer, section pointer, size, flags
7161 // (to add to the ones that come from the map type and modifier).
7183 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7186 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7189 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7192 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7195 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7196 // in unified shared memory mode or for local pointers
7197 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7200 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7203 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7206 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7209 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7211 // map(to: s.p[:22])
7212 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7213 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7214 // &(s.p), &(s.p[0]), 22*sizeof(double),
7215 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7216 // (*) alloc space for struct members, only this is a target parameter
7217 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7218 // optimizes this entry out, same in the examples below)
7219 // (***) map the pointee (map: to)
7222 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7223 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7224 // (*) alloc space for struct members, only this is a target parameter
7225 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7226 // optimizes this entry out, same in the examples below)
7227 // (***) map the pointee (map: to)
7230 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7232 // map(from: s.ps->s.i)
7233 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7234 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7235 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7237 // map(to: s.ps->ps)
7238 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7239 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7240 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7242 // map(s.ps->ps->ps)
7243 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7244 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7245 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7246 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7248 // map(to: s.ps->ps->s.f[:22])
7249 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7250 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7251 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7252 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7255 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7258 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7261 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7264 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7266 // map(to: ps->p[:22])
7267 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7268 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7269 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7272 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7274 // map(from: ps->ps->s.i)
7275 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7276 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7277 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7279 // map(from: ps->ps->ps)
7280 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7281 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7282 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7284 // map(ps->ps->ps->ps)
7285 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7286 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7287 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7288 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7290 // map(to: ps->ps->ps->s.f[:22])
7291 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7292 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7293 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7294 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7296 // map(to: s.f[:22]) map(from: s.p[:33])
7297 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7298 // sizeof(double*) (**), TARGET_PARAM
7299 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7300 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7301 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7302 // (*) allocate contiguous space needed to fit all mapped members even if
7303 // we allocate space for members not mapped (in this example,
7304 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7305 // them as well because they fall between &s.f[0] and &s.p)
7307 // map(from: s.f[:22]) map(to: ps->p[:33])
7308 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7309 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7310 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7311 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7312 // (*) the struct this entry pertains to is the 2nd element in the list of
7313 // arguments, hence MEMBER_OF(2)
7315 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7316 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7317 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7318 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7319 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7320 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7321 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7322 // (*) the struct this entry pertains to is the 4th element in the list
7323 // of arguments, hence MEMBER_OF(4)
7325 // Track if the map information being generated is the first for a capture.
7326 bool IsCaptureFirstInfo
= IsFirstComponentList
;
7327 // When the variable is on a declare target link or in a to clause with
7328 // unified memory, a reference is needed to hold the host/device address
7330 bool RequiresReference
= false;
7332 // Scan the components from the base to the complete expression.
7333 auto CI
= Components
.rbegin();
7334 auto CE
= Components
.rend();
7337 // Track if the map information being generated is the first for a list of
7339 bool IsExpressionFirstInfo
= true;
7340 bool FirstPointerInComplexData
= false;
7341 Address BP
= Address::invalid();
7342 const Expr
*AssocExpr
= I
->getAssociatedExpression();
7343 const auto *AE
= dyn_cast
<ArraySubscriptExpr
>(AssocExpr
);
7344 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(AssocExpr
);
7345 const auto *OAShE
= dyn_cast
<OMPArrayShapingExpr
>(AssocExpr
);
7347 if (isa
<MemberExpr
>(AssocExpr
)) {
7348 // The base is the 'this' pointer. The content of the pointer is going
7349 // to be the base of the field being mapped.
7350 BP
= CGF
.LoadCXXThisAddress();
7351 } else if ((AE
&& isa
<CXXThisExpr
>(AE
->getBase()->IgnoreParenImpCasts())) ||
7353 isa
<CXXThisExpr
>(OASE
->getBase()->IgnoreParenImpCasts()))) {
7354 BP
= CGF
.EmitOMPSharedLValue(AssocExpr
).getAddress(CGF
);
7356 isa
<CXXThisExpr
>(OAShE
->getBase()->IgnoreParenCasts())) {
7358 CGF
.EmitScalarExpr(OAShE
->getBase()),
7359 CGF
.ConvertTypeForMem(OAShE
->getBase()->getType()->getPointeeType()),
7360 CGF
.getContext().getTypeAlignInChars(OAShE
->getBase()->getType()));
7362 // The base is the reference to the variable.
7364 BP
= CGF
.EmitOMPSharedLValue(AssocExpr
).getAddress(CGF
);
7365 if (const auto *VD
=
7366 dyn_cast_or_null
<VarDecl
>(I
->getAssociatedDeclaration())) {
7367 if (std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
7368 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
)) {
7369 if ((*Res
== OMPDeclareTargetDeclAttr::MT_Link
) ||
7370 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
7371 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
7372 CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7373 RequiresReference
= true;
7374 BP
= CGF
.CGM
.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD
);
7379 // If the variable is a pointer and is being dereferenced (i.e. is not
7380 // the last component), the base has to be the pointer itself, not its
7381 // reference. References are ignored for mapping purposes.
7383 I
->getAssociatedDeclaration()->getType().getNonReferenceType();
7384 if (Ty
->isAnyPointerType() && std::next(I
) != CE
) {
7385 // No need to generate individual map information for the pointer, it
7386 // can be associated with the combined storage if shared memory mode is
7387 // active or the base declaration is not global variable.
7388 const auto *VD
= dyn_cast
<VarDecl
>(I
->getAssociatedDeclaration());
7389 if (CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7390 !VD
|| VD
->hasLocalStorage())
7391 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7393 FirstPointerInComplexData
= true;
7398 // Track whether a component of the list should be marked as MEMBER_OF some
7399 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7400 // in a component list should be marked as MEMBER_OF, all subsequent entries
7401 // do not belong to the base struct. E.g.
7403 // s.ps->ps->ps->f[:]
7405 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7406 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7407 // is the pointee of ps(2) which is not member of struct s, so it should not
7408 // be marked as such (it is still PTR_AND_OBJ).
7409 // The variable is initialized to false so that PTR_AND_OBJ entries which
7410 // are not struct members are not considered (e.g. array of pointers to
7412 bool ShouldBeMemberOf
= false;
7414 // Variable keeping track of whether or not we have encountered a component
7415 // in the component list which is a member expression. Useful when we have a
7416 // pointer or a final array section, in which case it is the previous
7417 // component in the list which tells us whether we have a member expression.
7419 // While processing the final array section "[:]" it is "f" which tells us
7420 // whether we are dealing with a member of a declared struct.
7421 const MemberExpr
*EncounteredME
= nullptr;
7423 // Track for the total number of dimension. Start from one for the dummy
7425 uint64_t DimSize
= 1;
7427 bool IsNonContiguous
= CombinedInfo
.NonContigInfo
.IsNonContiguous
;
7428 bool IsPrevMemberReference
= false;
7430 for (; I
!= CE
; ++I
) {
7431 // If the current component is member of a struct (parent struct) mark it.
7432 if (!EncounteredME
) {
7433 EncounteredME
= dyn_cast
<MemberExpr
>(I
->getAssociatedExpression());
7434 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7435 // as MEMBER_OF the parent struct.
7436 if (EncounteredME
) {
7437 ShouldBeMemberOf
= true;
7438 // Do not emit as complex pointer if this is actually not array-like
7440 if (FirstPointerInComplexData
) {
7441 QualType Ty
= std::prev(I
)
7442 ->getAssociatedDeclaration()
7444 .getNonReferenceType();
7445 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7446 FirstPointerInComplexData
= false;
7451 auto Next
= std::next(I
);
7453 // We need to generate the addresses and sizes if this is the last
7454 // component, if the component is a pointer or if it is an array section
7455 // whose length can't be proved to be one. If this is a pointer, it
7456 // becomes the base address for the following components.
7458 // A final array section, is one whose length can't be proved to be one.
7459 // If the map item is non-contiguous then we don't treat any array section
7460 // as final array section.
7461 bool IsFinalArraySection
=
7463 isFinalArraySectionExpression(I
->getAssociatedExpression());
7465 // If we have a declaration for the mapping use that, otherwise use
7466 // the base declaration of the map clause.
7467 const ValueDecl
*MapDecl
= (I
->getAssociatedDeclaration())
7468 ? I
->getAssociatedDeclaration()
7470 MapExpr
= (I
->getAssociatedExpression()) ? I
->getAssociatedExpression()
7473 // Get information on whether the element is a pointer. Have to do a
7474 // special treatment for array sections given that they are built-in
7477 dyn_cast
<OMPArraySectionExpr
>(I
->getAssociatedExpression());
7479 dyn_cast
<OMPArrayShapingExpr
>(I
->getAssociatedExpression());
7480 const auto *UO
= dyn_cast
<UnaryOperator
>(I
->getAssociatedExpression());
7481 const auto *BO
= dyn_cast
<BinaryOperator
>(I
->getAssociatedExpression());
7484 (OASE
&& OMPArraySectionExpr::getBaseOriginalType(OASE
)
7486 ->isAnyPointerType()) ||
7487 I
->getAssociatedExpression()->getType()->isAnyPointerType();
7488 bool IsMemberReference
= isa
<MemberExpr
>(I
->getAssociatedExpression()) &&
7490 MapDecl
->getType()->isLValueReferenceType();
7491 bool IsNonDerefPointer
= IsPointer
&& !UO
&& !BO
&& !IsNonContiguous
;
7496 if (Next
== CE
|| IsMemberReference
|| IsNonDerefPointer
||
7497 IsFinalArraySection
) {
7498 // If this is not the last component, we expect the pointer to be
7499 // associated with an array expression or member expression.
7500 assert((Next
== CE
||
7501 isa
<MemberExpr
>(Next
->getAssociatedExpression()) ||
7502 isa
<ArraySubscriptExpr
>(Next
->getAssociatedExpression()) ||
7503 isa
<OMPArraySectionExpr
>(Next
->getAssociatedExpression()) ||
7504 isa
<OMPArrayShapingExpr
>(Next
->getAssociatedExpression()) ||
7505 isa
<UnaryOperator
>(Next
->getAssociatedExpression()) ||
7506 isa
<BinaryOperator
>(Next
->getAssociatedExpression())) &&
7507 "Unexpected expression");
7509 Address LB
= Address::invalid();
7510 Address LowestElem
= Address::invalid();
7511 auto &&EmitMemberExprBase
= [](CodeGenFunction
&CGF
,
7512 const MemberExpr
*E
) {
7513 const Expr
*BaseExpr
= E
->getBase();
7514 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7518 LValueBaseInfo BaseInfo
;
7519 TBAAAccessInfo TBAAInfo
;
7521 CGF
.EmitPointerWithAlignment(BaseExpr
, &BaseInfo
, &TBAAInfo
);
7522 QualType PtrTy
= BaseExpr
->getType()->getPointeeType();
7523 BaseLV
= CGF
.MakeAddrLValue(Addr
, PtrTy
, BaseInfo
, TBAAInfo
);
7525 BaseLV
= CGF
.EmitOMPSharedLValue(BaseExpr
);
7531 Address(CGF
.EmitScalarExpr(OAShE
->getBase()),
7532 CGF
.ConvertTypeForMem(
7533 OAShE
->getBase()->getType()->getPointeeType()),
7534 CGF
.getContext().getTypeAlignInChars(
7535 OAShE
->getBase()->getType()));
7536 } else if (IsMemberReference
) {
7537 const auto *ME
= cast
<MemberExpr
>(I
->getAssociatedExpression());
7538 LValue BaseLVal
= EmitMemberExprBase(CGF
, ME
);
7539 LowestElem
= CGF
.EmitLValueForFieldInitialization(
7540 BaseLVal
, cast
<FieldDecl
>(MapDecl
))
7542 LB
= CGF
.EmitLoadOfReferenceLValue(LowestElem
, MapDecl
->getType())
7546 CGF
.EmitOMPSharedLValue(I
->getAssociatedExpression())
7550 // If this component is a pointer inside the base struct then we don't
7551 // need to create any entry for it - it will be combined with the object
7552 // it is pointing to into a single PTR_AND_OBJ entry.
7553 bool IsMemberPointerOrAddr
=
7555 (((IsPointer
|| ForDeviceAddr
) &&
7556 I
->getAssociatedExpression() == EncounteredME
) ||
7557 (IsPrevMemberReference
&& !IsPointer
) ||
7558 (IsMemberReference
&& Next
!= CE
&&
7559 !Next
->getAssociatedExpression()->getType()->isPointerType()));
7560 if (!OverlappedElements
.empty() && Next
== CE
) {
7561 // Handle base element with the info for overlapped elements.
7562 assert(!PartialStruct
.Base
.isValid() && "The base element is set.");
7563 assert(!IsPointer
&&
7564 "Unexpected base element with the pointer type.");
7565 // Mark the whole struct as the struct that requires allocation on the
7567 PartialStruct
.LowestElem
= {0, LowestElem
};
7568 CharUnits TypeSize
= CGF
.getContext().getTypeSizeInChars(
7569 I
->getAssociatedExpression()->getType());
7570 Address HB
= CGF
.Builder
.CreateConstGEP(
7571 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
7572 LowestElem
, CGF
.VoidPtrTy
, CGF
.Int8Ty
),
7573 TypeSize
.getQuantity() - 1);
7574 PartialStruct
.HighestElem
= {
7575 std::numeric_limits
<decltype(
7576 PartialStruct
.HighestElem
.first
)>::max(),
7578 PartialStruct
.Base
= BP
;
7579 PartialStruct
.LB
= LB
;
7581 PartialStruct
.PreliminaryMapData
.BasePointers
.empty() &&
7582 "Overlapped elements must be used only once for the variable.");
7583 std::swap(PartialStruct
.PreliminaryMapData
, CombinedInfo
);
7584 // Emit data for non-overlapped data.
7585 OpenMPOffloadMappingFlags Flags
=
7586 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
7587 getMapTypeBits(MapType
, MapModifiers
, MotionModifiers
, IsImplicit
,
7588 /*AddPtrFlag=*/false,
7589 /*AddIsTargetParamFlag=*/false, IsNonContiguous
);
7590 llvm::Value
*Size
= nullptr;
7591 // Do bitcopy of all non-overlapped structure elements.
7592 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7593 Component
: OverlappedElements
) {
7594 Address ComponentLB
= Address::invalid();
7595 for (const OMPClauseMappableExprCommon::MappableComponent
&MC
:
7597 if (const ValueDecl
*VD
= MC
.getAssociatedDeclaration()) {
7598 const auto *FD
= dyn_cast
<FieldDecl
>(VD
);
7599 if (FD
&& FD
->getType()->isLValueReferenceType()) {
7601 cast
<MemberExpr
>(MC
.getAssociatedExpression());
7602 LValue BaseLVal
= EmitMemberExprBase(CGF
, ME
);
7604 CGF
.EmitLValueForFieldInitialization(BaseLVal
, FD
)
7608 CGF
.EmitOMPSharedLValue(MC
.getAssociatedExpression())
7611 Size
= CGF
.Builder
.CreatePtrDiff(
7612 CGF
.Int8Ty
, CGF
.EmitCastToVoidPtr(ComponentLB
.getPointer()),
7613 CGF
.EmitCastToVoidPtr(LB
.getPointer()));
7617 assert(Size
&& "Failed to determine structure size");
7618 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7619 CombinedInfo
.BasePointers
.push_back(BP
.getPointer());
7620 CombinedInfo
.Pointers
.push_back(LB
.getPointer());
7621 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
7622 Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7623 CombinedInfo
.Types
.push_back(Flags
);
7624 CombinedInfo
.Mappers
.push_back(nullptr);
7625 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7627 LB
= CGF
.Builder
.CreateConstGEP(ComponentLB
, 1);
7629 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7630 CombinedInfo
.BasePointers
.push_back(BP
.getPointer());
7631 CombinedInfo
.Pointers
.push_back(LB
.getPointer());
7632 Size
= CGF
.Builder
.CreatePtrDiff(
7633 CGF
.Int8Ty
, CGF
.Builder
.CreateConstGEP(HB
, 1).getPointer(),
7634 CGF
.EmitCastToVoidPtr(LB
.getPointer()));
7635 CombinedInfo
.Sizes
.push_back(
7636 CGF
.Builder
.CreateIntCast(Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7637 CombinedInfo
.Types
.push_back(Flags
);
7638 CombinedInfo
.Mappers
.push_back(nullptr);
7639 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7643 llvm::Value
*Size
= getExprTypeSize(I
->getAssociatedExpression());
7644 if (!IsMemberPointerOrAddr
||
7645 (Next
== CE
&& MapType
!= OMPC_MAP_unknown
)) {
7646 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7647 CombinedInfo
.BasePointers
.push_back(BP
.getPointer());
7648 CombinedInfo
.Pointers
.push_back(LB
.getPointer());
7649 CombinedInfo
.Sizes
.push_back(
7650 CGF
.Builder
.CreateIntCast(Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7651 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7654 // If Mapper is valid, the last component inherits the mapper.
7655 bool HasMapper
= Mapper
&& Next
== CE
;
7656 CombinedInfo
.Mappers
.push_back(HasMapper
? Mapper
: nullptr);
7658 // We need to add a pointer flag for each map that comes from the
7659 // same expression except for the first one. We also need to signal
7660 // this map is the first one that relates with the current capture
7661 // (there is a set of entries for each capture).
7662 OpenMPOffloadMappingFlags Flags
= getMapTypeBits(
7663 MapType
, MapModifiers
, MotionModifiers
, IsImplicit
,
7664 !IsExpressionFirstInfo
|| RequiresReference
||
7665 FirstPointerInComplexData
|| IsMemberReference
,
7666 IsCaptureFirstInfo
&& !RequiresReference
, IsNonContiguous
);
7668 if (!IsExpressionFirstInfo
|| IsMemberReference
) {
7669 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7670 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7671 if (IsPointer
|| (IsMemberReference
&& Next
!= CE
))
7672 Flags
&= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7673 OpenMPOffloadMappingFlags::OMP_MAP_FROM
|
7674 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS
|
7675 OpenMPOffloadMappingFlags::OMP_MAP_DELETE
|
7676 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE
);
7678 if (ShouldBeMemberOf
) {
7679 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7680 // should be later updated with the correct value of MEMBER_OF.
7681 Flags
|= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
;
7682 // From now on, all subsequent PTR_AND_OBJ entries should not be
7683 // marked as MEMBER_OF.
7684 ShouldBeMemberOf
= false;
7688 CombinedInfo
.Types
.push_back(Flags
);
7691 // If we have encountered a member expression so far, keep track of the
7692 // mapped member. If the parent is "*this", then the value declaration
7694 if (EncounteredME
) {
7695 const auto *FD
= cast
<FieldDecl
>(EncounteredME
->getMemberDecl());
7696 unsigned FieldIndex
= FD
->getFieldIndex();
7698 // Update info about the lowest and highest elements for this struct
7699 if (!PartialStruct
.Base
.isValid()) {
7700 PartialStruct
.LowestElem
= {FieldIndex
, LowestElem
};
7701 if (IsFinalArraySection
) {
7703 CGF
.EmitOMPArraySectionExpr(OASE
, /*IsLowerBound=*/false)
7705 PartialStruct
.HighestElem
= {FieldIndex
, HB
};
7707 PartialStruct
.HighestElem
= {FieldIndex
, LowestElem
};
7709 PartialStruct
.Base
= BP
;
7710 PartialStruct
.LB
= BP
;
7711 } else if (FieldIndex
< PartialStruct
.LowestElem
.first
) {
7712 PartialStruct
.LowestElem
= {FieldIndex
, LowestElem
};
7713 } else if (FieldIndex
> PartialStruct
.HighestElem
.first
) {
7714 PartialStruct
.HighestElem
= {FieldIndex
, LowestElem
};
7718 // Need to emit combined struct for array sections.
7719 if (IsFinalArraySection
|| IsNonContiguous
)
7720 PartialStruct
.IsArraySection
= true;
7722 // If we have a final array section, we are done with this expression.
7723 if (IsFinalArraySection
)
7726 // The pointer becomes the base for the next element.
7728 BP
= IsMemberReference
? LowestElem
: LB
;
7730 IsExpressionFirstInfo
= false;
7731 IsCaptureFirstInfo
= false;
7732 FirstPointerInComplexData
= false;
7733 IsPrevMemberReference
= IsMemberReference
;
7734 } else if (FirstPointerInComplexData
) {
7735 QualType Ty
= Components
.rbegin()
7736 ->getAssociatedDeclaration()
7738 .getNonReferenceType();
7739 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7740 FirstPointerInComplexData
= false;
7743 // If ran into the whole component - allocate the space for the whole
7746 PartialStruct
.HasCompleteRecord
= true;
7748 if (!IsNonContiguous
)
7751 const ASTContext
&Context
= CGF
.getContext();
7753 // For supporting stride in array section, we need to initialize the first
7754 // dimension size as 1, first offset as 0, and first count as 1
7755 MapValuesArrayTy CurOffsets
= {llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 0)};
7756 MapValuesArrayTy CurCounts
= {llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 1)};
7757 MapValuesArrayTy CurStrides
;
7758 MapValuesArrayTy DimSizes
{llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 1)};
7759 uint64_t ElementTypeSize
;
7761 // Collect Size information for each dimension and get the element size as
7762 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7763 // should be [10, 10] and the first stride is 4 btyes.
7764 for (const OMPClauseMappableExprCommon::MappableComponent
&Component
:
7766 const Expr
*AssocExpr
= Component
.getAssociatedExpression();
7767 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(AssocExpr
);
7772 QualType Ty
= OMPArraySectionExpr::getBaseOriginalType(OASE
->getBase());
7773 auto *CAT
= Context
.getAsConstantArrayType(Ty
);
7774 auto *VAT
= Context
.getAsVariableArrayType(Ty
);
7776 // We need all the dimension size except for the last dimension.
7777 assert((VAT
|| CAT
|| &Component
== &*Components
.begin()) &&
7778 "Should be either ConstantArray or VariableArray if not the "
7781 // Get element size if CurStrides is empty.
7782 if (CurStrides
.empty()) {
7783 const Type
*ElementType
= nullptr;
7785 ElementType
= CAT
->getElementType().getTypePtr();
7787 ElementType
= VAT
->getElementType().getTypePtr();
7789 assert(&Component
== &*Components
.begin() &&
7790 "Only expect pointer (non CAT or VAT) when this is the "
7792 // If ElementType is null, then it means the base is a pointer
7793 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7794 // for next iteration.
7796 // For the case that having pointer as base, we need to remove one
7797 // level of indirection.
7798 if (&Component
!= &*Components
.begin())
7799 ElementType
= ElementType
->getPointeeOrArrayElementType();
7801 Context
.getTypeSizeInChars(ElementType
).getQuantity();
7802 CurStrides
.push_back(
7803 llvm::ConstantInt::get(CGF
.Int64Ty
, ElementTypeSize
));
7806 // Get dimension value except for the last dimension since we don't need
7808 if (DimSizes
.size() < Components
.size() - 1) {
7810 DimSizes
.push_back(llvm::ConstantInt::get(
7811 CGF
.Int64Ty
, CAT
->getSize().getZExtValue()));
7813 DimSizes
.push_back(CGF
.Builder
.CreateIntCast(
7814 CGF
.EmitScalarExpr(VAT
->getSizeExpr()), CGF
.Int64Ty
,
7815 /*IsSigned=*/false));
7819 // Skip the dummy dimension since we have already have its information.
7820 auto *DI
= DimSizes
.begin() + 1;
7821 // Product of dimension.
7822 llvm::Value
*DimProd
=
7823 llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, ElementTypeSize
);
7825 // Collect info for non-contiguous. Notice that offset, count, and stride
7826 // are only meaningful for array-section, so we insert a null for anything
7827 // other than array-section.
7828 // Also, the size of offset, count, and stride are not the same as
7829 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7830 // count, and stride are the same as the number of non-contiguous
7831 // declaration in target update to/from clause.
7832 for (const OMPClauseMappableExprCommon::MappableComponent
&Component
:
7834 const Expr
*AssocExpr
= Component
.getAssociatedExpression();
7836 if (const auto *AE
= dyn_cast
<ArraySubscriptExpr
>(AssocExpr
)) {
7837 llvm::Value
*Offset
= CGF
.Builder
.CreateIntCast(
7838 CGF
.EmitScalarExpr(AE
->getIdx()), CGF
.Int64Ty
,
7839 /*isSigned=*/false);
7840 CurOffsets
.push_back(Offset
);
7841 CurCounts
.push_back(llvm::ConstantInt::get(CGF
.Int64Ty
, /*V=*/1));
7842 CurStrides
.push_back(CurStrides
.back());
7846 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(AssocExpr
);
7852 const Expr
*OffsetExpr
= OASE
->getLowerBound();
7853 llvm::Value
*Offset
= nullptr;
7855 // If offset is absent, then we just set it to zero.
7856 Offset
= llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
7858 Offset
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(OffsetExpr
),
7860 /*isSigned=*/false);
7862 CurOffsets
.push_back(Offset
);
7865 const Expr
*CountExpr
= OASE
->getLength();
7866 llvm::Value
*Count
= nullptr;
7868 // In Clang, once a high dimension is an array section, we construct all
7869 // the lower dimension as array section, however, for case like
7870 // arr[0:2][2], Clang construct the inner dimension as an array section
7871 // but it actually is not in an array section form according to spec.
7872 if (!OASE
->getColonLocFirst().isValid() &&
7873 !OASE
->getColonLocSecond().isValid()) {
7874 Count
= llvm::ConstantInt::get(CGF
.Int64Ty
, 1);
7876 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7877 // When the length is absent it defaults to ⌈(size −
7878 // lower-bound)/stride⌉, where size is the size of the array
7880 const Expr
*StrideExpr
= OASE
->getStride();
7881 llvm::Value
*Stride
=
7883 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(StrideExpr
),
7884 CGF
.Int64Ty
, /*isSigned=*/false)
7887 Count
= CGF
.Builder
.CreateUDiv(
7888 CGF
.Builder
.CreateNUWSub(*DI
, Offset
), Stride
);
7890 Count
= CGF
.Builder
.CreateNUWSub(*DI
, Offset
);
7893 Count
= CGF
.EmitScalarExpr(CountExpr
);
7895 Count
= CGF
.Builder
.CreateIntCast(Count
, CGF
.Int64Ty
, /*isSigned=*/false);
7896 CurCounts
.push_back(Count
);
7898 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7899 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7900 // Offset Count Stride
7901 // D0 0 1 4 (int) <- dummy dimension
7902 // D1 0 2 8 (2 * (1) * 4)
7903 // D2 1 2 20 (1 * (1 * 5) * 4)
7904 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7905 const Expr
*StrideExpr
= OASE
->getStride();
7906 llvm::Value
*Stride
=
7908 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(StrideExpr
),
7909 CGF
.Int64Ty
, /*isSigned=*/false)
7911 DimProd
= CGF
.Builder
.CreateNUWMul(DimProd
, *(DI
- 1));
7913 CurStrides
.push_back(CGF
.Builder
.CreateNUWMul(DimProd
, Stride
));
7915 CurStrides
.push_back(DimProd
);
7916 if (DI
!= DimSizes
.end())
7920 CombinedInfo
.NonContigInfo
.Offsets
.push_back(CurOffsets
);
7921 CombinedInfo
.NonContigInfo
.Counts
.push_back(CurCounts
);
7922 CombinedInfo
.NonContigInfo
.Strides
.push_back(CurStrides
);
7925 /// Return the adjusted map modifiers if the declaration a capture refers to
7926 /// appears in a first-private clause. This is expected to be used only with
7927 /// directives that start with 'target'.
7928 OpenMPOffloadMappingFlags
7929 getMapModifiersForPrivateClauses(const CapturedStmt::Capture
&Cap
) const {
7930 assert(Cap
.capturesVariable() && "Expected capture by reference only!");
7932 // A first private variable captured by reference will use only the
7933 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7934 // declaration is known as first-private in this handler.
7935 if (FirstPrivateDecls
.count(Cap
.getCapturedVar())) {
7936 if (Cap
.getCapturedVar()->getType()->isAnyPointerType())
7937 return OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7938 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
;
7939 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE
|
7940 OpenMPOffloadMappingFlags::OMP_MAP_TO
;
7942 auto I
= LambdasMap
.find(Cap
.getCapturedVar()->getCanonicalDecl());
7943 if (I
!= LambdasMap
.end())
7944 // for map(to: lambda): using user specified map type.
7945 return getMapTypeBits(
7946 I
->getSecond()->getMapType(), I
->getSecond()->getMapTypeModifiers(),
7947 /*MotionModifiers=*/std::nullopt
, I
->getSecond()->isImplicit(),
7948 /*AddPtrFlag=*/false,
7949 /*AddIsTargetParamFlag=*/false,
7950 /*isNonContiguous=*/false);
7951 return OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7952 OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
7955 static OpenMPOffloadMappingFlags
getMemberOfFlag(unsigned Position
) {
7956 // Rotate by getFlagMemberOffset() bits.
7957 return static_cast<OpenMPOffloadMappingFlags
>(((uint64_t)Position
+ 1)
7958 << getFlagMemberOffset());
7961 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags
&Flags
,
7962 OpenMPOffloadMappingFlags MemberOfFlag
) {
7963 // If the entry is PTR_AND_OBJ but has not been marked with the special
7964 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7965 // marked as MEMBER_OF.
7966 if (static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
7967 Flags
& OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
) &&
7968 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
7969 (Flags
& OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
) !=
7970 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
))
7973 // Reset the placeholder value to prepare the flag for the assignment of the
7974 // proper MEMBER_OF value.
7975 Flags
&= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
;
7976 Flags
|= MemberOfFlag
;
7979 void getPlainLayout(const CXXRecordDecl
*RD
,
7980 llvm::SmallVectorImpl
<const FieldDecl
*> &Layout
,
7981 bool AsBase
) const {
7982 const CGRecordLayout
&RL
= CGF
.getTypes().getCGRecordLayout(RD
);
7984 llvm::StructType
*St
=
7985 AsBase
? RL
.getBaseSubobjectLLVMType() : RL
.getLLVMType();
7987 unsigned NumElements
= St
->getNumElements();
7989 llvm::PointerUnion
<const CXXRecordDecl
*, const FieldDecl
*>, 4>
7990 RecordLayout(NumElements
);
7993 for (const auto &I
: RD
->bases()) {
7996 const auto *Base
= I
.getType()->getAsCXXRecordDecl();
7997 // Ignore empty bases.
7998 if (Base
->isEmpty() || CGF
.getContext()
7999 .getASTRecordLayout(Base
)
8000 .getNonVirtualSize()
8004 unsigned FieldIndex
= RL
.getNonVirtualBaseLLVMFieldNo(Base
);
8005 RecordLayout
[FieldIndex
] = Base
;
8007 // Fill in virtual bases.
8008 for (const auto &I
: RD
->vbases()) {
8009 const auto *Base
= I
.getType()->getAsCXXRecordDecl();
8010 // Ignore empty bases.
8011 if (Base
->isEmpty())
8013 unsigned FieldIndex
= RL
.getVirtualBaseIndex(Base
);
8014 if (RecordLayout
[FieldIndex
])
8016 RecordLayout
[FieldIndex
] = Base
;
8018 // Fill in all the fields.
8019 assert(!RD
->isUnion() && "Unexpected union.");
8020 for (const auto *Field
: RD
->fields()) {
8021 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8022 // will fill in later.)
8023 if (!Field
->isBitField() && !Field
->isZeroSize(CGF
.getContext())) {
8024 unsigned FieldIndex
= RL
.getLLVMFieldNo(Field
);
8025 RecordLayout
[FieldIndex
] = Field
;
8028 for (const llvm::PointerUnion
<const CXXRecordDecl
*, const FieldDecl
*>
8029 &Data
: RecordLayout
) {
8032 if (const auto *Base
= Data
.dyn_cast
<const CXXRecordDecl
*>())
8033 getPlainLayout(Base
, Layout
, /*AsBase=*/true);
8035 Layout
.push_back(Data
.get
<const FieldDecl
*>());
8039 /// Generate all the base pointers, section pointers, sizes, map types, and
8040 /// mappers for the extracted mappable expressions (all included in \a
8041 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8042 /// pair of the relevant declaration and index where it occurs is appended to
8043 /// the device pointers info array.
8044 void generateAllInfoForClauses(
8045 ArrayRef
<const OMPClause
*> Clauses
, MapCombinedInfoTy
&CombinedInfo
,
8046 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkipVarSet
=
8047 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) const {
8048 // We have to process the component lists that relate with the same
8049 // declaration in a single chunk so that we can generate the map flags
8050 // correctly. Therefore, we organize all lists in a map.
8051 enum MapKind
{ Present
, Allocs
, Other
, Total
};
8052 llvm::MapVector
<CanonicalDeclPtr
<const Decl
>,
8053 SmallVector
<SmallVector
<MapInfo
, 8>, 4>>
8056 // Helper function to fill the information map for the different supported
8059 [&Info
, &SkipVarSet
](
8060 const ValueDecl
*D
, MapKind Kind
,
8061 OMPClauseMappableExprCommon::MappableExprComponentListRef L
,
8062 OpenMPMapClauseKind MapType
,
8063 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
8064 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
8065 bool ReturnDevicePointer
, bool IsImplicit
, const ValueDecl
*Mapper
,
8066 const Expr
*VarRef
= nullptr, bool ForDeviceAddr
= false) {
8067 if (SkipVarSet
.contains(D
))
8069 auto It
= Info
.find(D
);
8070 if (It
== Info
.end())
8072 .insert(std::make_pair(
8073 D
, SmallVector
<SmallVector
<MapInfo
, 8>, 4>(Total
)))
8075 It
->second
[Kind
].emplace_back(
8076 L
, MapType
, MapModifiers
, MotionModifiers
, ReturnDevicePointer
,
8077 IsImplicit
, Mapper
, VarRef
, ForDeviceAddr
);
8080 for (const auto *Cl
: Clauses
) {
8081 const auto *C
= dyn_cast
<OMPMapClause
>(Cl
);
8084 MapKind Kind
= Other
;
8085 if (llvm::is_contained(C
->getMapTypeModifiers(),
8086 OMPC_MAP_MODIFIER_present
))
8088 else if (C
->getMapType() == OMPC_MAP_alloc
)
8090 const auto *EI
= C
->getVarRefs().begin();
8091 for (const auto L
: C
->component_lists()) {
8092 const Expr
*E
= (C
->getMapLoc().isValid()) ? *EI
: nullptr;
8093 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), C
->getMapType(),
8094 C
->getMapTypeModifiers(), std::nullopt
,
8095 /*ReturnDevicePointer=*/false, C
->isImplicit(), std::get
<2>(L
),
8100 for (const auto *Cl
: Clauses
) {
8101 const auto *C
= dyn_cast
<OMPToClause
>(Cl
);
8104 MapKind Kind
= Other
;
8105 if (llvm::is_contained(C
->getMotionModifiers(),
8106 OMPC_MOTION_MODIFIER_present
))
8108 const auto *EI
= C
->getVarRefs().begin();
8109 for (const auto L
: C
->component_lists()) {
8110 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), OMPC_MAP_to
, std::nullopt
,
8111 C
->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8112 C
->isImplicit(), std::get
<2>(L
), *EI
);
8116 for (const auto *Cl
: Clauses
) {
8117 const auto *C
= dyn_cast
<OMPFromClause
>(Cl
);
8120 MapKind Kind
= Other
;
8121 if (llvm::is_contained(C
->getMotionModifiers(),
8122 OMPC_MOTION_MODIFIER_present
))
8124 const auto *EI
= C
->getVarRefs().begin();
8125 for (const auto L
: C
->component_lists()) {
8126 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), OMPC_MAP_from
,
8127 std::nullopt
, C
->getMotionModifiers(),
8128 /*ReturnDevicePointer=*/false, C
->isImplicit(), std::get
<2>(L
),
8134 // Look at the use_device_ptr and use_device_addr clauses information and
8135 // mark the existing map entries as such. If there is no map information for
8136 // an entry in the use_device_ptr and use_device_addr list, we create one
8137 // with map type 'alloc' and zero size section. It is the user fault if that
8138 // was not mapped before. If there is no map information and the pointer is
8139 // a struct member, then we defer the emission of that entry until the whole
8140 // struct has been processed.
8141 llvm::MapVector
<CanonicalDeclPtr
<const Decl
>,
8142 SmallVector
<DeferredDevicePtrEntryTy
, 4>>
8144 MapCombinedInfoTy UseDeviceDataCombinedInfo
;
8146 auto &&UseDeviceDataCombinedInfoGen
=
8147 [&UseDeviceDataCombinedInfo
](const ValueDecl
*VD
, llvm::Value
*Ptr
,
8148 CodeGenFunction
&CGF
) {
8149 UseDeviceDataCombinedInfo
.Exprs
.push_back(VD
);
8150 UseDeviceDataCombinedInfo
.BasePointers
.emplace_back(Ptr
, VD
);
8151 UseDeviceDataCombinedInfo
.Pointers
.push_back(Ptr
);
8152 UseDeviceDataCombinedInfo
.Sizes
.push_back(
8153 llvm::Constant::getNullValue(CGF
.Int64Ty
));
8154 UseDeviceDataCombinedInfo
.Types
.push_back(
8155 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
);
8156 UseDeviceDataCombinedInfo
.Mappers
.push_back(nullptr);
8160 [&DeferredInfo
, &UseDeviceDataCombinedInfoGen
,
8161 &InfoGen
](CodeGenFunction
&CGF
, const Expr
*IE
, const ValueDecl
*VD
,
8162 OMPClauseMappableExprCommon::MappableExprComponentListRef
8164 bool IsImplicit
, bool IsDevAddr
) {
8165 // We didn't find any match in our map information - generate a zero
8166 // size array section - if the pointer is a struct member we defer
8167 // this action until the whole struct has been processed.
8168 if (isa
<MemberExpr
>(IE
)) {
8169 // Insert the pointer into Info to be processed by
8170 // generateInfoForComponentList. Because it is a member pointer
8171 // without a pointee, no entry will be generated for it, therefore
8172 // we need to generate one after the whole struct has been
8173 // processed. Nonetheless, generateInfoForComponentList must be
8174 // called to take the pointer into account for the calculation of
8175 // the range of the partial struct.
8176 InfoGen(nullptr, Other
, Components
, OMPC_MAP_unknown
, std::nullopt
,
8177 std::nullopt
, /*ReturnDevicePointer=*/false, IsImplicit
,
8178 nullptr, nullptr, IsDevAddr
);
8179 DeferredInfo
[nullptr].emplace_back(IE
, VD
, IsDevAddr
);
8183 if (IE
->isGLValue())
8184 Ptr
= CGF
.EmitLValue(IE
).getPointer(CGF
);
8186 Ptr
= CGF
.EmitScalarExpr(IE
);
8188 Ptr
= CGF
.EmitLoadOfScalar(CGF
.EmitLValue(IE
), IE
->getExprLoc());
8190 UseDeviceDataCombinedInfoGen(VD
, Ptr
, CGF
);
8194 auto &&IsMapInfoExist
= [&Info
](CodeGenFunction
&CGF
, const ValueDecl
*VD
,
8195 const Expr
*IE
, bool IsDevAddr
) -> bool {
8196 // We potentially have map information for this declaration already.
8197 // Look for the first set of components that refer to it. If found,
8199 // If the first component is a member expression, we have to look into
8200 // 'this', which maps to null in the map of map information. Otherwise
8201 // look directly for the information.
8202 auto It
= Info
.find(isa
<MemberExpr
>(IE
) ? nullptr : VD
);
8203 if (It
!= Info
.end()) {
8205 for (auto &Data
: It
->second
) {
8206 auto *CI
= llvm::find_if(Data
, [VD
](const MapInfo
&MI
) {
8207 return MI
.Components
.back().getAssociatedDeclaration() == VD
;
8209 // If we found a map entry, signal that the pointer has to be
8210 // returned and move on to the next declaration. Exclude cases where
8211 // the base pointer is mapped as array subscript, array section or
8212 // array shaping. The base address is passed as a pointer to base in
8213 // this case and cannot be used as a base for use_device_ptr list
8215 if (CI
!= Data
.end()) {
8217 CI
->ReturnDevicePointer
= true;
8221 auto PrevCI
= std::next(CI
->Components
.rbegin());
8222 const auto *VarD
= dyn_cast
<VarDecl
>(VD
);
8223 if (CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8224 isa
<MemberExpr
>(IE
) ||
8225 !VD
->getType().getNonReferenceType()->isPointerType() ||
8226 PrevCI
== CI
->Components
.rend() ||
8227 isa
<MemberExpr
>(PrevCI
->getAssociatedExpression()) || !VarD
||
8228 VarD
->hasLocalStorage()) {
8229 CI
->ReturnDevicePointer
= true;
8241 // Look at the use_device_ptr clause information and mark the existing map
8242 // entries as such. If there is no map information for an entry in the
8243 // use_device_ptr list, we create one with map type 'alloc' and zero size
8244 // section. It is the user fault if that was not mapped before. If there is
8245 // no map information and the pointer is a struct member, then we defer the
8246 // emission of that entry until the whole struct has been processed.
8247 for (const auto *Cl
: Clauses
) {
8248 const auto *C
= dyn_cast
<OMPUseDevicePtrClause
>(Cl
);
8251 for (const auto L
: C
->component_lists()) {
8252 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
=
8254 assert(!Components
.empty() &&
8255 "Not expecting empty list of components!");
8256 const ValueDecl
*VD
= Components
.back().getAssociatedDeclaration();
8257 VD
= cast
<ValueDecl
>(VD
->getCanonicalDecl());
8258 const Expr
*IE
= Components
.back().getAssociatedExpression();
8259 if (IsMapInfoExist(CGF
, VD
, IE
, /*IsDevAddr=*/false))
8261 MapInfoGen(CGF
, IE
, VD
, Components
, C
->isImplicit(),
8262 /*IsDevAddr=*/false);
8266 llvm::SmallDenseSet
<CanonicalDeclPtr
<const Decl
>, 4> Processed
;
8267 for (const auto *Cl
: Clauses
) {
8268 const auto *C
= dyn_cast
<OMPUseDeviceAddrClause
>(Cl
);
8271 for (const auto L
: C
->component_lists()) {
8272 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
=
8274 assert(!std::get
<1>(L
).empty() &&
8275 "Not expecting empty list of components!");
8276 const ValueDecl
*VD
= std::get
<1>(L
).back().getAssociatedDeclaration();
8277 if (!Processed
.insert(VD
).second
)
8279 VD
= cast
<ValueDecl
>(VD
->getCanonicalDecl());
8280 const Expr
*IE
= std::get
<1>(L
).back().getAssociatedExpression();
8281 if (IsMapInfoExist(CGF
, VD
, IE
, /*IsDevAddr=*/true))
8283 MapInfoGen(CGF
, IE
, VD
, Components
, C
->isImplicit(),
8284 /*IsDevAddr=*/true);
8288 for (const auto &Data
: Info
) {
8289 StructRangeInfoTy PartialStruct
;
8290 // Temporary generated information.
8291 MapCombinedInfoTy CurInfo
;
8292 const Decl
*D
= Data
.first
;
8293 const ValueDecl
*VD
= cast_or_null
<ValueDecl
>(D
);
8294 for (const auto &M
: Data
.second
) {
8295 for (const MapInfo
&L
: M
) {
8296 assert(!L
.Components
.empty() &&
8297 "Not expecting declaration with no component lists.");
8299 // Remember the current base pointer index.
8300 unsigned CurrentBasePointersIdx
= CurInfo
.BasePointers
.size();
8301 CurInfo
.NonContigInfo
.IsNonContiguous
=
8302 L
.Components
.back().isNonContiguous();
8303 generateInfoForComponentList(
8304 L
.MapType
, L
.MapModifiers
, L
.MotionModifiers
, L
.Components
,
8305 CurInfo
, PartialStruct
, /*IsFirstComponentList=*/false,
8306 L
.IsImplicit
, L
.Mapper
, L
.ForDeviceAddr
, VD
, L
.VarRef
);
8308 // If this entry relates with a device pointer, set the relevant
8309 // declaration and add the 'return pointer' flag.
8310 if (L
.ReturnDevicePointer
) {
8311 assert(CurInfo
.BasePointers
.size() > CurrentBasePointersIdx
&&
8312 "Unexpected number of mapped base pointers.");
8314 const ValueDecl
*RelevantVD
=
8315 L
.Components
.back().getAssociatedDeclaration();
8316 assert(RelevantVD
&&
8317 "No relevant declaration related with device pointer??");
8319 CurInfo
.BasePointers
[CurrentBasePointersIdx
].setDevicePtrDecl(
8321 CurInfo
.Types
[CurrentBasePointersIdx
] |=
8322 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
;
8327 // Append any pending zero-length pointers which are struct members and
8328 // used with use_device_ptr or use_device_addr.
8329 auto CI
= DeferredInfo
.find(Data
.first
);
8330 if (CI
!= DeferredInfo
.end()) {
8331 for (const DeferredDevicePtrEntryTy
&L
: CI
->second
) {
8332 llvm::Value
*BasePtr
;
8334 if (L
.ForDeviceAddr
) {
8335 if (L
.IE
->isGLValue())
8336 Ptr
= this->CGF
.EmitLValue(L
.IE
).getPointer(CGF
);
8338 Ptr
= this->CGF
.EmitScalarExpr(L
.IE
);
8340 // Entry is RETURN_PARAM. Also, set the placeholder value
8341 // MEMBER_OF=FFFF so that the entry is later updated with the
8342 // correct value of MEMBER_OF.
8343 CurInfo
.Types
.push_back(
8344 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
|
8345 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
8347 BasePtr
= this->CGF
.EmitLValue(L
.IE
).getPointer(CGF
);
8348 Ptr
= this->CGF
.EmitLoadOfScalar(this->CGF
.EmitLValue(L
.IE
),
8349 L
.IE
->getExprLoc());
8350 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8351 // placeholder value MEMBER_OF=FFFF so that the entry is later
8352 // updated with the correct value of MEMBER_OF.
8353 CurInfo
.Types
.push_back(
8354 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8355 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
|
8356 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
8358 CurInfo
.Exprs
.push_back(L
.VD
);
8359 CurInfo
.BasePointers
.emplace_back(BasePtr
, L
.VD
);
8360 CurInfo
.Pointers
.push_back(Ptr
);
8361 CurInfo
.Sizes
.push_back(
8362 llvm::Constant::getNullValue(this->CGF
.Int64Ty
));
8363 CurInfo
.Mappers
.push_back(nullptr);
8366 // If there is an entry in PartialStruct it means we have a struct with
8367 // individual members mapped. Emit an extra combined entry.
8368 if (PartialStruct
.Base
.isValid()) {
8369 CurInfo
.NonContigInfo
.Dims
.push_back(0);
8370 emitCombinedEntry(CombinedInfo
, CurInfo
.Types
, PartialStruct
, VD
);
8373 // We need to append the results of this capture to what we already
8375 CombinedInfo
.append(CurInfo
);
8377 // Append data for use_device_ptr clauses.
8378 CombinedInfo
.append(UseDeviceDataCombinedInfo
);
8382 MappableExprsHandler(const OMPExecutableDirective
&Dir
, CodeGenFunction
&CGF
)
8383 : CurDir(&Dir
), CGF(CGF
) {
8384 // Extract firstprivate clause information.
8385 for (const auto *C
: Dir
.getClausesOfKind
<OMPFirstprivateClause
>())
8386 for (const auto *D
: C
->varlists())
8387 FirstPrivateDecls
.try_emplace(
8388 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
)->getDecl()), C
->isImplicit());
8389 // Extract implicit firstprivates from uses_allocators clauses.
8390 for (const auto *C
: Dir
.getClausesOfKind
<OMPUsesAllocatorsClause
>()) {
8391 for (unsigned I
= 0, E
= C
->getNumberOfAllocators(); I
< E
; ++I
) {
8392 OMPUsesAllocatorsClause::Data D
= C
->getAllocatorData(I
);
8393 if (const auto *DRE
= dyn_cast_or_null
<DeclRefExpr
>(D
.AllocatorTraits
))
8394 FirstPrivateDecls
.try_emplace(cast
<VarDecl
>(DRE
->getDecl()),
8396 else if (const auto *VD
= dyn_cast
<VarDecl
>(
8397 cast
<DeclRefExpr
>(D
.Allocator
->IgnoreParenImpCasts())
8399 FirstPrivateDecls
.try_emplace(VD
, /*Implicit=*/true);
8402 // Extract device pointer clause information.
8403 for (const auto *C
: Dir
.getClausesOfKind
<OMPIsDevicePtrClause
>())
8404 for (auto L
: C
->component_lists())
8405 DevPointersMap
[std::get
<0>(L
)].push_back(std::get
<1>(L
));
8406 // Extract device addr clause information.
8407 for (const auto *C
: Dir
.getClausesOfKind
<OMPHasDeviceAddrClause
>())
8408 for (auto L
: C
->component_lists())
8409 HasDevAddrsMap
[std::get
<0>(L
)].push_back(std::get
<1>(L
));
8410 // Extract map information.
8411 for (const auto *C
: Dir
.getClausesOfKind
<OMPMapClause
>()) {
8412 if (C
->getMapType() != OMPC_MAP_to
)
8414 for (auto L
: C
->component_lists()) {
8415 const ValueDecl
*VD
= std::get
<0>(L
);
8416 const auto *RD
= VD
? VD
->getType()
8418 .getNonReferenceType()
8419 ->getAsCXXRecordDecl()
8421 if (RD
&& RD
->isLambda())
8422 LambdasMap
.try_emplace(std::get
<0>(L
), C
);
8427 /// Constructor for the declare mapper directive.
8428 MappableExprsHandler(const OMPDeclareMapperDecl
&Dir
, CodeGenFunction
&CGF
)
8429 : CurDir(&Dir
), CGF(CGF
) {}
8431 /// Generate code for the combined entry if we have a partially mapped struct
8432 /// and take care of the mapping flags of the arguments corresponding to
8433 /// individual struct members.
8434 void emitCombinedEntry(MapCombinedInfoTy
&CombinedInfo
,
8435 MapFlagsArrayTy
&CurTypes
,
8436 const StructRangeInfoTy
&PartialStruct
,
8437 const ValueDecl
*VD
= nullptr,
8438 bool NotTargetParams
= true) const {
8439 if (CurTypes
.size() == 1 &&
8440 ((CurTypes
.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
) !=
8441 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
) &&
8442 !PartialStruct
.IsArraySection
)
8444 Address LBAddr
= PartialStruct
.LowestElem
.second
;
8445 Address HBAddr
= PartialStruct
.HighestElem
.second
;
8446 if (PartialStruct
.HasCompleteRecord
) {
8447 LBAddr
= PartialStruct
.LB
;
8448 HBAddr
= PartialStruct
.LB
;
8450 CombinedInfo
.Exprs
.push_back(VD
);
8451 // Base is the base of the struct
8452 CombinedInfo
.BasePointers
.push_back(PartialStruct
.Base
.getPointer());
8453 // Pointer is the address of the lowest element
8454 llvm::Value
*LB
= LBAddr
.getPointer();
8455 const CXXMethodDecl
*MD
=
8456 CGF
.CurFuncDecl
? dyn_cast
<CXXMethodDecl
>(CGF
.CurFuncDecl
) : nullptr;
8457 const CXXRecordDecl
*RD
= MD
? MD
->getParent() : nullptr;
8458 bool HasBaseClass
= RD
? RD
->getNumBases() > 0 : false;
8459 // There should not be a mapper for a combined entry.
8461 // OpenMP 5.2 148:21:
8462 // If the target construct is within a class non-static member function,
8463 // and a variable is an accessible data member of the object for which the
8464 // non-static data member function is invoked, the variable is treated as
8465 // if the this[:1] expression had appeared in a map clause with a map-type
8468 CombinedInfo
.Pointers
.push_back(PartialStruct
.Base
.getPointer());
8469 QualType Ty
= MD
->getThisType()->getPointeeType();
8471 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(Ty
), CGF
.Int64Ty
,
8473 CombinedInfo
.Sizes
.push_back(Size
);
8475 CombinedInfo
.Pointers
.push_back(LB
);
8476 // Size is (addr of {highest+1} element) - (addr of lowest element)
8477 llvm::Value
*HB
= HBAddr
.getPointer();
8478 llvm::Value
*HAddr
= CGF
.Builder
.CreateConstGEP1_32(
8479 HBAddr
.getElementType(), HB
, /*Idx0=*/1);
8480 llvm::Value
*CLAddr
= CGF
.Builder
.CreatePointerCast(LB
, CGF
.VoidPtrTy
);
8481 llvm::Value
*CHAddr
= CGF
.Builder
.CreatePointerCast(HAddr
, CGF
.VoidPtrTy
);
8482 llvm::Value
*Diff
= CGF
.Builder
.CreatePtrDiff(CGF
.Int8Ty
, CHAddr
, CLAddr
);
8483 llvm::Value
*Size
= CGF
.Builder
.CreateIntCast(Diff
, CGF
.Int64Ty
,
8484 /*isSigned=*/false);
8485 CombinedInfo
.Sizes
.push_back(Size
);
8487 CombinedInfo
.Mappers
.push_back(nullptr);
8488 // Map type is always TARGET_PARAM, if generate info for captures.
8489 CombinedInfo
.Types
.push_back(
8490 NotTargetParams
? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8491 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
);
8492 // If any element has the present modifier, then make sure the runtime
8493 // doesn't attempt to allocate the struct.
8494 if (CurTypes
.end() !=
8495 llvm::find_if(CurTypes
, [](OpenMPOffloadMappingFlags Type
) {
8496 return static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
8497 Type
& OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
);
8499 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
;
8500 // Remove TARGET_PARAM flag from the first element
8501 (*CurTypes
.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
8502 // If any element has the ompx_hold modifier, then make sure the runtime
8503 // uses the hold reference count for the struct as a whole so that it won't
8504 // be unmapped by an extra dynamic reference count decrement. Add it to all
8505 // elements as well so the runtime knows which reference count to check
8506 // when determining whether it's time for device-to-host transfers of
8507 // individual elements.
8508 if (CurTypes
.end() !=
8509 llvm::find_if(CurTypes
, [](OpenMPOffloadMappingFlags Type
) {
8510 return static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
8511 Type
& OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
);
8513 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
8514 for (auto &M
: CurTypes
)
8515 M
|= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
8518 // All other current entries will be MEMBER_OF the combined entry
8519 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8520 // 0xFFFF in the MEMBER_OF field).
8521 OpenMPOffloadMappingFlags MemberOfFlag
=
8522 getMemberOfFlag(CombinedInfo
.BasePointers
.size() - 1);
8523 for (auto &M
: CurTypes
)
8524 setCorrectMemberOfFlag(M
, MemberOfFlag
);
8527 /// Generate all the base pointers, section pointers, sizes, map types, and
8528 /// mappers for the extracted mappable expressions (all included in \a
8529 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8530 /// pair of the relevant declaration and index where it occurs is appended to
8531 /// the device pointers info array.
8532 void generateAllInfo(
8533 MapCombinedInfoTy
&CombinedInfo
,
8534 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkipVarSet
=
8535 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) const {
8536 assert(CurDir
.is
<const OMPExecutableDirective
*>() &&
8537 "Expect a executable directive");
8538 const auto *CurExecDir
= CurDir
.get
<const OMPExecutableDirective
*>();
8539 generateAllInfoForClauses(CurExecDir
->clauses(), CombinedInfo
, SkipVarSet
);
8542 /// Generate all the base pointers, section pointers, sizes, map types, and
8543 /// mappers for the extracted map clauses of user-defined mapper (all included
8544 /// in \a CombinedInfo).
8545 void generateAllInfoForMapper(MapCombinedInfoTy
&CombinedInfo
) const {
8546 assert(CurDir
.is
<const OMPDeclareMapperDecl
*>() &&
8547 "Expect a declare mapper directive");
8548 const auto *CurMapperDir
= CurDir
.get
<const OMPDeclareMapperDecl
*>();
8549 generateAllInfoForClauses(CurMapperDir
->clauses(), CombinedInfo
);
8552 /// Emit capture info for lambdas for variables captured by reference.
8553 void generateInfoForLambdaCaptures(
8554 const ValueDecl
*VD
, llvm::Value
*Arg
, MapCombinedInfoTy
&CombinedInfo
,
8555 llvm::DenseMap
<llvm::Value
*, llvm::Value
*> &LambdaPointers
) const {
8556 QualType VDType
= VD
->getType().getCanonicalType().getNonReferenceType();
8557 const auto *RD
= VDType
->getAsCXXRecordDecl();
8558 if (!RD
|| !RD
->isLambda())
8560 Address
VDAddr(Arg
, CGF
.ConvertTypeForMem(VDType
),
8561 CGF
.getContext().getDeclAlign(VD
));
8562 LValue VDLVal
= CGF
.MakeAddrLValue(VDAddr
, VDType
);
8563 llvm::DenseMap
<const ValueDecl
*, FieldDecl
*> Captures
;
8564 FieldDecl
*ThisCapture
= nullptr;
8565 RD
->getCaptureFields(Captures
, ThisCapture
);
8568 CGF
.EmitLValueForFieldInitialization(VDLVal
, ThisCapture
);
8569 LValue ThisLValVal
= CGF
.EmitLValueForField(VDLVal
, ThisCapture
);
8570 LambdaPointers
.try_emplace(ThisLVal
.getPointer(CGF
),
8571 VDLVal
.getPointer(CGF
));
8572 CombinedInfo
.Exprs
.push_back(VD
);
8573 CombinedInfo
.BasePointers
.push_back(ThisLVal
.getPointer(CGF
));
8574 CombinedInfo
.Pointers
.push_back(ThisLValVal
.getPointer(CGF
));
8575 CombinedInfo
.Sizes
.push_back(
8576 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(CGF
.getContext().VoidPtrTy
),
8577 CGF
.Int64Ty
, /*isSigned=*/true));
8578 CombinedInfo
.Types
.push_back(
8579 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8580 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8581 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8582 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
8583 CombinedInfo
.Mappers
.push_back(nullptr);
8585 for (const LambdaCapture
&LC
: RD
->captures()) {
8586 if (!LC
.capturesVariable())
8588 const VarDecl
*VD
= cast
<VarDecl
>(LC
.getCapturedVar());
8589 if (LC
.getCaptureKind() != LCK_ByRef
&& !VD
->getType()->isPointerType())
8591 auto It
= Captures
.find(VD
);
8592 assert(It
!= Captures
.end() && "Found lambda capture without field.");
8593 LValue VarLVal
= CGF
.EmitLValueForFieldInitialization(VDLVal
, It
->second
);
8594 if (LC
.getCaptureKind() == LCK_ByRef
) {
8595 LValue VarLValVal
= CGF
.EmitLValueForField(VDLVal
, It
->second
);
8596 LambdaPointers
.try_emplace(VarLVal
.getPointer(CGF
),
8597 VDLVal
.getPointer(CGF
));
8598 CombinedInfo
.Exprs
.push_back(VD
);
8599 CombinedInfo
.BasePointers
.push_back(VarLVal
.getPointer(CGF
));
8600 CombinedInfo
.Pointers
.push_back(VarLValVal
.getPointer(CGF
));
8601 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8603 VD
->getType().getCanonicalType().getNonReferenceType()),
8604 CGF
.Int64Ty
, /*isSigned=*/true));
8606 RValue VarRVal
= CGF
.EmitLoadOfLValue(VarLVal
, RD
->getLocation());
8607 LambdaPointers
.try_emplace(VarLVal
.getPointer(CGF
),
8608 VDLVal
.getPointer(CGF
));
8609 CombinedInfo
.Exprs
.push_back(VD
);
8610 CombinedInfo
.BasePointers
.push_back(VarLVal
.getPointer(CGF
));
8611 CombinedInfo
.Pointers
.push_back(VarRVal
.getScalarVal());
8612 CombinedInfo
.Sizes
.push_back(llvm::ConstantInt::get(CGF
.Int64Ty
, 0));
8614 CombinedInfo
.Types
.push_back(
8615 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8616 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8617 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8618 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
8619 CombinedInfo
.Mappers
.push_back(nullptr);
8623 /// Set correct indices for lambdas captures.
8624 void adjustMemberOfForLambdaCaptures(
8625 const llvm::DenseMap
<llvm::Value
*, llvm::Value
*> &LambdaPointers
,
8626 MapBaseValuesArrayTy
&BasePointers
, MapValuesArrayTy
&Pointers
,
8627 MapFlagsArrayTy
&Types
) const {
8628 for (unsigned I
= 0, E
= Types
.size(); I
< E
; ++I
) {
8629 // Set correct member_of idx for all implicit lambda captures.
8630 if (Types
[I
] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8631 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8632 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8633 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
))
8635 llvm::Value
*BasePtr
= LambdaPointers
.lookup(*BasePointers
[I
]);
8636 assert(BasePtr
&& "Unable to find base lambda address.");
8638 for (unsigned J
= I
; J
> 0; --J
) {
8639 unsigned Idx
= J
- 1;
8640 if (Pointers
[Idx
] != BasePtr
)
8645 assert(TgtIdx
!= -1 && "Unable to find parent lambda.");
8646 // All other current entries will be MEMBER_OF the combined entry
8647 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8648 // 0xFFFF in the MEMBER_OF field).
8649 OpenMPOffloadMappingFlags MemberOfFlag
= getMemberOfFlag(TgtIdx
);
8650 setCorrectMemberOfFlag(Types
[I
], MemberOfFlag
);
8654 /// Generate the base pointers, section pointers, sizes, map types, and
8655 /// mappers associated to a given capture (all included in \a CombinedInfo).
8656 void generateInfoForCapture(const CapturedStmt::Capture
*Cap
,
8657 llvm::Value
*Arg
, MapCombinedInfoTy
&CombinedInfo
,
8658 StructRangeInfoTy
&PartialStruct
) const {
8659 assert(!Cap
->capturesVariableArrayType() &&
8660 "Not expecting to generate map info for a variable array type!");
8662 // We need to know when we generating information for the first component
8663 const ValueDecl
*VD
= Cap
->capturesThis()
8665 : Cap
->getCapturedVar()->getCanonicalDecl();
8667 // for map(to: lambda): skip here, processing it in
8668 // generateDefaultMapInfo
8669 if (LambdasMap
.count(VD
))
8672 // If this declaration appears in a is_device_ptr clause we just have to
8673 // pass the pointer by value. If it is a reference to a declaration, we just
8675 if (VD
&& (DevPointersMap
.count(VD
) || HasDevAddrsMap
.count(VD
))) {
8676 CombinedInfo
.Exprs
.push_back(VD
);
8677 CombinedInfo
.BasePointers
.emplace_back(Arg
, VD
);
8678 CombinedInfo
.Pointers
.push_back(Arg
);
8679 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8680 CGF
.getTypeSize(CGF
.getContext().VoidPtrTy
), CGF
.Int64Ty
,
8681 /*isSigned=*/true));
8682 CombinedInfo
.Types
.push_back(
8683 (Cap
->capturesVariable()
8684 ? OpenMPOffloadMappingFlags::OMP_MAP_TO
8685 : OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
) |
8686 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
);
8687 CombinedInfo
.Mappers
.push_back(nullptr);
8692 std::tuple
<OMPClauseMappableExprCommon::MappableExprComponentListRef
,
8693 OpenMPMapClauseKind
, ArrayRef
<OpenMPMapModifierKind
>, bool,
8694 const ValueDecl
*, const Expr
*>;
8695 SmallVector
<MapData
, 4> DeclComponentLists
;
8696 // For member fields list in is_device_ptr, store it in
8697 // DeclComponentLists for generating components info.
8698 static const OpenMPMapModifierKind Unknown
= OMPC_MAP_MODIFIER_unknown
;
8699 auto It
= DevPointersMap
.find(VD
);
8700 if (It
!= DevPointersMap
.end())
8701 for (const auto &MCL
: It
->second
)
8702 DeclComponentLists
.emplace_back(MCL
, OMPC_MAP_to
, Unknown
,
8703 /*IsImpicit = */ true, nullptr,
8705 auto I
= HasDevAddrsMap
.find(VD
);
8706 if (I
!= HasDevAddrsMap
.end())
8707 for (const auto &MCL
: I
->second
)
8708 DeclComponentLists
.emplace_back(MCL
, OMPC_MAP_tofrom
, Unknown
,
8709 /*IsImpicit = */ true, nullptr,
8711 assert(CurDir
.is
<const OMPExecutableDirective
*>() &&
8712 "Expect a executable directive");
8713 const auto *CurExecDir
= CurDir
.get
<const OMPExecutableDirective
*>();
8714 for (const auto *C
: CurExecDir
->getClausesOfKind
<OMPMapClause
>()) {
8715 const auto *EI
= C
->getVarRefs().begin();
8716 for (const auto L
: C
->decl_component_lists(VD
)) {
8717 const ValueDecl
*VDecl
, *Mapper
;
8718 // The Expression is not correct if the mapping is implicit
8719 const Expr
*E
= (C
->getMapLoc().isValid()) ? *EI
: nullptr;
8720 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8721 std::tie(VDecl
, Components
, Mapper
) = L
;
8722 assert(VDecl
== VD
&& "We got information for the wrong declaration??");
8723 assert(!Components
.empty() &&
8724 "Not expecting declaration with no component lists.");
8725 DeclComponentLists
.emplace_back(Components
, C
->getMapType(),
8726 C
->getMapTypeModifiers(),
8727 C
->isImplicit(), Mapper
, E
);
8731 llvm::stable_sort(DeclComponentLists
, [](const MapData
&LHS
,
8732 const MapData
&RHS
) {
8733 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
= std::get
<2>(LHS
);
8734 OpenMPMapClauseKind MapType
= std::get
<1>(RHS
);
8736 llvm::is_contained(MapModifiers
, clang::OMPC_MAP_MODIFIER_present
);
8737 bool HasAllocs
= MapType
== OMPC_MAP_alloc
;
8738 MapModifiers
= std::get
<2>(RHS
);
8739 MapType
= std::get
<1>(LHS
);
8741 llvm::is_contained(MapModifiers
, clang::OMPC_MAP_MODIFIER_present
);
8742 bool HasAllocsR
= MapType
== OMPC_MAP_alloc
;
8743 return (HasPresent
&& !HasPresentR
) || (HasAllocs
&& !HasAllocsR
);
8746 // Find overlapping elements (including the offset from the base element).
8747 llvm::SmallDenseMap
<
8750 OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>,
8754 for (const MapData
&L
: DeclComponentLists
) {
8755 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8756 OpenMPMapClauseKind MapType
;
8757 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8759 const ValueDecl
*Mapper
;
8761 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8764 for (const MapData
&L1
: ArrayRef(DeclComponentLists
).slice(Count
)) {
8765 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1
;
8766 std::tie(Components1
, MapType
, MapModifiers
, IsImplicit
, Mapper
,
8768 auto CI
= Components
.rbegin();
8769 auto CE
= Components
.rend();
8770 auto SI
= Components1
.rbegin();
8771 auto SE
= Components1
.rend();
8772 for (; CI
!= CE
&& SI
!= SE
; ++CI
, ++SI
) {
8773 if (CI
->getAssociatedExpression()->getStmtClass() !=
8774 SI
->getAssociatedExpression()->getStmtClass())
8776 // Are we dealing with different variables/fields?
8777 if (CI
->getAssociatedDeclaration() != SI
->getAssociatedDeclaration())
8780 // Found overlapping if, at least for one component, reached the head
8781 // of the components list.
8782 if (CI
== CE
|| SI
== SE
) {
8783 // Ignore it if it is the same component.
8784 if (CI
== CE
&& SI
== SE
)
8786 const auto It
= (SI
== SE
) ? CI
: SI
;
8787 // If one component is a pointer and another one is a kind of
8788 // dereference of this pointer (array subscript, section, dereference,
8789 // etc.), it is not an overlapping.
8790 // Same, if one component is a base and another component is a
8791 // dereferenced pointer memberexpr with the same base.
8792 if (!isa
<MemberExpr
>(It
->getAssociatedExpression()) ||
8793 (std::prev(It
)->getAssociatedDeclaration() &&
8795 ->getAssociatedDeclaration()
8797 ->isPointerType()) ||
8798 (It
->getAssociatedDeclaration() &&
8799 It
->getAssociatedDeclaration()->getType()->isPointerType() &&
8800 std::next(It
) != CE
&& std::next(It
) != SE
))
8802 const MapData
&BaseData
= CI
== CE
? L
: L1
;
8803 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData
=
8804 SI
== SE
? Components
: Components1
;
8805 auto &OverlappedElements
= OverlappedData
.FindAndConstruct(&BaseData
);
8806 OverlappedElements
.getSecond().push_back(SubData
);
8810 // Sort the overlapped elements for each item.
8811 llvm::SmallVector
<const FieldDecl
*, 4> Layout
;
8812 if (!OverlappedData
.empty()) {
8813 const Type
*BaseType
= VD
->getType().getCanonicalType().getTypePtr();
8814 const Type
*OrigType
= BaseType
->getPointeeOrArrayElementType();
8815 while (BaseType
!= OrigType
) {
8816 BaseType
= OrigType
->getCanonicalTypeInternal().getTypePtr();
8817 OrigType
= BaseType
->getPointeeOrArrayElementType();
8820 if (const auto *CRD
= BaseType
->getAsCXXRecordDecl())
8821 getPlainLayout(CRD
, Layout
, /*AsBase=*/false);
8823 const auto *RD
= BaseType
->getAsRecordDecl();
8824 Layout
.append(RD
->field_begin(), RD
->field_end());
8827 for (auto &Pair
: OverlappedData
) {
8831 OMPClauseMappableExprCommon::MappableExprComponentListRef First
,
8832 OMPClauseMappableExprCommon::MappableExprComponentListRef
8834 auto CI
= First
.rbegin();
8835 auto CE
= First
.rend();
8836 auto SI
= Second
.rbegin();
8837 auto SE
= Second
.rend();
8838 for (; CI
!= CE
&& SI
!= SE
; ++CI
, ++SI
) {
8839 if (CI
->getAssociatedExpression()->getStmtClass() !=
8840 SI
->getAssociatedExpression()->getStmtClass())
8842 // Are we dealing with different variables/fields?
8843 if (CI
->getAssociatedDeclaration() !=
8844 SI
->getAssociatedDeclaration())
8848 // Lists contain the same elements.
8849 if (CI
== CE
&& SI
== SE
)
8852 // List with less elements is less than list with more elements.
8853 if (CI
== CE
|| SI
== SE
)
8856 const auto *FD1
= cast
<FieldDecl
>(CI
->getAssociatedDeclaration());
8857 const auto *FD2
= cast
<FieldDecl
>(SI
->getAssociatedDeclaration());
8858 if (FD1
->getParent() == FD2
->getParent())
8859 return FD1
->getFieldIndex() < FD2
->getFieldIndex();
8861 llvm::find_if(Layout
, [FD1
, FD2
](const FieldDecl
*FD
) {
8862 return FD
== FD1
|| FD
== FD2
;
8868 // Associated with a capture, because the mapping flags depend on it.
8869 // Go through all of the elements with the overlapped elements.
8870 bool IsFirstComponentList
= true;
8871 for (const auto &Pair
: OverlappedData
) {
8872 const MapData
&L
= *Pair
.getFirst();
8873 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8874 OpenMPMapClauseKind MapType
;
8875 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8877 const ValueDecl
*Mapper
;
8879 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8881 ArrayRef
<OMPClauseMappableExprCommon::MappableExprComponentListRef
>
8882 OverlappedComponents
= Pair
.getSecond();
8883 generateInfoForComponentList(
8884 MapType
, MapModifiers
, std::nullopt
, Components
, CombinedInfo
,
8885 PartialStruct
, IsFirstComponentList
, IsImplicit
, Mapper
,
8886 /*ForDeviceAddr=*/false, VD
, VarRef
, OverlappedComponents
);
8887 IsFirstComponentList
= false;
8889 // Go through other elements without overlapped elements.
8890 for (const MapData
&L
: DeclComponentLists
) {
8891 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8892 OpenMPMapClauseKind MapType
;
8893 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8895 const ValueDecl
*Mapper
;
8897 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8899 auto It
= OverlappedData
.find(&L
);
8900 if (It
== OverlappedData
.end())
8901 generateInfoForComponentList(MapType
, MapModifiers
, std::nullopt
,
8902 Components
, CombinedInfo
, PartialStruct
,
8903 IsFirstComponentList
, IsImplicit
, Mapper
,
8904 /*ForDeviceAddr=*/false, VD
, VarRef
);
8905 IsFirstComponentList
= false;
8909 /// Generate the default map information for a given capture \a CI,
8910 /// record field declaration \a RI and captured value \a CV.
8911 void generateDefaultMapInfo(const CapturedStmt::Capture
&CI
,
8912 const FieldDecl
&RI
, llvm::Value
*CV
,
8913 MapCombinedInfoTy
&CombinedInfo
) const {
8914 bool IsImplicit
= true;
8915 // Do the default mapping.
8916 if (CI
.capturesThis()) {
8917 CombinedInfo
.Exprs
.push_back(nullptr);
8918 CombinedInfo
.BasePointers
.push_back(CV
);
8919 CombinedInfo
.Pointers
.push_back(CV
);
8920 const auto *PtrTy
= cast
<PointerType
>(RI
.getType().getTypePtr());
8921 CombinedInfo
.Sizes
.push_back(
8922 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(PtrTy
->getPointeeType()),
8923 CGF
.Int64Ty
, /*isSigned=*/true));
8924 // Default map type.
8925 CombinedInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO
|
8926 OpenMPOffloadMappingFlags::OMP_MAP_FROM
);
8927 } else if (CI
.capturesVariableByCopy()) {
8928 const VarDecl
*VD
= CI
.getCapturedVar();
8929 CombinedInfo
.Exprs
.push_back(VD
->getCanonicalDecl());
8930 CombinedInfo
.BasePointers
.push_back(CV
);
8931 CombinedInfo
.Pointers
.push_back(CV
);
8932 if (!RI
.getType()->isAnyPointerType()) {
8933 // We have to signal to the runtime captures passed by value that are
8935 CombinedInfo
.Types
.push_back(
8936 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
);
8937 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8938 CGF
.getTypeSize(RI
.getType()), CGF
.Int64Ty
, /*isSigned=*/true));
8940 // Pointers are implicitly mapped with a zero size and no flags
8941 // (other than first map that is added for all implicit maps).
8942 CombinedInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE
);
8943 CombinedInfo
.Sizes
.push_back(llvm::Constant::getNullValue(CGF
.Int64Ty
));
8945 auto I
= FirstPrivateDecls
.find(VD
);
8946 if (I
!= FirstPrivateDecls
.end())
8947 IsImplicit
= I
->getSecond();
8949 assert(CI
.capturesVariable() && "Expected captured reference.");
8950 const auto *PtrTy
= cast
<ReferenceType
>(RI
.getType().getTypePtr());
8951 QualType ElementType
= PtrTy
->getPointeeType();
8952 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8953 CGF
.getTypeSize(ElementType
), CGF
.Int64Ty
, /*isSigned=*/true));
8954 // The default map type for a scalar/complex type is 'to' because by
8955 // default the value doesn't have to be retrieved. For an aggregate
8956 // type, the default is 'tofrom'.
8957 CombinedInfo
.Types
.push_back(getMapModifiersForPrivateClauses(CI
));
8958 const VarDecl
*VD
= CI
.getCapturedVar();
8959 auto I
= FirstPrivateDecls
.find(VD
);
8960 CombinedInfo
.Exprs
.push_back(VD
->getCanonicalDecl());
8961 CombinedInfo
.BasePointers
.push_back(CV
);
8962 if (I
!= FirstPrivateDecls
.end() && ElementType
->isAnyPointerType()) {
8963 Address PtrAddr
= CGF
.EmitLoadOfReference(CGF
.MakeAddrLValue(
8964 CV
, ElementType
, CGF
.getContext().getDeclAlign(VD
),
8965 AlignmentSource::Decl
));
8966 CombinedInfo
.Pointers
.push_back(PtrAddr
.getPointer());
8968 CombinedInfo
.Pointers
.push_back(CV
);
8970 if (I
!= FirstPrivateDecls
.end())
8971 IsImplicit
= I
->getSecond();
8973 // Every default map produces a single argument which is a target parameter.
8974 CombinedInfo
.Types
.back() |=
8975 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
8977 // Add flag stating this is an implicit map.
8979 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
;
8981 // No user-defined mapper for default mapping.
8982 CombinedInfo
.Mappers
.push_back(nullptr);
8985 } // anonymous namespace
8987 static void emitNonContiguousDescriptor(
8988 CodeGenFunction
&CGF
, MappableExprsHandler::MapCombinedInfoTy
&CombinedInfo
,
8989 CGOpenMPRuntime::TargetDataInfo
&Info
) {
8990 CodeGenModule
&CGM
= CGF
.CGM
;
8991 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
8992 &NonContigInfo
= CombinedInfo
.NonContigInfo
;
8994 // Build an array of struct descriptor_dim and then assign it to
8997 // struct descriptor_dim {
9002 ASTContext
&C
= CGF
.getContext();
9003 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9005 RD
= C
.buildImplicitRecord("descriptor_dim");
9006 RD
->startDefinition();
9007 addFieldToRecordDecl(C
, RD
, Int64Ty
);
9008 addFieldToRecordDecl(C
, RD
, Int64Ty
);
9009 addFieldToRecordDecl(C
, RD
, Int64Ty
);
9010 RD
->completeDefinition();
9011 QualType DimTy
= C
.getRecordType(RD
);
9013 enum { OffsetFD
= 0, CountFD
, StrideFD
};
9014 // We need two index variable here since the size of "Dims" is the same as the
9015 // size of Components, however, the size of offset, count, and stride is equal
9016 // to the size of base declaration that is non-contiguous.
9017 for (unsigned I
= 0, L
= 0, E
= NonContigInfo
.Dims
.size(); I
< E
; ++I
) {
9018 // Skip emitting ir if dimension size is 1 since it cannot be
9020 if (NonContigInfo
.Dims
[I
] == 1)
9022 llvm::APInt
Size(/*numBits=*/32, NonContigInfo
.Dims
[I
]);
9024 C
.getConstantArrayType(DimTy
, Size
, nullptr, ArrayType::Normal
, 0);
9025 Address DimsAddr
= CGF
.CreateMemTemp(ArrayTy
, "dims");
9026 for (unsigned II
= 0, EE
= NonContigInfo
.Dims
[I
]; II
< EE
; ++II
) {
9027 unsigned RevIdx
= EE
- II
- 1;
9028 LValue DimsLVal
= CGF
.MakeAddrLValue(
9029 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, II
), DimTy
);
9031 LValue OffsetLVal
= CGF
.EmitLValueForField(
9032 DimsLVal
, *std::next(RD
->field_begin(), OffsetFD
));
9033 CGF
.EmitStoreOfScalar(NonContigInfo
.Offsets
[L
][RevIdx
], OffsetLVal
);
9035 LValue CountLVal
= CGF
.EmitLValueForField(
9036 DimsLVal
, *std::next(RD
->field_begin(), CountFD
));
9037 CGF
.EmitStoreOfScalar(NonContigInfo
.Counts
[L
][RevIdx
], CountLVal
);
9039 LValue StrideLVal
= CGF
.EmitLValueForField(
9040 DimsLVal
, *std::next(RD
->field_begin(), StrideFD
));
9041 CGF
.EmitStoreOfScalar(NonContigInfo
.Strides
[L
][RevIdx
], StrideLVal
);
9044 Address DAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
9045 DimsAddr
, CGM
.Int8PtrTy
, CGM
.Int8Ty
);
9046 llvm::Value
*P
= CGF
.Builder
.CreateConstInBoundsGEP2_32(
9047 llvm::ArrayType::get(CGM
.VoidPtrTy
, Info
.NumberOfPtrs
),
9048 Info
.RTArgs
.PointersArray
, 0, I
);
9049 Address
PAddr(P
, CGM
.VoidPtrTy
, CGF
.getPointerAlign());
9050 CGF
.Builder
.CreateStore(DAddr
.getPointer(), PAddr
);
9055 // Try to extract the base declaration from a `this->x` expression if possible.
9056 static ValueDecl
*getDeclFromThisExpr(const Expr
*E
) {
9060 if (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(E
->IgnoreParenCasts()))
9061 if (const MemberExpr
*ME
=
9062 dyn_cast
<MemberExpr
>(OASE
->getBase()->IgnoreParenImpCasts()))
9063 return ME
->getMemberDecl();
9067 /// Emit a string constant containing the names of the values mapped to the
9068 /// offloading runtime library.
9070 emitMappingInformation(CodeGenFunction
&CGF
, llvm::OpenMPIRBuilder
&OMPBuilder
,
9071 MappableExprsHandler::MappingExprInfo
&MapExprs
) {
9073 uint32_t SrcLocStrSize
;
9074 if (!MapExprs
.getMapDecl() && !MapExprs
.getMapExpr())
9075 return OMPBuilder
.getOrCreateDefaultSrcLocStr(SrcLocStrSize
);
9078 if (!MapExprs
.getMapDecl() && MapExprs
.getMapExpr()) {
9079 if (const ValueDecl
*VD
= getDeclFromThisExpr(MapExprs
.getMapExpr()))
9080 Loc
= VD
->getLocation();
9082 Loc
= MapExprs
.getMapExpr()->getExprLoc();
9084 Loc
= MapExprs
.getMapDecl()->getLocation();
9087 std::string ExprName
;
9088 if (MapExprs
.getMapExpr()) {
9089 PrintingPolicy
P(CGF
.getContext().getLangOpts());
9090 llvm::raw_string_ostream
OS(ExprName
);
9091 MapExprs
.getMapExpr()->printPretty(OS
, nullptr, P
);
9094 ExprName
= MapExprs
.getMapDecl()->getNameAsString();
9097 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
9098 return OMPBuilder
.getOrCreateSrcLocStr(PLoc
.getFilename(), ExprName
,
9099 PLoc
.getLine(), PLoc
.getColumn(),
9103 /// Emit the arrays used to pass the captures and map information to the
9104 /// offloading runtime library. If there is no map or capture information,
9105 /// return nullptr by reference.
9106 static void emitOffloadingArrays(
9107 CodeGenFunction
&CGF
, MappableExprsHandler::MapCombinedInfoTy
&CombinedInfo
,
9108 CGOpenMPRuntime::TargetDataInfo
&Info
, llvm::OpenMPIRBuilder
&OMPBuilder
,
9109 bool IsNonContiguous
= false) {
9110 CodeGenModule
&CGM
= CGF
.CGM
;
9111 ASTContext
&Ctx
= CGF
.getContext();
9113 // Reset the array information.
9114 Info
.clearArrayInfo();
9115 Info
.NumberOfPtrs
= CombinedInfo
.BasePointers
.size();
9117 if (Info
.NumberOfPtrs
) {
9118 // Detect if we have any capture size requiring runtime evaluation of the
9119 // size so that a constant array could be eventually used.
9121 llvm::APInt
PointerNumAP(32, Info
.NumberOfPtrs
, /*isSigned=*/true);
9122 QualType PointerArrayType
= Ctx
.getConstantArrayType(
9123 Ctx
.VoidPtrTy
, PointerNumAP
, nullptr, ArrayType::Normal
,
9124 /*IndexTypeQuals=*/0);
9126 Info
.RTArgs
.BasePointersArray
=
9127 CGF
.CreateMemTemp(PointerArrayType
, ".offload_baseptrs").getPointer();
9128 Info
.RTArgs
.PointersArray
=
9129 CGF
.CreateMemTemp(PointerArrayType
, ".offload_ptrs").getPointer();
9130 Address MappersArray
=
9131 CGF
.CreateMemTemp(PointerArrayType
, ".offload_mappers");
9132 Info
.RTArgs
.MappersArray
= MappersArray
.getPointer();
9134 // If we don't have any VLA types or other types that require runtime
9135 // evaluation, we can use a constant array for the map sizes, otherwise we
9136 // need to fill up the arrays as we do for the pointers.
9138 Ctx
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9139 SmallVector
<llvm::Constant
*> ConstSizes(
9140 CombinedInfo
.Sizes
.size(), llvm::ConstantInt::get(CGF
.Int64Ty
, 0));
9141 llvm::SmallBitVector
RuntimeSizes(CombinedInfo
.Sizes
.size());
9142 for (unsigned I
= 0, E
= CombinedInfo
.Sizes
.size(); I
< E
; ++I
) {
9143 if (auto *CI
= dyn_cast
<llvm::Constant
>(CombinedInfo
.Sizes
[I
])) {
9144 if (!isa
<llvm::ConstantExpr
>(CI
) && !isa
<llvm::GlobalValue
>(CI
)) {
9145 if (IsNonContiguous
&&
9146 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9147 CombinedInfo
.Types
[I
] &
9148 OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG
))
9149 ConstSizes
[I
] = llvm::ConstantInt::get(
9150 CGF
.Int64Ty
, CombinedInfo
.NonContigInfo
.Dims
[I
]);
9156 RuntimeSizes
.set(I
);
9159 if (RuntimeSizes
.all()) {
9160 QualType SizeArrayType
= Ctx
.getConstantArrayType(
9161 Int64Ty
, PointerNumAP
, nullptr, ArrayType::Normal
,
9162 /*IndexTypeQuals=*/0);
9163 Info
.RTArgs
.SizesArray
=
9164 CGF
.CreateMemTemp(SizeArrayType
, ".offload_sizes").getPointer();
9166 auto *SizesArrayInit
= llvm::ConstantArray::get(
9167 llvm::ArrayType::get(CGM
.Int64Ty
, ConstSizes
.size()), ConstSizes
);
9168 std::string Name
= CGM
.getOpenMPRuntime().getName({"offload_sizes"});
9169 auto *SizesArrayGbl
= new llvm::GlobalVariable(
9170 CGM
.getModule(), SizesArrayInit
->getType(), /*isConstant=*/true,
9171 llvm::GlobalValue::PrivateLinkage
, SizesArrayInit
, Name
);
9172 SizesArrayGbl
->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global
);
9173 if (RuntimeSizes
.any()) {
9174 QualType SizeArrayType
= Ctx
.getConstantArrayType(
9175 Int64Ty
, PointerNumAP
, nullptr, ArrayType::Normal
,
9176 /*IndexTypeQuals=*/0);
9177 Address Buffer
= CGF
.CreateMemTemp(SizeArrayType
, ".offload_sizes");
9178 llvm::Value
*GblConstPtr
=
9179 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
9180 SizesArrayGbl
, CGM
.Int64Ty
->getPointerTo());
9181 CGF
.Builder
.CreateMemCpy(
9183 Address(GblConstPtr
, CGM
.Int64Ty
,
9184 CGM
.getNaturalTypeAlignment(Ctx
.getIntTypeForBitwidth(
9185 /*DestWidth=*/64, /*Signed=*/false))),
9186 CGF
.getTypeSize(SizeArrayType
));
9187 Info
.RTArgs
.SizesArray
= Buffer
.getPointer();
9189 Info
.RTArgs
.SizesArray
= SizesArrayGbl
;
9193 // The map types are always constant so we don't need to generate code to
9194 // fill arrays. Instead, we create an array constant.
9195 SmallVector
<uint64_t, 4> Mapping
;
9196 for (auto mapFlag
: CombinedInfo
.Types
)
9198 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9200 std::string MaptypesName
=
9201 CGM
.getOpenMPRuntime().getName({"offload_maptypes"});
9202 auto *MapTypesArrayGbl
=
9203 OMPBuilder
.createOffloadMaptypes(Mapping
, MaptypesName
);
9204 Info
.RTArgs
.MapTypesArray
= MapTypesArrayGbl
;
9206 // The information types are only built if there is debug information
9208 if (CGM
.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo
) {
9209 Info
.RTArgs
.MapNamesArray
= llvm::Constant::getNullValue(
9210 llvm::Type::getInt8Ty(CGF
.Builder
.getContext())->getPointerTo());
9212 auto fillInfoMap
= [&](MappableExprsHandler::MappingExprInfo
&MapExpr
) {
9213 return emitMappingInformation(CGF
, OMPBuilder
, MapExpr
);
9215 SmallVector
<llvm::Constant
*, 4> InfoMap(CombinedInfo
.Exprs
.size());
9216 llvm::transform(CombinedInfo
.Exprs
, InfoMap
.begin(), fillInfoMap
);
9217 std::string MapnamesName
=
9218 CGM
.getOpenMPRuntime().getName({"offload_mapnames"});
9219 auto *MapNamesArrayGbl
=
9220 OMPBuilder
.createOffloadMapnames(InfoMap
, MapnamesName
);
9221 Info
.RTArgs
.MapNamesArray
= MapNamesArrayGbl
;
9224 // If there's a present map type modifier, it must not be applied to the end
9225 // of a region, so generate a separate map type array in that case.
9226 if (Info
.separateBeginEndCalls()) {
9227 bool EndMapTypesDiffer
= false;
9228 for (uint64_t &Type
: Mapping
) {
9230 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9231 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
)) {
9233 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9234 OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
);
9235 EndMapTypesDiffer
= true;
9238 if (EndMapTypesDiffer
) {
9240 OMPBuilder
.createOffloadMaptypes(Mapping
, MaptypesName
);
9241 Info
.RTArgs
.MapTypesArrayEnd
= MapTypesArrayGbl
;
9245 for (unsigned I
= 0; I
< Info
.NumberOfPtrs
; ++I
) {
9246 llvm::Value
*BPVal
= *CombinedInfo
.BasePointers
[I
];
9247 llvm::Value
*BP
= CGF
.Builder
.CreateConstInBoundsGEP2_32(
9248 llvm::ArrayType::get(CGM
.VoidPtrTy
, Info
.NumberOfPtrs
),
9249 Info
.RTArgs
.BasePointersArray
, 0, I
);
9250 BP
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
9251 BP
, BPVal
->getType()->getPointerTo(/*AddrSpace=*/0));
9252 Address
BPAddr(BP
, BPVal
->getType(),
9253 Ctx
.getTypeAlignInChars(Ctx
.VoidPtrTy
));
9254 CGF
.Builder
.CreateStore(BPVal
, BPAddr
);
9256 if (Info
.requiresDevicePointerInfo())
9257 if (const ValueDecl
*DevVD
=
9258 CombinedInfo
.BasePointers
[I
].getDevicePtrDecl())
9259 Info
.CaptureDeviceAddrMap
.try_emplace(DevVD
, BPAddr
);
9261 llvm::Value
*PVal
= CombinedInfo
.Pointers
[I
];
9262 llvm::Value
*P
= CGF
.Builder
.CreateConstInBoundsGEP2_32(
9263 llvm::ArrayType::get(CGM
.VoidPtrTy
, Info
.NumberOfPtrs
),
9264 Info
.RTArgs
.PointersArray
, 0, I
);
9265 P
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
9266 P
, PVal
->getType()->getPointerTo(/*AddrSpace=*/0));
9267 Address
PAddr(P
, PVal
->getType(), Ctx
.getTypeAlignInChars(Ctx
.VoidPtrTy
));
9268 CGF
.Builder
.CreateStore(PVal
, PAddr
);
9270 if (RuntimeSizes
.test(I
)) {
9271 llvm::Value
*S
= CGF
.Builder
.CreateConstInBoundsGEP2_32(
9272 llvm::ArrayType::get(CGM
.Int64Ty
, Info
.NumberOfPtrs
),
9273 Info
.RTArgs
.SizesArray
,
9276 Address
SAddr(S
, CGM
.Int64Ty
, Ctx
.getTypeAlignInChars(Int64Ty
));
9277 CGF
.Builder
.CreateStore(CGF
.Builder
.CreateIntCast(CombinedInfo
.Sizes
[I
],
9283 // Fill up the mapper array.
9284 llvm::Value
*MFunc
= llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
);
9285 if (CombinedInfo
.Mappers
[I
]) {
9286 MFunc
= CGM
.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9287 cast
<OMPDeclareMapperDecl
>(CombinedInfo
.Mappers
[I
]));
9288 MFunc
= CGF
.Builder
.CreatePointerCast(MFunc
, CGM
.VoidPtrTy
);
9289 Info
.HasMapper
= true;
9291 Address MAddr
= CGF
.Builder
.CreateConstArrayGEP(MappersArray
, I
);
9292 CGF
.Builder
.CreateStore(MFunc
, MAddr
);
9296 if (!IsNonContiguous
|| CombinedInfo
.NonContigInfo
.Offsets
.empty() ||
9297 Info
.NumberOfPtrs
== 0)
9300 emitNonContiguousDescriptor(CGF
, CombinedInfo
, Info
);
9303 /// Check for inner distribute directive.
9304 static const OMPExecutableDirective
*
9305 getNestedDistributeDirective(ASTContext
&Ctx
, const OMPExecutableDirective
&D
) {
9306 const auto *CS
= D
.getInnermostCapturedStmt();
9308 CS
->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9309 const Stmt
*ChildStmt
=
9310 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx
, Body
);
9312 if (const auto *NestedDir
=
9313 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
9314 OpenMPDirectiveKind DKind
= NestedDir
->getDirectiveKind();
9315 switch (D
.getDirectiveKind()) {
9317 if (isOpenMPDistributeDirective(DKind
))
9319 if (DKind
== OMPD_teams
) {
9320 Body
= NestedDir
->getInnermostCapturedStmt()->IgnoreContainers(
9321 /*IgnoreCaptured=*/true);
9324 ChildStmt
= CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx
, Body
);
9325 if (const auto *NND
=
9326 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
9327 DKind
= NND
->getDirectiveKind();
9328 if (isOpenMPDistributeDirective(DKind
))
9333 case OMPD_target_teams
:
9334 if (isOpenMPDistributeDirective(DKind
))
9337 case OMPD_target_parallel
:
9338 case OMPD_target_simd
:
9339 case OMPD_target_parallel_for
:
9340 case OMPD_target_parallel_for_simd
:
9342 case OMPD_target_teams_distribute
:
9343 case OMPD_target_teams_distribute_simd
:
9344 case OMPD_target_teams_distribute_parallel_for
:
9345 case OMPD_target_teams_distribute_parallel_for_simd
:
9348 case OMPD_parallel_for
:
9349 case OMPD_parallel_master
:
9350 case OMPD_parallel_sections
:
9352 case OMPD_parallel_for_simd
:
9354 case OMPD_cancellation_point
:
9356 case OMPD_threadprivate
:
9367 case OMPD_taskyield
:
9370 case OMPD_taskgroup
:
9376 case OMPD_target_data
:
9377 case OMPD_target_exit_data
:
9378 case OMPD_target_enter_data
:
9379 case OMPD_distribute
:
9380 case OMPD_distribute_simd
:
9381 case OMPD_distribute_parallel_for
:
9382 case OMPD_distribute_parallel_for_simd
:
9383 case OMPD_teams_distribute
:
9384 case OMPD_teams_distribute_simd
:
9385 case OMPD_teams_distribute_parallel_for
:
9386 case OMPD_teams_distribute_parallel_for_simd
:
9387 case OMPD_target_update
:
9388 case OMPD_declare_simd
:
9389 case OMPD_declare_variant
:
9390 case OMPD_begin_declare_variant
:
9391 case OMPD_end_declare_variant
:
9392 case OMPD_declare_target
:
9393 case OMPD_end_declare_target
:
9394 case OMPD_declare_reduction
:
9395 case OMPD_declare_mapper
:
9397 case OMPD_taskloop_simd
:
9398 case OMPD_master_taskloop
:
9399 case OMPD_master_taskloop_simd
:
9400 case OMPD_parallel_master_taskloop
:
9401 case OMPD_parallel_master_taskloop_simd
:
9403 case OMPD_metadirective
:
9406 llvm_unreachable("Unexpected directive.");
9413 /// Emit the user-defined mapper function. The code generation follows the
9414 /// pattern in the example below.
9416 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9417 /// void *base, void *begin,
9418 /// int64_t size, int64_t type,
9419 /// void *name = nullptr) {
9420 /// // Allocate space for an array section first or add a base/begin for
9421 /// // pointer dereference.
9422 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9423 /// !maptype.IsDelete)
9424 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9425 /// size*sizeof(Ty), clearToFromMember(type));
9427 /// for (unsigned i = 0; i < size; i++) {
9428 /// // For each component specified by this mapper:
9429 /// for (auto c : begin[i]->all_components) {
9430 /// if (c.hasMapper())
9431 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9432 /// c.arg_type, c.arg_name);
9434 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9435 /// c.arg_begin, c.arg_size, c.arg_type,
9439 /// // Delete the array section.
9440 /// if (size > 1 && maptype.IsDelete)
9441 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9442 /// size*sizeof(Ty), clearToFromMember(type));
9445 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl
*D
,
9446 CodeGenFunction
*CGF
) {
9447 if (UDMMap
.count(D
) > 0)
9449 ASTContext
&C
= CGM
.getContext();
9450 QualType Ty
= D
->getType();
9451 QualType PtrTy
= C
.getPointerType(Ty
).withRestrict();
9452 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9453 auto *MapperVarDecl
=
9454 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getMapperVarRef())->getDecl());
9455 SourceLocation Loc
= D
->getLocation();
9456 CharUnits ElementSize
= C
.getTypeSizeInChars(Ty
);
9457 llvm::Type
*ElemTy
= CGM
.getTypes().ConvertTypeForMem(Ty
);
9459 // Prepare mapper function arguments and attributes.
9460 ImplicitParamDecl
HandleArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
9461 C
.VoidPtrTy
, ImplicitParamDecl::Other
);
9462 ImplicitParamDecl
BaseArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
9463 ImplicitParamDecl::Other
);
9464 ImplicitParamDecl
BeginArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
9465 C
.VoidPtrTy
, ImplicitParamDecl::Other
);
9466 ImplicitParamDecl
SizeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, Int64Ty
,
9467 ImplicitParamDecl::Other
);
9468 ImplicitParamDecl
TypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, Int64Ty
,
9469 ImplicitParamDecl::Other
);
9470 ImplicitParamDecl
NameArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
9471 ImplicitParamDecl::Other
);
9472 FunctionArgList Args
;
9473 Args
.push_back(&HandleArg
);
9474 Args
.push_back(&BaseArg
);
9475 Args
.push_back(&BeginArg
);
9476 Args
.push_back(&SizeArg
);
9477 Args
.push_back(&TypeArg
);
9478 Args
.push_back(&NameArg
);
9479 const CGFunctionInfo
&FnInfo
=
9480 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
9481 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
9482 SmallString
<64> TyStr
;
9483 llvm::raw_svector_ostream
Out(TyStr
);
9484 CGM
.getCXXABI().getMangleContext().mangleTypeName(Ty
, Out
);
9485 std::string Name
= getName({"omp_mapper", TyStr
, D
->getName()});
9486 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
9487 Name
, &CGM
.getModule());
9488 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
9489 Fn
->removeFnAttr(llvm::Attribute::OptimizeNone
);
9490 // Start the mapper function code generation.
9491 CodeGenFunction
MapperCGF(CGM
);
9492 MapperCGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
9493 // Compute the starting and end addresses of array elements.
9494 llvm::Value
*Size
= MapperCGF
.EmitLoadOfScalar(
9495 MapperCGF
.GetAddrOfLocalVar(&SizeArg
), /*Volatile=*/false,
9496 C
.getPointerType(Int64Ty
), Loc
);
9497 // Prepare common arguments for array initiation and deletion.
9498 llvm::Value
*Handle
= MapperCGF
.EmitLoadOfScalar(
9499 MapperCGF
.GetAddrOfLocalVar(&HandleArg
),
9500 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9501 llvm::Value
*BaseIn
= MapperCGF
.EmitLoadOfScalar(
9502 MapperCGF
.GetAddrOfLocalVar(&BaseArg
),
9503 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9504 llvm::Value
*BeginIn
= MapperCGF
.EmitLoadOfScalar(
9505 MapperCGF
.GetAddrOfLocalVar(&BeginArg
),
9506 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9507 // Convert the size in bytes into the number of array elements.
9508 Size
= MapperCGF
.Builder
.CreateExactUDiv(
9509 Size
, MapperCGF
.Builder
.getInt64(ElementSize
.getQuantity()));
9510 llvm::Value
*PtrBegin
= MapperCGF
.Builder
.CreateBitCast(
9511 BeginIn
, CGM
.getTypes().ConvertTypeForMem(PtrTy
));
9512 llvm::Value
*PtrEnd
= MapperCGF
.Builder
.CreateGEP(ElemTy
, PtrBegin
, Size
);
9513 llvm::Value
*MapType
= MapperCGF
.EmitLoadOfScalar(
9514 MapperCGF
.GetAddrOfLocalVar(&TypeArg
), /*Volatile=*/false,
9515 C
.getPointerType(Int64Ty
), Loc
);
9516 llvm::Value
*MapName
= MapperCGF
.EmitLoadOfScalar(
9517 MapperCGF
.GetAddrOfLocalVar(&NameArg
),
9518 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9520 // Emit array initiation if this is an array section and \p MapType indicates
9521 // that memory allocation is required.
9522 llvm::BasicBlock
*HeadBB
= MapperCGF
.createBasicBlock("omp.arraymap.head");
9523 emitUDMapperArrayInitOrDel(MapperCGF
, Handle
, BaseIn
, BeginIn
, Size
, MapType
,
9524 MapName
, ElementSize
, HeadBB
, /*IsInit=*/true);
9526 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9528 // Emit the loop header block.
9529 MapperCGF
.EmitBlock(HeadBB
);
9530 llvm::BasicBlock
*BodyBB
= MapperCGF
.createBasicBlock("omp.arraymap.body");
9531 llvm::BasicBlock
*DoneBB
= MapperCGF
.createBasicBlock("omp.done");
9532 // Evaluate whether the initial condition is satisfied.
9533 llvm::Value
*IsEmpty
=
9534 MapperCGF
.Builder
.CreateICmpEQ(PtrBegin
, PtrEnd
, "omp.arraymap.isempty");
9535 MapperCGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
9536 llvm::BasicBlock
*EntryBB
= MapperCGF
.Builder
.GetInsertBlock();
9538 // Emit the loop body block.
9539 MapperCGF
.EmitBlock(BodyBB
);
9540 llvm::BasicBlock
*LastBB
= BodyBB
;
9541 llvm::PHINode
*PtrPHI
= MapperCGF
.Builder
.CreatePHI(
9542 PtrBegin
->getType(), 2, "omp.arraymap.ptrcurrent");
9543 PtrPHI
->addIncoming(PtrBegin
, EntryBB
);
9544 Address
PtrCurrent(PtrPHI
, ElemTy
,
9545 MapperCGF
.GetAddrOfLocalVar(&BeginArg
)
9547 .alignmentOfArrayElement(ElementSize
));
9548 // Privatize the declared variable of mapper to be the current array element.
9549 CodeGenFunction::OMPPrivateScope
Scope(MapperCGF
);
9550 Scope
.addPrivate(MapperVarDecl
, PtrCurrent
);
9551 (void)Scope
.Privatize();
9553 // Get map clause information. Fill up the arrays with all mapped variables.
9554 MappableExprsHandler::MapCombinedInfoTy Info
;
9555 MappableExprsHandler
MEHandler(*D
, MapperCGF
);
9556 MEHandler
.generateAllInfoForMapper(Info
);
9558 // Call the runtime API __tgt_mapper_num_components to get the number of
9559 // pre-existing components.
9560 llvm::Value
*OffloadingArgs
[] = {Handle
};
9561 llvm::Value
*PreviousSize
= MapperCGF
.EmitRuntimeCall(
9562 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
9563 OMPRTL___tgt_mapper_num_components
),
9565 llvm::Value
*ShiftedPreviousSize
= MapperCGF
.Builder
.CreateShl(
9567 MapperCGF
.Builder
.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9569 // Fill up the runtime mapper handle for all components.
9570 for (unsigned I
= 0; I
< Info
.BasePointers
.size(); ++I
) {
9571 llvm::Value
*CurBaseArg
= MapperCGF
.Builder
.CreateBitCast(
9572 *Info
.BasePointers
[I
], CGM
.getTypes().ConvertTypeForMem(C
.VoidPtrTy
));
9573 llvm::Value
*CurBeginArg
= MapperCGF
.Builder
.CreateBitCast(
9574 Info
.Pointers
[I
], CGM
.getTypes().ConvertTypeForMem(C
.VoidPtrTy
));
9575 llvm::Value
*CurSizeArg
= Info
.Sizes
[I
];
9576 llvm::Value
*CurNameArg
=
9577 (CGM
.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo
)
9578 ? llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
)
9579 : emitMappingInformation(MapperCGF
, OMPBuilder
, Info
.Exprs
[I
]);
9581 // Extract the MEMBER_OF field from the map type.
9582 llvm::Value
*OriMapType
= MapperCGF
.Builder
.getInt64(
9583 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9585 llvm::Value
*MemberMapType
=
9586 MapperCGF
.Builder
.CreateNUWAdd(OriMapType
, ShiftedPreviousSize
);
9588 // Combine the map type inherited from user-defined mapper with that
9589 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9590 // bits of the \a MapType, which is the input argument of the mapper
9591 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9592 // bits of MemberMapType.
9593 // [OpenMP 5.0], 1.2.6. map-type decay.
9594 // | alloc | to | from | tofrom | release | delete
9595 // ----------------------------------------------------------
9596 // alloc | alloc | alloc | alloc | alloc | release | delete
9597 // to | alloc | to | alloc | to | release | delete
9598 // from | alloc | alloc | from | from | release | delete
9599 // tofrom | alloc | to | from | tofrom | release | delete
9600 llvm::Value
*LeftToFrom
= MapperCGF
.Builder
.CreateAnd(
9602 MapperCGF
.Builder
.getInt64(
9603 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9604 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9605 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9606 llvm::BasicBlock
*AllocBB
= MapperCGF
.createBasicBlock("omp.type.alloc");
9607 llvm::BasicBlock
*AllocElseBB
=
9608 MapperCGF
.createBasicBlock("omp.type.alloc.else");
9609 llvm::BasicBlock
*ToBB
= MapperCGF
.createBasicBlock("omp.type.to");
9610 llvm::BasicBlock
*ToElseBB
= MapperCGF
.createBasicBlock("omp.type.to.else");
9611 llvm::BasicBlock
*FromBB
= MapperCGF
.createBasicBlock("omp.type.from");
9612 llvm::BasicBlock
*EndBB
= MapperCGF
.createBasicBlock("omp.type.end");
9613 llvm::Value
*IsAlloc
= MapperCGF
.Builder
.CreateIsNull(LeftToFrom
);
9614 MapperCGF
.Builder
.CreateCondBr(IsAlloc
, AllocBB
, AllocElseBB
);
9615 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9616 MapperCGF
.EmitBlock(AllocBB
);
9617 llvm::Value
*AllocMapType
= MapperCGF
.Builder
.CreateAnd(
9619 MapperCGF
.Builder
.getInt64(
9620 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9621 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9622 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9623 MapperCGF
.Builder
.CreateBr(EndBB
);
9624 MapperCGF
.EmitBlock(AllocElseBB
);
9625 llvm::Value
*IsTo
= MapperCGF
.Builder
.CreateICmpEQ(
9627 MapperCGF
.Builder
.getInt64(
9628 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9629 OpenMPOffloadMappingFlags::OMP_MAP_TO
)));
9630 MapperCGF
.Builder
.CreateCondBr(IsTo
, ToBB
, ToElseBB
);
9631 // In case of to, clear OMP_MAP_FROM.
9632 MapperCGF
.EmitBlock(ToBB
);
9633 llvm::Value
*ToMapType
= MapperCGF
.Builder
.CreateAnd(
9635 MapperCGF
.Builder
.getInt64(
9636 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9637 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9638 MapperCGF
.Builder
.CreateBr(EndBB
);
9639 MapperCGF
.EmitBlock(ToElseBB
);
9640 llvm::Value
*IsFrom
= MapperCGF
.Builder
.CreateICmpEQ(
9642 MapperCGF
.Builder
.getInt64(
9643 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9644 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9645 MapperCGF
.Builder
.CreateCondBr(IsFrom
, FromBB
, EndBB
);
9646 // In case of from, clear OMP_MAP_TO.
9647 MapperCGF
.EmitBlock(FromBB
);
9648 llvm::Value
*FromMapType
= MapperCGF
.Builder
.CreateAnd(
9650 MapperCGF
.Builder
.getInt64(
9651 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9652 OpenMPOffloadMappingFlags::OMP_MAP_TO
)));
9653 // In case of tofrom, do nothing.
9654 MapperCGF
.EmitBlock(EndBB
);
9656 llvm::PHINode
*CurMapType
=
9657 MapperCGF
.Builder
.CreatePHI(CGM
.Int64Ty
, 4, "omp.maptype");
9658 CurMapType
->addIncoming(AllocMapType
, AllocBB
);
9659 CurMapType
->addIncoming(ToMapType
, ToBB
);
9660 CurMapType
->addIncoming(FromMapType
, FromBB
);
9661 CurMapType
->addIncoming(MemberMapType
, ToElseBB
);
9663 llvm::Value
*OffloadingArgs
[] = {Handle
, CurBaseArg
, CurBeginArg
,
9664 CurSizeArg
, CurMapType
, CurNameArg
};
9665 if (Info
.Mappers
[I
]) {
9666 // Call the corresponding mapper function.
9667 llvm::Function
*MapperFunc
= getOrCreateUserDefinedMapperFunc(
9668 cast
<OMPDeclareMapperDecl
>(Info
.Mappers
[I
]));
9669 assert(MapperFunc
&& "Expect a valid mapper function is available.");
9670 MapperCGF
.EmitNounwindRuntimeCall(MapperFunc
, OffloadingArgs
);
9672 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9674 MapperCGF
.EmitRuntimeCall(
9675 OMPBuilder
.getOrCreateRuntimeFunction(
9676 CGM
.getModule(), OMPRTL___tgt_push_mapper_component
),
9681 // Update the pointer to point to the next element that needs to be mapped,
9682 // and check whether we have mapped all elements.
9683 llvm::Value
*PtrNext
= MapperCGF
.Builder
.CreateConstGEP1_32(
9684 ElemTy
, PtrPHI
, /*Idx0=*/1, "omp.arraymap.next");
9685 PtrPHI
->addIncoming(PtrNext
, LastBB
);
9686 llvm::Value
*IsDone
=
9687 MapperCGF
.Builder
.CreateICmpEQ(PtrNext
, PtrEnd
, "omp.arraymap.isdone");
9688 llvm::BasicBlock
*ExitBB
= MapperCGF
.createBasicBlock("omp.arraymap.exit");
9689 MapperCGF
.Builder
.CreateCondBr(IsDone
, ExitBB
, BodyBB
);
9691 MapperCGF
.EmitBlock(ExitBB
);
9692 // Emit array deletion if this is an array section and \p MapType indicates
9693 // that deletion is required.
9694 emitUDMapperArrayInitOrDel(MapperCGF
, Handle
, BaseIn
, BeginIn
, Size
, MapType
,
9695 MapName
, ElementSize
, DoneBB
, /*IsInit=*/false);
9697 // Emit the function exit block.
9698 MapperCGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
9699 MapperCGF
.FinishFunction();
9700 UDMMap
.try_emplace(D
, Fn
);
9702 auto &Decls
= FunctionUDMMap
.FindAndConstruct(CGF
->CurFn
);
9703 Decls
.second
.push_back(D
);
9707 /// Emit the array initialization or deletion portion for user-defined mapper
9708 /// code generation. First, it evaluates whether an array section is mapped and
9709 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9710 /// true, and \a MapType indicates to not delete this array, array
9711 /// initialization code is generated. If \a IsInit is false, and \a MapType
9712 /// indicates to not this array, array deletion code is generated.
9713 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9714 CodeGenFunction
&MapperCGF
, llvm::Value
*Handle
, llvm::Value
*Base
,
9715 llvm::Value
*Begin
, llvm::Value
*Size
, llvm::Value
*MapType
,
9716 llvm::Value
*MapName
, CharUnits ElementSize
, llvm::BasicBlock
*ExitBB
,
9718 StringRef Prefix
= IsInit
? ".init" : ".del";
9720 // Evaluate if this is an array section.
9721 llvm::BasicBlock
*BodyBB
=
9722 MapperCGF
.createBasicBlock(getName({"omp.array", Prefix
}));
9723 llvm::Value
*IsArray
= MapperCGF
.Builder
.CreateICmpSGT(
9724 Size
, MapperCGF
.Builder
.getInt64(1), "omp.arrayinit.isarray");
9725 llvm::Value
*DeleteBit
= MapperCGF
.Builder
.CreateAnd(
9727 MapperCGF
.Builder
.getInt64(
9728 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9729 OpenMPOffloadMappingFlags::OMP_MAP_DELETE
)));
9730 llvm::Value
*DeleteCond
;
9734 llvm::Value
*BaseIsBegin
= MapperCGF
.Builder
.CreateICmpNE(Base
, Begin
);
9736 llvm::Value
*PtrAndObjBit
= MapperCGF
.Builder
.CreateAnd(
9738 MapperCGF
.Builder
.getInt64(
9739 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9740 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
)));
9741 PtrAndObjBit
= MapperCGF
.Builder
.CreateIsNotNull(PtrAndObjBit
);
9742 BaseIsBegin
= MapperCGF
.Builder
.CreateAnd(BaseIsBegin
, PtrAndObjBit
);
9743 Cond
= MapperCGF
.Builder
.CreateOr(IsArray
, BaseIsBegin
);
9744 DeleteCond
= MapperCGF
.Builder
.CreateIsNull(
9745 DeleteBit
, getName({"omp.array", Prefix
, ".delete"}));
9748 DeleteCond
= MapperCGF
.Builder
.CreateIsNotNull(
9749 DeleteBit
, getName({"omp.array", Prefix
, ".delete"}));
9751 Cond
= MapperCGF
.Builder
.CreateAnd(Cond
, DeleteCond
);
9752 MapperCGF
.Builder
.CreateCondBr(Cond
, BodyBB
, ExitBB
);
9754 MapperCGF
.EmitBlock(BodyBB
);
9755 // Get the array size by multiplying element size and element number (i.e., \p
9757 llvm::Value
*ArraySize
= MapperCGF
.Builder
.CreateNUWMul(
9758 Size
, MapperCGF
.Builder
.getInt64(ElementSize
.getQuantity()));
9759 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9760 // memory allocation/deletion purpose only.
9761 llvm::Value
*MapTypeArg
= MapperCGF
.Builder
.CreateAnd(
9763 MapperCGF
.Builder
.getInt64(
9764 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9765 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9766 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9767 MapTypeArg
= MapperCGF
.Builder
.CreateOr(
9769 MapperCGF
.Builder
.getInt64(
9770 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9771 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
)));
9773 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9775 llvm::Value
*OffloadingArgs
[] = {Handle
, Base
, Begin
,
9776 ArraySize
, MapTypeArg
, MapName
};
9777 MapperCGF
.EmitRuntimeCall(
9778 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
9779 OMPRTL___tgt_push_mapper_component
),
9783 llvm::Function
*CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9784 const OMPDeclareMapperDecl
*D
) {
9785 auto I
= UDMMap
.find(D
);
9786 if (I
!= UDMMap
.end())
9788 emitUserDefinedMapper(D
);
9789 return UDMMap
.lookup(D
);
9792 llvm::Value
*CGOpenMPRuntime::emitTargetNumIterationsCall(
9793 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
9794 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9795 const OMPLoopDirective
&D
)>
9797 OpenMPDirectiveKind Kind
= D
.getDirectiveKind();
9798 const OMPExecutableDirective
*TD
= &D
;
9799 // Get nested teams distribute kind directive, if any.
9800 if (!isOpenMPDistributeDirective(Kind
) || !isOpenMPTeamsDirective(Kind
))
9801 TD
= getNestedDistributeDirective(CGM
.getContext(), D
);
9803 return llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
9805 const auto *LD
= cast
<OMPLoopDirective
>(TD
);
9806 if (llvm::Value
*NumIterations
= SizeEmitter(CGF
, *LD
))
9807 return NumIterations
;
9808 return llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
9811 void CGOpenMPRuntime::emitTargetCall(
9812 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
9813 llvm::Function
*OutlinedFn
, llvm::Value
*OutlinedFnID
, const Expr
*IfCond
,
9814 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9815 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9816 const OMPLoopDirective
&D
)>
9818 if (!CGF
.HaveInsertPoint())
9821 const bool OffloadingMandatory
= !CGM
.getLangOpts().OpenMPIsDevice
&&
9822 CGM
.getLangOpts().OpenMPOffloadMandatory
;
9824 assert((OffloadingMandatory
|| OutlinedFn
) && "Invalid outlined function!");
9826 const bool RequiresOuterTask
= D
.hasClausesOfKind
<OMPDependClause
>() ||
9827 D
.hasClausesOfKind
<OMPNowaitClause
>() ||
9828 D
.hasClausesOfKind
<OMPInReductionClause
>();
9829 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
9830 const CapturedStmt
&CS
= *D
.getCapturedStmt(OMPD_target
);
9831 auto &&ArgsCodegen
= [&CS
, &CapturedVars
](CodeGenFunction
&CGF
,
9832 PrePostActionTy
&) {
9833 CGF
.GenerateOpenMPCapturedVars(CS
, CapturedVars
);
9835 emitInlinedDirective(CGF
, OMPD_unknown
, ArgsCodegen
);
9837 CodeGenFunction::OMPTargetDataInfo InputInfo
;
9838 llvm::Value
*MapTypesArray
= nullptr;
9839 llvm::Value
*MapNamesArray
= nullptr;
9840 // Generate code for the host fallback function.
9841 auto &&FallbackGen
= [this, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
,
9842 &CS
, OffloadingMandatory
](CodeGenFunction
&CGF
) {
9843 if (OffloadingMandatory
) {
9844 CGF
.Builder
.CreateUnreachable();
9846 if (RequiresOuterTask
) {
9847 CapturedVars
.clear();
9848 CGF
.GenerateOpenMPCapturedVars(CS
, CapturedVars
);
9850 emitOutlinedFunctionCall(CGF
, D
.getBeginLoc(), OutlinedFn
, CapturedVars
);
9853 // Fill up the pointer arrays and transfer execution to the device.
9854 auto &&ThenGen
= [this, Device
, OutlinedFnID
, &D
, &InputInfo
, &MapTypesArray
,
9855 &MapNamesArray
, SizeEmitter
,
9856 FallbackGen
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9857 if (Device
.getInt() == OMPC_DEVICE_ancestor
) {
9858 // Reverse offloading is not supported, so just execute on the host.
9863 // On top of the arrays that were filled up, the target offloading call
9864 // takes as arguments the device id as well as the host pointer. The host
9865 // pointer is used by the runtime library to identify the current target
9866 // region, so it only has to be unique and not necessarily point to
9867 // anything. It could be the pointer to the outlined function that
9868 // implements the target region, but we aren't using that so that the
9869 // compiler doesn't need to keep that, and could therefore inline the host
9870 // function if proven worthwhile during optimization.
9872 // From this point on, we need to have an ID of the target region defined.
9873 assert(OutlinedFnID
&& "Invalid outlined function ID!");
9876 // Emit device ID if any.
9877 llvm::Value
*DeviceID
;
9878 if (Device
.getPointer()) {
9879 assert((Device
.getInt() == OMPC_DEVICE_unknown
||
9880 Device
.getInt() == OMPC_DEVICE_device_num
) &&
9881 "Expected device_num modifier.");
9882 llvm::Value
*DevVal
= CGF
.EmitScalarExpr(Device
.getPointer());
9884 CGF
.Builder
.CreateIntCast(DevVal
, CGF
.Int64Ty
, /*isSigned=*/true);
9886 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
9889 // Emit the number of elements in the offloading arrays.
9890 llvm::Value
*PointerNum
=
9891 CGF
.Builder
.getInt32(InputInfo
.NumberOfTargetItems
);
9893 // Return value of the runtime offloading call.
9894 llvm::Value
*Return
;
9896 llvm::Value
*NumTeams
= emitNumTeamsForTargetDirective(CGF
, D
);
9897 llvm::Value
*NumThreads
= emitNumThreadsForTargetDirective(CGF
, D
);
9899 // Source location for the ident struct
9900 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
9902 // Get tripcount for the target loop-based directive.
9903 llvm::Value
*NumIterations
=
9904 emitTargetNumIterationsCall(CGF
, D
, SizeEmitter
);
9906 llvm::Value
*DynCGroupMem
= CGF
.Builder
.getInt32(0);
9907 if (auto *DynMemClause
= D
.getSingleClause
<OMPXDynCGroupMemClause
>()) {
9908 CodeGenFunction::RunCleanupsScope
DynCGroupMemScope(CGF
);
9909 llvm::Value
*DynCGroupMemVal
= CGF
.EmitScalarExpr(
9910 DynMemClause
->getSize(), /*IgnoreResultAssign=*/true);
9911 DynCGroupMem
= CGF
.Builder
.CreateIntCast(DynCGroupMemVal
, CGF
.Int32Ty
,
9912 /*isSigned=*/false);
9915 llvm::Value
*ZeroArray
=
9916 llvm::Constant::getNullValue(llvm::ArrayType::get(CGF
.CGM
.Int32Ty
, 3));
9918 bool HasNoWait
= D
.hasClausesOfKind
<OMPNowaitClause
>();
9919 llvm::Value
*Flags
= CGF
.Builder
.getInt64(HasNoWait
);
9921 llvm::Value
*NumTeams3D
=
9922 CGF
.Builder
.CreateInsertValue(ZeroArray
, NumTeams
, {0});
9923 llvm::Value
*NumThreads3D
=
9924 CGF
.Builder
.CreateInsertValue(ZeroArray
, NumThreads
, {0});
9926 // Arguments for the target kernel.
9927 SmallVector
<llvm::Value
*> KernelArgs
{
9928 CGF
.Builder
.getInt32(/* Version */ 2),
9930 InputInfo
.BasePointersArray
.getPointer(),
9931 InputInfo
.PointersArray
.getPointer(),
9932 InputInfo
.SizesArray
.getPointer(),
9935 InputInfo
.MappersArray
.getPointer(),
9943 // The target region is an outlined function launched by the runtime
9944 // via calls to __tgt_target_kernel().
9946 // Note that on the host and CPU targets, the runtime implementation of
9947 // these calls simply call the outlined function without forking threads.
9948 // The outlined functions themselves have runtime calls to
9949 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9950 // the compiler in emitTeamsCall() and emitParallelCall().
9952 // In contrast, on the NVPTX target, the implementation of
9953 // __tgt_target_teams() launches a GPU kernel with the requested number
9954 // of teams and threads so no additional calls to the runtime are required.
9955 // Check the error code and execute the host version if required.
9956 CGF
.Builder
.restoreIP(OMPBuilder
.emitTargetKernel(
9957 CGF
.Builder
, Return
, RTLoc
, DeviceID
, NumTeams
, NumThreads
,
9958 OutlinedFnID
, KernelArgs
));
9960 llvm::BasicBlock
*OffloadFailedBlock
=
9961 CGF
.createBasicBlock("omp_offload.failed");
9962 llvm::BasicBlock
*OffloadContBlock
=
9963 CGF
.createBasicBlock("omp_offload.cont");
9964 llvm::Value
*Failed
= CGF
.Builder
.CreateIsNotNull(Return
);
9965 CGF
.Builder
.CreateCondBr(Failed
, OffloadFailedBlock
, OffloadContBlock
);
9967 CGF
.EmitBlock(OffloadFailedBlock
);
9970 CGF
.EmitBranch(OffloadContBlock
);
9972 CGF
.EmitBlock(OffloadContBlock
, /*IsFinished=*/true);
9975 // Notify that the host version must be executed.
9976 auto &&ElseGen
= [FallbackGen
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9980 auto &&TargetThenGen
= [this, &ThenGen
, &D
, &InputInfo
, &MapTypesArray
,
9981 &MapNamesArray
, &CapturedVars
, RequiresOuterTask
,
9982 &CS
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9983 // Fill up the arrays with all the captured variables.
9984 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
9986 // Get mappable expression information.
9987 MappableExprsHandler
MEHandler(D
, CGF
);
9988 llvm::DenseMap
<llvm::Value
*, llvm::Value
*> LambdaPointers
;
9989 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> MappedVarSet
;
9991 auto RI
= CS
.getCapturedRecordDecl()->field_begin();
9992 auto *CV
= CapturedVars
.begin();
9993 for (CapturedStmt::const_capture_iterator CI
= CS
.capture_begin(),
9994 CE
= CS
.capture_end();
9995 CI
!= CE
; ++CI
, ++RI
, ++CV
) {
9996 MappableExprsHandler::MapCombinedInfoTy CurInfo
;
9997 MappableExprsHandler::StructRangeInfoTy PartialStruct
;
9999 // VLA sizes are passed to the outlined region by copy and do not have map
10000 // information associated.
10001 if (CI
->capturesVariableArrayType()) {
10002 CurInfo
.Exprs
.push_back(nullptr);
10003 CurInfo
.BasePointers
.push_back(*CV
);
10004 CurInfo
.Pointers
.push_back(*CV
);
10005 CurInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
10006 CGF
.getTypeSize(RI
->getType()), CGF
.Int64Ty
, /*isSigned=*/true));
10007 // Copy to the device as an argument. No need to retrieve it.
10008 CurInfo
.Types
.push_back(
10009 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
10010 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
|
10011 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
10012 CurInfo
.Mappers
.push_back(nullptr);
10014 // If we have any information in the map clause, we use it, otherwise we
10015 // just do a default mapping.
10016 MEHandler
.generateInfoForCapture(CI
, *CV
, CurInfo
, PartialStruct
);
10017 if (!CI
->capturesThis())
10018 MappedVarSet
.insert(CI
->getCapturedVar());
10020 MappedVarSet
.insert(nullptr);
10021 if (CurInfo
.BasePointers
.empty() && !PartialStruct
.Base
.isValid())
10022 MEHandler
.generateDefaultMapInfo(*CI
, **RI
, *CV
, CurInfo
);
10023 // Generate correct mapping for variables captured by reference in
10025 if (CI
->capturesVariable())
10026 MEHandler
.generateInfoForLambdaCaptures(CI
->getCapturedVar(), *CV
,
10027 CurInfo
, LambdaPointers
);
10029 // We expect to have at least an element of information for this capture.
10030 assert((!CurInfo
.BasePointers
.empty() || PartialStruct
.Base
.isValid()) &&
10031 "Non-existing map pointer for capture!");
10032 assert(CurInfo
.BasePointers
.size() == CurInfo
.Pointers
.size() &&
10033 CurInfo
.BasePointers
.size() == CurInfo
.Sizes
.size() &&
10034 CurInfo
.BasePointers
.size() == CurInfo
.Types
.size() &&
10035 CurInfo
.BasePointers
.size() == CurInfo
.Mappers
.size() &&
10036 "Inconsistent map information sizes!");
10038 // If there is an entry in PartialStruct it means we have a struct with
10039 // individual members mapped. Emit an extra combined entry.
10040 if (PartialStruct
.Base
.isValid()) {
10041 CombinedInfo
.append(PartialStruct
.PreliminaryMapData
);
10042 MEHandler
.emitCombinedEntry(
10043 CombinedInfo
, CurInfo
.Types
, PartialStruct
, nullptr,
10044 !PartialStruct
.PreliminaryMapData
.BasePointers
.empty());
10047 // We need to append the results of this capture to what we already have.
10048 CombinedInfo
.append(CurInfo
);
10050 // Adjust MEMBER_OF flags for the lambdas captures.
10051 MEHandler
.adjustMemberOfForLambdaCaptures(
10052 LambdaPointers
, CombinedInfo
.BasePointers
, CombinedInfo
.Pointers
,
10053 CombinedInfo
.Types
);
10054 // Map any list items in a map clause that were not captures because they
10055 // weren't referenced within the construct.
10056 MEHandler
.generateAllInfo(CombinedInfo
, MappedVarSet
);
10058 CGOpenMPRuntime::TargetDataInfo Info
;
10059 // Fill up the arrays and create the arguments.
10060 emitOffloadingArrays(CGF
, CombinedInfo
, Info
, OMPBuilder
);
10062 CGF
.CGM
.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo
;
10063 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, Info
.RTArgs
, Info
,
10065 /*ForEndCall=*/false);
10067 InputInfo
.NumberOfTargetItems
= Info
.NumberOfPtrs
;
10068 InputInfo
.BasePointersArray
= Address(Info
.RTArgs
.BasePointersArray
,
10069 CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10070 InputInfo
.PointersArray
= Address(Info
.RTArgs
.PointersArray
, CGF
.VoidPtrTy
,
10071 CGM
.getPointerAlign());
10072 InputInfo
.SizesArray
=
10073 Address(Info
.RTArgs
.SizesArray
, CGF
.Int64Ty
, CGM
.getPointerAlign());
10074 InputInfo
.MappersArray
=
10075 Address(Info
.RTArgs
.MappersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10076 MapTypesArray
= Info
.RTArgs
.MapTypesArray
;
10077 MapNamesArray
= Info
.RTArgs
.MapNamesArray
;
10078 if (RequiresOuterTask
)
10079 CGF
.EmitOMPTargetTaskBasedDirective(D
, ThenGen
, InputInfo
);
10081 emitInlinedDirective(CGF
, D
.getDirectiveKind(), ThenGen
);
10084 auto &&TargetElseGen
= [this, &ElseGen
, &D
, RequiresOuterTask
](
10085 CodeGenFunction
&CGF
, PrePostActionTy
&) {
10086 if (RequiresOuterTask
) {
10087 CodeGenFunction::OMPTargetDataInfo InputInfo
;
10088 CGF
.EmitOMPTargetTaskBasedDirective(D
, ElseGen
, InputInfo
);
10090 emitInlinedDirective(CGF
, D
.getDirectiveKind(), ElseGen
);
10094 // If we have a target function ID it means that we need to support
10095 // offloading, otherwise, just execute on the host. We need to execute on host
10096 // regardless of the conditional in the if clause if, e.g., the user do not
10097 // specify target triples.
10098 if (OutlinedFnID
) {
10100 emitIfClause(CGF
, IfCond
, TargetThenGen
, TargetElseGen
);
10102 RegionCodeGenTy
ThenRCG(TargetThenGen
);
10106 RegionCodeGenTy
ElseRCG(TargetElseGen
);
10111 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt
*S
,
10112 StringRef ParentName
) {
10116 // Codegen OMP target directives that offload compute to the device.
10117 bool RequiresDeviceCodegen
=
10118 isa
<OMPExecutableDirective
>(S
) &&
10119 isOpenMPTargetExecutionDirective(
10120 cast
<OMPExecutableDirective
>(S
)->getDirectiveKind());
10122 if (RequiresDeviceCodegen
) {
10123 const auto &E
= *cast
<OMPExecutableDirective
>(S
);
10125 getTargetEntryUniqueInfo(CGM
.getContext(), E
.getBeginLoc(), ParentName
);
10127 // Is this a target region that should not be emitted as an entry point? If
10128 // so just signal we are done with this target region.
10129 if (!OffloadEntriesInfoManager
.hasTargetRegionEntryInfo(EntryInfo
))
10132 switch (E
.getDirectiveKind()) {
10134 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM
, ParentName
,
10135 cast
<OMPTargetDirective
>(E
));
10137 case OMPD_target_parallel
:
10138 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10139 CGM
, ParentName
, cast
<OMPTargetParallelDirective
>(E
));
10141 case OMPD_target_teams
:
10142 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10143 CGM
, ParentName
, cast
<OMPTargetTeamsDirective
>(E
));
10145 case OMPD_target_teams_distribute
:
10146 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10147 CGM
, ParentName
, cast
<OMPTargetTeamsDistributeDirective
>(E
));
10149 case OMPD_target_teams_distribute_simd
:
10150 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10151 CGM
, ParentName
, cast
<OMPTargetTeamsDistributeSimdDirective
>(E
));
10153 case OMPD_target_parallel_for
:
10154 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10155 CGM
, ParentName
, cast
<OMPTargetParallelForDirective
>(E
));
10157 case OMPD_target_parallel_for_simd
:
10158 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10159 CGM
, ParentName
, cast
<OMPTargetParallelForSimdDirective
>(E
));
10161 case OMPD_target_simd
:
10162 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10163 CGM
, ParentName
, cast
<OMPTargetSimdDirective
>(E
));
10165 case OMPD_target_teams_distribute_parallel_for
:
10166 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10168 cast
<OMPTargetTeamsDistributeParallelForDirective
>(E
));
10170 case OMPD_target_teams_distribute_parallel_for_simd
:
10172 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10174 cast
<OMPTargetTeamsDistributeParallelForSimdDirective
>(E
));
10176 case OMPD_parallel
:
10178 case OMPD_parallel_for
:
10179 case OMPD_parallel_master
:
10180 case OMPD_parallel_sections
:
10181 case OMPD_for_simd
:
10182 case OMPD_parallel_for_simd
:
10184 case OMPD_cancellation_point
:
10186 case OMPD_threadprivate
:
10187 case OMPD_allocate
:
10192 case OMPD_sections
:
10196 case OMPD_critical
:
10197 case OMPD_taskyield
:
10199 case OMPD_taskwait
:
10200 case OMPD_taskgroup
:
10206 case OMPD_target_data
:
10207 case OMPD_target_exit_data
:
10208 case OMPD_target_enter_data
:
10209 case OMPD_distribute
:
10210 case OMPD_distribute_simd
:
10211 case OMPD_distribute_parallel_for
:
10212 case OMPD_distribute_parallel_for_simd
:
10213 case OMPD_teams_distribute
:
10214 case OMPD_teams_distribute_simd
:
10215 case OMPD_teams_distribute_parallel_for
:
10216 case OMPD_teams_distribute_parallel_for_simd
:
10217 case OMPD_target_update
:
10218 case OMPD_declare_simd
:
10219 case OMPD_declare_variant
:
10220 case OMPD_begin_declare_variant
:
10221 case OMPD_end_declare_variant
:
10222 case OMPD_declare_target
:
10223 case OMPD_end_declare_target
:
10224 case OMPD_declare_reduction
:
10225 case OMPD_declare_mapper
:
10226 case OMPD_taskloop
:
10227 case OMPD_taskloop_simd
:
10228 case OMPD_master_taskloop
:
10229 case OMPD_master_taskloop_simd
:
10230 case OMPD_parallel_master_taskloop
:
10231 case OMPD_parallel_master_taskloop_simd
:
10232 case OMPD_requires
:
10233 case OMPD_metadirective
:
10236 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10241 if (const auto *E
= dyn_cast
<OMPExecutableDirective
>(S
)) {
10242 if (!E
->hasAssociatedStmt() || !E
->getAssociatedStmt())
10245 scanForTargetRegionsFunctions(E
->getRawStmt(), ParentName
);
10249 // If this is a lambda function, look into its body.
10250 if (const auto *L
= dyn_cast
<LambdaExpr
>(S
))
10253 // Keep looking for target regions recursively.
10254 for (const Stmt
*II
: S
->children())
10255 scanForTargetRegionsFunctions(II
, ParentName
);
10258 static bool isAssumedToBeNotEmitted(const ValueDecl
*VD
, bool IsDevice
) {
10259 std::optional
<OMPDeclareTargetDeclAttr::DevTypeTy
> DevTy
=
10260 OMPDeclareTargetDeclAttr::getDeviceType(VD
);
10263 // Do not emit device_type(nohost) functions for the host.
10264 if (!IsDevice
&& DevTy
== OMPDeclareTargetDeclAttr::DT_NoHost
)
10266 // Do not emit device_type(host) functions for the device.
10267 if (IsDevice
&& DevTy
== OMPDeclareTargetDeclAttr::DT_Host
)
10272 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD
) {
10273 // If emitting code for the host, we do not process FD here. Instead we do
10274 // the normal code generation.
10275 if (!CGM
.getLangOpts().OpenMPIsDevice
) {
10276 if (const auto *FD
= dyn_cast
<FunctionDecl
>(GD
.getDecl()))
10277 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(FD
),
10278 CGM
.getLangOpts().OpenMPIsDevice
))
10283 const ValueDecl
*VD
= cast
<ValueDecl
>(GD
.getDecl());
10284 // Try to detect target regions in the function.
10285 if (const auto *FD
= dyn_cast
<FunctionDecl
>(VD
)) {
10286 StringRef Name
= CGM
.getMangledName(GD
);
10287 scanForTargetRegionsFunctions(FD
->getBody(), Name
);
10288 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(FD
),
10289 CGM
.getLangOpts().OpenMPIsDevice
))
10293 // Do not to emit function if it is not marked as declare target.
10294 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
) &&
10295 AlreadyEmittedTargetDecls
.count(VD
) == 0;
10298 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD
) {
10299 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(GD
.getDecl()),
10300 CGM
.getLangOpts().OpenMPIsDevice
))
10303 if (!CGM
.getLangOpts().OpenMPIsDevice
)
10306 // Check if there are Ctors/Dtors in this declaration and look for target
10307 // regions in it. We use the complete variant to produce the kernel name
10309 QualType RDTy
= cast
<VarDecl
>(GD
.getDecl())->getType();
10310 if (const auto *RD
= RDTy
->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10311 for (const CXXConstructorDecl
*Ctor
: RD
->ctors()) {
10312 StringRef ParentName
=
10313 CGM
.getMangledName(GlobalDecl(Ctor
, Ctor_Complete
));
10314 scanForTargetRegionsFunctions(Ctor
->getBody(), ParentName
);
10316 if (const CXXDestructorDecl
*Dtor
= RD
->getDestructor()) {
10317 StringRef ParentName
=
10318 CGM
.getMangledName(GlobalDecl(Dtor
, Dtor_Complete
));
10319 scanForTargetRegionsFunctions(Dtor
->getBody(), ParentName
);
10323 // Do not to emit variable if it is not marked as declare target.
10324 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10325 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10326 cast
<VarDecl
>(GD
.getDecl()));
10327 if (!Res
|| *Res
== OMPDeclareTargetDeclAttr::MT_Link
||
10328 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10329 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10330 HasRequiresUnifiedSharedMemory
)) {
10331 DeferredGlobalVariables
.insert(cast
<VarDecl
>(GD
.getDecl()));
10337 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl
*VD
,
10338 llvm::Constant
*Addr
) {
10339 if (CGM
.getLangOpts().OMPTargetTriples
.empty() &&
10340 !CGM
.getLangOpts().OpenMPIsDevice
)
10343 // If we have host/nohost variables, they do not need to be registered.
10344 std::optional
<OMPDeclareTargetDeclAttr::DevTypeTy
> DevTy
=
10345 OMPDeclareTargetDeclAttr::getDeviceType(VD
);
10346 if (DevTy
&& *DevTy
!= OMPDeclareTargetDeclAttr::DT_Any
)
10349 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10350 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
10352 if (CGM
.getLangOpts().OpenMPIsDevice
) {
10353 // Register non-target variables being emitted in device code (debug info
10354 // may cause this).
10355 StringRef VarName
= CGM
.getMangledName(VD
);
10356 EmittedNonTargetVariables
.try_emplace(VarName
, Addr
);
10360 // Register declare target variables.
10361 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags
;
10364 llvm::GlobalValue::LinkageTypes Linkage
;
10366 if ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10367 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10368 !HasRequiresUnifiedSharedMemory
) {
10369 Flags
= llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo
;
10370 VarName
= CGM
.getMangledName(VD
);
10371 if (VD
->hasDefinition(CGM
.getContext()) != VarDecl::DeclarationOnly
) {
10373 CGM
.getContext().getTypeSizeInChars(VD
->getType()).getQuantity();
10374 assert(VarSize
!= 0 && "Expected non-zero size of the variable");
10378 Linkage
= CGM
.getLLVMLinkageVarDefinition(VD
, /*IsConstant=*/false);
10379 // Temp solution to prevent optimizations of the internal variables.
10380 if (CGM
.getLangOpts().OpenMPIsDevice
&& !VD
->isExternallyVisible()) {
10381 // Do not create a "ref-variable" if the original is not also available
10383 if (!OffloadEntriesInfoManager
.hasDeviceGlobalVarEntryInfo(VarName
))
10385 std::string RefName
= getName({VarName
, "ref"});
10386 if (!CGM
.GetGlobalValue(RefName
)) {
10387 llvm::Constant
*AddrRef
=
10388 OMPBuilder
.getOrCreateInternalVariable(Addr
->getType(), RefName
);
10389 auto *GVAddrRef
= cast
<llvm::GlobalVariable
>(AddrRef
);
10390 GVAddrRef
->setConstant(/*Val=*/true);
10391 GVAddrRef
->setLinkage(llvm::GlobalValue::InternalLinkage
);
10392 GVAddrRef
->setInitializer(Addr
);
10393 CGM
.addCompilerUsedGlobal(GVAddrRef
);
10397 assert(((*Res
== OMPDeclareTargetDeclAttr::MT_Link
) ||
10398 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10399 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10400 HasRequiresUnifiedSharedMemory
)) &&
10401 "Declare target attribute must link or to with unified memory.");
10402 if (*Res
== OMPDeclareTargetDeclAttr::MT_Link
)
10403 Flags
= llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink
;
10405 Flags
= llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo
;
10407 if (CGM
.getLangOpts().OpenMPIsDevice
) {
10408 VarName
= Addr
->getName();
10411 VarName
= getAddrOfDeclareTargetVar(VD
).getName();
10412 Addr
= cast
<llvm::Constant
>(getAddrOfDeclareTargetVar(VD
).getPointer());
10414 VarSize
= CGM
.getPointerSize().getQuantity();
10415 Linkage
= llvm::GlobalValue::WeakAnyLinkage
;
10418 OffloadEntriesInfoManager
.registerDeviceGlobalVarEntryInfo(
10419 VarName
, Addr
, VarSize
, Flags
, Linkage
);
10422 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD
) {
10423 if (isa
<FunctionDecl
>(GD
.getDecl()) ||
10424 isa
<OMPDeclareReductionDecl
>(GD
.getDecl()))
10425 return emitTargetFunctions(GD
);
10427 return emitTargetGlobalVariable(GD
);
10430 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10431 for (const VarDecl
*VD
: DeferredGlobalVariables
) {
10432 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10433 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
10436 if ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10437 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10438 !HasRequiresUnifiedSharedMemory
) {
10439 CGM
.EmitGlobal(VD
);
10441 assert((*Res
== OMPDeclareTargetDeclAttr::MT_Link
||
10442 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10443 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10444 HasRequiresUnifiedSharedMemory
)) &&
10445 "Expected link clause or to clause with unified memory.");
10446 (void)CGM
.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD
);
10451 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10452 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) const {
10453 assert(isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) &&
10454 " Expected target-based directive.");
10457 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl
*D
) {
10458 for (const OMPClause
*Clause
: D
->clauselists()) {
10459 if (Clause
->getClauseKind() == OMPC_unified_shared_memory
) {
10460 HasRequiresUnifiedSharedMemory
= true;
10461 OMPBuilder
.Config
.setHasRequiresUnifiedSharedMemory(true);
10462 } else if (const auto *AC
=
10463 dyn_cast
<OMPAtomicDefaultMemOrderClause
>(Clause
)) {
10464 switch (AC
->getAtomicDefaultMemOrderKind()) {
10465 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel
:
10466 RequiresAtomicOrdering
= llvm::AtomicOrdering::AcquireRelease
;
10468 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst
:
10469 RequiresAtomicOrdering
= llvm::AtomicOrdering::SequentiallyConsistent
;
10471 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed
:
10472 RequiresAtomicOrdering
= llvm::AtomicOrdering::Monotonic
;
10474 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
:
10481 llvm::AtomicOrdering
CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10482 return RequiresAtomicOrdering
;
10485 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl
*VD
,
10487 if (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())
10489 const auto *A
= VD
->getAttr
<OMPAllocateDeclAttr
>();
10490 switch(A
->getAllocatorType()) {
10491 case OMPAllocateDeclAttr::OMPNullMemAlloc
:
10492 case OMPAllocateDeclAttr::OMPDefaultMemAlloc
:
10493 // Not supported, fallback to the default mem space.
10494 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc
:
10495 case OMPAllocateDeclAttr::OMPCGroupMemAlloc
:
10496 case OMPAllocateDeclAttr::OMPHighBWMemAlloc
:
10497 case OMPAllocateDeclAttr::OMPLowLatMemAlloc
:
10498 case OMPAllocateDeclAttr::OMPThreadMemAlloc
:
10499 case OMPAllocateDeclAttr::OMPConstMemAlloc
:
10500 case OMPAllocateDeclAttr::OMPPTeamMemAlloc
:
10501 AS
= LangAS::Default
;
10503 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc
:
10504 llvm_unreachable("Expected predefined allocator for the variables with the "
10505 "static storage.");
10510 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10511 return HasRequiresUnifiedSharedMemory
;
10514 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10515 CodeGenModule
&CGM
)
10517 if (CGM
.getLangOpts().OpenMPIsDevice
) {
10518 SavedShouldMarkAsGlobal
= CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
;
10519 CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
= false;
10523 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10524 if (CGM
.getLangOpts().OpenMPIsDevice
)
10525 CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
= SavedShouldMarkAsGlobal
;
10528 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD
) {
10529 if (!CGM
.getLangOpts().OpenMPIsDevice
|| !ShouldMarkAsGlobal
)
10532 const auto *D
= cast
<FunctionDecl
>(GD
.getDecl());
10533 // Do not to emit function if it is marked as declare target as it was already
10535 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D
)) {
10536 if (D
->hasBody() && AlreadyEmittedTargetDecls
.count(D
) == 0) {
10537 if (auto *F
= dyn_cast_or_null
<llvm::Function
>(
10538 CGM
.GetGlobalValue(CGM
.getMangledName(GD
))))
10539 return !F
->isDeclaration();
10545 return !AlreadyEmittedTargetDecls
.insert(D
).second
;
10548 llvm::Function
*CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10549 // If we don't have entries or if we are emitting code for the device, we
10550 // don't need to do anything.
10551 if (CGM
.getLangOpts().OMPTargetTriples
.empty() ||
10552 CGM
.getLangOpts().OpenMPSimd
|| CGM
.getLangOpts().OpenMPIsDevice
||
10553 (OffloadEntriesInfoManager
.empty() &&
10554 !HasEmittedDeclareTargetRegion
&&
10555 !HasEmittedTargetRegion
))
10558 // Create and register the function that handles the requires directives.
10559 ASTContext
&C
= CGM
.getContext();
10561 llvm::Function
*RequiresRegFn
;
10563 CodeGenFunction
CGF(CGM
);
10564 const auto &FI
= CGM
.getTypes().arrangeNullaryFunction();
10565 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
10566 std::string ReqName
= getName({"omp_offloading", "requires_reg"});
10567 RequiresRegFn
= CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, ReqName
, FI
);
10568 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, RequiresRegFn
, FI
, {});
10569 OpenMPOffloadingRequiresDirFlags Flags
= OMP_REQ_NONE
;
10570 // TODO: check for other requires clauses.
10571 // The requires directive takes effect only when a target region is
10572 // present in the compilation unit. Otherwise it is ignored and not
10573 // passed to the runtime. This avoids the runtime from throwing an error
10574 // for mismatching requires clauses across compilation units that don't
10575 // contain at least 1 target region.
10576 assert((HasEmittedTargetRegion
||
10577 HasEmittedDeclareTargetRegion
||
10578 !OffloadEntriesInfoManager
.empty()) &&
10579 "Target or declare target region expected.");
10580 if (HasRequiresUnifiedSharedMemory
)
10581 Flags
= OMP_REQ_UNIFIED_SHARED_MEMORY
;
10582 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10583 CGM
.getModule(), OMPRTL___tgt_register_requires
),
10584 llvm::ConstantInt::get(CGM
.Int64Ty
, Flags
));
10585 CGF
.FinishFunction();
10587 return RequiresRegFn
;
10590 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction
&CGF
,
10591 const OMPExecutableDirective
&D
,
10592 SourceLocation Loc
,
10593 llvm::Function
*OutlinedFn
,
10594 ArrayRef
<llvm::Value
*> CapturedVars
) {
10595 if (!CGF
.HaveInsertPoint())
10598 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10599 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
10601 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10602 llvm::Value
*Args
[] = {
10604 CGF
.Builder
.getInt32(CapturedVars
.size()), // Number of captured vars
10605 CGF
.Builder
.CreateBitCast(OutlinedFn
, getKmpc_MicroPointerTy())};
10606 llvm::SmallVector
<llvm::Value
*, 16> RealArgs
;
10607 RealArgs
.append(std::begin(Args
), std::end(Args
));
10608 RealArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
10610 llvm::FunctionCallee RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
10611 CGM
.getModule(), OMPRTL___kmpc_fork_teams
);
10612 CGF
.EmitRuntimeCall(RTLFn
, RealArgs
);
10615 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction
&CGF
,
10616 const Expr
*NumTeams
,
10617 const Expr
*ThreadLimit
,
10618 SourceLocation Loc
) {
10619 if (!CGF
.HaveInsertPoint())
10622 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10624 llvm::Value
*NumTeamsVal
=
10626 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(NumTeams
),
10627 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10628 : CGF
.Builder
.getInt32(0);
10630 llvm::Value
*ThreadLimitVal
=
10632 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(ThreadLimit
),
10633 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10634 : CGF
.Builder
.getInt32(0);
10636 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10637 llvm::Value
*PushNumTeamsArgs
[] = {RTLoc
, getThreadID(CGF
, Loc
), NumTeamsVal
,
10639 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10640 CGM
.getModule(), OMPRTL___kmpc_push_num_teams
),
10644 void CGOpenMPRuntime::emitTargetDataCalls(
10645 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
10646 const Expr
*Device
, const RegionCodeGenTy
&CodeGen
,
10647 CGOpenMPRuntime::TargetDataInfo
&Info
) {
10648 if (!CGF
.HaveInsertPoint())
10651 // Action used to replace the default codegen action and turn privatization
10653 PrePostActionTy NoPrivAction
;
10655 // Generate the code for the opening of the data environment. Capture all the
10656 // arguments of the runtime call by reference because they are used in the
10657 // closing of the region.
10658 auto &&BeginThenGen
= [this, &D
, Device
, &Info
,
10659 &CodeGen
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
10660 // Fill up the arrays with all the mapped variables.
10661 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10663 // Get map clause information.
10664 MappableExprsHandler
MEHandler(D
, CGF
);
10665 MEHandler
.generateAllInfo(CombinedInfo
);
10667 // Fill up the arrays and create the arguments.
10668 emitOffloadingArrays(CGF
, CombinedInfo
, Info
, OMPBuilder
,
10669 /*IsNonContiguous=*/true);
10671 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs
;
10673 CGF
.CGM
.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo
;
10674 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, RTArgs
, Info
,
10677 // Emit device ID if any.
10678 llvm::Value
*DeviceID
= nullptr;
10680 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10681 CGF
.Int64Ty
, /*isSigned=*/true);
10683 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10686 // Emit the number of elements in the offloading arrays.
10687 llvm::Value
*PointerNum
= CGF
.Builder
.getInt32(Info
.NumberOfPtrs
);
10689 // Source location for the ident struct
10690 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10692 llvm::Value
*OffloadingArgs
[] = {RTLoc
,
10695 RTArgs
.BasePointersArray
,
10696 RTArgs
.PointersArray
,
10698 RTArgs
.MapTypesArray
,
10699 RTArgs
.MapNamesArray
,
10700 RTArgs
.MappersArray
};
10701 CGF
.EmitRuntimeCall(
10702 OMPBuilder
.getOrCreateRuntimeFunction(
10703 CGM
.getModule(), OMPRTL___tgt_target_data_begin_mapper
),
10706 // If device pointer privatization is required, emit the body of the region
10707 // here. It will have to be duplicated: with and without privatization.
10708 if (!Info
.CaptureDeviceAddrMap
.empty())
10712 // Generate code for the closing of the data region.
10713 auto &&EndThenGen
= [this, Device
, &Info
, &D
](CodeGenFunction
&CGF
,
10714 PrePostActionTy
&) {
10715 assert(Info
.isValid() && "Invalid data environment closing arguments.");
10717 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs
;
10719 CGF
.CGM
.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo
;
10720 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, RTArgs
, Info
,
10722 /*ForEndCall=*/true);
10724 // Emit device ID if any.
10725 llvm::Value
*DeviceID
= nullptr;
10727 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10728 CGF
.Int64Ty
, /*isSigned=*/true);
10730 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10733 // Emit the number of elements in the offloading arrays.
10734 llvm::Value
*PointerNum
= CGF
.Builder
.getInt32(Info
.NumberOfPtrs
);
10736 // Source location for the ident struct
10737 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10739 llvm::Value
*OffloadingArgs
[] = {RTLoc
,
10742 RTArgs
.BasePointersArray
,
10743 RTArgs
.PointersArray
,
10745 RTArgs
.MapTypesArray
,
10746 RTArgs
.MapNamesArray
,
10747 RTArgs
.MappersArray
};
10748 CGF
.EmitRuntimeCall(
10749 OMPBuilder
.getOrCreateRuntimeFunction(
10750 CGM
.getModule(), OMPRTL___tgt_target_data_end_mapper
),
10754 // If we need device pointer privatization, we need to emit the body of the
10755 // region with no privatization in the 'else' branch of the conditional.
10756 // Otherwise, we don't have to do anything.
10757 auto &&BeginElseGen
= [&Info
, &CodeGen
, &NoPrivAction
](CodeGenFunction
&CGF
,
10758 PrePostActionTy
&) {
10759 if (!Info
.CaptureDeviceAddrMap
.empty()) {
10760 CodeGen
.setAction(NoPrivAction
);
10765 // We don't have to do anything to close the region if the if clause evaluates
10767 auto &&EndElseGen
= [](CodeGenFunction
&CGF
, PrePostActionTy
&) {};
10770 emitIfClause(CGF
, IfCond
, BeginThenGen
, BeginElseGen
);
10772 RegionCodeGenTy
RCG(BeginThenGen
);
10776 // If we don't require privatization of device pointers, we emit the body in
10777 // between the runtime calls. This avoids duplicating the body code.
10778 if (Info
.CaptureDeviceAddrMap
.empty()) {
10779 CodeGen
.setAction(NoPrivAction
);
10784 emitIfClause(CGF
, IfCond
, EndThenGen
, EndElseGen
);
10786 RegionCodeGenTy
RCG(EndThenGen
);
10791 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10792 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
10793 const Expr
*Device
) {
10794 if (!CGF
.HaveInsertPoint())
10797 assert((isa
<OMPTargetEnterDataDirective
>(D
) ||
10798 isa
<OMPTargetExitDataDirective
>(D
) ||
10799 isa
<OMPTargetUpdateDirective
>(D
)) &&
10800 "Expecting either target enter, exit data, or update directives.");
10802 CodeGenFunction::OMPTargetDataInfo InputInfo
;
10803 llvm::Value
*MapTypesArray
= nullptr;
10804 llvm::Value
*MapNamesArray
= nullptr;
10805 // Generate the code for the opening of the data environment.
10806 auto &&ThenGen
= [this, &D
, Device
, &InputInfo
, &MapTypesArray
,
10807 &MapNamesArray
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
10808 // Emit device ID if any.
10809 llvm::Value
*DeviceID
= nullptr;
10811 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10812 CGF
.Int64Ty
, /*isSigned=*/true);
10814 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10817 // Emit the number of elements in the offloading arrays.
10818 llvm::Constant
*PointerNum
=
10819 CGF
.Builder
.getInt32(InputInfo
.NumberOfTargetItems
);
10821 // Source location for the ident struct
10822 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10824 llvm::Value
*OffloadingArgs
[] = {RTLoc
,
10827 InputInfo
.BasePointersArray
.getPointer(),
10828 InputInfo
.PointersArray
.getPointer(),
10829 InputInfo
.SizesArray
.getPointer(),
10832 InputInfo
.MappersArray
.getPointer()};
10834 // Select the right runtime function call for each standalone
10836 const bool HasNowait
= D
.hasClausesOfKind
<OMPNowaitClause
>();
10837 RuntimeFunction RTLFn
;
10838 switch (D
.getDirectiveKind()) {
10839 case OMPD_target_enter_data
:
10840 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_begin_nowait_mapper
10841 : OMPRTL___tgt_target_data_begin_mapper
;
10843 case OMPD_target_exit_data
:
10844 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_end_nowait_mapper
10845 : OMPRTL___tgt_target_data_end_mapper
;
10847 case OMPD_target_update
:
10848 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_update_nowait_mapper
10849 : OMPRTL___tgt_target_data_update_mapper
;
10851 case OMPD_parallel
:
10853 case OMPD_parallel_for
:
10854 case OMPD_parallel_master
:
10855 case OMPD_parallel_sections
:
10856 case OMPD_for_simd
:
10857 case OMPD_parallel_for_simd
:
10859 case OMPD_cancellation_point
:
10861 case OMPD_threadprivate
:
10862 case OMPD_allocate
:
10867 case OMPD_sections
:
10871 case OMPD_critical
:
10872 case OMPD_taskyield
:
10874 case OMPD_taskwait
:
10875 case OMPD_taskgroup
:
10881 case OMPD_target_data
:
10882 case OMPD_distribute
:
10883 case OMPD_distribute_simd
:
10884 case OMPD_distribute_parallel_for
:
10885 case OMPD_distribute_parallel_for_simd
:
10886 case OMPD_teams_distribute
:
10887 case OMPD_teams_distribute_simd
:
10888 case OMPD_teams_distribute_parallel_for
:
10889 case OMPD_teams_distribute_parallel_for_simd
:
10890 case OMPD_declare_simd
:
10891 case OMPD_declare_variant
:
10892 case OMPD_begin_declare_variant
:
10893 case OMPD_end_declare_variant
:
10894 case OMPD_declare_target
:
10895 case OMPD_end_declare_target
:
10896 case OMPD_declare_reduction
:
10897 case OMPD_declare_mapper
:
10898 case OMPD_taskloop
:
10899 case OMPD_taskloop_simd
:
10900 case OMPD_master_taskloop
:
10901 case OMPD_master_taskloop_simd
:
10902 case OMPD_parallel_master_taskloop
:
10903 case OMPD_parallel_master_taskloop_simd
:
10905 case OMPD_target_simd
:
10906 case OMPD_target_teams_distribute
:
10907 case OMPD_target_teams_distribute_simd
:
10908 case OMPD_target_teams_distribute_parallel_for
:
10909 case OMPD_target_teams_distribute_parallel_for_simd
:
10910 case OMPD_target_teams
:
10911 case OMPD_target_parallel
:
10912 case OMPD_target_parallel_for
:
10913 case OMPD_target_parallel_for_simd
:
10914 case OMPD_requires
:
10915 case OMPD_metadirective
:
10918 llvm_unreachable("Unexpected standalone target data directive.");
10921 CGF
.EmitRuntimeCall(
10922 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(), RTLFn
),
10926 auto &&TargetThenGen
= [this, &ThenGen
, &D
, &InputInfo
, &MapTypesArray
,
10927 &MapNamesArray
](CodeGenFunction
&CGF
,
10928 PrePostActionTy
&) {
10929 // Fill up the arrays with all the mapped variables.
10930 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10932 // Get map clause information.
10933 MappableExprsHandler
MEHandler(D
, CGF
);
10934 MEHandler
.generateAllInfo(CombinedInfo
);
10936 CGOpenMPRuntime::TargetDataInfo Info
;
10937 // Fill up the arrays and create the arguments.
10938 emitOffloadingArrays(CGF
, CombinedInfo
, Info
, OMPBuilder
,
10939 /*IsNonContiguous=*/true);
10940 bool RequiresOuterTask
= D
.hasClausesOfKind
<OMPDependClause
>() ||
10941 D
.hasClausesOfKind
<OMPNowaitClause
>();
10943 CGF
.CGM
.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo
;
10944 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, Info
.RTArgs
, Info
,
10946 /*ForEndCall=*/false);
10947 InputInfo
.NumberOfTargetItems
= Info
.NumberOfPtrs
;
10948 InputInfo
.BasePointersArray
= Address(Info
.RTArgs
.BasePointersArray
,
10949 CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10950 InputInfo
.PointersArray
= Address(Info
.RTArgs
.PointersArray
, CGF
.VoidPtrTy
,
10951 CGM
.getPointerAlign());
10952 InputInfo
.SizesArray
=
10953 Address(Info
.RTArgs
.SizesArray
, CGF
.Int64Ty
, CGM
.getPointerAlign());
10954 InputInfo
.MappersArray
=
10955 Address(Info
.RTArgs
.MappersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10956 MapTypesArray
= Info
.RTArgs
.MapTypesArray
;
10957 MapNamesArray
= Info
.RTArgs
.MapNamesArray
;
10958 if (RequiresOuterTask
)
10959 CGF
.EmitOMPTargetTaskBasedDirective(D
, ThenGen
, InputInfo
);
10961 emitInlinedDirective(CGF
, D
.getDirectiveKind(), ThenGen
);
10965 emitIfClause(CGF
, IfCond
, TargetThenGen
,
10966 [](CodeGenFunction
&CGF
, PrePostActionTy
&) {});
10968 RegionCodeGenTy
ThenRCG(TargetThenGen
);
10974 /// Kind of parameter in a function with 'declare simd' directive.
10983 /// Attribute set of the parameter.
10984 struct ParamAttrTy
{
10985 ParamKindTy Kind
= Vector
;
10986 llvm::APSInt StrideOrArg
;
10987 llvm::APSInt Alignment
;
10988 bool HasVarStride
= false;
10992 static unsigned evaluateCDTSize(const FunctionDecl
*FD
,
10993 ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10994 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10995 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10996 // of that clause. The VLEN value must be power of 2.
10997 // In other case the notion of the function`s "characteristic data type" (CDT)
10998 // is used to compute the vector length.
10999 // CDT is defined in the following order:
11000 // a) For non-void function, the CDT is the return type.
11001 // b) If the function has any non-uniform, non-linear parameters, then the
11002 // CDT is the type of the first such parameter.
11003 // c) If the CDT determined by a) or b) above is struct, union, or class
11004 // type which is pass-by-value (except for the type that maps to the
11005 // built-in complex data type), the characteristic data type is int.
11006 // d) If none of the above three cases is applicable, the CDT is int.
11007 // The VLEN is then determined based on the CDT and the size of vector
11008 // register of that ISA for which current vector version is generated. The
11009 // VLEN is computed using the formula below:
11010 // VLEN = sizeof(vector_register) / sizeof(CDT),
11011 // where vector register size specified in section 3.2.1 Registers and the
11012 // Stack Frame of original AMD64 ABI document.
11013 QualType RetType
= FD
->getReturnType();
11014 if (RetType
.isNull())
11016 ASTContext
&C
= FD
->getASTContext();
11018 if (!RetType
.isNull() && !RetType
->isVoidType()) {
11021 unsigned Offset
= 0;
11022 if (const auto *MD
= dyn_cast
<CXXMethodDecl
>(FD
)) {
11023 if (ParamAttrs
[Offset
].Kind
== Vector
)
11024 CDT
= C
.getPointerType(C
.getRecordType(MD
->getParent()));
11027 if (CDT
.isNull()) {
11028 for (unsigned I
= 0, E
= FD
->getNumParams(); I
< E
; ++I
) {
11029 if (ParamAttrs
[I
+ Offset
].Kind
== Vector
) {
11030 CDT
= FD
->getParamDecl(I
)->getType();
11038 CDT
= CDT
->getCanonicalTypeUnqualified();
11039 if (CDT
->isRecordType() || CDT
->isUnionType())
11041 return C
.getTypeSize(CDT
);
11044 /// Mangle the parameter part of the vector function name according to
11045 /// their OpenMP classification. The mangling function is defined in
11046 /// section 4.5 of the AAVFABI(2021Q1).
11047 static std::string
mangleVectorParameters(ArrayRef
<ParamAttrTy
> ParamAttrs
) {
11048 SmallString
<256> Buffer
;
11049 llvm::raw_svector_ostream
Out(Buffer
);
11050 for (const auto &ParamAttr
: ParamAttrs
) {
11051 switch (ParamAttr
.Kind
) {
11071 if (ParamAttr
.HasVarStride
)
11072 Out
<< "s" << ParamAttr
.StrideOrArg
;
11073 else if (ParamAttr
.Kind
== Linear
|| ParamAttr
.Kind
== LinearRef
||
11074 ParamAttr
.Kind
== LinearUVal
|| ParamAttr
.Kind
== LinearVal
) {
11075 // Don't print the step value if it is not present or if it is
11077 if (ParamAttr
.StrideOrArg
< 0)
11078 Out
<< 'n' << -ParamAttr
.StrideOrArg
;
11079 else if (ParamAttr
.StrideOrArg
!= 1)
11080 Out
<< ParamAttr
.StrideOrArg
;
11083 if (!!ParamAttr
.Alignment
)
11084 Out
<< 'a' << ParamAttr
.Alignment
;
11087 return std::string(Out
.str());
11091 emitX86DeclareSimdFunction(const FunctionDecl
*FD
, llvm::Function
*Fn
,
11092 const llvm::APSInt
&VLENVal
,
11093 ArrayRef
<ParamAttrTy
> ParamAttrs
,
11094 OMPDeclareSimdDeclAttr::BranchStateTy State
) {
11097 unsigned VecRegSize
;
11099 ISADataTy ISAData
[] = {
11113 llvm::SmallVector
<char, 2> Masked
;
11115 case OMPDeclareSimdDeclAttr::BS_Undefined
:
11116 Masked
.push_back('N');
11117 Masked
.push_back('M');
11119 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
11120 Masked
.push_back('N');
11122 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
11123 Masked
.push_back('M');
11126 for (char Mask
: Masked
) {
11127 for (const ISADataTy
&Data
: ISAData
) {
11128 SmallString
<256> Buffer
;
11129 llvm::raw_svector_ostream
Out(Buffer
);
11130 Out
<< "_ZGV" << Data
.ISA
<< Mask
;
11132 unsigned NumElts
= evaluateCDTSize(FD
, ParamAttrs
);
11133 assert(NumElts
&& "Non-zero simdlen/cdtsize expected");
11134 Out
<< llvm::APSInt::getUnsigned(Data
.VecRegSize
/ NumElts
);
11138 Out
<< mangleVectorParameters(ParamAttrs
);
11139 Out
<< '_' << Fn
->getName();
11140 Fn
->addFnAttr(Out
.str());
11145 // This are the Functions that are needed to mangle the name of the
11146 // vector functions generated by the compiler, according to the rules
11147 // defined in the "Vector Function ABI specifications for AArch64",
11149 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11151 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11152 static bool getAArch64MTV(QualType QT
, ParamKindTy Kind
) {
11153 QT
= QT
.getCanonicalType();
11155 if (QT
->isVoidType())
11158 if (Kind
== ParamKindTy::Uniform
)
11161 if (Kind
== ParamKindTy::LinearUVal
|| ParamKindTy::LinearRef
)
11164 if ((Kind
== ParamKindTy::Linear
|| Kind
== ParamKindTy::LinearVal
) &&
11165 !QT
->isReferenceType())
11171 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11172 static bool getAArch64PBV(QualType QT
, ASTContext
&C
) {
11173 QT
= QT
.getCanonicalType();
11174 unsigned Size
= C
.getTypeSize(QT
);
11176 // Only scalars and complex within 16 bytes wide set PVB to true.
11177 if (Size
!= 8 && Size
!= 16 && Size
!= 32 && Size
!= 64 && Size
!= 128)
11180 if (QT
->isFloatingType())
11183 if (QT
->isIntegerType())
11186 if (QT
->isPointerType())
11189 // TODO: Add support for complex types (section 3.1.2, item 2).
11194 /// Computes the lane size (LS) of a return type or of an input parameter,
11195 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11196 /// TODO: Add support for references, section 3.2.1, item 1.
11197 static unsigned getAArch64LS(QualType QT
, ParamKindTy Kind
, ASTContext
&C
) {
11198 if (!getAArch64MTV(QT
, Kind
) && QT
.getCanonicalType()->isPointerType()) {
11199 QualType PTy
= QT
.getCanonicalType()->getPointeeType();
11200 if (getAArch64PBV(PTy
, C
))
11201 return C
.getTypeSize(PTy
);
11203 if (getAArch64PBV(QT
, C
))
11204 return C
.getTypeSize(QT
);
11206 return C
.getTypeSize(C
.getUIntPtrType());
11209 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11210 // signature of the scalar function, as defined in 3.2.2 of the
11212 static std::tuple
<unsigned, unsigned, bool>
11213 getNDSWDS(const FunctionDecl
*FD
, ArrayRef
<ParamAttrTy
> ParamAttrs
) {
11214 QualType RetType
= FD
->getReturnType().getCanonicalType();
11216 ASTContext
&C
= FD
->getASTContext();
11218 bool OutputBecomesInput
= false;
11220 llvm::SmallVector
<unsigned, 8> Sizes
;
11221 if (!RetType
->isVoidType()) {
11222 Sizes
.push_back(getAArch64LS(RetType
, ParamKindTy::Vector
, C
));
11223 if (!getAArch64PBV(RetType
, C
) && getAArch64MTV(RetType
, {}))
11224 OutputBecomesInput
= true;
11226 for (unsigned I
= 0, E
= FD
->getNumParams(); I
< E
; ++I
) {
11227 QualType QT
= FD
->getParamDecl(I
)->getType().getCanonicalType();
11228 Sizes
.push_back(getAArch64LS(QT
, ParamAttrs
[I
].Kind
, C
));
11231 assert(!Sizes
.empty() && "Unable to determine NDS and WDS.");
11232 // The LS of a function parameter / return value can only be a power
11233 // of 2, starting from 8 bits, up to 128.
11234 assert(llvm::all_of(Sizes
,
11235 [](unsigned Size
) {
11236 return Size
== 8 || Size
== 16 || Size
== 32 ||
11237 Size
== 64 || Size
== 128;
11241 return std::make_tuple(*std::min_element(std::begin(Sizes
), std::end(Sizes
)),
11242 *std::max_element(std::begin(Sizes
), std::end(Sizes
)),
11243 OutputBecomesInput
);
11246 // Function used to add the attribute. The parameter `VLEN` is
11247 // templated to allow the use of "x" when targeting scalable functions
11249 template <typename T
>
11250 static void addAArch64VectorName(T VLEN
, StringRef LMask
, StringRef Prefix
,
11251 char ISA
, StringRef ParSeq
,
11252 StringRef MangledName
, bool OutputBecomesInput
,
11253 llvm::Function
*Fn
) {
11254 SmallString
<256> Buffer
;
11255 llvm::raw_svector_ostream
Out(Buffer
);
11256 Out
<< Prefix
<< ISA
<< LMask
<< VLEN
;
11257 if (OutputBecomesInput
)
11259 Out
<< ParSeq
<< "_" << MangledName
;
11260 Fn
->addFnAttr(Out
.str());
11263 // Helper function to generate the Advanced SIMD names depending on
11264 // the value of the NDS when simdlen is not present.
11265 static void addAArch64AdvSIMDNDSNames(unsigned NDS
, StringRef Mask
,
11266 StringRef Prefix
, char ISA
,
11267 StringRef ParSeq
, StringRef MangledName
,
11268 bool OutputBecomesInput
,
11269 llvm::Function
*Fn
) {
11272 addAArch64VectorName(8, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11273 OutputBecomesInput
, Fn
);
11274 addAArch64VectorName(16, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11275 OutputBecomesInput
, Fn
);
11278 addAArch64VectorName(4, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11279 OutputBecomesInput
, Fn
);
11280 addAArch64VectorName(8, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11281 OutputBecomesInput
, Fn
);
11284 addAArch64VectorName(2, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11285 OutputBecomesInput
, Fn
);
11286 addAArch64VectorName(4, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11287 OutputBecomesInput
, Fn
);
11291 addAArch64VectorName(2, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11292 OutputBecomesInput
, Fn
);
11295 llvm_unreachable("Scalar type is too wide.");
11299 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11300 static void emitAArch64DeclareSimdFunction(
11301 CodeGenModule
&CGM
, const FunctionDecl
*FD
, unsigned UserVLEN
,
11302 ArrayRef
<ParamAttrTy
> ParamAttrs
,
11303 OMPDeclareSimdDeclAttr::BranchStateTy State
, StringRef MangledName
,
11304 char ISA
, unsigned VecRegSize
, llvm::Function
*Fn
, SourceLocation SLoc
) {
11306 // Get basic data for building the vector signature.
11307 const auto Data
= getNDSWDS(FD
, ParamAttrs
);
11308 const unsigned NDS
= std::get
<0>(Data
);
11309 const unsigned WDS
= std::get
<1>(Data
);
11310 const bool OutputBecomesInput
= std::get
<2>(Data
);
11312 // Check the values provided via `simdlen` by the user.
11313 // 1. A `simdlen(1)` doesn't produce vector signatures,
11314 if (UserVLEN
== 1) {
11315 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
11316 DiagnosticsEngine::Warning
,
11317 "The clause simdlen(1) has no effect when targeting aarch64.");
11318 CGM
.getDiags().Report(SLoc
, DiagID
);
11322 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11323 // Advanced SIMD output.
11324 if (ISA
== 'n' && UserVLEN
&& !llvm::isPowerOf2_32(UserVLEN
)) {
11325 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
11326 DiagnosticsEngine::Warning
, "The value specified in simdlen must be a "
11327 "power of 2 when targeting Advanced SIMD.");
11328 CGM
.getDiags().Report(SLoc
, DiagID
);
11332 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11334 if (ISA
== 's' && UserVLEN
!= 0) {
11335 if ((UserVLEN
* WDS
> 2048) || (UserVLEN
* WDS
% 128 != 0)) {
11336 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
11337 DiagnosticsEngine::Warning
, "The clause simdlen must fit the %0-bit "
11338 "lanes in the architectural constraints "
11339 "for SVE (min is 128-bit, max is "
11340 "2048-bit, by steps of 128-bit)");
11341 CGM
.getDiags().Report(SLoc
, DiagID
) << WDS
;
11346 // Sort out parameter sequence.
11347 const std::string ParSeq
= mangleVectorParameters(ParamAttrs
);
11348 StringRef Prefix
= "_ZGV";
11349 // Generate simdlen from user input (if any).
11352 // SVE generates only a masked function.
11353 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
11354 OutputBecomesInput
, Fn
);
11356 assert(ISA
== 'n' && "Expected ISA either 's' or 'n'.");
11357 // Advanced SIMD generates one or two functions, depending on
11358 // the `[not]inbranch` clause.
11360 case OMPDeclareSimdDeclAttr::BS_Undefined
:
11361 addAArch64VectorName(UserVLEN
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
11362 OutputBecomesInput
, Fn
);
11363 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
11364 OutputBecomesInput
, Fn
);
11366 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
11367 addAArch64VectorName(UserVLEN
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
11368 OutputBecomesInput
, Fn
);
11370 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
11371 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
11372 OutputBecomesInput
, Fn
);
11377 // If no user simdlen is provided, follow the AAVFABI rules for
11378 // generating the vector length.
11380 // SVE, section 3.4.1, item 1.
11381 addAArch64VectorName("x", "M", Prefix
, ISA
, ParSeq
, MangledName
,
11382 OutputBecomesInput
, Fn
);
11384 assert(ISA
== 'n' && "Expected ISA either 's' or 'n'.");
11385 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11386 // two vector names depending on the use of the clause
11387 // `[not]inbranch`.
11389 case OMPDeclareSimdDeclAttr::BS_Undefined
:
11390 addAArch64AdvSIMDNDSNames(NDS
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
11391 OutputBecomesInput
, Fn
);
11392 addAArch64AdvSIMDNDSNames(NDS
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
11393 OutputBecomesInput
, Fn
);
11395 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
11396 addAArch64AdvSIMDNDSNames(NDS
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
11397 OutputBecomesInput
, Fn
);
11399 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
11400 addAArch64AdvSIMDNDSNames(NDS
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
11401 OutputBecomesInput
, Fn
);
11408 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl
*FD
,
11409 llvm::Function
*Fn
) {
11410 ASTContext
&C
= CGM
.getContext();
11411 FD
= FD
->getMostRecentDecl();
11413 // Map params to their positions in function decl.
11414 llvm::DenseMap
<const Decl
*, unsigned> ParamPositions
;
11415 if (isa
<CXXMethodDecl
>(FD
))
11416 ParamPositions
.try_emplace(FD
, 0);
11417 unsigned ParamPos
= ParamPositions
.size();
11418 for (const ParmVarDecl
*P
: FD
->parameters()) {
11419 ParamPositions
.try_emplace(P
->getCanonicalDecl(), ParamPos
);
11422 for (const auto *Attr
: FD
->specific_attrs
<OMPDeclareSimdDeclAttr
>()) {
11423 llvm::SmallVector
<ParamAttrTy
, 8> ParamAttrs(ParamPositions
.size());
11424 // Mark uniform parameters.
11425 for (const Expr
*E
: Attr
->uniforms()) {
11426 E
= E
->IgnoreParenImpCasts();
11428 if (isa
<CXXThisExpr
>(E
)) {
11429 Pos
= ParamPositions
[FD
];
11431 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11432 ->getCanonicalDecl();
11433 auto It
= ParamPositions
.find(PVD
);
11434 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11437 ParamAttrs
[Pos
].Kind
= Uniform
;
11439 // Get alignment info.
11440 auto *NI
= Attr
->alignments_begin();
11441 for (const Expr
*E
: Attr
->aligneds()) {
11442 E
= E
->IgnoreParenImpCasts();
11445 if (isa
<CXXThisExpr
>(E
)) {
11446 Pos
= ParamPositions
[FD
];
11447 ParmTy
= E
->getType();
11449 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11450 ->getCanonicalDecl();
11451 auto It
= ParamPositions
.find(PVD
);
11452 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11454 ParmTy
= PVD
->getType();
11456 ParamAttrs
[Pos
].Alignment
=
11458 ? (*NI
)->EvaluateKnownConstInt(C
)
11459 : llvm::APSInt::getUnsigned(
11460 C
.toCharUnitsFromBits(C
.getOpenMPDefaultSimdAlign(ParmTy
))
11464 // Mark linear parameters.
11465 auto *SI
= Attr
->steps_begin();
11466 auto *MI
= Attr
->modifiers_begin();
11467 for (const Expr
*E
: Attr
->linears()) {
11468 E
= E
->IgnoreParenImpCasts();
11470 bool IsReferenceType
= false;
11471 // Rescaling factor needed to compute the linear parameter
11472 // value in the mangled name.
11473 unsigned PtrRescalingFactor
= 1;
11474 if (isa
<CXXThisExpr
>(E
)) {
11475 Pos
= ParamPositions
[FD
];
11476 auto *P
= cast
<PointerType
>(E
->getType());
11477 PtrRescalingFactor
= CGM
.getContext()
11478 .getTypeSizeInChars(P
->getPointeeType())
11481 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11482 ->getCanonicalDecl();
11483 auto It
= ParamPositions
.find(PVD
);
11484 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11486 if (auto *P
= dyn_cast
<PointerType
>(PVD
->getType()))
11487 PtrRescalingFactor
= CGM
.getContext()
11488 .getTypeSizeInChars(P
->getPointeeType())
11490 else if (PVD
->getType()->isReferenceType()) {
11491 IsReferenceType
= true;
11492 PtrRescalingFactor
=
11494 .getTypeSizeInChars(PVD
->getType().getNonReferenceType())
11498 ParamAttrTy
&ParamAttr
= ParamAttrs
[Pos
];
11499 if (*MI
== OMPC_LINEAR_ref
)
11500 ParamAttr
.Kind
= LinearRef
;
11501 else if (*MI
== OMPC_LINEAR_uval
)
11502 ParamAttr
.Kind
= LinearUVal
;
11503 else if (IsReferenceType
)
11504 ParamAttr
.Kind
= LinearVal
;
11506 ParamAttr
.Kind
= Linear
;
11507 // Assuming a stride of 1, for `linear` without modifiers.
11508 ParamAttr
.StrideOrArg
= llvm::APSInt::getUnsigned(1);
11510 Expr::EvalResult Result
;
11511 if (!(*SI
)->EvaluateAsInt(Result
, C
, Expr::SE_AllowSideEffects
)) {
11512 if (const auto *DRE
=
11513 cast
<DeclRefExpr
>((*SI
)->IgnoreParenImpCasts())) {
11514 if (const auto *StridePVD
=
11515 dyn_cast
<ParmVarDecl
>(DRE
->getDecl())) {
11516 ParamAttr
.HasVarStride
= true;
11517 auto It
= ParamPositions
.find(StridePVD
->getCanonicalDecl());
11518 assert(It
!= ParamPositions
.end() &&
11519 "Function parameter not found");
11520 ParamAttr
.StrideOrArg
= llvm::APSInt::getUnsigned(It
->second
);
11524 ParamAttr
.StrideOrArg
= Result
.Val
.getInt();
11527 // If we are using a linear clause on a pointer, we need to
11528 // rescale the value of linear_step with the byte size of the
11530 if (!ParamAttr
.HasVarStride
&&
11531 (ParamAttr
.Kind
== Linear
|| ParamAttr
.Kind
== LinearRef
))
11532 ParamAttr
.StrideOrArg
= ParamAttr
.StrideOrArg
* PtrRescalingFactor
;
11536 llvm::APSInt VLENVal
;
11537 SourceLocation ExprLoc
;
11538 const Expr
*VLENExpr
= Attr
->getSimdlen();
11540 VLENVal
= VLENExpr
->EvaluateKnownConstInt(C
);
11541 ExprLoc
= VLENExpr
->getExprLoc();
11543 OMPDeclareSimdDeclAttr::BranchStateTy State
= Attr
->getBranchState();
11544 if (CGM
.getTriple().isX86()) {
11545 emitX86DeclareSimdFunction(FD
, Fn
, VLENVal
, ParamAttrs
, State
);
11546 } else if (CGM
.getTriple().getArch() == llvm::Triple::aarch64
) {
11547 unsigned VLEN
= VLENVal
.getExtValue();
11548 StringRef MangledName
= Fn
->getName();
11549 if (CGM
.getTarget().hasFeature("sve"))
11550 emitAArch64DeclareSimdFunction(CGM
, FD
, VLEN
, ParamAttrs
, State
,
11551 MangledName
, 's', 128, Fn
, ExprLoc
);
11552 else if (CGM
.getTarget().hasFeature("neon"))
11553 emitAArch64DeclareSimdFunction(CGM
, FD
, VLEN
, ParamAttrs
, State
,
11554 MangledName
, 'n', 128, Fn
, ExprLoc
);
11557 FD
= FD
->getPreviousDecl();
11562 /// Cleanup action for doacross support.
11563 class DoacrossCleanupTy final
: public EHScopeStack::Cleanup
{
11565 static const int DoacrossFinArgs
= 2;
11568 llvm::FunctionCallee RTLFn
;
11569 llvm::Value
*Args
[DoacrossFinArgs
];
11572 DoacrossCleanupTy(llvm::FunctionCallee RTLFn
,
11573 ArrayRef
<llvm::Value
*> CallArgs
)
11575 assert(CallArgs
.size() == DoacrossFinArgs
);
11576 std::copy(CallArgs
.begin(), CallArgs
.end(), std::begin(Args
));
11578 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
11579 if (!CGF
.HaveInsertPoint())
11581 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11586 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction
&CGF
,
11587 const OMPLoopDirective
&D
,
11588 ArrayRef
<Expr
*> NumIterations
) {
11589 if (!CGF
.HaveInsertPoint())
11592 ASTContext
&C
= CGM
.getContext();
11593 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11595 if (KmpDimTy
.isNull()) {
11596 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11597 // kmp_int64 lo; // lower
11598 // kmp_int64 up; // upper
11599 // kmp_int64 st; // stride
11601 RD
= C
.buildImplicitRecord("kmp_dim");
11602 RD
->startDefinition();
11603 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11604 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11605 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11606 RD
->completeDefinition();
11607 KmpDimTy
= C
.getRecordType(RD
);
11609 RD
= cast
<RecordDecl
>(KmpDimTy
->getAsTagDecl());
11611 llvm::APInt
Size(/*numBits=*/32, NumIterations
.size());
11613 C
.getConstantArrayType(KmpDimTy
, Size
, nullptr, ArrayType::Normal
, 0);
11615 Address DimsAddr
= CGF
.CreateMemTemp(ArrayTy
, "dims");
11616 CGF
.EmitNullInitialization(DimsAddr
, ArrayTy
);
11617 enum { LowerFD
= 0, UpperFD
, StrideFD
};
11618 // Fill dims with data.
11619 for (unsigned I
= 0, E
= NumIterations
.size(); I
< E
; ++I
) {
11620 LValue DimsLVal
= CGF
.MakeAddrLValue(
11621 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, I
), KmpDimTy
);
11622 // dims.upper = num_iterations;
11623 LValue UpperLVal
= CGF
.EmitLValueForField(
11624 DimsLVal
, *std::next(RD
->field_begin(), UpperFD
));
11625 llvm::Value
*NumIterVal
= CGF
.EmitScalarConversion(
11626 CGF
.EmitScalarExpr(NumIterations
[I
]), NumIterations
[I
]->getType(),
11627 Int64Ty
, NumIterations
[I
]->getExprLoc());
11628 CGF
.EmitStoreOfScalar(NumIterVal
, UpperLVal
);
11629 // dims.stride = 1;
11630 LValue StrideLVal
= CGF
.EmitLValueForField(
11631 DimsLVal
, *std::next(RD
->field_begin(), StrideFD
));
11632 CGF
.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM
.Int64Ty
, /*V=*/1),
11636 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11637 // kmp_int32 num_dims, struct kmp_dim * dims);
11638 llvm::Value
*Args
[] = {
11639 emitUpdateLocation(CGF
, D
.getBeginLoc()),
11640 getThreadID(CGF
, D
.getBeginLoc()),
11641 llvm::ConstantInt::getSigned(CGM
.Int32Ty
, NumIterations
.size()),
11642 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11643 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, 0).getPointer(),
11646 llvm::FunctionCallee RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11647 CGM
.getModule(), OMPRTL___kmpc_doacross_init
);
11648 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11649 llvm::Value
*FiniArgs
[DoacrossCleanupTy::DoacrossFinArgs
] = {
11650 emitUpdateLocation(CGF
, D
.getEndLoc()), getThreadID(CGF
, D
.getEndLoc())};
11651 llvm::FunctionCallee FiniRTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11652 CGM
.getModule(), OMPRTL___kmpc_doacross_fini
);
11653 CGF
.EHStack
.pushCleanup
<DoacrossCleanupTy
>(NormalAndEHCleanup
, FiniRTLFn
,
11654 llvm::ArrayRef(FiniArgs
));
11657 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
11658 const OMPDependClause
*C
) {
11660 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11661 llvm::APInt
Size(/*numBits=*/32, C
->getNumLoops());
11662 QualType ArrayTy
= CGM
.getContext().getConstantArrayType(
11663 Int64Ty
, Size
, nullptr, ArrayType::Normal
, 0);
11664 Address CntAddr
= CGF
.CreateMemTemp(ArrayTy
, ".cnt.addr");
11665 for (unsigned I
= 0, E
= C
->getNumLoops(); I
< E
; ++I
) {
11666 const Expr
*CounterVal
= C
->getLoopData(I
);
11667 assert(CounterVal
);
11668 llvm::Value
*CntVal
= CGF
.EmitScalarConversion(
11669 CGF
.EmitScalarExpr(CounterVal
), CounterVal
->getType(), Int64Ty
,
11670 CounterVal
->getExprLoc());
11671 CGF
.EmitStoreOfScalar(CntVal
, CGF
.Builder
.CreateConstArrayGEP(CntAddr
, I
),
11672 /*Volatile=*/false, Int64Ty
);
11674 llvm::Value
*Args
[] = {
11675 emitUpdateLocation(CGF
, C
->getBeginLoc()),
11676 getThreadID(CGF
, C
->getBeginLoc()),
11677 CGF
.Builder
.CreateConstArrayGEP(CntAddr
, 0).getPointer()};
11678 llvm::FunctionCallee RTLFn
;
11679 if (C
->getDependencyKind() == OMPC_DEPEND_source
) {
11680 RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
11681 OMPRTL___kmpc_doacross_post
);
11683 assert(C
->getDependencyKind() == OMPC_DEPEND_sink
);
11684 RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
11685 OMPRTL___kmpc_doacross_wait
);
11687 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11690 void CGOpenMPRuntime::emitCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
11691 llvm::FunctionCallee Callee
,
11692 ArrayRef
<llvm::Value
*> Args
) const {
11693 assert(Loc
.isValid() && "Outlined function call location must be valid.");
11694 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
11696 if (auto *Fn
= dyn_cast
<llvm::Function
>(Callee
.getCallee())) {
11697 if (Fn
->doesNotThrow()) {
11698 CGF
.EmitNounwindRuntimeCall(Fn
, Args
);
11702 CGF
.EmitRuntimeCall(Callee
, Args
);
11705 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11706 CodeGenFunction
&CGF
, SourceLocation Loc
, llvm::FunctionCallee OutlinedFn
,
11707 ArrayRef
<llvm::Value
*> Args
) const {
11708 emitCall(CGF
, Loc
, OutlinedFn
, Args
);
11711 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction
&CGF
, const Decl
*D
) {
11712 if (const auto *FD
= dyn_cast
<FunctionDecl
>(D
))
11713 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD
))
11714 HasEmittedDeclareTargetRegion
= true;
11717 Address
CGOpenMPRuntime::getParameterAddress(CodeGenFunction
&CGF
,
11718 const VarDecl
*NativeParam
,
11719 const VarDecl
*TargetParam
) const {
11720 return CGF
.GetAddrOfLocalVar(NativeParam
);
11723 /// Return allocator value from expression, or return a null allocator (default
11724 /// when no allocator specified).
11725 static llvm::Value
*getAllocatorVal(CodeGenFunction
&CGF
,
11726 const Expr
*Allocator
) {
11727 llvm::Value
*AllocVal
;
11729 AllocVal
= CGF
.EmitScalarExpr(Allocator
);
11730 // According to the standard, the original allocator type is a enum
11731 // (integer). Convert to pointer type, if required.
11732 AllocVal
= CGF
.EmitScalarConversion(AllocVal
, Allocator
->getType(),
11733 CGF
.getContext().VoidPtrTy
,
11734 Allocator
->getExprLoc());
11736 // If no allocator specified, it defaults to the null allocator.
11737 AllocVal
= llvm::Constant::getNullValue(
11738 CGF
.CGM
.getTypes().ConvertType(CGF
.getContext().VoidPtrTy
));
11743 /// Return the alignment from an allocate directive if present.
11744 static llvm::Value
*getAlignmentValue(CodeGenModule
&CGM
, const VarDecl
*VD
) {
11745 std::optional
<CharUnits
> AllocateAlignment
= CGM
.getOMPAllocateAlignment(VD
);
11747 if (!AllocateAlignment
)
11750 return llvm::ConstantInt::get(CGM
.SizeTy
, AllocateAlignment
->getQuantity());
11753 Address
CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction
&CGF
,
11754 const VarDecl
*VD
) {
11756 return Address::invalid();
11757 Address UntiedAddr
= Address::invalid();
11758 Address UntiedRealAddr
= Address::invalid();
11759 auto It
= FunctionToUntiedTaskStackMap
.find(CGF
.CurFn
);
11760 if (It
!= FunctionToUntiedTaskStackMap
.end()) {
11761 const UntiedLocalVarsAddressesMap
&UntiedData
=
11762 UntiedLocalVarsStack
[It
->second
];
11763 auto I
= UntiedData
.find(VD
);
11764 if (I
!= UntiedData
.end()) {
11765 UntiedAddr
= I
->second
.first
;
11766 UntiedRealAddr
= I
->second
.second
;
11769 const VarDecl
*CVD
= VD
->getCanonicalDecl();
11770 if (CVD
->hasAttr
<OMPAllocateDeclAttr
>()) {
11771 // Use the default allocation.
11772 if (!isAllocatableDecl(VD
))
11775 CharUnits Align
= CGM
.getContext().getDeclAlign(CVD
);
11776 if (CVD
->getType()->isVariablyModifiedType()) {
11777 Size
= CGF
.getTypeSize(CVD
->getType());
11778 // Align the size: ((size + align - 1) / align) * align
11779 Size
= CGF
.Builder
.CreateNUWAdd(
11780 Size
, CGM
.getSize(Align
- CharUnits::fromQuantity(1)));
11781 Size
= CGF
.Builder
.CreateUDiv(Size
, CGM
.getSize(Align
));
11782 Size
= CGF
.Builder
.CreateNUWMul(Size
, CGM
.getSize(Align
));
11784 CharUnits Sz
= CGM
.getContext().getTypeSizeInChars(CVD
->getType());
11785 Size
= CGM
.getSize(Sz
.alignTo(Align
));
11787 llvm::Value
*ThreadID
= getThreadID(CGF
, CVD
->getBeginLoc());
11788 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
11789 const Expr
*Allocator
= AA
->getAllocator();
11790 llvm::Value
*AllocVal
= getAllocatorVal(CGF
, Allocator
);
11791 llvm::Value
*Alignment
= getAlignmentValue(CGM
, CVD
);
11792 SmallVector
<llvm::Value
*, 4> Args
;
11793 Args
.push_back(ThreadID
);
11795 Args
.push_back(Alignment
);
11796 Args
.push_back(Size
);
11797 Args
.push_back(AllocVal
);
11798 llvm::omp::RuntimeFunction FnID
=
11799 Alignment
? OMPRTL___kmpc_aligned_alloc
: OMPRTL___kmpc_alloc
;
11800 llvm::Value
*Addr
= CGF
.EmitRuntimeCall(
11801 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(), FnID
), Args
,
11802 getName({CVD
->getName(), ".void.addr"}));
11803 llvm::FunctionCallee FiniRTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11804 CGM
.getModule(), OMPRTL___kmpc_free
);
11805 QualType Ty
= CGM
.getContext().getPointerType(CVD
->getType());
11806 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11807 Addr
, CGF
.ConvertTypeForMem(Ty
), getName({CVD
->getName(), ".addr"}));
11808 if (UntiedAddr
.isValid())
11809 CGF
.EmitStoreOfScalar(Addr
, UntiedAddr
, /*Volatile=*/false, Ty
);
11811 // Cleanup action for allocate support.
11812 class OMPAllocateCleanupTy final
: public EHScopeStack::Cleanup
{
11813 llvm::FunctionCallee RTLFn
;
11814 SourceLocation::UIntTy LocEncoding
;
11816 const Expr
*AllocExpr
;
11819 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn
,
11820 SourceLocation::UIntTy LocEncoding
, Address Addr
,
11821 const Expr
*AllocExpr
)
11822 : RTLFn(RTLFn
), LocEncoding(LocEncoding
), Addr(Addr
),
11823 AllocExpr(AllocExpr
) {}
11824 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
11825 if (!CGF
.HaveInsertPoint())
11827 llvm::Value
*Args
[3];
11828 Args
[0] = CGF
.CGM
.getOpenMPRuntime().getThreadID(
11829 CGF
, SourceLocation::getFromRawEncoding(LocEncoding
));
11830 Args
[1] = CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11831 Addr
.getPointer(), CGF
.VoidPtrTy
);
11832 llvm::Value
*AllocVal
= getAllocatorVal(CGF
, AllocExpr
);
11833 Args
[2] = AllocVal
;
11834 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11838 UntiedRealAddr
.isValid()
11840 : Address(Addr
, CGF
.ConvertTypeForMem(CVD
->getType()), Align
);
11841 CGF
.EHStack
.pushCleanup
<OMPAllocateCleanupTy
>(
11842 NormalAndEHCleanup
, FiniRTLFn
, CVD
->getLocation().getRawEncoding(),
11843 VDAddr
, Allocator
);
11844 if (UntiedRealAddr
.isValid())
11846 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
11847 Region
->emitUntiedSwitch(CGF
);
11853 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction
&CGF
,
11854 const VarDecl
*VD
) const {
11855 auto It
= FunctionToUntiedTaskStackMap
.find(CGF
.CurFn
);
11856 if (It
== FunctionToUntiedTaskStackMap
.end())
11858 return UntiedLocalVarsStack
[It
->second
].count(VD
) > 0;
11861 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11862 CodeGenModule
&CGM
, const OMPLoopDirective
&S
)
11863 : CGM(CGM
), NeedToPush(S
.hasClausesOfKind
<OMPNontemporalClause
>()) {
11864 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11867 NontemporalDeclsSet
&DS
=
11868 CGM
.getOpenMPRuntime().NontemporalDeclsStack
.emplace_back();
11869 for (const auto *C
: S
.getClausesOfKind
<OMPNontemporalClause
>()) {
11870 for (const Stmt
*Ref
: C
->private_refs()) {
11871 const auto *SimpleRefExpr
= cast
<Expr
>(Ref
)->IgnoreParenImpCasts();
11872 const ValueDecl
*VD
;
11873 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(SimpleRefExpr
)) {
11874 VD
= DRE
->getDecl();
11876 const auto *ME
= cast
<MemberExpr
>(SimpleRefExpr
);
11877 assert((ME
->isImplicitCXXThis() ||
11878 isa
<CXXThisExpr
>(ME
->getBase()->IgnoreParenImpCasts())) &&
11879 "Expected member of current class.");
11880 VD
= ME
->getMemberDecl();
11887 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11890 CGM
.getOpenMPRuntime().NontemporalDeclsStack
.pop_back();
11893 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11894 CodeGenFunction
&CGF
,
11895 const llvm::MapVector
<CanonicalDeclPtr
<const VarDecl
>,
11896 std::pair
<Address
, Address
>> &LocalVars
)
11897 : CGM(CGF
.CGM
), NeedToPush(!LocalVars
.empty()) {
11900 CGM
.getOpenMPRuntime().FunctionToUntiedTaskStackMap
.try_emplace(
11901 CGF
.CurFn
, CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.size());
11902 CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.push_back(LocalVars
);
11905 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11908 CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.pop_back();
11911 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl
*VD
) const {
11912 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11914 return llvm::any_of(
11915 CGM
.getOpenMPRuntime().NontemporalDeclsStack
,
11916 [VD
](const NontemporalDeclsSet
&Set
) { return Set
.contains(VD
); });
11919 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11920 const OMPExecutableDirective
&S
,
11921 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &NeedToAddForLPCsAsDisabled
)
11923 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> NeedToCheckForLPCs
;
11924 // Vars in target/task regions must be excluded completely.
11925 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()) ||
11926 isOpenMPTaskingDirective(S
.getDirectiveKind())) {
11927 SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
11928 getOpenMPCaptureRegions(CaptureRegions
, S
.getDirectiveKind());
11929 const CapturedStmt
*CS
= S
.getCapturedStmt(CaptureRegions
.front());
11930 for (const CapturedStmt::Capture
&Cap
: CS
->captures()) {
11931 if (Cap
.capturesVariable() || Cap
.capturesVariableByCopy())
11932 NeedToCheckForLPCs
.insert(Cap
.getCapturedVar());
11935 // Exclude vars in private clauses.
11936 for (const auto *C
: S
.getClausesOfKind
<OMPPrivateClause
>()) {
11937 for (const Expr
*Ref
: C
->varlists()) {
11938 if (!Ref
->getType()->isScalarType())
11940 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11943 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11946 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
11947 for (const Expr
*Ref
: C
->varlists()) {
11948 if (!Ref
->getType()->isScalarType())
11950 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11953 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11956 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
11957 for (const Expr
*Ref
: C
->varlists()) {
11958 if (!Ref
->getType()->isScalarType())
11960 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11963 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11966 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
11967 for (const Expr
*Ref
: C
->varlists()) {
11968 if (!Ref
->getType()->isScalarType())
11970 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11973 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11976 for (const auto *C
: S
.getClausesOfKind
<OMPLinearClause
>()) {
11977 for (const Expr
*Ref
: C
->varlists()) {
11978 if (!Ref
->getType()->isScalarType())
11980 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11983 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11986 for (const Decl
*VD
: NeedToCheckForLPCs
) {
11987 for (const LastprivateConditionalData
&Data
:
11988 llvm::reverse(CGM
.getOpenMPRuntime().LastprivateConditionalStack
)) {
11989 if (Data
.DeclToUniqueName
.count(VD
) > 0) {
11990 if (!Data
.Disabled
)
11991 NeedToAddForLPCsAsDisabled
.insert(VD
);
11998 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11999 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
, LValue IVLVal
)
12001 Action((CGM
.getLangOpts().OpenMP
>= 50 &&
12002 llvm::any_of(S
.getClausesOfKind
<OMPLastprivateClause
>(),
12003 [](const OMPLastprivateClause
*C
) {
12004 return C
->getKind() ==
12005 OMPC_LASTPRIVATE_conditional
;
12007 ? ActionToDo::PushAsLastprivateConditional
12008 : ActionToDo::DoNotPush
) {
12009 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
12010 if (CGM
.getLangOpts().OpenMP
< 50 || Action
== ActionToDo::DoNotPush
)
12012 assert(Action
== ActionToDo::PushAsLastprivateConditional
&&
12013 "Expected a push action.");
12014 LastprivateConditionalData
&Data
=
12015 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.emplace_back();
12016 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
12017 if (C
->getKind() != OMPC_LASTPRIVATE_conditional
)
12020 for (const Expr
*Ref
: C
->varlists()) {
12021 Data
.DeclToUniqueName
.insert(std::make_pair(
12022 cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts())->getDecl(),
12023 SmallString
<16>(generateUniqueName(CGM
, "pl_cond", Ref
))));
12026 Data
.IVLVal
= IVLVal
;
12027 Data
.Fn
= CGF
.CurFn
;
12030 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12031 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
12032 : CGM(CGF
.CGM
), Action(ActionToDo::DoNotPush
) {
12033 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
12034 if (CGM
.getLangOpts().OpenMP
< 50)
12036 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> NeedToAddForLPCsAsDisabled
;
12037 tryToDisableInnerAnalysis(S
, NeedToAddForLPCsAsDisabled
);
12038 if (!NeedToAddForLPCsAsDisabled
.empty()) {
12039 Action
= ActionToDo::DisableLastprivateConditional
;
12040 LastprivateConditionalData
&Data
=
12041 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.emplace_back();
12042 for (const Decl
*VD
: NeedToAddForLPCsAsDisabled
)
12043 Data
.DeclToUniqueName
.insert(std::make_pair(VD
, SmallString
<16>()));
12044 Data
.Fn
= CGF
.CurFn
;
12045 Data
.Disabled
= true;
12049 CGOpenMPRuntime::LastprivateConditionalRAII
12050 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12051 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
12052 return LastprivateConditionalRAII(CGF
, S
);
12055 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12056 if (CGM
.getLangOpts().OpenMP
< 50)
12058 if (Action
== ActionToDo::DisableLastprivateConditional
) {
12059 assert(CGM
.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled
&&
12060 "Expected list of disabled private vars.");
12061 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.pop_back();
12063 if (Action
== ActionToDo::PushAsLastprivateConditional
) {
12065 !CGM
.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled
&&
12066 "Expected list of lastprivate conditional vars.");
12067 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.pop_back();
12071 Address
CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction
&CGF
,
12072 const VarDecl
*VD
) {
12073 ASTContext
&C
= CGM
.getContext();
12074 auto I
= LastprivateConditionalToTypes
.find(CGF
.CurFn
);
12075 if (I
== LastprivateConditionalToTypes
.end())
12076 I
= LastprivateConditionalToTypes
.try_emplace(CGF
.CurFn
).first
;
12078 const FieldDecl
*VDField
;
12079 const FieldDecl
*FiredField
;
12081 auto VI
= I
->getSecond().find(VD
);
12082 if (VI
== I
->getSecond().end()) {
12083 RecordDecl
*RD
= C
.buildImplicitRecord("lasprivate.conditional");
12084 RD
->startDefinition();
12085 VDField
= addFieldToRecordDecl(C
, RD
, VD
->getType().getNonReferenceType());
12086 FiredField
= addFieldToRecordDecl(C
, RD
, C
.CharTy
);
12087 RD
->completeDefinition();
12088 NewType
= C
.getRecordType(RD
);
12089 Address Addr
= CGF
.CreateMemTemp(NewType
, C
.getDeclAlign(VD
), VD
->getName());
12090 BaseLVal
= CGF
.MakeAddrLValue(Addr
, NewType
, AlignmentSource::Decl
);
12091 I
->getSecond().try_emplace(VD
, NewType
, VDField
, FiredField
, BaseLVal
);
12093 NewType
= std::get
<0>(VI
->getSecond());
12094 VDField
= std::get
<1>(VI
->getSecond());
12095 FiredField
= std::get
<2>(VI
->getSecond());
12096 BaseLVal
= std::get
<3>(VI
->getSecond());
12099 CGF
.EmitLValueForField(BaseLVal
, FiredField
);
12100 CGF
.EmitStoreOfScalar(
12101 llvm::ConstantInt::getNullValue(CGF
.ConvertTypeForMem(C
.CharTy
)),
12103 return CGF
.EmitLValueForField(BaseLVal
, VDField
).getAddress(CGF
);
12107 /// Checks if the lastprivate conditional variable is referenced in LHS.
12108 class LastprivateConditionalRefChecker final
12109 : public ConstStmtVisitor
<LastprivateConditionalRefChecker
, bool> {
12110 ArrayRef
<CGOpenMPRuntime::LastprivateConditionalData
> LPM
;
12111 const Expr
*FoundE
= nullptr;
12112 const Decl
*FoundD
= nullptr;
12113 StringRef UniqueDeclName
;
12115 llvm::Function
*FoundFn
= nullptr;
12116 SourceLocation Loc
;
12119 bool VisitDeclRefExpr(const DeclRefExpr
*E
) {
12120 for (const CGOpenMPRuntime::LastprivateConditionalData
&D
:
12121 llvm::reverse(LPM
)) {
12122 auto It
= D
.DeclToUniqueName
.find(E
->getDecl());
12123 if (It
== D
.DeclToUniqueName
.end())
12128 FoundD
= E
->getDecl()->getCanonicalDecl();
12129 UniqueDeclName
= It
->second
;
12134 return FoundE
== E
;
12136 bool VisitMemberExpr(const MemberExpr
*E
) {
12137 if (!CodeGenFunction::IsWrappedCXXThis(E
->getBase()))
12139 for (const CGOpenMPRuntime::LastprivateConditionalData
&D
:
12140 llvm::reverse(LPM
)) {
12141 auto It
= D
.DeclToUniqueName
.find(E
->getMemberDecl());
12142 if (It
== D
.DeclToUniqueName
.end())
12147 FoundD
= E
->getMemberDecl()->getCanonicalDecl();
12148 UniqueDeclName
= It
->second
;
12153 return FoundE
== E
;
12155 bool VisitStmt(const Stmt
*S
) {
12156 for (const Stmt
*Child
: S
->children()) {
12159 if (const auto *E
= dyn_cast
<Expr
>(Child
))
12160 if (!E
->isGLValue())
12167 explicit LastprivateConditionalRefChecker(
12168 ArrayRef
<CGOpenMPRuntime::LastprivateConditionalData
> LPM
)
12170 std::tuple
<const Expr
*, const Decl
*, StringRef
, LValue
, llvm::Function
*>
12171 getFoundData() const {
12172 return std::make_tuple(FoundE
, FoundD
, UniqueDeclName
, IVLVal
, FoundFn
);
12177 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction
&CGF
,
12179 StringRef UniqueDeclName
,
12181 SourceLocation Loc
) {
12182 // Last updated loop counter for the lastprivate conditional var.
12183 // int<xx> last_iv = 0;
12184 llvm::Type
*LLIVTy
= CGF
.ConvertTypeForMem(IVLVal
.getType());
12185 llvm::Constant
*LastIV
= OMPBuilder
.getOrCreateInternalVariable(
12186 LLIVTy
, getName({UniqueDeclName
, "iv"}));
12187 cast
<llvm::GlobalVariable
>(LastIV
)->setAlignment(
12188 IVLVal
.getAlignment().getAsAlign());
12189 LValue LastIVLVal
= CGF
.MakeNaturalAlignAddrLValue(LastIV
, IVLVal
.getType());
12191 // Last value of the lastprivate conditional.
12192 // decltype(priv_a) last_a;
12193 llvm::GlobalVariable
*Last
= OMPBuilder
.getOrCreateInternalVariable(
12194 CGF
.ConvertTypeForMem(LVal
.getType()), UniqueDeclName
);
12195 Last
->setAlignment(LVal
.getAlignment().getAsAlign());
12196 LValue LastLVal
= CGF
.MakeAddrLValue(
12197 Address(Last
, Last
->getValueType(), LVal
.getAlignment()), LVal
.getType());
12199 // Global loop counter. Required to handle inner parallel-for regions.
12201 llvm::Value
*IVVal
= CGF
.EmitLoadOfScalar(IVLVal
, Loc
);
12203 // #pragma omp critical(a)
12204 // if (last_iv <= iv) {
12206 // last_a = priv_a;
12208 auto &&CodeGen
= [&LastIVLVal
, &IVLVal
, IVVal
, &LVal
, &LastLVal
,
12209 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
12211 llvm::Value
*LastIVVal
= CGF
.EmitLoadOfScalar(LastIVLVal
, Loc
);
12212 // (last_iv <= iv) ? Check if the variable is updated and store new
12213 // value in global var.
12214 llvm::Value
*CmpRes
;
12215 if (IVLVal
.getType()->isSignedIntegerType()) {
12216 CmpRes
= CGF
.Builder
.CreateICmpSLE(LastIVVal
, IVVal
);
12218 assert(IVLVal
.getType()->isUnsignedIntegerType() &&
12219 "Loop iteration variable must be integer.");
12220 CmpRes
= CGF
.Builder
.CreateICmpULE(LastIVVal
, IVVal
);
12222 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock("lp_cond_then");
12223 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock("lp_cond_exit");
12224 CGF
.Builder
.CreateCondBr(CmpRes
, ThenBB
, ExitBB
);
12226 CGF
.EmitBlock(ThenBB
);
12229 CGF
.EmitStoreOfScalar(IVVal
, LastIVLVal
);
12231 // last_a = priv_a;
12232 switch (CGF
.getEvaluationKind(LVal
.getType())) {
12234 llvm::Value
*PrivVal
= CGF
.EmitLoadOfScalar(LVal
, Loc
);
12235 CGF
.EmitStoreOfScalar(PrivVal
, LastLVal
);
12238 case TEK_Complex
: {
12239 CodeGenFunction::ComplexPairTy PrivVal
= CGF
.EmitLoadOfComplex(LVal
, Loc
);
12240 CGF
.EmitStoreOfComplex(PrivVal
, LastLVal
, /*isInit=*/false);
12243 case TEK_Aggregate
:
12245 "Aggregates are not supported in lastprivate conditional.");
12248 CGF
.EmitBranch(ExitBB
);
12249 // There is no need to emit line number for unconditional branch.
12250 (void)ApplyDebugLocation::CreateEmpty(CGF
);
12251 CGF
.EmitBlock(ExitBB
, /*IsFinished=*/true);
12254 if (CGM
.getLangOpts().OpenMPSimd
) {
12255 // Do not emit as a critical region as no parallel region could be emitted.
12256 RegionCodeGenTy
ThenRCG(CodeGen
);
12259 emitCriticalRegion(CGF
, UniqueDeclName
, CodeGen
, Loc
);
12263 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction
&CGF
,
12265 if (CGF
.getLangOpts().OpenMP
< 50 || LastprivateConditionalStack
.empty())
12267 LastprivateConditionalRefChecker
Checker(LastprivateConditionalStack
);
12268 if (!Checker
.Visit(LHS
))
12270 const Expr
*FoundE
;
12271 const Decl
*FoundD
;
12272 StringRef UniqueDeclName
;
12274 llvm::Function
*FoundFn
;
12275 std::tie(FoundE
, FoundD
, UniqueDeclName
, IVLVal
, FoundFn
) =
12276 Checker
.getFoundData();
12277 if (FoundFn
!= CGF
.CurFn
) {
12278 // Special codegen for inner parallel regions.
12279 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12280 auto It
= LastprivateConditionalToTypes
[FoundFn
].find(FoundD
);
12281 assert(It
!= LastprivateConditionalToTypes
[FoundFn
].end() &&
12282 "Lastprivate conditional is not found in outer region.");
12283 QualType StructTy
= std::get
<0>(It
->getSecond());
12284 const FieldDecl
* FiredDecl
= std::get
<2>(It
->getSecond());
12285 LValue PrivLVal
= CGF
.EmitLValue(FoundE
);
12286 Address StructAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
12287 PrivLVal
.getAddress(CGF
),
12288 CGF
.ConvertTypeForMem(CGF
.getContext().getPointerType(StructTy
)),
12289 CGF
.ConvertTypeForMem(StructTy
));
12291 CGF
.MakeAddrLValue(StructAddr
, StructTy
, AlignmentSource::Decl
);
12292 LValue FiredLVal
= CGF
.EmitLValueForField(BaseLVal
, FiredDecl
);
12293 CGF
.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12294 CGF
.ConvertTypeForMem(FiredDecl
->getType()), 1)),
12295 FiredLVal
, llvm::AtomicOrdering::Unordered
,
12296 /*IsVolatile=*/true, /*isInit=*/false);
12300 // Private address of the lastprivate conditional in the current context.
12302 LValue LVal
= CGF
.EmitLValue(FoundE
);
12303 emitLastprivateConditionalUpdate(CGF
, IVLVal
, UniqueDeclName
, LVal
,
12304 FoundE
->getExprLoc());
12307 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12308 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
12309 const llvm::DenseSet
<CanonicalDeclPtr
<const VarDecl
>> &IgnoredDecls
) {
12310 if (CGF
.getLangOpts().OpenMP
< 50 || LastprivateConditionalStack
.empty())
12312 auto Range
= llvm::reverse(LastprivateConditionalStack
);
12313 auto It
= llvm::find_if(
12314 Range
, [](const LastprivateConditionalData
&D
) { return !D
.Disabled
; });
12315 if (It
== Range
.end() || It
->Fn
!= CGF
.CurFn
)
12317 auto LPCI
= LastprivateConditionalToTypes
.find(It
->Fn
);
12318 assert(LPCI
!= LastprivateConditionalToTypes
.end() &&
12319 "Lastprivates must be registered already.");
12320 SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
12321 getOpenMPCaptureRegions(CaptureRegions
, D
.getDirectiveKind());
12322 const CapturedStmt
*CS
= D
.getCapturedStmt(CaptureRegions
.back());
12323 for (const auto &Pair
: It
->DeclToUniqueName
) {
12324 const auto *VD
= cast
<VarDecl
>(Pair
.first
->getCanonicalDecl());
12325 if (!CS
->capturesVariable(VD
) || IgnoredDecls
.contains(VD
))
12327 auto I
= LPCI
->getSecond().find(Pair
.first
);
12328 assert(I
!= LPCI
->getSecond().end() &&
12329 "Lastprivate must be rehistered already.");
12330 // bool Cmp = priv_a.Fired != 0;
12331 LValue BaseLVal
= std::get
<3>(I
->getSecond());
12333 CGF
.EmitLValueForField(BaseLVal
, std::get
<2>(I
->getSecond()));
12334 llvm::Value
*Res
= CGF
.EmitLoadOfScalar(FiredLVal
, D
.getBeginLoc());
12335 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Res
);
12336 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock("lpc.then");
12337 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("lpc.done");
12339 CGF
.Builder
.CreateCondBr(Cmp
, ThenBB
, DoneBB
);
12340 CGF
.EmitBlock(ThenBB
);
12341 Address Addr
= CGF
.GetAddrOfLocalVar(VD
);
12343 if (VD
->getType()->isReferenceType())
12344 LVal
= CGF
.EmitLoadOfReferenceLValue(Addr
, VD
->getType(),
12345 AlignmentSource::Decl
);
12347 LVal
= CGF
.MakeAddrLValue(Addr
, VD
->getType().getNonReferenceType(),
12348 AlignmentSource::Decl
);
12349 emitLastprivateConditionalUpdate(CGF
, It
->IVLVal
, Pair
.second
, LVal
,
12351 auto AL
= ApplyDebugLocation::CreateArtificial(CGF
);
12352 CGF
.EmitBlock(DoneBB
, /*IsFinal=*/true);
12357 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12358 CodeGenFunction
&CGF
, LValue PrivLVal
, const VarDecl
*VD
,
12359 SourceLocation Loc
) {
12360 if (CGF
.getLangOpts().OpenMP
< 50)
12362 auto It
= LastprivateConditionalStack
.back().DeclToUniqueName
.find(VD
);
12363 assert(It
!= LastprivateConditionalStack
.back().DeclToUniqueName
.end() &&
12364 "Unknown lastprivate conditional variable.");
12365 StringRef UniqueName
= It
->second
;
12366 llvm::GlobalVariable
*GV
= CGM
.getModule().getNamedGlobal(UniqueName
);
12367 // The variable was not updated in the region - exit.
12370 LValue LPLVal
= CGF
.MakeAddrLValue(
12371 Address(GV
, GV
->getValueType(), PrivLVal
.getAlignment()),
12372 PrivLVal
.getType().getNonReferenceType());
12373 llvm::Value
*Res
= CGF
.EmitLoadOfScalar(LPLVal
, Loc
);
12374 CGF
.EmitStoreOfScalar(Res
, PrivLVal
);
12377 llvm::Function
*CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12378 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
12379 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
) {
12380 llvm_unreachable("Not supported in SIMD-only mode");
12383 llvm::Function
*CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12384 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
12385 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
) {
12386 llvm_unreachable("Not supported in SIMD-only mode");
12389 llvm::Function
*CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12390 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
12391 const VarDecl
*PartIDVar
, const VarDecl
*TaskTVar
,
12392 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
12393 bool Tied
, unsigned &NumberOfParts
) {
12394 llvm_unreachable("Not supported in SIMD-only mode");
12397 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction
&CGF
,
12398 SourceLocation Loc
,
12399 llvm::Function
*OutlinedFn
,
12400 ArrayRef
<llvm::Value
*> CapturedVars
,
12401 const Expr
*IfCond
,
12402 llvm::Value
*NumThreads
) {
12403 llvm_unreachable("Not supported in SIMD-only mode");
12406 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12407 CodeGenFunction
&CGF
, StringRef CriticalName
,
12408 const RegionCodeGenTy
&CriticalOpGen
, SourceLocation Loc
,
12409 const Expr
*Hint
) {
12410 llvm_unreachable("Not supported in SIMD-only mode");
12413 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction
&CGF
,
12414 const RegionCodeGenTy
&MasterOpGen
,
12415 SourceLocation Loc
) {
12416 llvm_unreachable("Not supported in SIMD-only mode");
12419 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction
&CGF
,
12420 const RegionCodeGenTy
&MasterOpGen
,
12421 SourceLocation Loc
,
12422 const Expr
*Filter
) {
12423 llvm_unreachable("Not supported in SIMD-only mode");
12426 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction
&CGF
,
12427 SourceLocation Loc
) {
12428 llvm_unreachable("Not supported in SIMD-only mode");
12431 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12432 CodeGenFunction
&CGF
, const RegionCodeGenTy
&TaskgroupOpGen
,
12433 SourceLocation Loc
) {
12434 llvm_unreachable("Not supported in SIMD-only mode");
12437 void CGOpenMPSIMDRuntime::emitSingleRegion(
12438 CodeGenFunction
&CGF
, const RegionCodeGenTy
&SingleOpGen
,
12439 SourceLocation Loc
, ArrayRef
<const Expr
*> CopyprivateVars
,
12440 ArrayRef
<const Expr
*> DestExprs
, ArrayRef
<const Expr
*> SrcExprs
,
12441 ArrayRef
<const Expr
*> AssignmentOps
) {
12442 llvm_unreachable("Not supported in SIMD-only mode");
12445 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction
&CGF
,
12446 const RegionCodeGenTy
&OrderedOpGen
,
12447 SourceLocation Loc
,
12449 llvm_unreachable("Not supported in SIMD-only mode");
12452 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction
&CGF
,
12453 SourceLocation Loc
,
12454 OpenMPDirectiveKind Kind
,
12456 bool ForceSimpleCall
) {
12457 llvm_unreachable("Not supported in SIMD-only mode");
12460 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12461 CodeGenFunction
&CGF
, SourceLocation Loc
,
12462 const OpenMPScheduleTy
&ScheduleKind
, unsigned IVSize
, bool IVSigned
,
12463 bool Ordered
, const DispatchRTInput
&DispatchValues
) {
12464 llvm_unreachable("Not supported in SIMD-only mode");
12467 void CGOpenMPSIMDRuntime::emitForStaticInit(
12468 CodeGenFunction
&CGF
, SourceLocation Loc
, OpenMPDirectiveKind DKind
,
12469 const OpenMPScheduleTy
&ScheduleKind
, const StaticRTInput
&Values
) {
12470 llvm_unreachable("Not supported in SIMD-only mode");
12473 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12474 CodeGenFunction
&CGF
, SourceLocation Loc
,
12475 OpenMPDistScheduleClauseKind SchedKind
, const StaticRTInput
&Values
) {
12476 llvm_unreachable("Not supported in SIMD-only mode");
12479 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction
&CGF
,
12480 SourceLocation Loc
,
12483 llvm_unreachable("Not supported in SIMD-only mode");
12486 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction
&CGF
,
12487 SourceLocation Loc
,
12488 OpenMPDirectiveKind DKind
) {
12489 llvm_unreachable("Not supported in SIMD-only mode");
12492 llvm::Value
*CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction
&CGF
,
12493 SourceLocation Loc
,
12494 unsigned IVSize
, bool IVSigned
,
12495 Address IL
, Address LB
,
12496 Address UB
, Address ST
) {
12497 llvm_unreachable("Not supported in SIMD-only mode");
12500 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction
&CGF
,
12501 llvm::Value
*NumThreads
,
12502 SourceLocation Loc
) {
12503 llvm_unreachable("Not supported in SIMD-only mode");
12506 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction
&CGF
,
12507 ProcBindKind ProcBind
,
12508 SourceLocation Loc
) {
12509 llvm_unreachable("Not supported in SIMD-only mode");
12512 Address
CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction
&CGF
,
12515 SourceLocation Loc
) {
12516 llvm_unreachable("Not supported in SIMD-only mode");
12519 llvm::Function
*CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12520 const VarDecl
*VD
, Address VDAddr
, SourceLocation Loc
, bool PerformInit
,
12521 CodeGenFunction
*CGF
) {
12522 llvm_unreachable("Not supported in SIMD-only mode");
12525 Address
CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12526 CodeGenFunction
&CGF
, QualType VarType
, StringRef Name
) {
12527 llvm_unreachable("Not supported in SIMD-only mode");
12530 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction
&CGF
,
12531 ArrayRef
<const Expr
*> Vars
,
12532 SourceLocation Loc
,
12533 llvm::AtomicOrdering AO
) {
12534 llvm_unreachable("Not supported in SIMD-only mode");
12537 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
12538 const OMPExecutableDirective
&D
,
12539 llvm::Function
*TaskFunction
,
12540 QualType SharedsTy
, Address Shareds
,
12541 const Expr
*IfCond
,
12542 const OMPTaskDataTy
&Data
) {
12543 llvm_unreachable("Not supported in SIMD-only mode");
12546 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12547 CodeGenFunction
&CGF
, SourceLocation Loc
, const OMPLoopDirective
&D
,
12548 llvm::Function
*TaskFunction
, QualType SharedsTy
, Address Shareds
,
12549 const Expr
*IfCond
, const OMPTaskDataTy
&Data
) {
12550 llvm_unreachable("Not supported in SIMD-only mode");
12553 void CGOpenMPSIMDRuntime::emitReduction(
12554 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> Privates
,
12555 ArrayRef
<const Expr
*> LHSExprs
, ArrayRef
<const Expr
*> RHSExprs
,
12556 ArrayRef
<const Expr
*> ReductionOps
, ReductionOptionsTy Options
) {
12557 assert(Options
.SimpleReduction
&& "Only simple reduction is expected.");
12558 CGOpenMPRuntime::emitReduction(CGF
, Loc
, Privates
, LHSExprs
, RHSExprs
,
12559 ReductionOps
, Options
);
12562 llvm::Value
*CGOpenMPSIMDRuntime::emitTaskReductionInit(
12563 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> LHSExprs
,
12564 ArrayRef
<const Expr
*> RHSExprs
, const OMPTaskDataTy
&Data
) {
12565 llvm_unreachable("Not supported in SIMD-only mode");
12568 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction
&CGF
,
12569 SourceLocation Loc
,
12570 bool IsWorksharingReduction
) {
12571 llvm_unreachable("Not supported in SIMD-only mode");
12574 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction
&CGF
,
12575 SourceLocation Loc
,
12576 ReductionCodeGen
&RCG
,
12578 llvm_unreachable("Not supported in SIMD-only mode");
12581 Address
CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction
&CGF
,
12582 SourceLocation Loc
,
12583 llvm::Value
*ReductionsPtr
,
12584 LValue SharedLVal
) {
12585 llvm_unreachable("Not supported in SIMD-only mode");
12588 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction
&CGF
,
12589 SourceLocation Loc
,
12590 const OMPTaskDataTy
&Data
) {
12591 llvm_unreachable("Not supported in SIMD-only mode");
12594 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12595 CodeGenFunction
&CGF
, SourceLocation Loc
,
12596 OpenMPDirectiveKind CancelRegion
) {
12597 llvm_unreachable("Not supported in SIMD-only mode");
12600 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction
&CGF
,
12601 SourceLocation Loc
, const Expr
*IfCond
,
12602 OpenMPDirectiveKind CancelRegion
) {
12603 llvm_unreachable("Not supported in SIMD-only mode");
12606 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12607 const OMPExecutableDirective
&D
, StringRef ParentName
,
12608 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
12609 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
12610 llvm_unreachable("Not supported in SIMD-only mode");
12613 void CGOpenMPSIMDRuntime::emitTargetCall(
12614 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
12615 llvm::Function
*OutlinedFn
, llvm::Value
*OutlinedFnID
, const Expr
*IfCond
,
12616 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
12617 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
12618 const OMPLoopDirective
&D
)>
12620 llvm_unreachable("Not supported in SIMD-only mode");
12623 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD
) {
12624 llvm_unreachable("Not supported in SIMD-only mode");
12627 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD
) {
12628 llvm_unreachable("Not supported in SIMD-only mode");
12631 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD
) {
12635 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction
&CGF
,
12636 const OMPExecutableDirective
&D
,
12637 SourceLocation Loc
,
12638 llvm::Function
*OutlinedFn
,
12639 ArrayRef
<llvm::Value
*> CapturedVars
) {
12640 llvm_unreachable("Not supported in SIMD-only mode");
12643 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction
&CGF
,
12644 const Expr
*NumTeams
,
12645 const Expr
*ThreadLimit
,
12646 SourceLocation Loc
) {
12647 llvm_unreachable("Not supported in SIMD-only mode");
12650 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12651 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
12652 const Expr
*Device
, const RegionCodeGenTy
&CodeGen
,
12653 CGOpenMPRuntime::TargetDataInfo
&Info
) {
12654 llvm_unreachable("Not supported in SIMD-only mode");
12657 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12658 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
12659 const Expr
*Device
) {
12660 llvm_unreachable("Not supported in SIMD-only mode");
12663 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction
&CGF
,
12664 const OMPLoopDirective
&D
,
12665 ArrayRef
<Expr
*> NumIterations
) {
12666 llvm_unreachable("Not supported in SIMD-only mode");
12669 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
12670 const OMPDependClause
*C
) {
12671 llvm_unreachable("Not supported in SIMD-only mode");
12675 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl
*FD
,
12676 const VarDecl
*NativeParam
) const {
12677 llvm_unreachable("Not supported in SIMD-only mode");
12681 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction
&CGF
,
12682 const VarDecl
*NativeParam
,
12683 const VarDecl
*TargetParam
) const {
12684 llvm_unreachable("Not supported in SIMD-only mode");