1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
13 #include "CGOpenMPRuntime.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
48 using namespace clang
;
49 using namespace CodeGen
;
50 using namespace llvm::omp
;
53 /// Base class for handling code generation inside OpenMP regions.
54 class CGOpenMPRegionInfo
: public CodeGenFunction::CGCapturedStmtInfo
{
56 /// Kinds of OpenMP regions used in codegen.
57 enum CGOpenMPRegionKind
{
58 /// Region with outlined function for standalone 'parallel'
60 ParallelOutlinedRegion
,
61 /// Region with outlined function for standalone 'task' directive.
63 /// Region for constructs that do not require function outlining,
64 /// like 'for', 'sections', 'atomic' etc. directives.
66 /// Region with outlined function for standalone 'target' directive.
70 CGOpenMPRegionInfo(const CapturedStmt
&CS
,
71 const CGOpenMPRegionKind RegionKind
,
72 const RegionCodeGenTy
&CodeGen
, OpenMPDirectiveKind Kind
,
74 : CGCapturedStmtInfo(CS
, CR_OpenMP
), RegionKind(RegionKind
),
75 CodeGen(CodeGen
), Kind(Kind
), HasCancel(HasCancel
) {}
77 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind
,
78 const RegionCodeGenTy
&CodeGen
, OpenMPDirectiveKind Kind
,
80 : CGCapturedStmtInfo(CR_OpenMP
), RegionKind(RegionKind
), CodeGen(CodeGen
),
81 Kind(Kind
), HasCancel(HasCancel
) {}
83 /// Get a variable or parameter for storing global thread id
84 /// inside OpenMP construct.
85 virtual const VarDecl
*getThreadIDVariable() const = 0;
87 /// Emit the captured statement body.
88 void EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) override
;
90 /// Get an LValue for the current ThreadID variable.
91 /// \return LValue for thread id variable. This LValue always has type int32*.
92 virtual LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
);
94 virtual void emitUntiedSwitch(CodeGenFunction
& /*CGF*/) {}
96 CGOpenMPRegionKind
getRegionKind() const { return RegionKind
; }
98 OpenMPDirectiveKind
getDirectiveKind() const { return Kind
; }
100 bool hasCancel() const { return HasCancel
; }
102 static bool classof(const CGCapturedStmtInfo
*Info
) {
103 return Info
->getKind() == CR_OpenMP
;
106 ~CGOpenMPRegionInfo() override
= default;
109 CGOpenMPRegionKind RegionKind
;
110 RegionCodeGenTy CodeGen
;
111 OpenMPDirectiveKind Kind
;
115 /// API for captured statement code generation in OpenMP constructs.
116 class CGOpenMPOutlinedRegionInfo final
: public CGOpenMPRegionInfo
{
118 CGOpenMPOutlinedRegionInfo(const CapturedStmt
&CS
, const VarDecl
*ThreadIDVar
,
119 const RegionCodeGenTy
&CodeGen
,
120 OpenMPDirectiveKind Kind
, bool HasCancel
,
121 StringRef HelperName
)
122 : CGOpenMPRegionInfo(CS
, ParallelOutlinedRegion
, CodeGen
, Kind
,
124 ThreadIDVar(ThreadIDVar
), HelperName(HelperName
) {
125 assert(ThreadIDVar
!= nullptr && "No ThreadID in OpenMP region.");
128 /// Get a variable or parameter for storing global thread id
129 /// inside OpenMP construct.
130 const VarDecl
*getThreadIDVariable() const override
{ return ThreadIDVar
; }
132 /// Get the name of the capture helper.
133 StringRef
getHelperName() const override
{ return HelperName
; }
135 static bool classof(const CGCapturedStmtInfo
*Info
) {
136 return CGOpenMPRegionInfo::classof(Info
) &&
137 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() ==
138 ParallelOutlinedRegion
;
142 /// A variable or parameter storing global thread id for OpenMP
144 const VarDecl
*ThreadIDVar
;
145 StringRef HelperName
;
148 /// API for captured statement code generation in OpenMP constructs.
149 class CGOpenMPTaskOutlinedRegionInfo final
: public CGOpenMPRegionInfo
{
151 class UntiedTaskActionTy final
: public PrePostActionTy
{
153 const VarDecl
*PartIDVar
;
154 const RegionCodeGenTy UntiedCodeGen
;
155 llvm::SwitchInst
*UntiedSwitch
= nullptr;
158 UntiedTaskActionTy(bool Tied
, const VarDecl
*PartIDVar
,
159 const RegionCodeGenTy
&UntiedCodeGen
)
160 : Untied(!Tied
), PartIDVar(PartIDVar
), UntiedCodeGen(UntiedCodeGen
) {}
161 void Enter(CodeGenFunction
&CGF
) override
{
163 // Emit task switching point.
164 LValue PartIdLVal
= CGF
.EmitLoadOfPointerLValue(
165 CGF
.GetAddrOfLocalVar(PartIDVar
),
166 PartIDVar
->getType()->castAs
<PointerType
>());
168 CGF
.EmitLoadOfScalar(PartIdLVal
, PartIDVar
->getLocation());
169 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock(".untied.done.");
170 UntiedSwitch
= CGF
.Builder
.CreateSwitch(Res
, DoneBB
);
171 CGF
.EmitBlock(DoneBB
);
172 CGF
.EmitBranchThroughCleanup(CGF
.ReturnBlock
);
173 CGF
.EmitBlock(CGF
.createBasicBlock(".untied.jmp."));
174 UntiedSwitch
->addCase(CGF
.Builder
.getInt32(0),
175 CGF
.Builder
.GetInsertBlock());
176 emitUntiedSwitch(CGF
);
179 void emitUntiedSwitch(CodeGenFunction
&CGF
) const {
181 LValue PartIdLVal
= CGF
.EmitLoadOfPointerLValue(
182 CGF
.GetAddrOfLocalVar(PartIDVar
),
183 PartIDVar
->getType()->castAs
<PointerType
>());
184 CGF
.EmitStoreOfScalar(CGF
.Builder
.getInt32(UntiedSwitch
->getNumCases()),
187 CodeGenFunction::JumpDest CurPoint
=
188 CGF
.getJumpDestInCurrentScope(".untied.next.");
189 CGF
.EmitBranch(CGF
.ReturnBlock
.getBlock());
190 CGF
.EmitBlock(CGF
.createBasicBlock(".untied.jmp."));
191 UntiedSwitch
->addCase(CGF
.Builder
.getInt32(UntiedSwitch
->getNumCases()),
192 CGF
.Builder
.GetInsertBlock());
193 CGF
.EmitBranchThroughCleanup(CurPoint
);
194 CGF
.EmitBlock(CurPoint
.getBlock());
197 unsigned getNumberOfParts() const { return UntiedSwitch
->getNumCases(); }
199 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt
&CS
,
200 const VarDecl
*ThreadIDVar
,
201 const RegionCodeGenTy
&CodeGen
,
202 OpenMPDirectiveKind Kind
, bool HasCancel
,
203 const UntiedTaskActionTy
&Action
)
204 : CGOpenMPRegionInfo(CS
, TaskOutlinedRegion
, CodeGen
, Kind
, HasCancel
),
205 ThreadIDVar(ThreadIDVar
), Action(Action
) {
206 assert(ThreadIDVar
!= nullptr && "No ThreadID in OpenMP region.");
209 /// Get a variable or parameter for storing global thread id
210 /// inside OpenMP construct.
211 const VarDecl
*getThreadIDVariable() const override
{ return ThreadIDVar
; }
213 /// Get an LValue for the current ThreadID variable.
214 LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
) override
;
216 /// Get the name of the capture helper.
217 StringRef
getHelperName() const override
{ return ".omp_outlined."; }
219 void emitUntiedSwitch(CodeGenFunction
&CGF
) override
{
220 Action
.emitUntiedSwitch(CGF
);
223 static bool classof(const CGCapturedStmtInfo
*Info
) {
224 return CGOpenMPRegionInfo::classof(Info
) &&
225 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() ==
230 /// A variable or parameter storing global thread id for OpenMP
232 const VarDecl
*ThreadIDVar
;
233 /// Action for emitting code for untied tasks.
234 const UntiedTaskActionTy
&Action
;
237 /// API for inlined captured statement code generation in OpenMP
239 class CGOpenMPInlinedRegionInfo
: public CGOpenMPRegionInfo
{
241 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo
*OldCSI
,
242 const RegionCodeGenTy
&CodeGen
,
243 OpenMPDirectiveKind Kind
, bool HasCancel
)
244 : CGOpenMPRegionInfo(InlinedRegion
, CodeGen
, Kind
, HasCancel
),
246 OuterRegionInfo(dyn_cast_or_null
<CGOpenMPRegionInfo
>(OldCSI
)) {}
248 // Retrieve the value of the context parameter.
249 llvm::Value
*getContextValue() const override
{
251 return OuterRegionInfo
->getContextValue();
252 llvm_unreachable("No context value for inlined OpenMP region");
255 void setContextValue(llvm::Value
*V
) override
{
256 if (OuterRegionInfo
) {
257 OuterRegionInfo
->setContextValue(V
);
260 llvm_unreachable("No context value for inlined OpenMP region");
263 /// Lookup the captured field decl for a variable.
264 const FieldDecl
*lookup(const VarDecl
*VD
) const override
{
266 return OuterRegionInfo
->lookup(VD
);
267 // If there is no outer outlined region,no need to lookup in a list of
268 // captured variables, we can use the original one.
272 FieldDecl
*getThisFieldDecl() const override
{
274 return OuterRegionInfo
->getThisFieldDecl();
278 /// Get a variable or parameter for storing global thread id
279 /// inside OpenMP construct.
280 const VarDecl
*getThreadIDVariable() const override
{
282 return OuterRegionInfo
->getThreadIDVariable();
286 /// Get an LValue for the current ThreadID variable.
287 LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
) override
{
289 return OuterRegionInfo
->getThreadIDVariableLValue(CGF
);
290 llvm_unreachable("No LValue for inlined OpenMP construct");
293 /// Get the name of the capture helper.
294 StringRef
getHelperName() const override
{
295 if (auto *OuterRegionInfo
= getOldCSI())
296 return OuterRegionInfo
->getHelperName();
297 llvm_unreachable("No helper name for inlined OpenMP construct");
300 void emitUntiedSwitch(CodeGenFunction
&CGF
) override
{
302 OuterRegionInfo
->emitUntiedSwitch(CGF
);
305 CodeGenFunction::CGCapturedStmtInfo
*getOldCSI() const { return OldCSI
; }
307 static bool classof(const CGCapturedStmtInfo
*Info
) {
308 return CGOpenMPRegionInfo::classof(Info
) &&
309 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() == InlinedRegion
;
312 ~CGOpenMPInlinedRegionInfo() override
= default;
315 /// CodeGen info about outer OpenMP region.
316 CodeGenFunction::CGCapturedStmtInfo
*OldCSI
;
317 CGOpenMPRegionInfo
*OuterRegionInfo
;
320 /// API for captured statement code generation in OpenMP target
321 /// constructs. For this captures, implicit parameters are used instead of the
322 /// captured fields. The name of the target region has to be unique in a given
323 /// application so it is provided by the client, because only the client has
324 /// the information to generate that.
325 class CGOpenMPTargetRegionInfo final
: public CGOpenMPRegionInfo
{
327 CGOpenMPTargetRegionInfo(const CapturedStmt
&CS
,
328 const RegionCodeGenTy
&CodeGen
, StringRef HelperName
)
329 : CGOpenMPRegionInfo(CS
, TargetRegion
, CodeGen
, OMPD_target
,
330 /*HasCancel=*/false),
331 HelperName(HelperName
) {}
333 /// This is unused for target regions because each starts executing
334 /// with a single thread.
335 const VarDecl
*getThreadIDVariable() const override
{ return nullptr; }
337 /// Get the name of the capture helper.
338 StringRef
getHelperName() const override
{ return HelperName
; }
340 static bool classof(const CGCapturedStmtInfo
*Info
) {
341 return CGOpenMPRegionInfo::classof(Info
) &&
342 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() == TargetRegion
;
346 StringRef HelperName
;
349 static void EmptyCodeGen(CodeGenFunction
&, PrePostActionTy
&) {
350 llvm_unreachable("No codegen for expressions");
352 /// API for generation of expressions captured in a innermost OpenMP
354 class CGOpenMPInnerExprInfo final
: public CGOpenMPInlinedRegionInfo
{
356 CGOpenMPInnerExprInfo(CodeGenFunction
&CGF
, const CapturedStmt
&CS
)
357 : CGOpenMPInlinedRegionInfo(CGF
.CapturedStmtInfo
, EmptyCodeGen
,
359 /*HasCancel=*/false),
361 // Make sure the globals captured in the provided statement are local by
362 // using the privatization logic. We assume the same variable is not
363 // captured more than once.
364 for (const auto &C
: CS
.captures()) {
365 if (!C
.capturesVariable() && !C
.capturesVariableByCopy())
368 const VarDecl
*VD
= C
.getCapturedVar();
369 if (VD
->isLocalVarDeclOrParm())
372 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(VD
),
373 /*RefersToEnclosingVariableOrCapture=*/false,
374 VD
->getType().getNonReferenceType(), VK_LValue
,
376 PrivScope
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
378 (void)PrivScope
.Privatize();
381 /// Lookup the captured field decl for a variable.
382 const FieldDecl
*lookup(const VarDecl
*VD
) const override
{
383 if (const FieldDecl
*FD
= CGOpenMPInlinedRegionInfo::lookup(VD
))
388 /// Emit the captured statement body.
389 void EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) override
{
390 llvm_unreachable("No body for expressions");
393 /// Get a variable or parameter for storing global thread id
394 /// inside OpenMP construct.
395 const VarDecl
*getThreadIDVariable() const override
{
396 llvm_unreachable("No thread id for expressions");
399 /// Get the name of the capture helper.
400 StringRef
getHelperName() const override
{
401 llvm_unreachable("No helper name for expressions");
404 static bool classof(const CGCapturedStmtInfo
*Info
) { return false; }
407 /// Private scope to capture global variables.
408 CodeGenFunction::OMPPrivateScope PrivScope
;
411 /// RAII for emitting code of OpenMP constructs.
412 class InlinedOpenMPRegionRAII
{
413 CodeGenFunction
&CGF
;
414 llvm::DenseMap
<const ValueDecl
*, FieldDecl
*> LambdaCaptureFields
;
415 FieldDecl
*LambdaThisCaptureField
= nullptr;
416 const CodeGen::CGBlockInfo
*BlockInfo
= nullptr;
417 bool NoInheritance
= false;
420 /// Constructs region for combined constructs.
421 /// \param CodeGen Code generation sequence for combined directives. Includes
422 /// a list of functions used for code generation of implicitly inlined
424 InlinedOpenMPRegionRAII(CodeGenFunction
&CGF
, const RegionCodeGenTy
&CodeGen
,
425 OpenMPDirectiveKind Kind
, bool HasCancel
,
426 bool NoInheritance
= true)
427 : CGF(CGF
), NoInheritance(NoInheritance
) {
428 // Start emission for the construct.
429 CGF
.CapturedStmtInfo
= new CGOpenMPInlinedRegionInfo(
430 CGF
.CapturedStmtInfo
, CodeGen
, Kind
, HasCancel
);
432 std::swap(CGF
.LambdaCaptureFields
, LambdaCaptureFields
);
433 LambdaThisCaptureField
= CGF
.LambdaThisCaptureField
;
434 CGF
.LambdaThisCaptureField
= nullptr;
435 BlockInfo
= CGF
.BlockInfo
;
436 CGF
.BlockInfo
= nullptr;
440 ~InlinedOpenMPRegionRAII() {
441 // Restore original CapturedStmtInfo only if we're done with code emission.
443 cast
<CGOpenMPInlinedRegionInfo
>(CGF
.CapturedStmtInfo
)->getOldCSI();
444 delete CGF
.CapturedStmtInfo
;
445 CGF
.CapturedStmtInfo
= OldCSI
;
447 std::swap(CGF
.LambdaCaptureFields
, LambdaCaptureFields
);
448 CGF
.LambdaThisCaptureField
= LambdaThisCaptureField
;
449 CGF
.BlockInfo
= BlockInfo
;
454 /// Values for bit flags used in the ident_t to describe the fields.
455 /// All enumeric elements are named and described in accordance with the code
456 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
457 enum OpenMPLocationFlags
: unsigned {
458 /// Use trampoline for internal microtask.
459 OMP_IDENT_IMD
= 0x01,
460 /// Use c-style ident structure.
461 OMP_IDENT_KMPC
= 0x02,
462 /// Atomic reduction option for kmpc_reduce.
463 OMP_ATOMIC_REDUCE
= 0x10,
464 /// Explicit 'barrier' directive.
465 OMP_IDENT_BARRIER_EXPL
= 0x20,
466 /// Implicit barrier in code.
467 OMP_IDENT_BARRIER_IMPL
= 0x40,
468 /// Implicit barrier in 'for' directive.
469 OMP_IDENT_BARRIER_IMPL_FOR
= 0x40,
470 /// Implicit barrier in 'sections' directive.
471 OMP_IDENT_BARRIER_IMPL_SECTIONS
= 0xC0,
472 /// Implicit barrier in 'single' directive.
473 OMP_IDENT_BARRIER_IMPL_SINGLE
= 0x140,
474 /// Call of __kmp_for_static_init for static loop.
475 OMP_IDENT_WORK_LOOP
= 0x200,
476 /// Call of __kmp_for_static_init for sections.
477 OMP_IDENT_WORK_SECTIONS
= 0x400,
478 /// Call of __kmp_for_static_init for distribute.
479 OMP_IDENT_WORK_DISTRIBUTE
= 0x800,
480 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE
)
484 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
485 /// Values for bit flags for marking which requires clauses have been used.
486 enum OpenMPOffloadingRequiresDirFlags
: int64_t {
488 OMP_REQ_UNDEFINED
= 0x000,
489 /// no requires clause present.
490 OMP_REQ_NONE
= 0x001,
491 /// reverse_offload clause.
492 OMP_REQ_REVERSE_OFFLOAD
= 0x002,
493 /// unified_address clause.
494 OMP_REQ_UNIFIED_ADDRESS
= 0x004,
495 /// unified_shared_memory clause.
496 OMP_REQ_UNIFIED_SHARED_MEMORY
= 0x008,
497 /// dynamic_allocators clause.
498 OMP_REQ_DYNAMIC_ALLOCATORS
= 0x010,
499 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS
)
502 } // anonymous namespace
504 /// Describes ident structure that describes a source location.
505 /// All descriptions are taken from
506 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
507 /// Original structure:
508 /// typedef struct ident {
509 /// kmp_int32 reserved_1; /**< might be used in Fortran;
511 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
512 /// KMP_IDENT_KMPC identifies this union
514 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
517 /// /* but currently used for storing
518 /// region-specific ITT */
519 /// /* contextual information. */
520 ///#endif /* USE_ITT_BUILD */
521 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
523 /// char const *psource; /**< String describing the source location.
524 /// The string is composed of semi-colon separated
525 // fields which describe the source file,
526 /// the function and a pair of line numbers that
527 /// delimit the construct.
530 enum IdentFieldIndex
{
531 /// might be used in Fortran
532 IdentField_Reserved_1
,
533 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
535 /// Not really used in Fortran any more
536 IdentField_Reserved_2
,
537 /// Source[4] in Fortran, do not use for C++
538 IdentField_Reserved_3
,
539 /// String describing the source location. The string is composed of
540 /// semi-colon separated fields which describe the source file, the function
541 /// and a pair of line numbers that delimit the construct.
545 /// Schedule types for 'omp for' loops (these enumerators are taken from
546 /// the enum sched_type in kmp.h).
547 enum OpenMPSchedType
{
548 /// Lower bound for default (unordered) versions.
550 OMP_sch_static_chunked
= 33,
552 OMP_sch_dynamic_chunked
= 35,
553 OMP_sch_guided_chunked
= 36,
554 OMP_sch_runtime
= 37,
556 /// static with chunk adjustment (e.g., simd)
557 OMP_sch_static_balanced_chunked
= 45,
558 /// Lower bound for 'ordered' versions.
560 OMP_ord_static_chunked
= 65,
562 OMP_ord_dynamic_chunked
= 67,
563 OMP_ord_guided_chunked
= 68,
564 OMP_ord_runtime
= 69,
566 OMP_sch_default
= OMP_sch_static
,
567 /// dist_schedule types
568 OMP_dist_sch_static_chunked
= 91,
569 OMP_dist_sch_static
= 92,
570 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
571 /// Set if the monotonic schedule modifier was present.
572 OMP_sch_modifier_monotonic
= (1 << 29),
573 /// Set if the nonmonotonic schedule modifier was present.
574 OMP_sch_modifier_nonmonotonic
= (1 << 30),
577 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
579 class CleanupTy final
: public EHScopeStack::Cleanup
{
580 PrePostActionTy
*Action
;
583 explicit CleanupTy(PrePostActionTy
*Action
) : Action(Action
) {}
584 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
585 if (!CGF
.HaveInsertPoint())
591 } // anonymous namespace
593 void RegionCodeGenTy::operator()(CodeGenFunction
&CGF
) const {
594 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
596 CGF
.EHStack
.pushCleanup
<CleanupTy
>(NormalAndEHCleanup
, PrePostAction
);
597 Callback(CodeGen
, CGF
, *PrePostAction
);
599 PrePostActionTy Action
;
600 Callback(CodeGen
, CGF
, Action
);
604 /// Check if the combiner is a call to UDR combiner and if it is so return the
605 /// UDR decl used for reduction.
606 static const OMPDeclareReductionDecl
*
607 getReductionInit(const Expr
*ReductionOp
) {
608 if (const auto *CE
= dyn_cast
<CallExpr
>(ReductionOp
))
609 if (const auto *OVE
= dyn_cast
<OpaqueValueExpr
>(CE
->getCallee()))
610 if (const auto *DRE
=
611 dyn_cast
<DeclRefExpr
>(OVE
->getSourceExpr()->IgnoreImpCasts()))
612 if (const auto *DRD
= dyn_cast
<OMPDeclareReductionDecl
>(DRE
->getDecl()))
617 static void emitInitWithReductionInitializer(CodeGenFunction
&CGF
,
618 const OMPDeclareReductionDecl
*DRD
,
620 Address Private
, Address Original
,
622 if (DRD
->getInitializer()) {
623 std::pair
<llvm::Function
*, llvm::Function
*> Reduction
=
624 CGF
.CGM
.getOpenMPRuntime().getUserDefinedReduction(DRD
);
625 const auto *CE
= cast
<CallExpr
>(InitOp
);
626 const auto *OVE
= cast
<OpaqueValueExpr
>(CE
->getCallee());
627 const Expr
*LHS
= CE
->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
628 const Expr
*RHS
= CE
->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
630 cast
<DeclRefExpr
>(cast
<UnaryOperator
>(LHS
)->getSubExpr());
632 cast
<DeclRefExpr
>(cast
<UnaryOperator
>(RHS
)->getSubExpr());
633 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
634 PrivateScope
.addPrivate(cast
<VarDecl
>(LHSDRE
->getDecl()), Private
);
635 PrivateScope
.addPrivate(cast
<VarDecl
>(RHSDRE
->getDecl()), Original
);
636 (void)PrivateScope
.Privatize();
637 RValue Func
= RValue::get(Reduction
.second
);
638 CodeGenFunction::OpaqueValueMapping
Map(CGF
, OVE
, Func
);
639 CGF
.EmitIgnoredExpr(InitOp
);
641 llvm::Constant
*Init
= CGF
.CGM
.EmitNullConstant(Ty
);
642 std::string Name
= CGF
.CGM
.getOpenMPRuntime().getName({"init"});
643 auto *GV
= new llvm::GlobalVariable(
644 CGF
.CGM
.getModule(), Init
->getType(), /*isConstant=*/true,
645 llvm::GlobalValue::PrivateLinkage
, Init
, Name
);
646 LValue LV
= CGF
.MakeNaturalAlignAddrLValue(GV
, Ty
);
648 switch (CGF
.getEvaluationKind(Ty
)) {
650 InitRVal
= CGF
.EmitLoadOfLValue(LV
, DRD
->getLocation());
654 RValue::getComplex(CGF
.EmitLoadOfComplex(LV
, DRD
->getLocation()));
656 case TEK_Aggregate
: {
657 OpaqueValueExpr
OVE(DRD
->getLocation(), Ty
, VK_LValue
);
658 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, &OVE
, LV
);
659 CGF
.EmitAnyExprToMem(&OVE
, Private
, Ty
.getQualifiers(),
660 /*IsInitializer=*/false);
664 OpaqueValueExpr
OVE(DRD
->getLocation(), Ty
, VK_PRValue
);
665 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, &OVE
, InitRVal
);
666 CGF
.EmitAnyExprToMem(&OVE
, Private
, Ty
.getQualifiers(),
667 /*IsInitializer=*/false);
671 /// Emit initialization of arrays of complex types.
672 /// \param DestAddr Address of the array.
673 /// \param Type Type of array.
674 /// \param Init Initial expression of array.
675 /// \param SrcAddr Address of the original array.
676 static void EmitOMPAggregateInit(CodeGenFunction
&CGF
, Address DestAddr
,
677 QualType Type
, bool EmitDeclareReductionInit
,
679 const OMPDeclareReductionDecl
*DRD
,
680 Address SrcAddr
= Address::invalid()) {
681 // Perform element-by-element initialization.
684 // Drill down to the base element type on both arrays.
685 const ArrayType
*ArrayTy
= Type
->getAsArrayTypeUnsafe();
686 llvm::Value
*NumElements
= CGF
.emitArrayLength(ArrayTy
, ElementTy
, DestAddr
);
688 SrcAddr
= SrcAddr
.withElementType(DestAddr
.getElementType());
690 llvm::Value
*SrcBegin
= nullptr;
692 SrcBegin
= SrcAddr
.getPointer();
693 llvm::Value
*DestBegin
= DestAddr
.getPointer();
694 // Cast from pointer to array type to pointer to single element.
695 llvm::Value
*DestEnd
=
696 CGF
.Builder
.CreateGEP(DestAddr
.getElementType(), DestBegin
, NumElements
);
697 // The basic structure here is a while-do loop.
698 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.arrayinit.body");
699 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.arrayinit.done");
700 llvm::Value
*IsEmpty
=
701 CGF
.Builder
.CreateICmpEQ(DestBegin
, DestEnd
, "omp.arrayinit.isempty");
702 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
704 // Enter the loop body, making that address the current address.
705 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
706 CGF
.EmitBlock(BodyBB
);
708 CharUnits ElementSize
= CGF
.getContext().getTypeSizeInChars(ElementTy
);
710 llvm::PHINode
*SrcElementPHI
= nullptr;
711 Address SrcElementCurrent
= Address::invalid();
713 SrcElementPHI
= CGF
.Builder
.CreatePHI(SrcBegin
->getType(), 2,
714 "omp.arraycpy.srcElementPast");
715 SrcElementPHI
->addIncoming(SrcBegin
, EntryBB
);
717 Address(SrcElementPHI
, SrcAddr
.getElementType(),
718 SrcAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
720 llvm::PHINode
*DestElementPHI
= CGF
.Builder
.CreatePHI(
721 DestBegin
->getType(), 2, "omp.arraycpy.destElementPast");
722 DestElementPHI
->addIncoming(DestBegin
, EntryBB
);
723 Address DestElementCurrent
=
724 Address(DestElementPHI
, DestAddr
.getElementType(),
725 DestAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
729 CodeGenFunction::RunCleanupsScope
InitScope(CGF
);
730 if (EmitDeclareReductionInit
) {
731 emitInitWithReductionInitializer(CGF
, DRD
, Init
, DestElementCurrent
,
732 SrcElementCurrent
, ElementTy
);
734 CGF
.EmitAnyExprToMem(Init
, DestElementCurrent
, ElementTy
.getQualifiers(),
735 /*IsInitializer=*/false);
739 // Shift the address forward by one element.
740 llvm::Value
*SrcElementNext
= CGF
.Builder
.CreateConstGEP1_32(
741 SrcAddr
.getElementType(), SrcElementPHI
, /*Idx0=*/1,
742 "omp.arraycpy.dest.element");
743 SrcElementPHI
->addIncoming(SrcElementNext
, CGF
.Builder
.GetInsertBlock());
746 // Shift the address forward by one element.
747 llvm::Value
*DestElementNext
= CGF
.Builder
.CreateConstGEP1_32(
748 DestAddr
.getElementType(), DestElementPHI
, /*Idx0=*/1,
749 "omp.arraycpy.dest.element");
750 // Check whether we've reached the end.
752 CGF
.Builder
.CreateICmpEQ(DestElementNext
, DestEnd
, "omp.arraycpy.done");
753 CGF
.Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
754 DestElementPHI
->addIncoming(DestElementNext
, CGF
.Builder
.GetInsertBlock());
757 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
760 LValue
ReductionCodeGen::emitSharedLValue(CodeGenFunction
&CGF
, const Expr
*E
) {
761 return CGF
.EmitOMPSharedLValue(E
);
764 LValue
ReductionCodeGen::emitSharedLValueUB(CodeGenFunction
&CGF
,
766 if (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(E
))
767 return CGF
.EmitOMPArraySectionExpr(OASE
, /*IsLowerBound=*/false);
771 void ReductionCodeGen::emitAggregateInitialization(
772 CodeGenFunction
&CGF
, unsigned N
, Address PrivateAddr
, Address SharedAddr
,
773 const OMPDeclareReductionDecl
*DRD
) {
774 // Emit VarDecl with copy init for arrays.
775 // Get the address of the original variable captured in current
777 const auto *PrivateVD
=
778 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Private
)->getDecl());
779 bool EmitDeclareReductionInit
=
780 DRD
&& (DRD
->getInitializer() || !PrivateVD
->hasInit());
781 EmitOMPAggregateInit(CGF
, PrivateAddr
, PrivateVD
->getType(),
782 EmitDeclareReductionInit
,
783 EmitDeclareReductionInit
? ClausesData
[N
].ReductionOp
784 : PrivateVD
->getInit(),
788 ReductionCodeGen::ReductionCodeGen(ArrayRef
<const Expr
*> Shareds
,
789 ArrayRef
<const Expr
*> Origs
,
790 ArrayRef
<const Expr
*> Privates
,
791 ArrayRef
<const Expr
*> ReductionOps
) {
792 ClausesData
.reserve(Shareds
.size());
793 SharedAddresses
.reserve(Shareds
.size());
794 Sizes
.reserve(Shareds
.size());
795 BaseDecls
.reserve(Shareds
.size());
796 const auto *IOrig
= Origs
.begin();
797 const auto *IPriv
= Privates
.begin();
798 const auto *IRed
= ReductionOps
.begin();
799 for (const Expr
*Ref
: Shareds
) {
800 ClausesData
.emplace_back(Ref
, *IOrig
, *IPriv
, *IRed
);
801 std::advance(IOrig
, 1);
802 std::advance(IPriv
, 1);
803 std::advance(IRed
, 1);
807 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction
&CGF
, unsigned N
) {
808 assert(SharedAddresses
.size() == N
&& OrigAddresses
.size() == N
&&
809 "Number of generated lvalues must be exactly N.");
810 LValue First
= emitSharedLValue(CGF
, ClausesData
[N
].Shared
);
811 LValue Second
= emitSharedLValueUB(CGF
, ClausesData
[N
].Shared
);
812 SharedAddresses
.emplace_back(First
, Second
);
813 if (ClausesData
[N
].Shared
== ClausesData
[N
].Ref
) {
814 OrigAddresses
.emplace_back(First
, Second
);
816 LValue First
= emitSharedLValue(CGF
, ClausesData
[N
].Ref
);
817 LValue Second
= emitSharedLValueUB(CGF
, ClausesData
[N
].Ref
);
818 OrigAddresses
.emplace_back(First
, Second
);
822 void ReductionCodeGen::emitAggregateType(CodeGenFunction
&CGF
, unsigned N
) {
823 QualType PrivateType
= getPrivateType(N
);
824 bool AsArraySection
= isa
<OMPArraySectionExpr
>(ClausesData
[N
].Ref
);
825 if (!PrivateType
->isVariablyModifiedType()) {
827 CGF
.getTypeSize(OrigAddresses
[N
].first
.getType().getNonReferenceType()),
832 llvm::Value
*SizeInChars
;
833 auto *ElemType
= OrigAddresses
[N
].first
.getAddress(CGF
).getElementType();
834 auto *ElemSizeOf
= llvm::ConstantExpr::getSizeOf(ElemType
);
835 if (AsArraySection
) {
836 Size
= CGF
.Builder
.CreatePtrDiff(ElemType
,
837 OrigAddresses
[N
].second
.getPointer(CGF
),
838 OrigAddresses
[N
].first
.getPointer(CGF
));
839 Size
= CGF
.Builder
.CreateNUWAdd(
840 Size
, llvm::ConstantInt::get(Size
->getType(), /*V=*/1));
841 SizeInChars
= CGF
.Builder
.CreateNUWMul(Size
, ElemSizeOf
);
844 CGF
.getTypeSize(OrigAddresses
[N
].first
.getType().getNonReferenceType());
845 Size
= CGF
.Builder
.CreateExactUDiv(SizeInChars
, ElemSizeOf
);
847 Sizes
.emplace_back(SizeInChars
, Size
);
848 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
850 cast
<OpaqueValueExpr
>(
851 CGF
.getContext().getAsVariableArrayType(PrivateType
)->getSizeExpr()),
853 CGF
.EmitVariablyModifiedType(PrivateType
);
856 void ReductionCodeGen::emitAggregateType(CodeGenFunction
&CGF
, unsigned N
,
858 QualType PrivateType
= getPrivateType(N
);
859 if (!PrivateType
->isVariablyModifiedType()) {
860 assert(!Size
&& !Sizes
[N
].second
&&
861 "Size should be nullptr for non-variably modified reduction "
865 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
867 cast
<OpaqueValueExpr
>(
868 CGF
.getContext().getAsVariableArrayType(PrivateType
)->getSizeExpr()),
870 CGF
.EmitVariablyModifiedType(PrivateType
);
873 void ReductionCodeGen::emitInitialization(
874 CodeGenFunction
&CGF
, unsigned N
, Address PrivateAddr
, Address SharedAddr
,
875 llvm::function_ref
<bool(CodeGenFunction
&)> DefaultInit
) {
876 assert(SharedAddresses
.size() > N
&& "No variable was generated");
877 const auto *PrivateVD
=
878 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Private
)->getDecl());
879 const OMPDeclareReductionDecl
*DRD
=
880 getReductionInit(ClausesData
[N
].ReductionOp
);
881 if (CGF
.getContext().getAsArrayType(PrivateVD
->getType())) {
882 if (DRD
&& DRD
->getInitializer())
883 (void)DefaultInit(CGF
);
884 emitAggregateInitialization(CGF
, N
, PrivateAddr
, SharedAddr
, DRD
);
885 } else if (DRD
&& (DRD
->getInitializer() || !PrivateVD
->hasInit())) {
886 (void)DefaultInit(CGF
);
887 QualType SharedType
= SharedAddresses
[N
].first
.getType();
888 emitInitWithReductionInitializer(CGF
, DRD
, ClausesData
[N
].ReductionOp
,
889 PrivateAddr
, SharedAddr
, SharedType
);
890 } else if (!DefaultInit(CGF
) && PrivateVD
->hasInit() &&
891 !CGF
.isTrivialInitializer(PrivateVD
->getInit())) {
892 CGF
.EmitAnyExprToMem(PrivateVD
->getInit(), PrivateAddr
,
893 PrivateVD
->getType().getQualifiers(),
894 /*IsInitializer=*/false);
898 bool ReductionCodeGen::needCleanups(unsigned N
) {
899 QualType PrivateType
= getPrivateType(N
);
900 QualType::DestructionKind DTorKind
= PrivateType
.isDestructedType();
901 return DTorKind
!= QualType::DK_none
;
904 void ReductionCodeGen::emitCleanups(CodeGenFunction
&CGF
, unsigned N
,
905 Address PrivateAddr
) {
906 QualType PrivateType
= getPrivateType(N
);
907 QualType::DestructionKind DTorKind
= PrivateType
.isDestructedType();
908 if (needCleanups(N
)) {
910 PrivateAddr
.withElementType(CGF
.ConvertTypeForMem(PrivateType
));
911 CGF
.pushDestroy(DTorKind
, PrivateAddr
, PrivateType
);
915 static LValue
loadToBegin(CodeGenFunction
&CGF
, QualType BaseTy
, QualType ElTy
,
917 BaseTy
= BaseTy
.getNonReferenceType();
918 while ((BaseTy
->isPointerType() || BaseTy
->isReferenceType()) &&
919 !CGF
.getContext().hasSameType(BaseTy
, ElTy
)) {
920 if (const auto *PtrTy
= BaseTy
->getAs
<PointerType
>()) {
921 BaseLV
= CGF
.EmitLoadOfPointerLValue(BaseLV
.getAddress(CGF
), PtrTy
);
923 LValue RefLVal
= CGF
.MakeAddrLValue(BaseLV
.getAddress(CGF
), BaseTy
);
924 BaseLV
= CGF
.EmitLoadOfReferenceLValue(RefLVal
);
926 BaseTy
= BaseTy
->getPointeeType();
928 return CGF
.MakeAddrLValue(
929 BaseLV
.getAddress(CGF
).withElementType(CGF
.ConvertTypeForMem(ElTy
)),
930 BaseLV
.getType(), BaseLV
.getBaseInfo(),
931 CGF
.CGM
.getTBAAInfoForSubobject(BaseLV
, BaseLV
.getType()));
934 static Address
castToBase(CodeGenFunction
&CGF
, QualType BaseTy
, QualType ElTy
,
935 Address OriginalBaseAddress
, llvm::Value
*Addr
) {
936 Address Tmp
= Address::invalid();
937 Address TopTmp
= Address::invalid();
938 Address MostTopTmp
= Address::invalid();
939 BaseTy
= BaseTy
.getNonReferenceType();
940 while ((BaseTy
->isPointerType() || BaseTy
->isReferenceType()) &&
941 !CGF
.getContext().hasSameType(BaseTy
, ElTy
)) {
942 Tmp
= CGF
.CreateMemTemp(BaseTy
);
943 if (TopTmp
.isValid())
944 CGF
.Builder
.CreateStore(Tmp
.getPointer(), TopTmp
);
948 BaseTy
= BaseTy
->getPointeeType();
952 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
953 Addr
, Tmp
.getElementType());
954 CGF
.Builder
.CreateStore(Addr
, Tmp
);
958 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
959 Addr
, OriginalBaseAddress
.getType());
960 return OriginalBaseAddress
.withPointer(Addr
, NotKnownNonNull
);
963 static const VarDecl
*getBaseDecl(const Expr
*Ref
, const DeclRefExpr
*&DE
) {
964 const VarDecl
*OrigVD
= nullptr;
965 if (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(Ref
)) {
966 const Expr
*Base
= OASE
->getBase()->IgnoreParenImpCasts();
967 while (const auto *TempOASE
= dyn_cast
<OMPArraySectionExpr
>(Base
))
968 Base
= TempOASE
->getBase()->IgnoreParenImpCasts();
969 while (const auto *TempASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
970 Base
= TempASE
->getBase()->IgnoreParenImpCasts();
971 DE
= cast
<DeclRefExpr
>(Base
);
972 OrigVD
= cast
<VarDecl
>(DE
->getDecl());
973 } else if (const auto *ASE
= dyn_cast
<ArraySubscriptExpr
>(Ref
)) {
974 const Expr
*Base
= ASE
->getBase()->IgnoreParenImpCasts();
975 while (const auto *TempASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
976 Base
= TempASE
->getBase()->IgnoreParenImpCasts();
977 DE
= cast
<DeclRefExpr
>(Base
);
978 OrigVD
= cast
<VarDecl
>(DE
->getDecl());
983 Address
ReductionCodeGen::adjustPrivateAddress(CodeGenFunction
&CGF
, unsigned N
,
984 Address PrivateAddr
) {
985 const DeclRefExpr
*DE
;
986 if (const VarDecl
*OrigVD
= ::getBaseDecl(ClausesData
[N
].Ref
, DE
)) {
987 BaseDecls
.emplace_back(OrigVD
);
988 LValue OriginalBaseLValue
= CGF
.EmitLValue(DE
);
990 loadToBegin(CGF
, OrigVD
->getType(), SharedAddresses
[N
].first
.getType(),
992 Address SharedAddr
= SharedAddresses
[N
].first
.getAddress(CGF
);
993 llvm::Value
*Adjustment
= CGF
.Builder
.CreatePtrDiff(
994 SharedAddr
.getElementType(), BaseLValue
.getPointer(CGF
),
995 SharedAddr
.getPointer());
996 llvm::Value
*PrivatePointer
=
997 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
998 PrivateAddr
.getPointer(), SharedAddr
.getType());
999 llvm::Value
*Ptr
= CGF
.Builder
.CreateGEP(
1000 SharedAddr
.getElementType(), PrivatePointer
, Adjustment
);
1001 return castToBase(CGF
, OrigVD
->getType(),
1002 SharedAddresses
[N
].first
.getType(),
1003 OriginalBaseLValue
.getAddress(CGF
), Ptr
);
1005 BaseDecls
.emplace_back(
1006 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Ref
)->getDecl()));
1010 bool ReductionCodeGen::usesReductionInitializer(unsigned N
) const {
1011 const OMPDeclareReductionDecl
*DRD
=
1012 getReductionInit(ClausesData
[N
].ReductionOp
);
1013 return DRD
&& DRD
->getInitializer();
1016 LValue
CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction
&CGF
) {
1017 return CGF
.EmitLoadOfPointerLValue(
1018 CGF
.GetAddrOfLocalVar(getThreadIDVariable()),
1019 getThreadIDVariable()->getType()->castAs
<PointerType
>());
1022 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) {
1023 if (!CGF
.HaveInsertPoint())
1025 // 1.2.2 OpenMP Language Terminology
1026 // Structured block - An executable statement with a single entry at the
1027 // top and a single exit at the bottom.
1028 // The point of exit cannot be a branch out of the structured block.
1029 // longjmp() and throw() must not violate the entry/exit criteria.
1030 CGF
.EHStack
.pushTerminate();
1032 CGF
.incrementProfileCounter(S
);
1034 CGF
.EHStack
.popTerminate();
1037 LValue
CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1038 CodeGenFunction
&CGF
) {
1039 return CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(getThreadIDVariable()),
1040 getThreadIDVariable()->getType(),
1041 AlignmentSource::Decl
);
1044 static FieldDecl
*addFieldToRecordDecl(ASTContext
&C
, DeclContext
*DC
,
1046 auto *Field
= FieldDecl::Create(
1047 C
, DC
, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy
,
1048 C
.getTrivialTypeSourceInfo(FieldTy
, SourceLocation()),
1049 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit
);
1050 Field
->setAccess(AS_public
);
1055 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule
&CGM
)
1056 : CGM(CGM
), OMPBuilder(CGM
.getModule()) {
1057 KmpCriticalNameTy
= llvm::ArrayType::get(CGM
.Int32Ty
, /*NumElements*/ 8);
1058 llvm::OpenMPIRBuilderConfig
Config(CGM
.getLangOpts().OpenMPIsTargetDevice
,
1059 isGPU(), hasRequiresUnifiedSharedMemory(),
1060 CGM
.getLangOpts().OpenMPOffloadMandatory
);
1061 OMPBuilder
.initialize(CGM
.getLangOpts().OpenMPIsTargetDevice
1062 ? CGM
.getLangOpts().OMPHostIRFile
1064 OMPBuilder
.setConfig(Config
);
1067 void CGOpenMPRuntime::clear() {
1068 InternalVars
.clear();
1069 // Clean non-target variable declarations possibly used only in debug info.
1070 for (const auto &Data
: EmittedNonTargetVariables
) {
1071 if (!Data
.getValue().pointsToAliveValue())
1073 auto *GV
= dyn_cast
<llvm::GlobalVariable
>(Data
.getValue());
1076 if (!GV
->isDeclaration() || GV
->getNumUses() > 0)
1078 GV
->eraseFromParent();
1082 std::string
CGOpenMPRuntime::getName(ArrayRef
<StringRef
> Parts
) const {
1083 return OMPBuilder
.createPlatformSpecificName(Parts
);
1086 static llvm::Function
*
1087 emitCombinerOrInitializer(CodeGenModule
&CGM
, QualType Ty
,
1088 const Expr
*CombinerInitializer
, const VarDecl
*In
,
1089 const VarDecl
*Out
, bool IsCombiner
) {
1090 // void .omp_combiner.(Ty *in, Ty *out);
1091 ASTContext
&C
= CGM
.getContext();
1092 QualType PtrTy
= C
.getPointerType(Ty
).withRestrict();
1093 FunctionArgList Args
;
1094 ImplicitParamDecl
OmpOutParm(C
, /*DC=*/nullptr, Out
->getLocation(),
1095 /*Id=*/nullptr, PtrTy
, ImplicitParamDecl::Other
);
1096 ImplicitParamDecl
OmpInParm(C
, /*DC=*/nullptr, In
->getLocation(),
1097 /*Id=*/nullptr, PtrTy
, ImplicitParamDecl::Other
);
1098 Args
.push_back(&OmpOutParm
);
1099 Args
.push_back(&OmpInParm
);
1100 const CGFunctionInfo
&FnInfo
=
1101 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
1102 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
1103 std::string Name
= CGM
.getOpenMPRuntime().getName(
1104 {IsCombiner
? "omp_combiner" : "omp_initializer", ""});
1105 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
1106 Name
, &CGM
.getModule());
1107 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
1108 if (CGM
.getLangOpts().Optimize
) {
1109 Fn
->removeFnAttr(llvm::Attribute::NoInline
);
1110 Fn
->removeFnAttr(llvm::Attribute::OptimizeNone
);
1111 Fn
->addFnAttr(llvm::Attribute::AlwaysInline
);
1113 CodeGenFunction
CGF(CGM
);
1114 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1115 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1116 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, In
->getLocation(),
1117 Out
->getLocation());
1118 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
1119 Address AddrIn
= CGF
.GetAddrOfLocalVar(&OmpInParm
);
1121 In
, CGF
.EmitLoadOfPointerLValue(AddrIn
, PtrTy
->castAs
<PointerType
>())
1123 Address AddrOut
= CGF
.GetAddrOfLocalVar(&OmpOutParm
);
1125 Out
, CGF
.EmitLoadOfPointerLValue(AddrOut
, PtrTy
->castAs
<PointerType
>())
1127 (void)Scope
.Privatize();
1128 if (!IsCombiner
&& Out
->hasInit() &&
1129 !CGF
.isTrivialInitializer(Out
->getInit())) {
1130 CGF
.EmitAnyExprToMem(Out
->getInit(), CGF
.GetAddrOfLocalVar(Out
),
1131 Out
->getType().getQualifiers(),
1132 /*IsInitializer=*/true);
1134 if (CombinerInitializer
)
1135 CGF
.EmitIgnoredExpr(CombinerInitializer
);
1136 Scope
.ForceCleanup();
1137 CGF
.FinishFunction();
1141 void CGOpenMPRuntime::emitUserDefinedReduction(
1142 CodeGenFunction
*CGF
, const OMPDeclareReductionDecl
*D
) {
1143 if (UDRMap
.count(D
) > 0)
1145 llvm::Function
*Combiner
= emitCombinerOrInitializer(
1146 CGM
, D
->getType(), D
->getCombiner(),
1147 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getCombinerIn())->getDecl()),
1148 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getCombinerOut())->getDecl()),
1149 /*IsCombiner=*/true);
1150 llvm::Function
*Initializer
= nullptr;
1151 if (const Expr
*Init
= D
->getInitializer()) {
1152 Initializer
= emitCombinerOrInitializer(
1154 D
->getInitializerKind() == OMPDeclareReductionDecl::CallInit
? Init
1156 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getInitOrig())->getDecl()),
1157 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getInitPriv())->getDecl()),
1158 /*IsCombiner=*/false);
1160 UDRMap
.try_emplace(D
, Combiner
, Initializer
);
1162 auto &Decls
= FunctionUDRMap
.FindAndConstruct(CGF
->CurFn
);
1163 Decls
.second
.push_back(D
);
1167 std::pair
<llvm::Function
*, llvm::Function
*>
1168 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl
*D
) {
1169 auto I
= UDRMap
.find(D
);
1170 if (I
!= UDRMap
.end())
1172 emitUserDefinedReduction(/*CGF=*/nullptr, D
);
1173 return UDRMap
.lookup(D
);
1177 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1178 // Builder if one is present.
1179 struct PushAndPopStackRAII
{
1180 PushAndPopStackRAII(llvm::OpenMPIRBuilder
*OMPBuilder
, CodeGenFunction
&CGF
,
1181 bool HasCancel
, llvm::omp::Directive Kind
)
1182 : OMPBuilder(OMPBuilder
) {
1186 // The following callback is the crucial part of clangs cleanup process.
1189 // Once the OpenMPIRBuilder is used to create parallel regions (and
1190 // similar), the cancellation destination (Dest below) is determined via
1191 // IP. That means if we have variables to finalize we split the block at IP,
1192 // use the new block (=BB) as destination to build a JumpDest (via
1193 // getJumpDestInCurrentScope(BB)) which then is fed to
1194 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1195 // to push & pop an FinalizationInfo object.
1196 // The FiniCB will still be needed but at the point where the
1197 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1198 auto FiniCB
= [&CGF
](llvm::OpenMPIRBuilder::InsertPointTy IP
) {
1199 assert(IP
.getBlock()->end() == IP
.getPoint() &&
1200 "Clang CG should cause non-terminated block!");
1201 CGBuilderTy::InsertPointGuard
IPG(CGF
.Builder
);
1202 CGF
.Builder
.restoreIP(IP
);
1203 CodeGenFunction::JumpDest Dest
=
1204 CGF
.getOMPCancelDestination(OMPD_parallel
);
1205 CGF
.EmitBranchThroughCleanup(Dest
);
1208 // TODO: Remove this once we emit parallel regions through the
1209 // OpenMPIRBuilder as it can do this setup internally.
1210 llvm::OpenMPIRBuilder::FinalizationInfo
FI({FiniCB
, Kind
, HasCancel
});
1211 OMPBuilder
->pushFinalizationCB(std::move(FI
));
1213 ~PushAndPopStackRAII() {
1215 OMPBuilder
->popFinalizationCB();
1217 llvm::OpenMPIRBuilder
*OMPBuilder
;
1221 static llvm::Function
*emitParallelOrTeamsOutlinedFunction(
1222 CodeGenModule
&CGM
, const OMPExecutableDirective
&D
, const CapturedStmt
*CS
,
1223 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1224 const StringRef OutlinedHelperName
, const RegionCodeGenTy
&CodeGen
) {
1225 assert(ThreadIDVar
->getType()->isPointerType() &&
1226 "thread id variable must be of type kmp_int32 *");
1227 CodeGenFunction
CGF(CGM
, true);
1228 bool HasCancel
= false;
1229 if (const auto *OPD
= dyn_cast
<OMPParallelDirective
>(&D
))
1230 HasCancel
= OPD
->hasCancel();
1231 else if (const auto *OPD
= dyn_cast
<OMPTargetParallelDirective
>(&D
))
1232 HasCancel
= OPD
->hasCancel();
1233 else if (const auto *OPSD
= dyn_cast
<OMPParallelSectionsDirective
>(&D
))
1234 HasCancel
= OPSD
->hasCancel();
1235 else if (const auto *OPFD
= dyn_cast
<OMPParallelForDirective
>(&D
))
1236 HasCancel
= OPFD
->hasCancel();
1237 else if (const auto *OPFD
= dyn_cast
<OMPTargetParallelForDirective
>(&D
))
1238 HasCancel
= OPFD
->hasCancel();
1239 else if (const auto *OPFD
= dyn_cast
<OMPDistributeParallelForDirective
>(&D
))
1240 HasCancel
= OPFD
->hasCancel();
1241 else if (const auto *OPFD
=
1242 dyn_cast
<OMPTeamsDistributeParallelForDirective
>(&D
))
1243 HasCancel
= OPFD
->hasCancel();
1244 else if (const auto *OPFD
=
1245 dyn_cast
<OMPTargetTeamsDistributeParallelForDirective
>(&D
))
1246 HasCancel
= OPFD
->hasCancel();
1248 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1249 // parallel region to make cancellation barriers work properly.
1250 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1251 PushAndPopStackRAII
PSR(&OMPBuilder
, CGF
, HasCancel
, InnermostKind
);
1252 CGOpenMPOutlinedRegionInfo
CGInfo(*CS
, ThreadIDVar
, CodeGen
, InnermostKind
,
1253 HasCancel
, OutlinedHelperName
);
1254 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
1255 return CGF
.GenerateOpenMPCapturedStmtFunction(*CS
, D
.getBeginLoc());
1258 std::string
CGOpenMPRuntime::getOutlinedHelperName(StringRef Name
) const {
1259 std::string Suffix
= getName({"omp_outlined"});
1260 return (Name
+ Suffix
).str();
1263 std::string
CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction
&CGF
) const {
1264 return getOutlinedHelperName(CGF
.CurFn
->getName());
1267 std::string
CGOpenMPRuntime::getReductionFuncName(StringRef Name
) const {
1268 std::string Suffix
= getName({"omp", "reduction", "reduction_func"});
1269 return (Name
+ Suffix
).str();
1272 llvm::Function
*CGOpenMPRuntime::emitParallelOutlinedFunction(
1273 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
1274 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1275 const RegionCodeGenTy
&CodeGen
) {
1276 const CapturedStmt
*CS
= D
.getCapturedStmt(OMPD_parallel
);
1277 return emitParallelOrTeamsOutlinedFunction(
1278 CGM
, D
, CS
, ThreadIDVar
, InnermostKind
, getOutlinedHelperName(CGF
),
1282 llvm::Function
*CGOpenMPRuntime::emitTeamsOutlinedFunction(
1283 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
1284 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1285 const RegionCodeGenTy
&CodeGen
) {
1286 const CapturedStmt
*CS
= D
.getCapturedStmt(OMPD_teams
);
1287 return emitParallelOrTeamsOutlinedFunction(
1288 CGM
, D
, CS
, ThreadIDVar
, InnermostKind
, getOutlinedHelperName(CGF
),
1292 llvm::Function
*CGOpenMPRuntime::emitTaskOutlinedFunction(
1293 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
1294 const VarDecl
*PartIDVar
, const VarDecl
*TaskTVar
,
1295 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
1296 bool Tied
, unsigned &NumberOfParts
) {
1297 auto &&UntiedCodeGen
= [this, &D
, TaskTVar
](CodeGenFunction
&CGF
,
1298 PrePostActionTy
&) {
1299 llvm::Value
*ThreadID
= getThreadID(CGF
, D
.getBeginLoc());
1300 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
1301 llvm::Value
*TaskArgs
[] = {
1303 CGF
.EmitLoadOfPointerLValue(CGF
.GetAddrOfLocalVar(TaskTVar
),
1304 TaskTVar
->getType()->castAs
<PointerType
>())
1306 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1307 CGM
.getModule(), OMPRTL___kmpc_omp_task
),
1310 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy
Action(Tied
, PartIDVar
,
1312 CodeGen
.setAction(Action
);
1313 assert(!ThreadIDVar
->getType()->isPointerType() &&
1314 "thread id variable must be of type kmp_int32 for tasks");
1315 const OpenMPDirectiveKind Region
=
1316 isOpenMPTaskLoopDirective(D
.getDirectiveKind()) ? OMPD_taskloop
1318 const CapturedStmt
*CS
= D
.getCapturedStmt(Region
);
1319 bool HasCancel
= false;
1320 if (const auto *TD
= dyn_cast
<OMPTaskDirective
>(&D
))
1321 HasCancel
= TD
->hasCancel();
1322 else if (const auto *TD
= dyn_cast
<OMPTaskLoopDirective
>(&D
))
1323 HasCancel
= TD
->hasCancel();
1324 else if (const auto *TD
= dyn_cast
<OMPMasterTaskLoopDirective
>(&D
))
1325 HasCancel
= TD
->hasCancel();
1326 else if (const auto *TD
= dyn_cast
<OMPParallelMasterTaskLoopDirective
>(&D
))
1327 HasCancel
= TD
->hasCancel();
1329 CodeGenFunction
CGF(CGM
, true);
1330 CGOpenMPTaskOutlinedRegionInfo
CGInfo(*CS
, ThreadIDVar
, CodeGen
,
1331 InnermostKind
, HasCancel
, Action
);
1332 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
1333 llvm::Function
*Res
= CGF
.GenerateCapturedStmtFunction(*CS
);
1335 NumberOfParts
= Action
.getNumberOfParts();
1339 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction
&CGF
,
1340 bool AtCurrentPoint
) {
1341 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1342 assert(!Elem
.second
.ServiceInsertPt
&& "Insert point is set already.");
1344 llvm::Value
*Undef
= llvm::UndefValue::get(CGF
.Int32Ty
);
1345 if (AtCurrentPoint
) {
1346 Elem
.second
.ServiceInsertPt
= new llvm::BitCastInst(
1347 Undef
, CGF
.Int32Ty
, "svcpt", CGF
.Builder
.GetInsertBlock());
1349 Elem
.second
.ServiceInsertPt
=
1350 new llvm::BitCastInst(Undef
, CGF
.Int32Ty
, "svcpt");
1351 Elem
.second
.ServiceInsertPt
->insertAfter(CGF
.AllocaInsertPt
);
1355 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction
&CGF
) {
1356 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1357 if (Elem
.second
.ServiceInsertPt
) {
1358 llvm::Instruction
*Ptr
= Elem
.second
.ServiceInsertPt
;
1359 Elem
.second
.ServiceInsertPt
= nullptr;
1360 Ptr
->eraseFromParent();
1364 static StringRef
getIdentStringFromSourceLocation(CodeGenFunction
&CGF
,
1366 SmallString
<128> &Buffer
) {
1367 llvm::raw_svector_ostream
OS(Buffer
);
1368 // Build debug location
1369 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
1370 OS
<< ";" << PLoc
.getFilename() << ";";
1371 if (const auto *FD
= dyn_cast_or_null
<FunctionDecl
>(CGF
.CurFuncDecl
))
1372 OS
<< FD
->getQualifiedNameAsString();
1373 OS
<< ";" << PLoc
.getLine() << ";" << PLoc
.getColumn() << ";;";
1377 llvm::Value
*CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction
&CGF
,
1379 unsigned Flags
, bool EmitLoc
) {
1380 uint32_t SrcLocStrSize
;
1381 llvm::Constant
*SrcLocStr
;
1382 if ((!EmitLoc
&& CGM
.getCodeGenOpts().getDebugInfo() ==
1383 llvm::codegenoptions::NoDebugInfo
) ||
1385 SrcLocStr
= OMPBuilder
.getOrCreateDefaultSrcLocStr(SrcLocStrSize
);
1387 std::string FunctionName
;
1388 if (const auto *FD
= dyn_cast_or_null
<FunctionDecl
>(CGF
.CurFuncDecl
))
1389 FunctionName
= FD
->getQualifiedNameAsString();
1390 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
1391 const char *FileName
= PLoc
.getFilename();
1392 unsigned Line
= PLoc
.getLine();
1393 unsigned Column
= PLoc
.getColumn();
1394 SrcLocStr
= OMPBuilder
.getOrCreateSrcLocStr(FunctionName
, FileName
, Line
,
1395 Column
, SrcLocStrSize
);
1397 unsigned Reserved2Flags
= getDefaultLocationReserved2Flags();
1398 return OMPBuilder
.getOrCreateIdent(
1399 SrcLocStr
, SrcLocStrSize
, llvm::omp::IdentFlag(Flags
), Reserved2Flags
);
1402 llvm::Value
*CGOpenMPRuntime::getThreadID(CodeGenFunction
&CGF
,
1403 SourceLocation Loc
) {
1404 assert(CGF
.CurFn
&& "No function in current CodeGenFunction.");
1405 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1406 // the clang invariants used below might be broken.
1407 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
1408 SmallString
<128> Buffer
;
1409 OMPBuilder
.updateToLocation(CGF
.Builder
.saveIP());
1410 uint32_t SrcLocStrSize
;
1411 auto *SrcLocStr
= OMPBuilder
.getOrCreateSrcLocStr(
1412 getIdentStringFromSourceLocation(CGF
, Loc
, Buffer
), SrcLocStrSize
);
1413 return OMPBuilder
.getOrCreateThreadID(
1414 OMPBuilder
.getOrCreateIdent(SrcLocStr
, SrcLocStrSize
));
1417 llvm::Value
*ThreadID
= nullptr;
1418 // Check whether we've already cached a load of the thread id in this
1420 auto I
= OpenMPLocThreadIDMap
.find(CGF
.CurFn
);
1421 if (I
!= OpenMPLocThreadIDMap
.end()) {
1422 ThreadID
= I
->second
.ThreadID
;
1423 if (ThreadID
!= nullptr)
1426 // If exceptions are enabled, do not use parameter to avoid possible crash.
1427 if (auto *OMPRegionInfo
=
1428 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
1429 if (OMPRegionInfo
->getThreadIDVariable()) {
1430 // Check if this an outlined function with thread id passed as argument.
1431 LValue LVal
= OMPRegionInfo
->getThreadIDVariableLValue(CGF
);
1432 llvm::BasicBlock
*TopBlock
= CGF
.AllocaInsertPt
->getParent();
1433 if (!CGF
.EHStack
.requiresLandingPad() || !CGF
.getLangOpts().Exceptions
||
1434 !CGF
.getLangOpts().CXXExceptions
||
1435 CGF
.Builder
.GetInsertBlock() == TopBlock
||
1436 !isa
<llvm::Instruction
>(LVal
.getPointer(CGF
)) ||
1437 cast
<llvm::Instruction
>(LVal
.getPointer(CGF
))->getParent() ==
1439 cast
<llvm::Instruction
>(LVal
.getPointer(CGF
))->getParent() ==
1440 CGF
.Builder
.GetInsertBlock()) {
1441 ThreadID
= CGF
.EmitLoadOfScalar(LVal
, Loc
);
1442 // If value loaded in entry block, cache it and use it everywhere in
1444 if (CGF
.Builder
.GetInsertBlock() == TopBlock
) {
1445 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1446 Elem
.second
.ThreadID
= ThreadID
;
1453 // This is not an outlined function region - need to call __kmpc_int32
1454 // kmpc_global_thread_num(ident_t *loc).
1455 // Generate thread id value and cache this value for use across the
1457 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1458 if (!Elem
.second
.ServiceInsertPt
)
1459 setLocThreadIdInsertPt(CGF
);
1460 CGBuilderTy::InsertPointGuard
IPG(CGF
.Builder
);
1461 CGF
.Builder
.SetInsertPoint(Elem
.second
.ServiceInsertPt
);
1462 llvm::CallInst
*Call
= CGF
.Builder
.CreateCall(
1463 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
1464 OMPRTL___kmpc_global_thread_num
),
1465 emitUpdateLocation(CGF
, Loc
));
1466 Call
->setCallingConv(CGF
.getRuntimeCC());
1467 Elem
.second
.ThreadID
= Call
;
1471 void CGOpenMPRuntime::functionFinished(CodeGenFunction
&CGF
) {
1472 assert(CGF
.CurFn
&& "No function in current CodeGenFunction.");
1473 if (OpenMPLocThreadIDMap
.count(CGF
.CurFn
)) {
1474 clearLocThreadIdInsertPt(CGF
);
1475 OpenMPLocThreadIDMap
.erase(CGF
.CurFn
);
1477 if (FunctionUDRMap
.count(CGF
.CurFn
) > 0) {
1478 for(const auto *D
: FunctionUDRMap
[CGF
.CurFn
])
1480 FunctionUDRMap
.erase(CGF
.CurFn
);
1482 auto I
= FunctionUDMMap
.find(CGF
.CurFn
);
1483 if (I
!= FunctionUDMMap
.end()) {
1484 for(const auto *D
: I
->second
)
1486 FunctionUDMMap
.erase(I
);
1488 LastprivateConditionalToTypes
.erase(CGF
.CurFn
);
1489 FunctionToUntiedTaskStackMap
.erase(CGF
.CurFn
);
1492 llvm::Type
*CGOpenMPRuntime::getIdentTyPointerTy() {
1493 return OMPBuilder
.IdentPtr
;
1496 llvm::Type
*CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1497 if (!Kmpc_MicroTy
) {
1498 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1499 llvm::Type
*MicroParams
[] = {llvm::PointerType::getUnqual(CGM
.Int32Ty
),
1500 llvm::PointerType::getUnqual(CGM
.Int32Ty
)};
1501 Kmpc_MicroTy
= llvm::FunctionType::get(CGM
.VoidTy
, MicroParams
, true);
1503 return llvm::PointerType::getUnqual(Kmpc_MicroTy
);
1506 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1507 convertDeviceClause(const VarDecl
*VD
) {
1508 std::optional
<OMPDeclareTargetDeclAttr::DevTypeTy
> DevTy
=
1509 OMPDeclareTargetDeclAttr::getDeviceType(VD
);
1511 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone
;
1513 switch ((int)*DevTy
) { // Avoid -Wcovered-switch-default
1514 case OMPDeclareTargetDeclAttr::DT_Host
:
1515 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost
;
1517 case OMPDeclareTargetDeclAttr::DT_NoHost
:
1518 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost
;
1520 case OMPDeclareTargetDeclAttr::DT_Any
:
1521 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny
;
1524 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone
;
1529 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1530 convertCaptureClause(const VarDecl
*VD
) {
1531 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> MapType
=
1532 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
1534 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone
;
1535 switch ((int)*MapType
) { // Avoid -Wcovered-switch-default
1536 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To
:
1537 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo
;
1539 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter
:
1540 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter
;
1542 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link
:
1543 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink
;
1546 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone
;
1551 static llvm::TargetRegionEntryInfo
getEntryInfoFromPresumedLoc(
1552 CodeGenModule
&CGM
, llvm::OpenMPIRBuilder
&OMPBuilder
,
1553 SourceLocation BeginLoc
, llvm::StringRef ParentName
= "") {
1555 auto FileInfoCallBack
= [&]() {
1556 SourceManager
&SM
= CGM
.getContext().getSourceManager();
1557 PresumedLoc PLoc
= SM
.getPresumedLoc(BeginLoc
);
1559 llvm::sys::fs::UniqueID ID
;
1560 if (auto EC
= llvm::sys::fs::getUniqueID(PLoc
.getFilename(), ID
)) {
1561 PLoc
= SM
.getPresumedLoc(BeginLoc
, /*UseLineDirectives=*/false);
1564 return std::pair
<std::string
, uint64_t>(PLoc
.getFilename(), PLoc
.getLine());
1567 return OMPBuilder
.getTargetEntryUniqueInfo(FileInfoCallBack
, ParentName
);
1570 Address
CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl
*VD
) {
1571 auto AddrOfGlobal
= [&VD
, this]() { return CGM
.GetAddrOfGlobal(VD
); };
1573 auto LinkageForVariable
= [&VD
, this]() {
1574 return CGM
.getLLVMLinkageVarDefinition(VD
);
1577 std::vector
<llvm::GlobalVariable
*> GeneratedRefs
;
1579 llvm::Type
*LlvmPtrTy
= CGM
.getTypes().ConvertTypeForMem(
1580 CGM
.getContext().getPointerType(VD
->getType()));
1581 llvm::Constant
*addr
= OMPBuilder
.getAddrOfDeclareTargetVar(
1582 convertCaptureClause(VD
), convertDeviceClause(VD
),
1583 VD
->hasDefinition(CGM
.getContext()) == VarDecl::DeclarationOnly
,
1584 VD
->isExternallyVisible(),
1585 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
,
1586 VD
->getCanonicalDecl()->getBeginLoc()),
1587 CGM
.getMangledName(VD
), GeneratedRefs
, CGM
.getLangOpts().OpenMPSimd
,
1588 CGM
.getLangOpts().OMPTargetTriples
, LlvmPtrTy
, AddrOfGlobal
,
1589 LinkageForVariable
);
1592 return Address::invalid();
1593 return Address(addr
, LlvmPtrTy
, CGM
.getContext().getDeclAlign(VD
));
1597 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl
*VD
) {
1598 assert(!CGM
.getLangOpts().OpenMPUseTLS
||
1599 !CGM
.getContext().getTargetInfo().isTLSSupported());
1600 // Lookup the entry, lazily creating it if necessary.
1601 std::string Suffix
= getName({"cache", ""});
1602 return OMPBuilder
.getOrCreateInternalVariable(
1603 CGM
.Int8PtrPtrTy
, Twine(CGM
.getMangledName(VD
)).concat(Suffix
).str());
1606 Address
CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction
&CGF
,
1609 SourceLocation Loc
) {
1610 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1611 CGM
.getContext().getTargetInfo().isTLSSupported())
1614 llvm::Type
*VarTy
= VDAddr
.getElementType();
1615 llvm::Value
*Args
[] = {
1616 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
1617 CGF
.Builder
.CreatePointerCast(VDAddr
.getPointer(), CGM
.Int8PtrTy
),
1618 CGM
.getSize(CGM
.GetTargetTypeStoreSize(VarTy
)),
1619 getOrCreateThreadPrivateCache(VD
)};
1621 CGF
.EmitRuntimeCall(
1622 OMPBuilder
.getOrCreateRuntimeFunction(
1623 CGM
.getModule(), OMPRTL___kmpc_threadprivate_cached
),
1625 CGF
.Int8Ty
, VDAddr
.getAlignment());
1628 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1629 CodeGenFunction
&CGF
, Address VDAddr
, llvm::Value
*Ctor
,
1630 llvm::Value
*CopyCtor
, llvm::Value
*Dtor
, SourceLocation Loc
) {
1631 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1633 llvm::Value
*OMPLoc
= emitUpdateLocation(CGF
, Loc
);
1634 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1635 CGM
.getModule(), OMPRTL___kmpc_global_thread_num
),
1637 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1638 // to register constructor/destructor for variable.
1639 llvm::Value
*Args
[] = {
1640 OMPLoc
, CGF
.Builder
.CreatePointerCast(VDAddr
.getPointer(), CGM
.VoidPtrTy
),
1641 Ctor
, CopyCtor
, Dtor
};
1642 CGF
.EmitRuntimeCall(
1643 OMPBuilder
.getOrCreateRuntimeFunction(
1644 CGM
.getModule(), OMPRTL___kmpc_threadprivate_register
),
1648 llvm::Function
*CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1649 const VarDecl
*VD
, Address VDAddr
, SourceLocation Loc
,
1650 bool PerformInit
, CodeGenFunction
*CGF
) {
1651 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1652 CGM
.getContext().getTargetInfo().isTLSSupported())
1655 VD
= VD
->getDefinition(CGM
.getContext());
1656 if (VD
&& ThreadPrivateWithDefinition
.insert(CGM
.getMangledName(VD
)).second
) {
1657 QualType ASTTy
= VD
->getType();
1659 llvm::Value
*Ctor
= nullptr, *CopyCtor
= nullptr, *Dtor
= nullptr;
1660 const Expr
*Init
= VD
->getAnyInitializer();
1661 if (CGM
.getLangOpts().CPlusPlus
&& PerformInit
) {
1662 // Generate function that re-emits the declaration's initializer into the
1663 // threadprivate copy of the variable VD
1664 CodeGenFunction
CtorCGF(CGM
);
1665 FunctionArgList Args
;
1666 ImplicitParamDecl
Dst(CGM
.getContext(), /*DC=*/nullptr, Loc
,
1667 /*Id=*/nullptr, CGM
.getContext().VoidPtrTy
,
1668 ImplicitParamDecl::Other
);
1669 Args
.push_back(&Dst
);
1671 const auto &FI
= CGM
.getTypes().arrangeBuiltinFunctionDeclaration(
1672 CGM
.getContext().VoidPtrTy
, Args
);
1673 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1674 std::string Name
= getName({"__kmpc_global_ctor_", ""});
1675 llvm::Function
*Fn
=
1676 CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, Name
, FI
, Loc
);
1677 CtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidPtrTy
, Fn
, FI
,
1679 llvm::Value
*ArgVal
= CtorCGF
.EmitLoadOfScalar(
1680 CtorCGF
.GetAddrOfLocalVar(&Dst
), /*Volatile=*/false,
1681 CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1682 Address
Arg(ArgVal
, CtorCGF
.ConvertTypeForMem(ASTTy
),
1683 VDAddr
.getAlignment());
1684 CtorCGF
.EmitAnyExprToMem(Init
, Arg
, Init
->getType().getQualifiers(),
1685 /*IsInitializer=*/true);
1686 ArgVal
= CtorCGF
.EmitLoadOfScalar(
1687 CtorCGF
.GetAddrOfLocalVar(&Dst
), /*Volatile=*/false,
1688 CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1689 CtorCGF
.Builder
.CreateStore(ArgVal
, CtorCGF
.ReturnValue
);
1690 CtorCGF
.FinishFunction();
1693 if (VD
->getType().isDestructedType() != QualType::DK_none
) {
1694 // Generate function that emits destructor call for the threadprivate copy
1695 // of the variable VD
1696 CodeGenFunction
DtorCGF(CGM
);
1697 FunctionArgList Args
;
1698 ImplicitParamDecl
Dst(CGM
.getContext(), /*DC=*/nullptr, Loc
,
1699 /*Id=*/nullptr, CGM
.getContext().VoidPtrTy
,
1700 ImplicitParamDecl::Other
);
1701 Args
.push_back(&Dst
);
1703 const auto &FI
= CGM
.getTypes().arrangeBuiltinFunctionDeclaration(
1704 CGM
.getContext().VoidTy
, Args
);
1705 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1706 std::string Name
= getName({"__kmpc_global_dtor_", ""});
1707 llvm::Function
*Fn
=
1708 CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, Name
, FI
, Loc
);
1709 auto NL
= ApplyDebugLocation::CreateEmpty(DtorCGF
);
1710 DtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
, Args
,
1712 // Create a scope with an artificial location for the body of this function.
1713 auto AL
= ApplyDebugLocation::CreateArtificial(DtorCGF
);
1714 llvm::Value
*ArgVal
= DtorCGF
.EmitLoadOfScalar(
1715 DtorCGF
.GetAddrOfLocalVar(&Dst
),
1716 /*Volatile=*/false, CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1717 DtorCGF
.emitDestroy(
1718 Address(ArgVal
, DtorCGF
.Int8Ty
, VDAddr
.getAlignment()), ASTTy
,
1719 DtorCGF
.getDestroyer(ASTTy
.isDestructedType()),
1720 DtorCGF
.needsEHCleanup(ASTTy
.isDestructedType()));
1721 DtorCGF
.FinishFunction();
1724 // Do not emit init function if it is not required.
1728 llvm::Type
*CopyCtorTyArgs
[] = {CGM
.VoidPtrTy
, CGM
.VoidPtrTy
};
1729 auto *CopyCtorTy
= llvm::FunctionType::get(CGM
.VoidPtrTy
, CopyCtorTyArgs
,
1732 // Copying constructor for the threadprivate variable.
1733 // Must be NULL - reserved by runtime, but currently it requires that this
1734 // parameter is always NULL. Otherwise it fires assertion.
1735 CopyCtor
= llvm::Constant::getNullValue(CopyCtorTy
);
1736 if (Ctor
== nullptr) {
1737 auto *CtorTy
= llvm::FunctionType::get(CGM
.VoidPtrTy
, CGM
.VoidPtrTy
,
1740 Ctor
= llvm::Constant::getNullValue(CtorTy
);
1742 if (Dtor
== nullptr) {
1743 auto *DtorTy
= llvm::FunctionType::get(CGM
.VoidTy
, CGM
.VoidPtrTy
,
1746 Dtor
= llvm::Constant::getNullValue(DtorTy
);
1749 auto *InitFunctionTy
=
1750 llvm::FunctionType::get(CGM
.VoidTy
, /*isVarArg*/ false);
1751 std::string Name
= getName({"__omp_threadprivate_init_", ""});
1752 llvm::Function
*InitFunction
= CGM
.CreateGlobalInitOrCleanUpFunction(
1753 InitFunctionTy
, Name
, CGM
.getTypes().arrangeNullaryFunction());
1754 CodeGenFunction
InitCGF(CGM
);
1755 FunctionArgList ArgList
;
1756 InitCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, InitFunction
,
1757 CGM
.getTypes().arrangeNullaryFunction(), ArgList
,
1759 emitThreadPrivateVarInit(InitCGF
, VDAddr
, Ctor
, CopyCtor
, Dtor
, Loc
);
1760 InitCGF
.FinishFunction();
1761 return InitFunction
;
1763 emitThreadPrivateVarInit(*CGF
, VDAddr
, Ctor
, CopyCtor
, Dtor
, Loc
);
1768 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl
*VD
,
1769 llvm::GlobalVariable
*Addr
,
1771 if (CGM
.getLangOpts().OMPTargetTriples
.empty() &&
1772 !CGM
.getLangOpts().OpenMPIsTargetDevice
)
1774 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
1775 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
1776 if (!Res
|| *Res
== OMPDeclareTargetDeclAttr::MT_Link
||
1777 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
1778 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
1779 HasRequiresUnifiedSharedMemory
))
1780 return CGM
.getLangOpts().OpenMPIsTargetDevice
;
1781 VD
= VD
->getDefinition(CGM
.getContext());
1782 assert(VD
&& "Unknown VarDecl");
1784 if (!DeclareTargetWithDefinition
.insert(CGM
.getMangledName(VD
)).second
)
1785 return CGM
.getLangOpts().OpenMPIsTargetDevice
;
1787 QualType ASTTy
= VD
->getType();
1788 SourceLocation Loc
= VD
->getCanonicalDecl()->getBeginLoc();
1790 // Produce the unique prefix to identify the new target regions. We use
1791 // the source location of the variable declaration which we know to not
1792 // conflict with any target region.
1793 llvm::TargetRegionEntryInfo EntryInfo
=
1794 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
, Loc
, VD
->getName());
1795 SmallString
<128> Buffer
, Out
;
1796 OMPBuilder
.OffloadInfoManager
.getTargetRegionEntryFnName(Buffer
, EntryInfo
);
1798 const Expr
*Init
= VD
->getAnyInitializer();
1799 if (CGM
.getLangOpts().CPlusPlus
&& PerformInit
) {
1800 llvm::Constant
*Ctor
;
1802 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
1803 // Generate function that re-emits the declaration's initializer into
1804 // the threadprivate copy of the variable VD
1805 CodeGenFunction
CtorCGF(CGM
);
1807 const CGFunctionInfo
&FI
= CGM
.getTypes().arrangeNullaryFunction();
1808 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1809 llvm::Function
*Fn
= CGM
.CreateGlobalInitOrCleanUpFunction(
1810 FTy
, Twine(Buffer
, "_ctor"), FI
, Loc
, false,
1811 llvm::GlobalValue::WeakODRLinkage
);
1812 Fn
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
1813 if (CGM
.getTriple().isAMDGCN())
1814 Fn
->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL
);
1815 auto NL
= ApplyDebugLocation::CreateEmpty(CtorCGF
);
1816 CtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
,
1817 FunctionArgList(), Loc
, Loc
);
1818 auto AL
= ApplyDebugLocation::CreateArtificial(CtorCGF
);
1819 llvm::Constant
*AddrInAS0
= Addr
;
1820 if (Addr
->getAddressSpace() != 0)
1821 AddrInAS0
= llvm::ConstantExpr::getAddrSpaceCast(
1822 Addr
, llvm::PointerType::get(CGM
.getLLVMContext(), 0));
1823 CtorCGF
.EmitAnyExprToMem(Init
,
1824 Address(AddrInAS0
, Addr
->getValueType(),
1825 CGM
.getContext().getDeclAlign(VD
)),
1826 Init
->getType().getQualifiers(),
1827 /*IsInitializer=*/true);
1828 CtorCGF
.FinishFunction();
1830 ID
= llvm::ConstantExpr::getBitCast(Fn
, CGM
.Int8PtrTy
);
1832 Ctor
= new llvm::GlobalVariable(
1833 CGM
.getModule(), CGM
.Int8Ty
, /*isConstant=*/true,
1834 llvm::GlobalValue::PrivateLinkage
,
1835 llvm::Constant::getNullValue(CGM
.Int8Ty
), Twine(Buffer
, "_ctor"));
1839 // Register the information for the entry associated with the constructor.
1841 auto CtorEntryInfo
= EntryInfo
;
1842 CtorEntryInfo
.ParentName
= Twine(Buffer
, "_ctor").toStringRef(Out
);
1843 OMPBuilder
.OffloadInfoManager
.registerTargetRegionEntryInfo(
1844 CtorEntryInfo
, Ctor
, ID
,
1845 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor
);
1847 if (VD
->getType().isDestructedType() != QualType::DK_none
) {
1848 llvm::Constant
*Dtor
;
1850 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
1851 // Generate function that emits destructor call for the threadprivate
1852 // copy of the variable VD
1853 CodeGenFunction
DtorCGF(CGM
);
1855 const CGFunctionInfo
&FI
= CGM
.getTypes().arrangeNullaryFunction();
1856 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1857 llvm::Function
*Fn
= CGM
.CreateGlobalInitOrCleanUpFunction(
1858 FTy
, Twine(Buffer
, "_dtor"), FI
, Loc
, false,
1859 llvm::GlobalValue::WeakODRLinkage
);
1860 Fn
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
1861 if (CGM
.getTriple().isAMDGCN())
1862 Fn
->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL
);
1863 auto NL
= ApplyDebugLocation::CreateEmpty(DtorCGF
);
1864 DtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
,
1865 FunctionArgList(), Loc
, Loc
);
1866 // Create a scope with an artificial location for the body of this
1868 auto AL
= ApplyDebugLocation::CreateArtificial(DtorCGF
);
1869 llvm::Constant
*AddrInAS0
= Addr
;
1870 if (Addr
->getAddressSpace() != 0)
1871 AddrInAS0
= llvm::ConstantExpr::getAddrSpaceCast(
1872 Addr
, llvm::PointerType::get(CGM
.getLLVMContext(), 0));
1873 DtorCGF
.emitDestroy(Address(AddrInAS0
, Addr
->getValueType(),
1874 CGM
.getContext().getDeclAlign(VD
)),
1875 ASTTy
, DtorCGF
.getDestroyer(ASTTy
.isDestructedType()),
1876 DtorCGF
.needsEHCleanup(ASTTy
.isDestructedType()));
1877 DtorCGF
.FinishFunction();
1879 ID
= llvm::ConstantExpr::getBitCast(Fn
, CGM
.Int8PtrTy
);
1881 Dtor
= new llvm::GlobalVariable(
1882 CGM
.getModule(), CGM
.Int8Ty
, /*isConstant=*/true,
1883 llvm::GlobalValue::PrivateLinkage
,
1884 llvm::Constant::getNullValue(CGM
.Int8Ty
), Twine(Buffer
, "_dtor"));
1887 // Register the information for the entry associated with the destructor.
1889 auto DtorEntryInfo
= EntryInfo
;
1890 DtorEntryInfo
.ParentName
= Twine(Buffer
, "_dtor").toStringRef(Out
);
1891 OMPBuilder
.OffloadInfoManager
.registerTargetRegionEntryInfo(
1892 DtorEntryInfo
, Dtor
, ID
,
1893 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor
);
1895 return CGM
.getLangOpts().OpenMPIsTargetDevice
;
1898 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl
*FD
,
1899 llvm::GlobalValue
*GV
) {
1900 std::optional
<OMPDeclareTargetDeclAttr
*> ActiveAttr
=
1901 OMPDeclareTargetDeclAttr::getActiveAttr(FD
);
1903 // We only need to handle active 'indirect' declare target functions.
1904 if (!ActiveAttr
|| !(*ActiveAttr
)->getIndirect())
1907 // Get a mangled name to store the new device global in.
1908 llvm::TargetRegionEntryInfo EntryInfo
= getEntryInfoFromPresumedLoc(
1909 CGM
, OMPBuilder
, FD
->getCanonicalDecl()->getBeginLoc(), FD
->getName());
1910 SmallString
<128> Name
;
1911 OMPBuilder
.OffloadInfoManager
.getTargetRegionEntryFnName(Name
, EntryInfo
);
1913 // We need to generate a new global to hold the address of the indirectly
1914 // called device function. Doing this allows us to keep the visibility and
1915 // linkage of the associated function unchanged while allowing the runtime to
1916 // access its value.
1917 llvm::GlobalValue
*Addr
= GV
;
1918 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
1919 Addr
= new llvm::GlobalVariable(
1920 CGM
.getModule(), CGM
.VoidPtrTy
,
1921 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage
, GV
, Name
,
1922 nullptr, llvm::GlobalValue::NotThreadLocal
,
1923 CGM
.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1924 Addr
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
1927 OMPBuilder
.OffloadInfoManager
.registerDeviceGlobalVarEntryInfo(
1928 Name
, Addr
, CGM
.GetTargetTypeStoreSize(CGM
.VoidPtrTy
).getQuantity(),
1929 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect
,
1930 llvm::GlobalValue::WeakODRLinkage
);
1933 Address
CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction
&CGF
,
1936 std::string Suffix
= getName({"artificial", ""});
1937 llvm::Type
*VarLVType
= CGF
.ConvertTypeForMem(VarType
);
1938 llvm::GlobalVariable
*GAddr
= OMPBuilder
.getOrCreateInternalVariable(
1939 VarLVType
, Twine(Name
).concat(Suffix
).str());
1940 if (CGM
.getLangOpts().OpenMP
&& CGM
.getLangOpts().OpenMPUseTLS
&&
1941 CGM
.getTarget().isTLSSupported()) {
1942 GAddr
->setThreadLocal(/*Val=*/true);
1943 return Address(GAddr
, GAddr
->getValueType(),
1944 CGM
.getContext().getTypeAlignInChars(VarType
));
1946 std::string CacheSuffix
= getName({"cache", ""});
1947 llvm::Value
*Args
[] = {
1948 emitUpdateLocation(CGF
, SourceLocation()),
1949 getThreadID(CGF
, SourceLocation()),
1950 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(GAddr
, CGM
.VoidPtrTy
),
1951 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(VarType
), CGM
.SizeTy
,
1952 /*isSigned=*/false),
1953 OMPBuilder
.getOrCreateInternalVariable(
1955 Twine(Name
).concat(Suffix
).concat(CacheSuffix
).str())};
1957 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
1958 CGF
.EmitRuntimeCall(
1959 OMPBuilder
.getOrCreateRuntimeFunction(
1960 CGM
.getModule(), OMPRTL___kmpc_threadprivate_cached
),
1962 VarLVType
->getPointerTo(/*AddrSpace=*/0)),
1963 VarLVType
, CGM
.getContext().getTypeAlignInChars(VarType
));
1966 void CGOpenMPRuntime::emitIfClause(CodeGenFunction
&CGF
, const Expr
*Cond
,
1967 const RegionCodeGenTy
&ThenGen
,
1968 const RegionCodeGenTy
&ElseGen
) {
1969 CodeGenFunction::LexicalScope
ConditionScope(CGF
, Cond
->getSourceRange());
1971 // If the condition constant folds and can be elided, try to avoid emitting
1972 // the condition and the dead arm of the if/else.
1974 if (CGF
.ConstantFoldsToSimpleInteger(Cond
, CondConstant
)) {
1982 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1983 // emit the conditional branch.
1984 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("omp_if.then");
1985 llvm::BasicBlock
*ElseBlock
= CGF
.createBasicBlock("omp_if.else");
1986 llvm::BasicBlock
*ContBlock
= CGF
.createBasicBlock("omp_if.end");
1987 CGF
.EmitBranchOnBoolExpr(Cond
, ThenBlock
, ElseBlock
, /*TrueCount=*/0);
1989 // Emit the 'then' code.
1990 CGF
.EmitBlock(ThenBlock
);
1992 CGF
.EmitBranch(ContBlock
);
1993 // Emit the 'else' code if present.
1994 // There is no need to emit line number for unconditional branch.
1995 (void)ApplyDebugLocation::CreateEmpty(CGF
);
1996 CGF
.EmitBlock(ElseBlock
);
1998 // There is no need to emit line number for unconditional branch.
1999 (void)ApplyDebugLocation::CreateEmpty(CGF
);
2000 CGF
.EmitBranch(ContBlock
);
2001 // Emit the continuation block for code after the if.
2002 CGF
.EmitBlock(ContBlock
, /*IsFinished=*/true);
2005 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2006 llvm::Function
*OutlinedFn
,
2007 ArrayRef
<llvm::Value
*> CapturedVars
,
2009 llvm::Value
*NumThreads
) {
2010 if (!CGF
.HaveInsertPoint())
2012 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
2013 auto &M
= CGM
.getModule();
2014 auto &&ThenGen
= [&M
, OutlinedFn
, CapturedVars
, RTLoc
,
2015 this](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2016 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2017 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
2018 llvm::Value
*Args
[] = {
2020 CGF
.Builder
.getInt32(CapturedVars
.size()), // Number of captured vars
2021 CGF
.Builder
.CreateBitCast(OutlinedFn
, RT
.getKmpc_MicroPointerTy())};
2022 llvm::SmallVector
<llvm::Value
*, 16> RealArgs
;
2023 RealArgs
.append(std::begin(Args
), std::end(Args
));
2024 RealArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
2026 llvm::FunctionCallee RTLFn
=
2027 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_fork_call
);
2028 CGF
.EmitRuntimeCall(RTLFn
, RealArgs
);
2030 auto &&ElseGen
= [&M
, OutlinedFn
, CapturedVars
, RTLoc
, Loc
,
2031 this](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2032 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
2033 llvm::Value
*ThreadID
= RT
.getThreadID(CGF
, Loc
);
2035 // __kmpc_serialized_parallel(&Loc, GTid);
2036 llvm::Value
*Args
[] = {RTLoc
, ThreadID
};
2037 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2038 M
, OMPRTL___kmpc_serialized_parallel
),
2041 // OutlinedFn(>id, &zero_bound, CapturedStruct);
2042 Address ThreadIDAddr
= RT
.emitThreadIDAddress(CGF
, Loc
);
2043 Address ZeroAddrBound
=
2044 CGF
.CreateDefaultAlignTempAlloca(CGF
.Int32Ty
,
2045 /*Name=*/".bound.zero.addr");
2046 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(/*C*/ 0), ZeroAddrBound
);
2047 llvm::SmallVector
<llvm::Value
*, 16> OutlinedFnArgs
;
2048 // ThreadId for serialized parallels is 0.
2049 OutlinedFnArgs
.push_back(ThreadIDAddr
.getPointer());
2050 OutlinedFnArgs
.push_back(ZeroAddrBound
.getPointer());
2051 OutlinedFnArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
2053 // Ensure we do not inline the function. This is trivially true for the ones
2054 // passed to __kmpc_fork_call but the ones called in serialized regions
2055 // could be inlined. This is not a perfect but it is closer to the invariant
2056 // we want, namely, every data environment starts with a new function.
2057 // TODO: We should pass the if condition to the runtime function and do the
2058 // handling there. Much cleaner code.
2059 OutlinedFn
->removeFnAttr(llvm::Attribute::AlwaysInline
);
2060 OutlinedFn
->addFnAttr(llvm::Attribute::NoInline
);
2061 RT
.emitOutlinedFunctionCall(CGF
, Loc
, OutlinedFn
, OutlinedFnArgs
);
2063 // __kmpc_end_serialized_parallel(&Loc, GTid);
2064 llvm::Value
*EndArgs
[] = {RT
.emitUpdateLocation(CGF
, Loc
), ThreadID
};
2065 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2066 M
, OMPRTL___kmpc_end_serialized_parallel
),
2070 emitIfClause(CGF
, IfCond
, ThenGen
, ElseGen
);
2072 RegionCodeGenTy
ThenRCG(ThenGen
);
2077 // If we're inside an (outlined) parallel region, use the region info's
2078 // thread-ID variable (it is passed in a first argument of the outlined function
2079 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2080 // regular serial code region, get thread ID by calling kmp_int32
2081 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2082 // return the address of that temp.
2083 Address
CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction
&CGF
,
2084 SourceLocation Loc
) {
2085 if (auto *OMPRegionInfo
=
2086 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
2087 if (OMPRegionInfo
->getThreadIDVariable())
2088 return OMPRegionInfo
->getThreadIDVariableLValue(CGF
).getAddress(CGF
);
2090 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
2092 CGF
.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2093 Address ThreadIDTemp
= CGF
.CreateMemTemp(Int32Ty
, /*Name*/ ".threadid_temp.");
2094 CGF
.EmitStoreOfScalar(ThreadID
,
2095 CGF
.MakeAddrLValue(ThreadIDTemp
, Int32Ty
));
2097 return ThreadIDTemp
;
2100 llvm::Value
*CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName
) {
2101 std::string Prefix
= Twine("gomp_critical_user_", CriticalName
).str();
2102 std::string Name
= getName({Prefix
, "var"});
2103 return OMPBuilder
.getOrCreateInternalVariable(KmpCriticalNameTy
, Name
);
2107 /// Common pre(post)-action for different OpenMP constructs.
2108 class CommonActionTy final
: public PrePostActionTy
{
2109 llvm::FunctionCallee EnterCallee
;
2110 ArrayRef
<llvm::Value
*> EnterArgs
;
2111 llvm::FunctionCallee ExitCallee
;
2112 ArrayRef
<llvm::Value
*> ExitArgs
;
2114 llvm::BasicBlock
*ContBlock
= nullptr;
2117 CommonActionTy(llvm::FunctionCallee EnterCallee
,
2118 ArrayRef
<llvm::Value
*> EnterArgs
,
2119 llvm::FunctionCallee ExitCallee
,
2120 ArrayRef
<llvm::Value
*> ExitArgs
, bool Conditional
= false)
2121 : EnterCallee(EnterCallee
), EnterArgs(EnterArgs
), ExitCallee(ExitCallee
),
2122 ExitArgs(ExitArgs
), Conditional(Conditional
) {}
2123 void Enter(CodeGenFunction
&CGF
) override
{
2124 llvm::Value
*EnterRes
= CGF
.EmitRuntimeCall(EnterCallee
, EnterArgs
);
2126 llvm::Value
*CallBool
= CGF
.Builder
.CreateIsNotNull(EnterRes
);
2127 auto *ThenBlock
= CGF
.createBasicBlock("omp_if.then");
2128 ContBlock
= CGF
.createBasicBlock("omp_if.end");
2129 // Generate the branch (If-stmt)
2130 CGF
.Builder
.CreateCondBr(CallBool
, ThenBlock
, ContBlock
);
2131 CGF
.EmitBlock(ThenBlock
);
2134 void Done(CodeGenFunction
&CGF
) {
2135 // Emit the rest of blocks/branches
2136 CGF
.EmitBranch(ContBlock
);
2137 CGF
.EmitBlock(ContBlock
, true);
2139 void Exit(CodeGenFunction
&CGF
) override
{
2140 CGF
.EmitRuntimeCall(ExitCallee
, ExitArgs
);
2143 } // anonymous namespace
2145 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction
&CGF
,
2146 StringRef CriticalName
,
2147 const RegionCodeGenTy
&CriticalOpGen
,
2148 SourceLocation Loc
, const Expr
*Hint
) {
2149 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2151 // __kmpc_end_critical(ident_t *, gtid, Lock);
2152 // Prepare arguments and build a call to __kmpc_critical
2153 if (!CGF
.HaveInsertPoint())
2155 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2156 getCriticalRegionLock(CriticalName
)};
2157 llvm::SmallVector
<llvm::Value
*, 4> EnterArgs(std::begin(Args
),
2160 EnterArgs
.push_back(CGF
.Builder
.CreateIntCast(
2161 CGF
.EmitScalarExpr(Hint
), CGM
.Int32Ty
, /*isSigned=*/false));
2163 CommonActionTy
Action(
2164 OMPBuilder
.getOrCreateRuntimeFunction(
2166 Hint
? OMPRTL___kmpc_critical_with_hint
: OMPRTL___kmpc_critical
),
2168 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
2169 OMPRTL___kmpc_end_critical
),
2171 CriticalOpGen
.setAction(Action
);
2172 emitInlinedDirective(CGF
, OMPD_critical
, CriticalOpGen
);
2175 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction
&CGF
,
2176 const RegionCodeGenTy
&MasterOpGen
,
2177 SourceLocation Loc
) {
2178 if (!CGF
.HaveInsertPoint())
2180 // if(__kmpc_master(ident_t *, gtid)) {
2182 // __kmpc_end_master(ident_t *, gtid);
2184 // Prepare arguments and build a call to __kmpc_master
2185 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2186 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2187 CGM
.getModule(), OMPRTL___kmpc_master
),
2189 OMPBuilder
.getOrCreateRuntimeFunction(
2190 CGM
.getModule(), OMPRTL___kmpc_end_master
),
2192 /*Conditional=*/true);
2193 MasterOpGen
.setAction(Action
);
2194 emitInlinedDirective(CGF
, OMPD_master
, MasterOpGen
);
2198 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction
&CGF
,
2199 const RegionCodeGenTy
&MaskedOpGen
,
2200 SourceLocation Loc
, const Expr
*Filter
) {
2201 if (!CGF
.HaveInsertPoint())
2203 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2205 // __kmpc_end_masked(iden_t *, gtid);
2207 // Prepare arguments and build a call to __kmpc_masked
2208 llvm::Value
*FilterVal
= Filter
2209 ? CGF
.EmitScalarExpr(Filter
, CGF
.Int32Ty
)
2210 : llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/0);
2211 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2213 llvm::Value
*ArgsEnd
[] = {emitUpdateLocation(CGF
, Loc
),
2214 getThreadID(CGF
, Loc
)};
2215 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2216 CGM
.getModule(), OMPRTL___kmpc_masked
),
2218 OMPBuilder
.getOrCreateRuntimeFunction(
2219 CGM
.getModule(), OMPRTL___kmpc_end_masked
),
2221 /*Conditional=*/true);
2222 MaskedOpGen
.setAction(Action
);
2223 emitInlinedDirective(CGF
, OMPD_masked
, MaskedOpGen
);
2227 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction
&CGF
,
2228 SourceLocation Loc
) {
2229 if (!CGF
.HaveInsertPoint())
2231 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2232 OMPBuilder
.createTaskyield(CGF
.Builder
);
2234 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2235 llvm::Value
*Args
[] = {
2236 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2237 llvm::ConstantInt::get(CGM
.IntTy
, /*V=*/0, /*isSigned=*/true)};
2238 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2239 CGM
.getModule(), OMPRTL___kmpc_omp_taskyield
),
2243 if (auto *Region
= dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
2244 Region
->emitUntiedSwitch(CGF
);
2247 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction
&CGF
,
2248 const RegionCodeGenTy
&TaskgroupOpGen
,
2249 SourceLocation Loc
) {
2250 if (!CGF
.HaveInsertPoint())
2252 // __kmpc_taskgroup(ident_t *, gtid);
2253 // TaskgroupOpGen();
2254 // __kmpc_end_taskgroup(ident_t *, gtid);
2255 // Prepare arguments and build a call to __kmpc_taskgroup
2256 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2257 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2258 CGM
.getModule(), OMPRTL___kmpc_taskgroup
),
2260 OMPBuilder
.getOrCreateRuntimeFunction(
2261 CGM
.getModule(), OMPRTL___kmpc_end_taskgroup
),
2263 TaskgroupOpGen
.setAction(Action
);
2264 emitInlinedDirective(CGF
, OMPD_taskgroup
, TaskgroupOpGen
);
2267 /// Given an array of pointers to variables, project the address of a
2269 static Address
emitAddrOfVarFromArray(CodeGenFunction
&CGF
, Address Array
,
2270 unsigned Index
, const VarDecl
*Var
) {
2271 // Pull out the pointer to the variable.
2272 Address PtrAddr
= CGF
.Builder
.CreateConstArrayGEP(Array
, Index
);
2273 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(PtrAddr
);
2275 llvm::Type
*ElemTy
= CGF
.ConvertTypeForMem(Var
->getType());
2277 CGF
.Builder
.CreateBitCast(
2278 Ptr
, ElemTy
->getPointerTo(Ptr
->getType()->getPointerAddressSpace())),
2279 ElemTy
, CGF
.getContext().getDeclAlign(Var
));
2282 static llvm::Value
*emitCopyprivateCopyFunction(
2283 CodeGenModule
&CGM
, llvm::Type
*ArgsElemType
,
2284 ArrayRef
<const Expr
*> CopyprivateVars
, ArrayRef
<const Expr
*> DestExprs
,
2285 ArrayRef
<const Expr
*> SrcExprs
, ArrayRef
<const Expr
*> AssignmentOps
,
2286 SourceLocation Loc
) {
2287 ASTContext
&C
= CGM
.getContext();
2288 // void copy_func(void *LHSArg, void *RHSArg);
2289 FunctionArgList Args
;
2290 ImplicitParamDecl
LHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
2291 ImplicitParamDecl::Other
);
2292 ImplicitParamDecl
RHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
2293 ImplicitParamDecl::Other
);
2294 Args
.push_back(&LHSArg
);
2295 Args
.push_back(&RHSArg
);
2297 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
2299 CGM
.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2300 auto *Fn
= llvm::Function::Create(CGM
.getTypes().GetFunctionType(CGFI
),
2301 llvm::GlobalValue::InternalLinkage
, Name
,
2303 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, CGFI
);
2304 Fn
->setDoesNotRecurse();
2305 CodeGenFunction
CGF(CGM
);
2306 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, CGFI
, Args
, Loc
, Loc
);
2307 // Dest = (void*[n])(LHSArg);
2308 // Src = (void*[n])(RHSArg);
2309 Address
LHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2310 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&LHSArg
)),
2311 ArgsElemType
->getPointerTo()),
2312 ArgsElemType
, CGF
.getPointerAlign());
2313 Address
RHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2314 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&RHSArg
)),
2315 ArgsElemType
->getPointerTo()),
2316 ArgsElemType
, CGF
.getPointerAlign());
2317 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2318 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2320 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2321 for (unsigned I
= 0, E
= AssignmentOps
.size(); I
< E
; ++I
) {
2322 const auto *DestVar
=
2323 cast
<VarDecl
>(cast
<DeclRefExpr
>(DestExprs
[I
])->getDecl());
2324 Address DestAddr
= emitAddrOfVarFromArray(CGF
, LHS
, I
, DestVar
);
2326 const auto *SrcVar
=
2327 cast
<VarDecl
>(cast
<DeclRefExpr
>(SrcExprs
[I
])->getDecl());
2328 Address SrcAddr
= emitAddrOfVarFromArray(CGF
, RHS
, I
, SrcVar
);
2330 const auto *VD
= cast
<DeclRefExpr
>(CopyprivateVars
[I
])->getDecl();
2331 QualType Type
= VD
->getType();
2332 CGF
.EmitOMPCopy(Type
, DestAddr
, SrcAddr
, DestVar
, SrcVar
, AssignmentOps
[I
]);
2334 CGF
.FinishFunction();
2338 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction
&CGF
,
2339 const RegionCodeGenTy
&SingleOpGen
,
2341 ArrayRef
<const Expr
*> CopyprivateVars
,
2342 ArrayRef
<const Expr
*> SrcExprs
,
2343 ArrayRef
<const Expr
*> DstExprs
,
2344 ArrayRef
<const Expr
*> AssignmentOps
) {
2345 if (!CGF
.HaveInsertPoint())
2347 assert(CopyprivateVars
.size() == SrcExprs
.size() &&
2348 CopyprivateVars
.size() == DstExprs
.size() &&
2349 CopyprivateVars
.size() == AssignmentOps
.size());
2350 ASTContext
&C
= CGM
.getContext();
2351 // int32 did_it = 0;
2352 // if(__kmpc_single(ident_t *, gtid)) {
2354 // __kmpc_end_single(ident_t *, gtid);
2357 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2358 // <copy_func>, did_it);
2360 Address DidIt
= Address::invalid();
2361 if (!CopyprivateVars
.empty()) {
2362 // int32 did_it = 0;
2363 QualType KmpInt32Ty
=
2364 C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2365 DidIt
= CGF
.CreateMemTemp(KmpInt32Ty
, ".omp.copyprivate.did_it");
2366 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(0), DidIt
);
2368 // Prepare arguments and build a call to __kmpc_single
2369 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2370 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2371 CGM
.getModule(), OMPRTL___kmpc_single
),
2373 OMPBuilder
.getOrCreateRuntimeFunction(
2374 CGM
.getModule(), OMPRTL___kmpc_end_single
),
2376 /*Conditional=*/true);
2377 SingleOpGen
.setAction(Action
);
2378 emitInlinedDirective(CGF
, OMPD_single
, SingleOpGen
);
2379 if (DidIt
.isValid()) {
2381 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(1), DidIt
);
2384 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2385 // <copy_func>, did_it);
2386 if (DidIt
.isValid()) {
2387 llvm::APInt
ArraySize(/*unsigned int numBits=*/32, CopyprivateVars
.size());
2388 QualType CopyprivateArrayTy
= C
.getConstantArrayType(
2389 C
.VoidPtrTy
, ArraySize
, nullptr, ArrayType::Normal
,
2390 /*IndexTypeQuals=*/0);
2391 // Create a list of all private variables for copyprivate.
2392 Address CopyprivateList
=
2393 CGF
.CreateMemTemp(CopyprivateArrayTy
, ".omp.copyprivate.cpr_list");
2394 for (unsigned I
= 0, E
= CopyprivateVars
.size(); I
< E
; ++I
) {
2395 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(CopyprivateList
, I
);
2396 CGF
.Builder
.CreateStore(
2397 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2398 CGF
.EmitLValue(CopyprivateVars
[I
]).getPointer(CGF
),
2402 // Build function that copies private values from single region to all other
2403 // threads in the corresponding parallel region.
2404 llvm::Value
*CpyFn
= emitCopyprivateCopyFunction(
2405 CGM
, CGF
.ConvertTypeForMem(CopyprivateArrayTy
), CopyprivateVars
,
2406 SrcExprs
, DstExprs
, AssignmentOps
, Loc
);
2407 llvm::Value
*BufSize
= CGF
.getTypeSize(CopyprivateArrayTy
);
2408 Address CL
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2409 CopyprivateList
, CGF
.VoidPtrTy
, CGF
.Int8Ty
);
2410 llvm::Value
*DidItVal
= CGF
.Builder
.CreateLoad(DidIt
);
2411 llvm::Value
*Args
[] = {
2412 emitUpdateLocation(CGF
, Loc
), // ident_t *<loc>
2413 getThreadID(CGF
, Loc
), // i32 <gtid>
2414 BufSize
, // size_t <buf_size>
2415 CL
.getPointer(), // void *<copyprivate list>
2416 CpyFn
, // void (*) (void *, void *) <copy_func>
2417 DidItVal
// i32 did_it
2419 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2420 CGM
.getModule(), OMPRTL___kmpc_copyprivate
),
2425 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction
&CGF
,
2426 const RegionCodeGenTy
&OrderedOpGen
,
2427 SourceLocation Loc
, bool IsThreads
) {
2428 if (!CGF
.HaveInsertPoint())
2430 // __kmpc_ordered(ident_t *, gtid);
2432 // __kmpc_end_ordered(ident_t *, gtid);
2433 // Prepare arguments and build a call to __kmpc_ordered
2435 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2436 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2437 CGM
.getModule(), OMPRTL___kmpc_ordered
),
2439 OMPBuilder
.getOrCreateRuntimeFunction(
2440 CGM
.getModule(), OMPRTL___kmpc_end_ordered
),
2442 OrderedOpGen
.setAction(Action
);
2443 emitInlinedDirective(CGF
, OMPD_ordered
, OrderedOpGen
);
2446 emitInlinedDirective(CGF
, OMPD_ordered
, OrderedOpGen
);
2449 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind
) {
2451 if (Kind
== OMPD_for
)
2452 Flags
= OMP_IDENT_BARRIER_IMPL_FOR
;
2453 else if (Kind
== OMPD_sections
)
2454 Flags
= OMP_IDENT_BARRIER_IMPL_SECTIONS
;
2455 else if (Kind
== OMPD_single
)
2456 Flags
= OMP_IDENT_BARRIER_IMPL_SINGLE
;
2457 else if (Kind
== OMPD_barrier
)
2458 Flags
= OMP_IDENT_BARRIER_EXPL
;
2460 Flags
= OMP_IDENT_BARRIER_IMPL
;
2464 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2465 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2466 OpenMPScheduleClauseKind
&ScheduleKind
, const Expr
*&ChunkExpr
) const {
2467 // Check if the loop directive is actually a doacross loop directive. In this
2468 // case choose static, 1 schedule.
2470 S
.getClausesOfKind
<OMPOrderedClause
>(),
2471 [](const OMPOrderedClause
*C
) { return C
->getNumForLoops(); })) {
2472 ScheduleKind
= OMPC_SCHEDULE_static
;
2473 // Chunk size is 1 in this case.
2474 llvm::APInt
ChunkSize(32, 1);
2475 ChunkExpr
= IntegerLiteral::Create(
2476 CGF
.getContext(), ChunkSize
,
2477 CGF
.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2482 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2483 OpenMPDirectiveKind Kind
, bool EmitChecks
,
2484 bool ForceSimpleCall
) {
2485 // Check if we should use the OMPBuilder
2486 auto *OMPRegionInfo
=
2487 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
);
2488 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2489 CGF
.Builder
.restoreIP(OMPBuilder
.createBarrier(
2490 CGF
.Builder
, Kind
, ForceSimpleCall
, EmitChecks
));
2494 if (!CGF
.HaveInsertPoint())
2496 // Build call __kmpc_cancel_barrier(loc, thread_id);
2497 // Build call __kmpc_barrier(loc, thread_id);
2498 unsigned Flags
= getDefaultFlagsForBarriers(Kind
);
2499 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2501 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
, Flags
),
2502 getThreadID(CGF
, Loc
)};
2503 if (OMPRegionInfo
) {
2504 if (!ForceSimpleCall
&& OMPRegionInfo
->hasCancel()) {
2505 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
2506 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
2507 OMPRTL___kmpc_cancel_barrier
),
2510 // if (__kmpc_cancel_barrier()) {
2511 // exit from construct;
2513 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
2514 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
2515 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
2516 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
2517 CGF
.EmitBlock(ExitBB
);
2518 // exit from construct;
2519 CodeGenFunction::JumpDest CancelDestination
=
2520 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
2521 CGF
.EmitBranchThroughCleanup(CancelDestination
);
2522 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
2527 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2528 CGM
.getModule(), OMPRTL___kmpc_barrier
),
2532 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2533 Expr
*ME
, bool IsFatal
) {
2535 ME
? CGF
.EmitStringLiteralLValue(cast
<StringLiteral
>(ME
)).getPointer(CGF
)
2536 : llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
2537 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2539 llvm::Value
*Args
[] = {
2540 emitUpdateLocation(CGF
, Loc
, /*Flags=*/0, /*GenLoc=*/true),
2541 llvm::ConstantInt::get(CGM
.Int32Ty
, IsFatal
? 2 : 1),
2542 CGF
.Builder
.CreatePointerCast(MVL
, CGM
.Int8PtrTy
)};
2543 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2544 CGM
.getModule(), OMPRTL___kmpc_error
),
2548 /// Map the OpenMP loop schedule to the runtime enumeration.
2549 static OpenMPSchedType
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind
,
2550 bool Chunked
, bool Ordered
) {
2551 switch (ScheduleKind
) {
2552 case OMPC_SCHEDULE_static
:
2553 return Chunked
? (Ordered
? OMP_ord_static_chunked
: OMP_sch_static_chunked
)
2554 : (Ordered
? OMP_ord_static
: OMP_sch_static
);
2555 case OMPC_SCHEDULE_dynamic
:
2556 return Ordered
? OMP_ord_dynamic_chunked
: OMP_sch_dynamic_chunked
;
2557 case OMPC_SCHEDULE_guided
:
2558 return Ordered
? OMP_ord_guided_chunked
: OMP_sch_guided_chunked
;
2559 case OMPC_SCHEDULE_runtime
:
2560 return Ordered
? OMP_ord_runtime
: OMP_sch_runtime
;
2561 case OMPC_SCHEDULE_auto
:
2562 return Ordered
? OMP_ord_auto
: OMP_sch_auto
;
2563 case OMPC_SCHEDULE_unknown
:
2564 assert(!Chunked
&& "chunk was specified but schedule kind not known");
2565 return Ordered
? OMP_ord_static
: OMP_sch_static
;
2567 llvm_unreachable("Unexpected runtime schedule");
2570 /// Map the OpenMP distribute schedule to the runtime enumeration.
2571 static OpenMPSchedType
2572 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) {
2573 // only static is allowed for dist_schedule
2574 return Chunked
? OMP_dist_sch_static_chunked
: OMP_dist_sch_static
;
2577 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind
,
2578 bool Chunked
) const {
2579 OpenMPSchedType Schedule
=
2580 getRuntimeSchedule(ScheduleKind
, Chunked
, /*Ordered=*/false);
2581 return Schedule
== OMP_sch_static
;
2584 bool CGOpenMPRuntime::isStaticNonchunked(
2585 OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) const {
2586 OpenMPSchedType Schedule
= getRuntimeSchedule(ScheduleKind
, Chunked
);
2587 return Schedule
== OMP_dist_sch_static
;
2590 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind
,
2591 bool Chunked
) const {
2592 OpenMPSchedType Schedule
=
2593 getRuntimeSchedule(ScheduleKind
, Chunked
, /*Ordered=*/false);
2594 return Schedule
== OMP_sch_static_chunked
;
2597 bool CGOpenMPRuntime::isStaticChunked(
2598 OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) const {
2599 OpenMPSchedType Schedule
= getRuntimeSchedule(ScheduleKind
, Chunked
);
2600 return Schedule
== OMP_dist_sch_static_chunked
;
2603 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind
) const {
2604 OpenMPSchedType Schedule
=
2605 getRuntimeSchedule(ScheduleKind
, /*Chunked=*/false, /*Ordered=*/false);
2606 assert(Schedule
!= OMP_sch_static_chunked
&& "cannot be chunked here");
2607 return Schedule
!= OMP_sch_static
;
2610 static int addMonoNonMonoModifier(CodeGenModule
&CGM
, OpenMPSchedType Schedule
,
2611 OpenMPScheduleClauseModifier M1
,
2612 OpenMPScheduleClauseModifier M2
) {
2615 case OMPC_SCHEDULE_MODIFIER_monotonic
:
2616 Modifier
= OMP_sch_modifier_monotonic
;
2618 case OMPC_SCHEDULE_MODIFIER_nonmonotonic
:
2619 Modifier
= OMP_sch_modifier_nonmonotonic
;
2621 case OMPC_SCHEDULE_MODIFIER_simd
:
2622 if (Schedule
== OMP_sch_static_chunked
)
2623 Schedule
= OMP_sch_static_balanced_chunked
;
2625 case OMPC_SCHEDULE_MODIFIER_last
:
2626 case OMPC_SCHEDULE_MODIFIER_unknown
:
2630 case OMPC_SCHEDULE_MODIFIER_monotonic
:
2631 Modifier
= OMP_sch_modifier_monotonic
;
2633 case OMPC_SCHEDULE_MODIFIER_nonmonotonic
:
2634 Modifier
= OMP_sch_modifier_nonmonotonic
;
2636 case OMPC_SCHEDULE_MODIFIER_simd
:
2637 if (Schedule
== OMP_sch_static_chunked
)
2638 Schedule
= OMP_sch_static_balanced_chunked
;
2640 case OMPC_SCHEDULE_MODIFIER_last
:
2641 case OMPC_SCHEDULE_MODIFIER_unknown
:
2644 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2645 // If the static schedule kind is specified or if the ordered clause is
2646 // specified, and if the nonmonotonic modifier is not specified, the effect is
2647 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2648 // modifier is specified, the effect is as if the nonmonotonic modifier is
2650 if (CGM
.getLangOpts().OpenMP
>= 50 && Modifier
== 0) {
2651 if (!(Schedule
== OMP_sch_static_chunked
|| Schedule
== OMP_sch_static
||
2652 Schedule
== OMP_sch_static_balanced_chunked
||
2653 Schedule
== OMP_ord_static_chunked
|| Schedule
== OMP_ord_static
||
2654 Schedule
== OMP_dist_sch_static_chunked
||
2655 Schedule
== OMP_dist_sch_static
))
2656 Modifier
= OMP_sch_modifier_nonmonotonic
;
2658 return Schedule
| Modifier
;
2661 void CGOpenMPRuntime::emitForDispatchInit(
2662 CodeGenFunction
&CGF
, SourceLocation Loc
,
2663 const OpenMPScheduleTy
&ScheduleKind
, unsigned IVSize
, bool IVSigned
,
2664 bool Ordered
, const DispatchRTInput
&DispatchValues
) {
2665 if (!CGF
.HaveInsertPoint())
2667 OpenMPSchedType Schedule
= getRuntimeSchedule(
2668 ScheduleKind
.Schedule
, DispatchValues
.Chunk
!= nullptr, Ordered
);
2670 (Schedule
!= OMP_sch_static
&& Schedule
!= OMP_sch_static_chunked
&&
2671 Schedule
!= OMP_ord_static
&& Schedule
!= OMP_ord_static_chunked
&&
2672 Schedule
!= OMP_sch_static_balanced_chunked
));
2673 // Call __kmpc_dispatch_init(
2674 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2675 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2676 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2678 // If the Chunk was not specified in the clause - use default value 1.
2679 llvm::Value
*Chunk
= DispatchValues
.Chunk
? DispatchValues
.Chunk
2680 : CGF
.Builder
.getIntN(IVSize
, 1);
2681 llvm::Value
*Args
[] = {
2682 emitUpdateLocation(CGF
, Loc
),
2683 getThreadID(CGF
, Loc
),
2684 CGF
.Builder
.getInt32(addMonoNonMonoModifier(
2685 CGM
, Schedule
, ScheduleKind
.M1
, ScheduleKind
.M2
)), // Schedule type
2686 DispatchValues
.LB
, // Lower
2687 DispatchValues
.UB
, // Upper
2688 CGF
.Builder
.getIntN(IVSize
, 1), // Stride
2691 CGF
.EmitRuntimeCall(OMPBuilder
.createDispatchInitFunction(IVSize
, IVSigned
),
2695 static void emitForStaticInitCall(
2696 CodeGenFunction
&CGF
, llvm::Value
*UpdateLocation
, llvm::Value
*ThreadId
,
2697 llvm::FunctionCallee ForStaticInitFunction
, OpenMPSchedType Schedule
,
2698 OpenMPScheduleClauseModifier M1
, OpenMPScheduleClauseModifier M2
,
2699 const CGOpenMPRuntime::StaticRTInput
&Values
) {
2700 if (!CGF
.HaveInsertPoint())
2703 assert(!Values
.Ordered
);
2704 assert(Schedule
== OMP_sch_static
|| Schedule
== OMP_sch_static_chunked
||
2705 Schedule
== OMP_sch_static_balanced_chunked
||
2706 Schedule
== OMP_ord_static
|| Schedule
== OMP_ord_static_chunked
||
2707 Schedule
== OMP_dist_sch_static
||
2708 Schedule
== OMP_dist_sch_static_chunked
);
2710 // Call __kmpc_for_static_init(
2711 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2712 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2713 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2714 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2715 llvm::Value
*Chunk
= Values
.Chunk
;
2716 if (Chunk
== nullptr) {
2717 assert((Schedule
== OMP_sch_static
|| Schedule
== OMP_ord_static
||
2718 Schedule
== OMP_dist_sch_static
) &&
2719 "expected static non-chunked schedule");
2720 // If the Chunk was not specified in the clause - use default value 1.
2721 Chunk
= CGF
.Builder
.getIntN(Values
.IVSize
, 1);
2723 assert((Schedule
== OMP_sch_static_chunked
||
2724 Schedule
== OMP_sch_static_balanced_chunked
||
2725 Schedule
== OMP_ord_static_chunked
||
2726 Schedule
== OMP_dist_sch_static_chunked
) &&
2727 "expected static chunked schedule");
2729 llvm::Value
*Args
[] = {
2732 CGF
.Builder
.getInt32(addMonoNonMonoModifier(CGF
.CGM
, Schedule
, M1
,
2733 M2
)), // Schedule type
2734 Values
.IL
.getPointer(), // &isLastIter
2735 Values
.LB
.getPointer(), // &LB
2736 Values
.UB
.getPointer(), // &UB
2737 Values
.ST
.getPointer(), // &Stride
2738 CGF
.Builder
.getIntN(Values
.IVSize
, 1), // Incr
2741 CGF
.EmitRuntimeCall(ForStaticInitFunction
, Args
);
2744 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction
&CGF
,
2746 OpenMPDirectiveKind DKind
,
2747 const OpenMPScheduleTy
&ScheduleKind
,
2748 const StaticRTInput
&Values
) {
2749 OpenMPSchedType ScheduleNum
= getRuntimeSchedule(
2750 ScheduleKind
.Schedule
, Values
.Chunk
!= nullptr, Values
.Ordered
);
2751 assert((isOpenMPWorksharingDirective(DKind
) || (DKind
== OMPD_loop
)) &&
2752 "Expected loop-based or sections-based directive.");
2753 llvm::Value
*UpdatedLocation
= emitUpdateLocation(CGF
, Loc
,
2754 isOpenMPLoopDirective(DKind
)
2755 ? OMP_IDENT_WORK_LOOP
2756 : OMP_IDENT_WORK_SECTIONS
);
2757 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
2758 llvm::FunctionCallee StaticInitFunction
=
2759 OMPBuilder
.createForStaticInitFunction(Values
.IVSize
, Values
.IVSigned
,
2761 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
2762 emitForStaticInitCall(CGF
, UpdatedLocation
, ThreadId
, StaticInitFunction
,
2763 ScheduleNum
, ScheduleKind
.M1
, ScheduleKind
.M2
, Values
);
2766 void CGOpenMPRuntime::emitDistributeStaticInit(
2767 CodeGenFunction
&CGF
, SourceLocation Loc
,
2768 OpenMPDistScheduleClauseKind SchedKind
,
2769 const CGOpenMPRuntime::StaticRTInput
&Values
) {
2770 OpenMPSchedType ScheduleNum
=
2771 getRuntimeSchedule(SchedKind
, Values
.Chunk
!= nullptr);
2772 llvm::Value
*UpdatedLocation
=
2773 emitUpdateLocation(CGF
, Loc
, OMP_IDENT_WORK_DISTRIBUTE
);
2774 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
2775 llvm::FunctionCallee StaticInitFunction
;
2776 bool isGPUDistribute
=
2777 CGM
.getLangOpts().OpenMPIsTargetDevice
&&
2778 (CGM
.getTriple().isAMDGCN() || CGM
.getTriple().isNVPTX());
2779 StaticInitFunction
= OMPBuilder
.createForStaticInitFunction(
2780 Values
.IVSize
, Values
.IVSigned
, isGPUDistribute
);
2782 emitForStaticInitCall(CGF
, UpdatedLocation
, ThreadId
, StaticInitFunction
,
2783 ScheduleNum
, OMPC_SCHEDULE_MODIFIER_unknown
,
2784 OMPC_SCHEDULE_MODIFIER_unknown
, Values
);
2787 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction
&CGF
,
2789 OpenMPDirectiveKind DKind
) {
2790 if (!CGF
.HaveInsertPoint())
2792 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2793 llvm::Value
*Args
[] = {
2794 emitUpdateLocation(CGF
, Loc
,
2795 isOpenMPDistributeDirective(DKind
)
2796 ? OMP_IDENT_WORK_DISTRIBUTE
2797 : isOpenMPLoopDirective(DKind
)
2798 ? OMP_IDENT_WORK_LOOP
2799 : OMP_IDENT_WORK_SECTIONS
),
2800 getThreadID(CGF
, Loc
)};
2801 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
2802 if (isOpenMPDistributeDirective(DKind
) &&
2803 CGM
.getLangOpts().OpenMPIsTargetDevice
&&
2804 (CGM
.getTriple().isAMDGCN() || CGM
.getTriple().isNVPTX()))
2805 CGF
.EmitRuntimeCall(
2806 OMPBuilder
.getOrCreateRuntimeFunction(
2807 CGM
.getModule(), OMPRTL___kmpc_distribute_static_fini
),
2810 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2811 CGM
.getModule(), OMPRTL___kmpc_for_static_fini
),
2815 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction
&CGF
,
2819 if (!CGF
.HaveInsertPoint())
2821 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2822 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2823 CGF
.EmitRuntimeCall(OMPBuilder
.createDispatchFiniFunction(IVSize
, IVSigned
),
2827 llvm::Value
*CGOpenMPRuntime::emitForNext(CodeGenFunction
&CGF
,
2828 SourceLocation Loc
, unsigned IVSize
,
2829 bool IVSigned
, Address IL
,
2830 Address LB
, Address UB
,
2832 // Call __kmpc_dispatch_next(
2833 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2834 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2835 // kmp_int[32|64] *p_stride);
2836 llvm::Value
*Args
[] = {
2837 emitUpdateLocation(CGF
, Loc
),
2838 getThreadID(CGF
, Loc
),
2839 IL
.getPointer(), // &isLastIter
2840 LB
.getPointer(), // &Lower
2841 UB
.getPointer(), // &Upper
2842 ST
.getPointer() // &Stride
2844 llvm::Value
*Call
= CGF
.EmitRuntimeCall(
2845 OMPBuilder
.createDispatchNextFunction(IVSize
, IVSigned
), Args
);
2846 return CGF
.EmitScalarConversion(
2847 Call
, CGF
.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2848 CGF
.getContext().BoolTy
, Loc
);
2851 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction
&CGF
,
2852 llvm::Value
*NumThreads
,
2853 SourceLocation Loc
) {
2854 if (!CGF
.HaveInsertPoint())
2856 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2857 llvm::Value
*Args
[] = {
2858 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2859 CGF
.Builder
.CreateIntCast(NumThreads
, CGF
.Int32Ty
, /*isSigned*/ true)};
2860 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2861 CGM
.getModule(), OMPRTL___kmpc_push_num_threads
),
2865 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction
&CGF
,
2866 ProcBindKind ProcBind
,
2867 SourceLocation Loc
) {
2868 if (!CGF
.HaveInsertPoint())
2870 assert(ProcBind
!= OMP_PROC_BIND_unknown
&& "Unsupported proc_bind value.");
2871 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2872 llvm::Value
*Args
[] = {
2873 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2874 llvm::ConstantInt::get(CGM
.IntTy
, unsigned(ProcBind
), /*isSigned=*/true)};
2875 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2876 CGM
.getModule(), OMPRTL___kmpc_push_proc_bind
),
2880 void CGOpenMPRuntime::emitFlush(CodeGenFunction
&CGF
, ArrayRef
<const Expr
*>,
2881 SourceLocation Loc
, llvm::AtomicOrdering AO
) {
2882 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2883 OMPBuilder
.createFlush(CGF
.Builder
);
2885 if (!CGF
.HaveInsertPoint())
2887 // Build call void __kmpc_flush(ident_t *loc)
2888 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2889 CGM
.getModule(), OMPRTL___kmpc_flush
),
2890 emitUpdateLocation(CGF
, Loc
));
2895 /// Indexes of fields for type kmp_task_t.
2896 enum KmpTaskTFields
{
2897 /// List of shared variables.
2901 /// Partition id for the untied tasks.
2903 /// Function with call of destructors for private variables.
2907 /// (Taskloops only) Lower bound.
2909 /// (Taskloops only) Upper bound.
2911 /// (Taskloops only) Stride.
2913 /// (Taskloops only) Is last iteration flag.
2915 /// (Taskloops only) Reduction data.
2918 } // anonymous namespace
2920 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2921 // If we are in simd mode or there are no entries, we don't need to do
2923 if (CGM
.getLangOpts().OpenMPSimd
|| OMPBuilder
.OffloadInfoManager
.empty())
2926 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy
&&ErrorReportFn
=
2927 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind
,
2928 const llvm::TargetRegionEntryInfo
&EntryInfo
) -> void {
2930 if (Kind
!= llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR
) {
2931 for (auto I
= CGM
.getContext().getSourceManager().fileinfo_begin(),
2932 E
= CGM
.getContext().getSourceManager().fileinfo_end();
2934 if (I
->getFirst()->getUniqueID().getDevice() == EntryInfo
.DeviceID
&&
2935 I
->getFirst()->getUniqueID().getFile() == EntryInfo
.FileID
) {
2936 Loc
= CGM
.getContext().getSourceManager().translateFileLineCol(
2937 I
->getFirst(), EntryInfo
.Line
, 1);
2943 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR
: {
2944 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2945 DiagnosticsEngine::Error
, "Offloading entry for target region in "
2946 "%0 is incorrect: either the "
2947 "address or the ID is invalid.");
2948 CGM
.getDiags().Report(Loc
, DiagID
) << EntryInfo
.ParentName
;
2950 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR
: {
2951 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2952 DiagnosticsEngine::Error
, "Offloading entry for declare target "
2953 "variable %0 is incorrect: the "
2954 "address is invalid.");
2955 CGM
.getDiags().Report(Loc
, DiagID
) << EntryInfo
.ParentName
;
2957 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR
: {
2958 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2959 DiagnosticsEngine::Error
,
2960 "Offloading entry for declare target variable is incorrect: the "
2961 "address is invalid.");
2962 CGM
.getDiags().Report(DiagID
);
2967 OMPBuilder
.createOffloadEntriesAndInfoMetadata(ErrorReportFn
);
2970 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty
) {
2971 if (!KmpRoutineEntryPtrTy
) {
2972 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2973 ASTContext
&C
= CGM
.getContext();
2974 QualType KmpRoutineEntryTyArgs
[] = {KmpInt32Ty
, C
.VoidPtrTy
};
2975 FunctionProtoType::ExtProtoInfo EPI
;
2976 KmpRoutineEntryPtrQTy
= C
.getPointerType(
2977 C
.getFunctionType(KmpInt32Ty
, KmpRoutineEntryTyArgs
, EPI
));
2978 KmpRoutineEntryPtrTy
= CGM
.getTypes().ConvertType(KmpRoutineEntryPtrQTy
);
2983 struct PrivateHelpersTy
{
2984 PrivateHelpersTy(const Expr
*OriginalRef
, const VarDecl
*Original
,
2985 const VarDecl
*PrivateCopy
, const VarDecl
*PrivateElemInit
)
2986 : OriginalRef(OriginalRef
), Original(Original
), PrivateCopy(PrivateCopy
),
2987 PrivateElemInit(PrivateElemInit
) {}
2988 PrivateHelpersTy(const VarDecl
*Original
) : Original(Original
) {}
2989 const Expr
*OriginalRef
= nullptr;
2990 const VarDecl
*Original
= nullptr;
2991 const VarDecl
*PrivateCopy
= nullptr;
2992 const VarDecl
*PrivateElemInit
= nullptr;
2993 bool isLocalPrivate() const {
2994 return !OriginalRef
&& !PrivateCopy
&& !PrivateElemInit
;
2997 typedef std::pair
<CharUnits
/*Align*/, PrivateHelpersTy
> PrivateDataTy
;
2998 } // anonymous namespace
3000 static bool isAllocatableDecl(const VarDecl
*VD
) {
3001 const VarDecl
*CVD
= VD
->getCanonicalDecl();
3002 if (!CVD
->hasAttr
<OMPAllocateDeclAttr
>())
3004 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
3005 // Use the default allocation.
3006 return !(AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc
&&
3007 !AA
->getAllocator());
3011 createPrivatesRecordDecl(CodeGenModule
&CGM
, ArrayRef
<PrivateDataTy
> Privates
) {
3012 if (!Privates
.empty()) {
3013 ASTContext
&C
= CGM
.getContext();
3014 // Build struct .kmp_privates_t. {
3015 // /* private vars */
3017 RecordDecl
*RD
= C
.buildImplicitRecord(".kmp_privates.t");
3018 RD
->startDefinition();
3019 for (const auto &Pair
: Privates
) {
3020 const VarDecl
*VD
= Pair
.second
.Original
;
3021 QualType Type
= VD
->getType().getNonReferenceType();
3022 // If the private variable is a local variable with lvalue ref type,
3023 // allocate the pointer instead of the pointee type.
3024 if (Pair
.second
.isLocalPrivate()) {
3025 if (VD
->getType()->isLValueReferenceType())
3026 Type
= C
.getPointerType(Type
);
3027 if (isAllocatableDecl(VD
))
3028 Type
= C
.getPointerType(Type
);
3030 FieldDecl
*FD
= addFieldToRecordDecl(C
, RD
, Type
);
3031 if (VD
->hasAttrs()) {
3032 for (specific_attr_iterator
<AlignedAttr
> I(VD
->getAttrs().begin()),
3033 E(VD
->getAttrs().end());
3038 RD
->completeDefinition();
3045 createKmpTaskTRecordDecl(CodeGenModule
&CGM
, OpenMPDirectiveKind Kind
,
3046 QualType KmpInt32Ty
,
3047 QualType KmpRoutineEntryPointerQTy
) {
3048 ASTContext
&C
= CGM
.getContext();
3049 // Build struct kmp_task_t {
3051 // kmp_routine_entry_t routine;
3052 // kmp_int32 part_id;
3053 // kmp_cmplrdata_t data1;
3054 // kmp_cmplrdata_t data2;
3055 // For taskloops additional fields:
3060 // void * reductions;
3062 RecordDecl
*UD
= C
.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union
);
3063 UD
->startDefinition();
3064 addFieldToRecordDecl(C
, UD
, KmpInt32Ty
);
3065 addFieldToRecordDecl(C
, UD
, KmpRoutineEntryPointerQTy
);
3066 UD
->completeDefinition();
3067 QualType KmpCmplrdataTy
= C
.getRecordType(UD
);
3068 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_task_t");
3069 RD
->startDefinition();
3070 addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
3071 addFieldToRecordDecl(C
, RD
, KmpRoutineEntryPointerQTy
);
3072 addFieldToRecordDecl(C
, RD
, KmpInt32Ty
);
3073 addFieldToRecordDecl(C
, RD
, KmpCmplrdataTy
);
3074 addFieldToRecordDecl(C
, RD
, KmpCmplrdataTy
);
3075 if (isOpenMPTaskLoopDirective(Kind
)) {
3076 QualType KmpUInt64Ty
=
3077 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3078 QualType KmpInt64Ty
=
3079 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3080 addFieldToRecordDecl(C
, RD
, KmpUInt64Ty
);
3081 addFieldToRecordDecl(C
, RD
, KmpUInt64Ty
);
3082 addFieldToRecordDecl(C
, RD
, KmpInt64Ty
);
3083 addFieldToRecordDecl(C
, RD
, KmpInt32Ty
);
3084 addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
3086 RD
->completeDefinition();
3091 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule
&CGM
, QualType KmpTaskTQTy
,
3092 ArrayRef
<PrivateDataTy
> Privates
) {
3093 ASTContext
&C
= CGM
.getContext();
3094 // Build struct kmp_task_t_with_privates {
3095 // kmp_task_t task_data;
3096 // .kmp_privates_t. privates;
3098 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_task_t_with_privates");
3099 RD
->startDefinition();
3100 addFieldToRecordDecl(C
, RD
, KmpTaskTQTy
);
3101 if (const RecordDecl
*PrivateRD
= createPrivatesRecordDecl(CGM
, Privates
))
3102 addFieldToRecordDecl(C
, RD
, C
.getRecordType(PrivateRD
));
3103 RD
->completeDefinition();
3107 /// Emit a proxy function which accepts kmp_task_t as the second
3110 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3111 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3113 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3114 /// tt->reductions, tt->shareds);
3118 static llvm::Function
*
3119 emitProxyTaskFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3120 OpenMPDirectiveKind Kind
, QualType KmpInt32Ty
,
3121 QualType KmpTaskTWithPrivatesPtrQTy
,
3122 QualType KmpTaskTWithPrivatesQTy
, QualType KmpTaskTQTy
,
3123 QualType SharedsPtrTy
, llvm::Function
*TaskFunction
,
3124 llvm::Value
*TaskPrivatesMap
) {
3125 ASTContext
&C
= CGM
.getContext();
3126 FunctionArgList Args
;
3127 ImplicitParamDecl
GtidArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, KmpInt32Ty
,
3128 ImplicitParamDecl::Other
);
3129 ImplicitParamDecl
TaskTypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3130 KmpTaskTWithPrivatesPtrQTy
.withRestrict(),
3131 ImplicitParamDecl::Other
);
3132 Args
.push_back(&GtidArg
);
3133 Args
.push_back(&TaskTypeArg
);
3134 const auto &TaskEntryFnInfo
=
3135 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty
, Args
);
3136 llvm::FunctionType
*TaskEntryTy
=
3137 CGM
.getTypes().GetFunctionType(TaskEntryFnInfo
);
3138 std::string Name
= CGM
.getOpenMPRuntime().getName({"omp_task_entry", ""});
3139 auto *TaskEntry
= llvm::Function::Create(
3140 TaskEntryTy
, llvm::GlobalValue::InternalLinkage
, Name
, &CGM
.getModule());
3141 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry
, TaskEntryFnInfo
);
3142 TaskEntry
->setDoesNotRecurse();
3143 CodeGenFunction
CGF(CGM
);
3144 CGF
.StartFunction(GlobalDecl(), KmpInt32Ty
, TaskEntry
, TaskEntryFnInfo
, Args
,
3147 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3150 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3151 // tt->task_data.shareds);
3152 llvm::Value
*GtidParam
= CGF
.EmitLoadOfScalar(
3153 CGF
.GetAddrOfLocalVar(&GtidArg
), /*Volatile=*/false, KmpInt32Ty
, Loc
);
3154 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3155 CGF
.GetAddrOfLocalVar(&TaskTypeArg
),
3156 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3157 const auto *KmpTaskTWithPrivatesQTyRD
=
3158 cast
<RecordDecl
>(KmpTaskTWithPrivatesQTy
->getAsTagDecl());
3160 CGF
.EmitLValueForField(TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3161 const auto *KmpTaskTQTyRD
= cast
<RecordDecl
>(KmpTaskTQTy
->getAsTagDecl());
3162 auto PartIdFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTPartId
);
3163 LValue PartIdLVal
= CGF
.EmitLValueForField(Base
, *PartIdFI
);
3164 llvm::Value
*PartidParam
= PartIdLVal
.getPointer(CGF
);
3166 auto SharedsFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTShareds
);
3167 LValue SharedsLVal
= CGF
.EmitLValueForField(Base
, *SharedsFI
);
3168 llvm::Value
*SharedsParam
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3169 CGF
.EmitLoadOfScalar(SharedsLVal
, Loc
),
3170 CGF
.ConvertTypeForMem(SharedsPtrTy
));
3172 auto PrivatesFI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin(), 1);
3173 llvm::Value
*PrivatesParam
;
3174 if (PrivatesFI
!= KmpTaskTWithPrivatesQTyRD
->field_end()) {
3175 LValue PrivatesLVal
= CGF
.EmitLValueForField(TDBase
, *PrivatesFI
);
3176 PrivatesParam
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3177 PrivatesLVal
.getPointer(CGF
), CGF
.VoidPtrTy
);
3179 PrivatesParam
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
3182 llvm::Value
*CommonArgs
[] = {
3183 GtidParam
, PartidParam
, PrivatesParam
, TaskPrivatesMap
,
3185 .CreatePointerBitCastOrAddrSpaceCast(TDBase
.getAddress(CGF
),
3186 CGF
.VoidPtrTy
, CGF
.Int8Ty
)
3188 SmallVector
<llvm::Value
*, 16> CallArgs(std::begin(CommonArgs
),
3189 std::end(CommonArgs
));
3190 if (isOpenMPTaskLoopDirective(Kind
)) {
3191 auto LBFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLowerBound
);
3192 LValue LBLVal
= CGF
.EmitLValueForField(Base
, *LBFI
);
3193 llvm::Value
*LBParam
= CGF
.EmitLoadOfScalar(LBLVal
, Loc
);
3194 auto UBFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTUpperBound
);
3195 LValue UBLVal
= CGF
.EmitLValueForField(Base
, *UBFI
);
3196 llvm::Value
*UBParam
= CGF
.EmitLoadOfScalar(UBLVal
, Loc
);
3197 auto StFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTStride
);
3198 LValue StLVal
= CGF
.EmitLValueForField(Base
, *StFI
);
3199 llvm::Value
*StParam
= CGF
.EmitLoadOfScalar(StLVal
, Loc
);
3200 auto LIFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLastIter
);
3201 LValue LILVal
= CGF
.EmitLValueForField(Base
, *LIFI
);
3202 llvm::Value
*LIParam
= CGF
.EmitLoadOfScalar(LILVal
, Loc
);
3203 auto RFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTReductions
);
3204 LValue RLVal
= CGF
.EmitLValueForField(Base
, *RFI
);
3205 llvm::Value
*RParam
= CGF
.EmitLoadOfScalar(RLVal
, Loc
);
3206 CallArgs
.push_back(LBParam
);
3207 CallArgs
.push_back(UBParam
);
3208 CallArgs
.push_back(StParam
);
3209 CallArgs
.push_back(LIParam
);
3210 CallArgs
.push_back(RParam
);
3212 CallArgs
.push_back(SharedsParam
);
3214 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, Loc
, TaskFunction
,
3216 CGF
.EmitStoreThroughLValue(RValue::get(CGF
.Builder
.getInt32(/*C=*/0)),
3217 CGF
.MakeAddrLValue(CGF
.ReturnValue
, KmpInt32Ty
));
3218 CGF
.FinishFunction();
3222 static llvm::Value
*emitDestructorsFunction(CodeGenModule
&CGM
,
3224 QualType KmpInt32Ty
,
3225 QualType KmpTaskTWithPrivatesPtrQTy
,
3226 QualType KmpTaskTWithPrivatesQTy
) {
3227 ASTContext
&C
= CGM
.getContext();
3228 FunctionArgList Args
;
3229 ImplicitParamDecl
GtidArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, KmpInt32Ty
,
3230 ImplicitParamDecl::Other
);
3231 ImplicitParamDecl
TaskTypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3232 KmpTaskTWithPrivatesPtrQTy
.withRestrict(),
3233 ImplicitParamDecl::Other
);
3234 Args
.push_back(&GtidArg
);
3235 Args
.push_back(&TaskTypeArg
);
3236 const auto &DestructorFnInfo
=
3237 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty
, Args
);
3238 llvm::FunctionType
*DestructorFnTy
=
3239 CGM
.getTypes().GetFunctionType(DestructorFnInfo
);
3241 CGM
.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3242 auto *DestructorFn
=
3243 llvm::Function::Create(DestructorFnTy
, llvm::GlobalValue::InternalLinkage
,
3244 Name
, &CGM
.getModule());
3245 CGM
.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn
,
3247 DestructorFn
->setDoesNotRecurse();
3248 CodeGenFunction
CGF(CGM
);
3249 CGF
.StartFunction(GlobalDecl(), KmpInt32Ty
, DestructorFn
, DestructorFnInfo
,
3252 LValue Base
= CGF
.EmitLoadOfPointerLValue(
3253 CGF
.GetAddrOfLocalVar(&TaskTypeArg
),
3254 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3255 const auto *KmpTaskTWithPrivatesQTyRD
=
3256 cast
<RecordDecl
>(KmpTaskTWithPrivatesQTy
->getAsTagDecl());
3257 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3258 Base
= CGF
.EmitLValueForField(Base
, *FI
);
3259 for (const auto *Field
:
3260 cast
<RecordDecl
>(FI
->getType()->getAsTagDecl())->fields()) {
3261 if (QualType::DestructionKind DtorKind
=
3262 Field
->getType().isDestructedType()) {
3263 LValue FieldLValue
= CGF
.EmitLValueForField(Base
, Field
);
3264 CGF
.pushDestroy(DtorKind
, FieldLValue
.getAddress(CGF
), Field
->getType());
3267 CGF
.FinishFunction();
3268 return DestructorFn
;
3271 /// Emit a privates mapping function for correct handling of private and
3272 /// firstprivate variables.
3274 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3275 /// **noalias priv1,..., <tyn> **noalias privn) {
3276 /// *priv1 = &.privates.priv1;
3278 /// *privn = &.privates.privn;
3281 static llvm::Value
*
3282 emitTaskPrivateMappingFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3283 const OMPTaskDataTy
&Data
, QualType PrivatesQTy
,
3284 ArrayRef
<PrivateDataTy
> Privates
) {
3285 ASTContext
&C
= CGM
.getContext();
3286 FunctionArgList Args
;
3287 ImplicitParamDecl
TaskPrivatesArg(
3288 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3289 C
.getPointerType(PrivatesQTy
).withConst().withRestrict(),
3290 ImplicitParamDecl::Other
);
3291 Args
.push_back(&TaskPrivatesArg
);
3292 llvm::DenseMap
<CanonicalDeclPtr
<const VarDecl
>, unsigned> PrivateVarsPos
;
3293 unsigned Counter
= 1;
3294 for (const Expr
*E
: Data
.PrivateVars
) {
3295 Args
.push_back(ImplicitParamDecl::Create(
3296 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3297 C
.getPointerType(C
.getPointerType(E
->getType()))
3300 ImplicitParamDecl::Other
));
3301 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3302 PrivateVarsPos
[VD
] = Counter
;
3305 for (const Expr
*E
: Data
.FirstprivateVars
) {
3306 Args
.push_back(ImplicitParamDecl::Create(
3307 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3308 C
.getPointerType(C
.getPointerType(E
->getType()))
3311 ImplicitParamDecl::Other
));
3312 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3313 PrivateVarsPos
[VD
] = Counter
;
3316 for (const Expr
*E
: Data
.LastprivateVars
) {
3317 Args
.push_back(ImplicitParamDecl::Create(
3318 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3319 C
.getPointerType(C
.getPointerType(E
->getType()))
3322 ImplicitParamDecl::Other
));
3323 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3324 PrivateVarsPos
[VD
] = Counter
;
3327 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
3328 QualType Ty
= VD
->getType().getNonReferenceType();
3329 if (VD
->getType()->isLValueReferenceType())
3330 Ty
= C
.getPointerType(Ty
);
3331 if (isAllocatableDecl(VD
))
3332 Ty
= C
.getPointerType(Ty
);
3333 Args
.push_back(ImplicitParamDecl::Create(
3334 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3335 C
.getPointerType(C
.getPointerType(Ty
)).withConst().withRestrict(),
3336 ImplicitParamDecl::Other
));
3337 PrivateVarsPos
[VD
] = Counter
;
3340 const auto &TaskPrivatesMapFnInfo
=
3341 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
3342 llvm::FunctionType
*TaskPrivatesMapTy
=
3343 CGM
.getTypes().GetFunctionType(TaskPrivatesMapFnInfo
);
3345 CGM
.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3346 auto *TaskPrivatesMap
= llvm::Function::Create(
3347 TaskPrivatesMapTy
, llvm::GlobalValue::InternalLinkage
, Name
,
3349 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap
,
3350 TaskPrivatesMapFnInfo
);
3351 if (CGM
.getLangOpts().Optimize
) {
3352 TaskPrivatesMap
->removeFnAttr(llvm::Attribute::NoInline
);
3353 TaskPrivatesMap
->removeFnAttr(llvm::Attribute::OptimizeNone
);
3354 TaskPrivatesMap
->addFnAttr(llvm::Attribute::AlwaysInline
);
3356 CodeGenFunction
CGF(CGM
);
3357 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, TaskPrivatesMap
,
3358 TaskPrivatesMapFnInfo
, Args
, Loc
, Loc
);
3360 // *privi = &.privates.privi;
3361 LValue Base
= CGF
.EmitLoadOfPointerLValue(
3362 CGF
.GetAddrOfLocalVar(&TaskPrivatesArg
),
3363 TaskPrivatesArg
.getType()->castAs
<PointerType
>());
3364 const auto *PrivatesQTyRD
= cast
<RecordDecl
>(PrivatesQTy
->getAsTagDecl());
3366 for (const FieldDecl
*Field
: PrivatesQTyRD
->fields()) {
3367 LValue FieldLVal
= CGF
.EmitLValueForField(Base
, Field
);
3368 const VarDecl
*VD
= Args
[PrivateVarsPos
[Privates
[Counter
].second
.Original
]];
3370 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(VD
), VD
->getType());
3371 LValue RefLoadLVal
= CGF
.EmitLoadOfPointerLValue(
3372 RefLVal
.getAddress(CGF
), RefLVal
.getType()->castAs
<PointerType
>());
3373 CGF
.EmitStoreOfScalar(FieldLVal
.getPointer(CGF
), RefLoadLVal
);
3376 CGF
.FinishFunction();
3377 return TaskPrivatesMap
;
3380 /// Emit initialization for private variables in task-based directives.
3381 static void emitPrivatesInit(CodeGenFunction
&CGF
,
3382 const OMPExecutableDirective
&D
,
3383 Address KmpTaskSharedsPtr
, LValue TDBase
,
3384 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3385 QualType SharedsTy
, QualType SharedsPtrTy
,
3386 const OMPTaskDataTy
&Data
,
3387 ArrayRef
<PrivateDataTy
> Privates
, bool ForDup
) {
3388 ASTContext
&C
= CGF
.getContext();
3389 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3390 LValue PrivatesBase
= CGF
.EmitLValueForField(TDBase
, *FI
);
3391 OpenMPDirectiveKind Kind
= isOpenMPTaskLoopDirective(D
.getDirectiveKind())
3394 const CapturedStmt
&CS
= *D
.getCapturedStmt(Kind
);
3395 CodeGenFunction::CGCapturedStmtInfo
CapturesInfo(CS
);
3398 isOpenMPTargetDataManagementDirective(D
.getDirectiveKind()) ||
3399 isOpenMPTargetExecutionDirective(D
.getDirectiveKind());
3400 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3401 // PointersArray, SizesArray, and MappersArray. The original variables for
3402 // these arrays are not captured and we get their addresses explicitly.
3403 if ((!IsTargetTask
&& !Data
.FirstprivateVars
.empty() && ForDup
) ||
3404 (IsTargetTask
&& KmpTaskSharedsPtr
.isValid())) {
3405 SrcBase
= CGF
.MakeAddrLValue(
3406 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3407 KmpTaskSharedsPtr
, CGF
.ConvertTypeForMem(SharedsPtrTy
),
3408 CGF
.ConvertTypeForMem(SharedsTy
)),
3411 FI
= cast
<RecordDecl
>(FI
->getType()->getAsTagDecl())->field_begin();
3412 for (const PrivateDataTy
&Pair
: Privates
) {
3413 // Do not initialize private locals.
3414 if (Pair
.second
.isLocalPrivate()) {
3418 const VarDecl
*VD
= Pair
.second
.PrivateCopy
;
3419 const Expr
*Init
= VD
->getAnyInitializer();
3420 if (Init
&& (!ForDup
|| (isa
<CXXConstructExpr
>(Init
) &&
3421 !CGF
.isTrivialInitializer(Init
)))) {
3422 LValue PrivateLValue
= CGF
.EmitLValueForField(PrivatesBase
, *FI
);
3423 if (const VarDecl
*Elem
= Pair
.second
.PrivateElemInit
) {
3424 const VarDecl
*OriginalVD
= Pair
.second
.Original
;
3425 // Check if the variable is the target-based BasePointersArray,
3426 // PointersArray, SizesArray, or MappersArray.
3427 LValue SharedRefLValue
;
3428 QualType Type
= PrivateLValue
.getType();
3429 const FieldDecl
*SharedField
= CapturesInfo
.lookup(OriginalVD
);
3430 if (IsTargetTask
&& !SharedField
) {
3431 assert(isa
<ImplicitParamDecl
>(OriginalVD
) &&
3432 isa
<CapturedDecl
>(OriginalVD
->getDeclContext()) &&
3433 cast
<CapturedDecl
>(OriginalVD
->getDeclContext())
3434 ->getNumParams() == 0 &&
3435 isa
<TranslationUnitDecl
>(
3436 cast
<CapturedDecl
>(OriginalVD
->getDeclContext())
3437 ->getDeclContext()) &&
3438 "Expected artificial target data variable.");
3440 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(OriginalVD
), Type
);
3441 } else if (ForDup
) {
3442 SharedRefLValue
= CGF
.EmitLValueForField(SrcBase
, SharedField
);
3443 SharedRefLValue
= CGF
.MakeAddrLValue(
3444 SharedRefLValue
.getAddress(CGF
).withAlignment(
3445 C
.getDeclAlign(OriginalVD
)),
3446 SharedRefLValue
.getType(), LValueBaseInfo(AlignmentSource::Decl
),
3447 SharedRefLValue
.getTBAAInfo());
3448 } else if (CGF
.LambdaCaptureFields
.count(
3449 Pair
.second
.Original
->getCanonicalDecl()) > 0 ||
3450 isa_and_nonnull
<BlockDecl
>(CGF
.CurCodeDecl
)) {
3451 SharedRefLValue
= CGF
.EmitLValue(Pair
.second
.OriginalRef
);
3453 // Processing for implicitly captured variables.
3454 InlinedOpenMPRegionRAII
Region(
3455 CGF
, [](CodeGenFunction
&, PrePostActionTy
&) {}, OMPD_unknown
,
3456 /*HasCancel=*/false, /*NoInheritance=*/true);
3457 SharedRefLValue
= CGF
.EmitLValue(Pair
.second
.OriginalRef
);
3459 if (Type
->isArrayType()) {
3460 // Initialize firstprivate array.
3461 if (!isa
<CXXConstructExpr
>(Init
) || CGF
.isTrivialInitializer(Init
)) {
3462 // Perform simple memcpy.
3463 CGF
.EmitAggregateAssign(PrivateLValue
, SharedRefLValue
, Type
);
3465 // Initialize firstprivate array using element-by-element
3467 CGF
.EmitOMPAggregateAssign(
3468 PrivateLValue
.getAddress(CGF
), SharedRefLValue
.getAddress(CGF
),
3470 [&CGF
, Elem
, Init
, &CapturesInfo
](Address DestElement
,
3471 Address SrcElement
) {
3472 // Clean up any temporaries needed by the initialization.
3473 CodeGenFunction::OMPPrivateScope
InitScope(CGF
);
3474 InitScope
.addPrivate(Elem
, SrcElement
);
3475 (void)InitScope
.Privatize();
3476 // Emit initialization for single element.
3477 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(
3478 CGF
, &CapturesInfo
);
3479 CGF
.EmitAnyExprToMem(Init
, DestElement
,
3480 Init
->getType().getQualifiers(),
3481 /*IsInitializer=*/false);
3485 CodeGenFunction::OMPPrivateScope
InitScope(CGF
);
3486 InitScope
.addPrivate(Elem
, SharedRefLValue
.getAddress(CGF
));
3487 (void)InitScope
.Privatize();
3488 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CapturesInfo
);
3489 CGF
.EmitExprAsInit(Init
, VD
, PrivateLValue
,
3490 /*capturedByInit=*/false);
3493 CGF
.EmitExprAsInit(Init
, VD
, PrivateLValue
, /*capturedByInit=*/false);
3500 /// Check if duplication function is required for taskloops.
3501 static bool checkInitIsRequired(CodeGenFunction
&CGF
,
3502 ArrayRef
<PrivateDataTy
> Privates
) {
3503 bool InitRequired
= false;
3504 for (const PrivateDataTy
&Pair
: Privates
) {
3505 if (Pair
.second
.isLocalPrivate())
3507 const VarDecl
*VD
= Pair
.second
.PrivateCopy
;
3508 const Expr
*Init
= VD
->getAnyInitializer();
3509 InitRequired
= InitRequired
|| (isa_and_nonnull
<CXXConstructExpr
>(Init
) &&
3510 !CGF
.isTrivialInitializer(Init
));
3514 return InitRequired
;
3518 /// Emit task_dup function (for initialization of
3519 /// private/firstprivate/lastprivate vars and last_iter flag)
3521 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3523 /// // setup lastprivate flag
3524 /// task_dst->last = lastpriv;
3525 /// // could be constructor calls here...
3528 static llvm::Value
*
3529 emitTaskDupFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3530 const OMPExecutableDirective
&D
,
3531 QualType KmpTaskTWithPrivatesPtrQTy
,
3532 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3533 const RecordDecl
*KmpTaskTQTyRD
, QualType SharedsTy
,
3534 QualType SharedsPtrTy
, const OMPTaskDataTy
&Data
,
3535 ArrayRef
<PrivateDataTy
> Privates
, bool WithLastIter
) {
3536 ASTContext
&C
= CGM
.getContext();
3537 FunctionArgList Args
;
3538 ImplicitParamDecl
DstArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3539 KmpTaskTWithPrivatesPtrQTy
,
3540 ImplicitParamDecl::Other
);
3541 ImplicitParamDecl
SrcArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3542 KmpTaskTWithPrivatesPtrQTy
,
3543 ImplicitParamDecl::Other
);
3544 ImplicitParamDecl
LastprivArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.IntTy
,
3545 ImplicitParamDecl::Other
);
3546 Args
.push_back(&DstArg
);
3547 Args
.push_back(&SrcArg
);
3548 Args
.push_back(&LastprivArg
);
3549 const auto &TaskDupFnInfo
=
3550 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
3551 llvm::FunctionType
*TaskDupTy
= CGM
.getTypes().GetFunctionType(TaskDupFnInfo
);
3552 std::string Name
= CGM
.getOpenMPRuntime().getName({"omp_task_dup", ""});
3553 auto *TaskDup
= llvm::Function::Create(
3554 TaskDupTy
, llvm::GlobalValue::InternalLinkage
, Name
, &CGM
.getModule());
3555 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskDup
, TaskDupFnInfo
);
3556 TaskDup
->setDoesNotRecurse();
3557 CodeGenFunction
CGF(CGM
);
3558 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, TaskDup
, TaskDupFnInfo
, Args
, Loc
,
3561 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3562 CGF
.GetAddrOfLocalVar(&DstArg
),
3563 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3564 // task_dst->liter = lastpriv;
3566 auto LIFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLastIter
);
3567 LValue Base
= CGF
.EmitLValueForField(
3568 TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3569 LValue LILVal
= CGF
.EmitLValueForField(Base
, *LIFI
);
3570 llvm::Value
*Lastpriv
= CGF
.EmitLoadOfScalar(
3571 CGF
.GetAddrOfLocalVar(&LastprivArg
), /*Volatile=*/false, C
.IntTy
, Loc
);
3572 CGF
.EmitStoreOfScalar(Lastpriv
, LILVal
);
3575 // Emit initial values for private copies (if any).
3576 assert(!Privates
.empty());
3577 Address KmpTaskSharedsPtr
= Address::invalid();
3578 if (!Data
.FirstprivateVars
.empty()) {
3579 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3580 CGF
.GetAddrOfLocalVar(&SrcArg
),
3581 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3582 LValue Base
= CGF
.EmitLValueForField(
3583 TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3584 KmpTaskSharedsPtr
= Address(
3585 CGF
.EmitLoadOfScalar(CGF
.EmitLValueForField(
3586 Base
, *std::next(KmpTaskTQTyRD
->field_begin(),
3589 CGF
.Int8Ty
, CGM
.getNaturalTypeAlignment(SharedsTy
));
3591 emitPrivatesInit(CGF
, D
, KmpTaskSharedsPtr
, TDBase
, KmpTaskTWithPrivatesQTyRD
,
3592 SharedsTy
, SharedsPtrTy
, Data
, Privates
, /*ForDup=*/true);
3593 CGF
.FinishFunction();
3597 /// Checks if destructor function is required to be generated.
3598 /// \return true if cleanups are required, false otherwise.
3600 checkDestructorsRequired(const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3601 ArrayRef
<PrivateDataTy
> Privates
) {
3602 for (const PrivateDataTy
&P
: Privates
) {
3603 if (P
.second
.isLocalPrivate())
3605 QualType Ty
= P
.second
.Original
->getType().getNonReferenceType();
3606 if (Ty
.isDestructedType())
3613 /// Loop generator for OpenMP iterator expression.
3614 class OMPIteratorGeneratorScope final
3615 : public CodeGenFunction::OMPPrivateScope
{
3616 CodeGenFunction
&CGF
;
3617 const OMPIteratorExpr
*E
= nullptr;
3618 SmallVector
<CodeGenFunction::JumpDest
, 4> ContDests
;
3619 SmallVector
<CodeGenFunction::JumpDest
, 4> ExitDests
;
3620 OMPIteratorGeneratorScope() = delete;
3621 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope
&) = delete;
3624 OMPIteratorGeneratorScope(CodeGenFunction
&CGF
, const OMPIteratorExpr
*E
)
3625 : CodeGenFunction::OMPPrivateScope(CGF
), CGF(CGF
), E(E
) {
3628 SmallVector
<llvm::Value
*, 4> Uppers
;
3629 for (unsigned I
= 0, End
= E
->numOfIterators(); I
< End
; ++I
) {
3630 Uppers
.push_back(CGF
.EmitScalarExpr(E
->getHelper(I
).Upper
));
3631 const auto *VD
= cast
<VarDecl
>(E
->getIteratorDecl(I
));
3632 addPrivate(VD
, CGF
.CreateMemTemp(VD
->getType(), VD
->getName()));
3633 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
);
3635 HelperData
.CounterVD
,
3636 CGF
.CreateMemTemp(HelperData
.CounterVD
->getType(), "counter.addr"));
3640 for (unsigned I
= 0, End
= E
->numOfIterators(); I
< End
; ++I
) {
3641 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
);
3643 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(HelperData
.CounterVD
),
3644 HelperData
.CounterVD
->getType());
3646 CGF
.EmitStoreOfScalar(
3647 llvm::ConstantInt::get(CLVal
.getAddress(CGF
).getElementType(), 0),
3649 CodeGenFunction::JumpDest
&ContDest
=
3650 ContDests
.emplace_back(CGF
.getJumpDestInCurrentScope("iter.cont"));
3651 CodeGenFunction::JumpDest
&ExitDest
=
3652 ExitDests
.emplace_back(CGF
.getJumpDestInCurrentScope("iter.exit"));
3653 // N = <number-of_iterations>;
3654 llvm::Value
*N
= Uppers
[I
];
3656 // if (Counter < N) goto body; else goto exit;
3657 CGF
.EmitBlock(ContDest
.getBlock());
3659 CGF
.EmitLoadOfScalar(CLVal
, HelperData
.CounterVD
->getLocation());
3661 HelperData
.CounterVD
->getType()->isSignedIntegerOrEnumerationType()
3662 ? CGF
.Builder
.CreateICmpSLT(CVal
, N
)
3663 : CGF
.Builder
.CreateICmpULT(CVal
, N
);
3664 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("iter.body");
3665 CGF
.Builder
.CreateCondBr(Cmp
, BodyBB
, ExitDest
.getBlock());
3667 CGF
.EmitBlock(BodyBB
);
3668 // Iteri = Begini + Counter * Stepi;
3669 CGF
.EmitIgnoredExpr(HelperData
.Update
);
3672 ~OMPIteratorGeneratorScope() {
3675 for (unsigned I
= E
->numOfIterators(); I
> 0; --I
) {
3676 // Counter = Counter + 1;
3677 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
- 1);
3678 CGF
.EmitIgnoredExpr(HelperData
.CounterUpdate
);
3680 CGF
.EmitBranchThroughCleanup(ContDests
[I
- 1]);
3682 CGF
.EmitBlock(ExitDests
[I
- 1].getBlock(), /*IsFinished=*/I
== 1);
3688 static std::pair
<llvm::Value
*, llvm::Value
*>
3689 getPointerAndSize(CodeGenFunction
&CGF
, const Expr
*E
) {
3690 const auto *OASE
= dyn_cast
<OMPArrayShapingExpr
>(E
);
3693 const Expr
*Base
= OASE
->getBase();
3694 Addr
= CGF
.EmitScalarExpr(Base
);
3696 Addr
= CGF
.EmitLValue(E
).getPointer(CGF
);
3698 llvm::Value
*SizeVal
;
3699 QualType Ty
= E
->getType();
3701 SizeVal
= CGF
.getTypeSize(OASE
->getBase()->getType()->getPointeeType());
3702 for (const Expr
*SE
: OASE
->getDimensions()) {
3703 llvm::Value
*Sz
= CGF
.EmitScalarExpr(SE
);
3704 Sz
= CGF
.EmitScalarConversion(
3705 Sz
, SE
->getType(), CGF
.getContext().getSizeType(), SE
->getExprLoc());
3706 SizeVal
= CGF
.Builder
.CreateNUWMul(SizeVal
, Sz
);
3708 } else if (const auto *ASE
=
3709 dyn_cast
<OMPArraySectionExpr
>(E
->IgnoreParenImpCasts())) {
3711 CGF
.EmitOMPArraySectionExpr(ASE
, /*IsLowerBound=*/false);
3712 Address UpAddrAddress
= UpAddrLVal
.getAddress(CGF
);
3713 llvm::Value
*UpAddr
= CGF
.Builder
.CreateConstGEP1_32(
3714 UpAddrAddress
.getElementType(), UpAddrAddress
.getPointer(), /*Idx0=*/1);
3715 llvm::Value
*LowIntPtr
= CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.SizeTy
);
3716 llvm::Value
*UpIntPtr
= CGF
.Builder
.CreatePtrToInt(UpAddr
, CGF
.SizeTy
);
3717 SizeVal
= CGF
.Builder
.CreateNUWSub(UpIntPtr
, LowIntPtr
);
3719 SizeVal
= CGF
.getTypeSize(Ty
);
3721 return std::make_pair(Addr
, SizeVal
);
3724 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3725 static void getKmpAffinityType(ASTContext
&C
, QualType
&KmpTaskAffinityInfoTy
) {
3726 QualType FlagsTy
= C
.getIntTypeForBitwidth(32, /*Signed=*/false);
3727 if (KmpTaskAffinityInfoTy
.isNull()) {
3728 RecordDecl
*KmpAffinityInfoRD
=
3729 C
.buildImplicitRecord("kmp_task_affinity_info_t");
3730 KmpAffinityInfoRD
->startDefinition();
3731 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, C
.getIntPtrType());
3732 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, C
.getSizeType());
3733 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, FlagsTy
);
3734 KmpAffinityInfoRD
->completeDefinition();
3735 KmpTaskAffinityInfoTy
= C
.getRecordType(KmpAffinityInfoRD
);
3739 CGOpenMPRuntime::TaskResultTy
3740 CGOpenMPRuntime::emitTaskInit(CodeGenFunction
&CGF
, SourceLocation Loc
,
3741 const OMPExecutableDirective
&D
,
3742 llvm::Function
*TaskFunction
, QualType SharedsTy
,
3743 Address Shareds
, const OMPTaskDataTy
&Data
) {
3744 ASTContext
&C
= CGM
.getContext();
3745 llvm::SmallVector
<PrivateDataTy
, 4> Privates
;
3746 // Aggregate privates and sort them by the alignment.
3747 const auto *I
= Data
.PrivateCopies
.begin();
3748 for (const Expr
*E
: Data
.PrivateVars
) {
3749 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3750 Privates
.emplace_back(
3752 PrivateHelpersTy(E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3753 /*PrivateElemInit=*/nullptr));
3756 I
= Data
.FirstprivateCopies
.begin();
3757 const auto *IElemInitRef
= Data
.FirstprivateInits
.begin();
3758 for (const Expr
*E
: Data
.FirstprivateVars
) {
3759 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3760 Privates
.emplace_back(
3763 E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3764 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IElemInitRef
)->getDecl())));
3768 I
= Data
.LastprivateCopies
.begin();
3769 for (const Expr
*E
: Data
.LastprivateVars
) {
3770 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3771 Privates
.emplace_back(
3773 PrivateHelpersTy(E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3774 /*PrivateElemInit=*/nullptr));
3777 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
3778 if (isAllocatableDecl(VD
))
3779 Privates
.emplace_back(CGM
.getPointerAlign(), PrivateHelpersTy(VD
));
3781 Privates
.emplace_back(C
.getDeclAlign(VD
), PrivateHelpersTy(VD
));
3783 llvm::stable_sort(Privates
,
3784 [](const PrivateDataTy
&L
, const PrivateDataTy
&R
) {
3785 return L
.first
> R
.first
;
3787 QualType KmpInt32Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3788 // Build type kmp_routine_entry_t (if not built yet).
3789 emitKmpRoutineEntryT(KmpInt32Ty
);
3790 // Build type kmp_task_t (if not built yet).
3791 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind())) {
3792 if (SavedKmpTaskloopTQTy
.isNull()) {
3793 SavedKmpTaskloopTQTy
= C
.getRecordType(createKmpTaskTRecordDecl(
3794 CGM
, D
.getDirectiveKind(), KmpInt32Ty
, KmpRoutineEntryPtrQTy
));
3796 KmpTaskTQTy
= SavedKmpTaskloopTQTy
;
3798 assert((D
.getDirectiveKind() == OMPD_task
||
3799 isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) ||
3800 isOpenMPTargetDataManagementDirective(D
.getDirectiveKind())) &&
3801 "Expected taskloop, task or target directive");
3802 if (SavedKmpTaskTQTy
.isNull()) {
3803 SavedKmpTaskTQTy
= C
.getRecordType(createKmpTaskTRecordDecl(
3804 CGM
, D
.getDirectiveKind(), KmpInt32Ty
, KmpRoutineEntryPtrQTy
));
3806 KmpTaskTQTy
= SavedKmpTaskTQTy
;
3808 const auto *KmpTaskTQTyRD
= cast
<RecordDecl
>(KmpTaskTQTy
->getAsTagDecl());
3809 // Build particular struct kmp_task_t for the given task.
3810 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
=
3811 createKmpTaskTWithPrivatesRecordDecl(CGM
, KmpTaskTQTy
, Privates
);
3812 QualType KmpTaskTWithPrivatesQTy
= C
.getRecordType(KmpTaskTWithPrivatesQTyRD
);
3813 QualType KmpTaskTWithPrivatesPtrQTy
=
3814 C
.getPointerType(KmpTaskTWithPrivatesQTy
);
3815 llvm::Type
*KmpTaskTWithPrivatesTy
= CGF
.ConvertType(KmpTaskTWithPrivatesQTy
);
3816 llvm::Type
*KmpTaskTWithPrivatesPtrTy
=
3817 KmpTaskTWithPrivatesTy
->getPointerTo();
3818 llvm::Value
*KmpTaskTWithPrivatesTySize
=
3819 CGF
.getTypeSize(KmpTaskTWithPrivatesQTy
);
3820 QualType SharedsPtrTy
= C
.getPointerType(SharedsTy
);
3822 // Emit initial values for private copies (if any).
3823 llvm::Value
*TaskPrivatesMap
= nullptr;
3824 llvm::Type
*TaskPrivatesMapTy
=
3825 std::next(TaskFunction
->arg_begin(), 3)->getType();
3826 if (!Privates
.empty()) {
3827 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3829 emitTaskPrivateMappingFunction(CGM
, Loc
, Data
, FI
->getType(), Privates
);
3830 TaskPrivatesMap
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3831 TaskPrivatesMap
, TaskPrivatesMapTy
);
3833 TaskPrivatesMap
= llvm::ConstantPointerNull::get(
3834 cast
<llvm::PointerType
>(TaskPrivatesMapTy
));
3836 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3838 llvm::Function
*TaskEntry
= emitProxyTaskFunction(
3839 CGM
, Loc
, D
.getDirectiveKind(), KmpInt32Ty
, KmpTaskTWithPrivatesPtrQTy
,
3840 KmpTaskTWithPrivatesQTy
, KmpTaskTQTy
, SharedsPtrTy
, TaskFunction
,
3843 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3844 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3845 // kmp_routine_entry_t *task_entry);
3846 // Task flags. Format is taken from
3847 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3848 // description of kmp_tasking_flags struct.
3852 DestructorsFlag
= 0x8,
3853 PriorityFlag
= 0x20,
3854 DetachableFlag
= 0x40,
3856 unsigned Flags
= Data
.Tied
? TiedFlag
: 0;
3857 bool NeedsCleanup
= false;
3858 if (!Privates
.empty()) {
3860 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD
, Privates
);
3862 Flags
= Flags
| DestructorsFlag
;
3864 if (Data
.Priority
.getInt())
3865 Flags
= Flags
| PriorityFlag
;
3866 if (D
.hasClausesOfKind
<OMPDetachClause
>())
3867 Flags
= Flags
| DetachableFlag
;
3868 llvm::Value
*TaskFlags
=
3869 Data
.Final
.getPointer()
3870 ? CGF
.Builder
.CreateSelect(Data
.Final
.getPointer(),
3871 CGF
.Builder
.getInt32(FinalFlag
),
3872 CGF
.Builder
.getInt32(/*C=*/0))
3873 : CGF
.Builder
.getInt32(Data
.Final
.getInt() ? FinalFlag
: 0);
3874 TaskFlags
= CGF
.Builder
.CreateOr(TaskFlags
, CGF
.Builder
.getInt32(Flags
));
3875 llvm::Value
*SharedsSize
= CGM
.getSize(C
.getTypeSizeInChars(SharedsTy
));
3876 SmallVector
<llvm::Value
*, 8> AllocArgs
= {emitUpdateLocation(CGF
, Loc
),
3877 getThreadID(CGF
, Loc
), TaskFlags
, KmpTaskTWithPrivatesTySize
,
3878 SharedsSize
, CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3879 TaskEntry
, KmpRoutineEntryPtrTy
)};
3880 llvm::Value
*NewTask
;
3881 if (D
.hasClausesOfKind
<OMPNowaitClause
>()) {
3882 // Check if we have any device clause associated with the directive.
3883 const Expr
*Device
= nullptr;
3884 if (auto *C
= D
.getSingleClause
<OMPDeviceClause
>())
3885 Device
= C
->getDevice();
3886 // Emit device ID if any otherwise use default value.
3887 llvm::Value
*DeviceID
;
3889 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
3890 CGF
.Int64Ty
, /*isSigned=*/true);
3892 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
3893 AllocArgs
.push_back(DeviceID
);
3894 NewTask
= CGF
.EmitRuntimeCall(
3895 OMPBuilder
.getOrCreateRuntimeFunction(
3896 CGM
.getModule(), OMPRTL___kmpc_omp_target_task_alloc
),
3900 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
3901 CGM
.getModule(), OMPRTL___kmpc_omp_task_alloc
),
3904 // Emit detach clause initialization.
3905 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3906 // task_descriptor);
3907 if (const auto *DC
= D
.getSingleClause
<OMPDetachClause
>()) {
3908 const Expr
*Evt
= DC
->getEventHandler()->IgnoreParenImpCasts();
3909 LValue EvtLVal
= CGF
.EmitLValue(Evt
);
3911 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3912 // int gtid, kmp_task_t *task);
3913 llvm::Value
*Loc
= emitUpdateLocation(CGF
, DC
->getBeginLoc());
3914 llvm::Value
*Tid
= getThreadID(CGF
, DC
->getBeginLoc());
3915 Tid
= CGF
.Builder
.CreateIntCast(Tid
, CGF
.IntTy
, /*isSigned=*/false);
3916 llvm::Value
*EvtVal
= CGF
.EmitRuntimeCall(
3917 OMPBuilder
.getOrCreateRuntimeFunction(
3918 CGM
.getModule(), OMPRTL___kmpc_task_allow_completion_event
),
3919 {Loc
, Tid
, NewTask
});
3920 EvtVal
= CGF
.EmitScalarConversion(EvtVal
, C
.VoidPtrTy
, Evt
->getType(),
3922 CGF
.EmitStoreOfScalar(EvtVal
, EvtLVal
);
3924 // Process affinity clauses.
3925 if (D
.hasClausesOfKind
<OMPAffinityClause
>()) {
3926 // Process list of affinity data.
3927 ASTContext
&C
= CGM
.getContext();
3928 Address AffinitiesArray
= Address::invalid();
3929 // Calculate number of elements to form the array of affinity data.
3930 llvm::Value
*NumOfElements
= nullptr;
3931 unsigned NumAffinities
= 0;
3932 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
3933 if (const Expr
*Modifier
= C
->getModifier()) {
3934 const auto *IE
= cast
<OMPIteratorExpr
>(Modifier
->IgnoreParenImpCasts());
3935 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
3936 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
3937 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.SizeTy
, /*isSigned=*/false);
3939 NumOfElements
? CGF
.Builder
.CreateNUWMul(NumOfElements
, Sz
) : Sz
;
3942 NumAffinities
+= C
->varlist_size();
3945 getKmpAffinityType(CGM
.getContext(), KmpTaskAffinityInfoTy
);
3946 // Fields ids in kmp_task_affinity_info record.
3947 enum RTLAffinityInfoFieldsTy
{ BaseAddr
, Len
, Flags
};
3949 QualType KmpTaskAffinityInfoArrayTy
;
3950 if (NumOfElements
) {
3951 NumOfElements
= CGF
.Builder
.CreateNUWAdd(
3952 llvm::ConstantInt::get(CGF
.SizeTy
, NumAffinities
), NumOfElements
);
3953 auto *OVE
= new (C
) OpaqueValueExpr(
3955 C
.getIntTypeForBitwidth(C
.getTypeSize(C
.getSizeType()), /*Signed=*/0),
3957 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, OVE
,
3958 RValue::get(NumOfElements
));
3959 KmpTaskAffinityInfoArrayTy
=
3960 C
.getVariableArrayType(KmpTaskAffinityInfoTy
, OVE
, ArrayType::Normal
,
3961 /*IndexTypeQuals=*/0, SourceRange(Loc
, Loc
));
3962 // Properly emit variable-sized array.
3963 auto *PD
= ImplicitParamDecl::Create(C
, KmpTaskAffinityInfoArrayTy
,
3964 ImplicitParamDecl::Other
);
3965 CGF
.EmitVarDecl(*PD
);
3966 AffinitiesArray
= CGF
.GetAddrOfLocalVar(PD
);
3967 NumOfElements
= CGF
.Builder
.CreateIntCast(NumOfElements
, CGF
.Int32Ty
,
3968 /*isSigned=*/false);
3970 KmpTaskAffinityInfoArrayTy
= C
.getConstantArrayType(
3971 KmpTaskAffinityInfoTy
,
3972 llvm::APInt(C
.getTypeSize(C
.getSizeType()), NumAffinities
), nullptr,
3973 ArrayType::Normal
, /*IndexTypeQuals=*/0);
3975 CGF
.CreateMemTemp(KmpTaskAffinityInfoArrayTy
, ".affs.arr.addr");
3976 AffinitiesArray
= CGF
.Builder
.CreateConstArrayGEP(AffinitiesArray
, 0);
3977 NumOfElements
= llvm::ConstantInt::get(CGM
.Int32Ty
, NumAffinities
,
3978 /*isSigned=*/false);
3981 const auto *KmpAffinityInfoRD
= KmpTaskAffinityInfoTy
->getAsRecordDecl();
3982 // Fill array by elements without iterators.
3984 bool HasIterator
= false;
3985 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
3986 if (C
->getModifier()) {
3990 for (const Expr
*E
: C
->varlists()) {
3993 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
3995 CGF
.MakeAddrLValue(CGF
.Builder
.CreateConstGEP(AffinitiesArray
, Pos
),
3996 KmpTaskAffinityInfoTy
);
3997 // affs[i].base_addr = &<Affinities[i].second>;
3998 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
3999 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), BaseAddr
));
4000 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
),
4002 // affs[i].len = sizeof(<Affinities[i].second>);
4003 LValue LenLVal
= CGF
.EmitLValueForField(
4004 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), Len
));
4005 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4011 PosLVal
= CGF
.MakeAddrLValue(
4012 CGF
.CreateMemTemp(C
.getSizeType(), "affs.counter.addr"),
4014 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Pos
), PosLVal
);
4016 // Process elements with iterators.
4017 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
4018 const Expr
*Modifier
= C
->getModifier();
4021 OMPIteratorGeneratorScope
IteratorScope(
4022 CGF
, cast_or_null
<OMPIteratorExpr
>(Modifier
->IgnoreParenImpCasts()));
4023 for (const Expr
*E
: C
->varlists()) {
4026 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
4027 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4028 LValue Base
= CGF
.MakeAddrLValue(
4029 CGF
.Builder
.CreateGEP(AffinitiesArray
, Idx
), KmpTaskAffinityInfoTy
);
4030 // affs[i].base_addr = &<Affinities[i].second>;
4031 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4032 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), BaseAddr
));
4033 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
),
4035 // affs[i].len = sizeof(<Affinities[i].second>);
4036 LValue LenLVal
= CGF
.EmitLValueForField(
4037 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), Len
));
4038 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4039 Idx
= CGF
.Builder
.CreateNUWAdd(
4040 Idx
, llvm::ConstantInt::get(Idx
->getType(), 1));
4041 CGF
.EmitStoreOfScalar(Idx
, PosLVal
);
4044 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4045 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4046 // naffins, kmp_task_affinity_info_t *affin_list);
4047 llvm::Value
*LocRef
= emitUpdateLocation(CGF
, Loc
);
4048 llvm::Value
*GTid
= getThreadID(CGF
, Loc
);
4049 llvm::Value
*AffinListPtr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4050 AffinitiesArray
.getPointer(), CGM
.VoidPtrTy
);
4051 // FIXME: Emit the function and ignore its result for now unless the
4052 // runtime function is properly implemented.
4053 (void)CGF
.EmitRuntimeCall(
4054 OMPBuilder
.getOrCreateRuntimeFunction(
4055 CGM
.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity
),
4056 {LocRef
, GTid
, NewTask
, NumOfElements
, AffinListPtr
});
4058 llvm::Value
*NewTaskNewTaskTTy
=
4059 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4060 NewTask
, KmpTaskTWithPrivatesPtrTy
);
4061 LValue Base
= CGF
.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy
,
4062 KmpTaskTWithPrivatesQTy
);
4064 CGF
.EmitLValueForField(Base
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
4065 // Fill the data in the resulting kmp_task_t record.
4066 // Copy shareds if there are any.
4067 Address KmpTaskSharedsPtr
= Address::invalid();
4068 if (!SharedsTy
->getAsStructureType()->getDecl()->field_empty()) {
4069 KmpTaskSharedsPtr
= Address(
4070 CGF
.EmitLoadOfScalar(
4071 CGF
.EmitLValueForField(
4073 *std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTShareds
)),
4075 CGF
.Int8Ty
, CGM
.getNaturalTypeAlignment(SharedsTy
));
4076 LValue Dest
= CGF
.MakeAddrLValue(KmpTaskSharedsPtr
, SharedsTy
);
4077 LValue Src
= CGF
.MakeAddrLValue(Shareds
, SharedsTy
);
4078 CGF
.EmitAggregateCopy(Dest
, Src
, SharedsTy
, AggValueSlot::DoesNotOverlap
);
4080 // Emit initial values for private copies (if any).
4081 TaskResultTy Result
;
4082 if (!Privates
.empty()) {
4083 emitPrivatesInit(CGF
, D
, KmpTaskSharedsPtr
, Base
, KmpTaskTWithPrivatesQTyRD
,
4084 SharedsTy
, SharedsPtrTy
, Data
, Privates
,
4086 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind()) &&
4087 (!Data
.LastprivateVars
.empty() || checkInitIsRequired(CGF
, Privates
))) {
4088 Result
.TaskDupFn
= emitTaskDupFunction(
4089 CGM
, Loc
, D
, KmpTaskTWithPrivatesPtrQTy
, KmpTaskTWithPrivatesQTyRD
,
4090 KmpTaskTQTyRD
, SharedsTy
, SharedsPtrTy
, Data
, Privates
,
4091 /*WithLastIter=*/!Data
.LastprivateVars
.empty());
4094 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4095 enum { Priority
= 0, Destructors
= 1 };
4096 // Provide pointer to function with destructors for privates.
4097 auto FI
= std::next(KmpTaskTQTyRD
->field_begin(), Data1
);
4098 const RecordDecl
*KmpCmplrdataUD
=
4099 (*FI
)->getType()->getAsUnionType()->getDecl();
4101 llvm::Value
*DestructorFn
= emitDestructorsFunction(
4102 CGM
, Loc
, KmpInt32Ty
, KmpTaskTWithPrivatesPtrQTy
,
4103 KmpTaskTWithPrivatesQTy
);
4104 LValue Data1LV
= CGF
.EmitLValueForField(TDBase
, *FI
);
4105 LValue DestructorsLV
= CGF
.EmitLValueForField(
4106 Data1LV
, *std::next(KmpCmplrdataUD
->field_begin(), Destructors
));
4107 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4108 DestructorFn
, KmpRoutineEntryPtrTy
),
4112 if (Data
.Priority
.getInt()) {
4113 LValue Data2LV
= CGF
.EmitLValueForField(
4114 TDBase
, *std::next(KmpTaskTQTyRD
->field_begin(), Data2
));
4115 LValue PriorityLV
= CGF
.EmitLValueForField(
4116 Data2LV
, *std::next(KmpCmplrdataUD
->field_begin(), Priority
));
4117 CGF
.EmitStoreOfScalar(Data
.Priority
.getPointer(), PriorityLV
);
4119 Result
.NewTask
= NewTask
;
4120 Result
.TaskEntry
= TaskEntry
;
4121 Result
.NewTaskNewTaskTTy
= NewTaskNewTaskTTy
;
4122 Result
.TDBase
= TDBase
;
4123 Result
.KmpTaskTQTyRD
= KmpTaskTQTyRD
;
4127 /// Translates internal dependency kind into the runtime kind.
4128 static RTLDependenceKindTy
translateDependencyKind(OpenMPDependClauseKind K
) {
4129 RTLDependenceKindTy DepKind
;
4131 case OMPC_DEPEND_in
:
4132 DepKind
= RTLDependenceKindTy::DepIn
;
4134 // Out and InOut dependencies must use the same code.
4135 case OMPC_DEPEND_out
:
4136 case OMPC_DEPEND_inout
:
4137 DepKind
= RTLDependenceKindTy::DepInOut
;
4139 case OMPC_DEPEND_mutexinoutset
:
4140 DepKind
= RTLDependenceKindTy::DepMutexInOutSet
;
4142 case OMPC_DEPEND_inoutset
:
4143 DepKind
= RTLDependenceKindTy::DepInOutSet
;
4145 case OMPC_DEPEND_outallmemory
:
4146 DepKind
= RTLDependenceKindTy::DepOmpAllMem
;
4148 case OMPC_DEPEND_source
:
4149 case OMPC_DEPEND_sink
:
4150 case OMPC_DEPEND_depobj
:
4151 case OMPC_DEPEND_inoutallmemory
:
4152 case OMPC_DEPEND_unknown
:
4153 llvm_unreachable("Unknown task dependence type");
4158 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4159 static void getDependTypes(ASTContext
&C
, QualType
&KmpDependInfoTy
,
4160 QualType
&FlagsTy
) {
4161 FlagsTy
= C
.getIntTypeForBitwidth(C
.getTypeSize(C
.BoolTy
), /*Signed=*/false);
4162 if (KmpDependInfoTy
.isNull()) {
4163 RecordDecl
*KmpDependInfoRD
= C
.buildImplicitRecord("kmp_depend_info");
4164 KmpDependInfoRD
->startDefinition();
4165 addFieldToRecordDecl(C
, KmpDependInfoRD
, C
.getIntPtrType());
4166 addFieldToRecordDecl(C
, KmpDependInfoRD
, C
.getSizeType());
4167 addFieldToRecordDecl(C
, KmpDependInfoRD
, FlagsTy
);
4168 KmpDependInfoRD
->completeDefinition();
4169 KmpDependInfoTy
= C
.getRecordType(KmpDependInfoRD
);
4173 std::pair
<llvm::Value
*, LValue
>
4174 CGOpenMPRuntime::getDepobjElements(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4175 SourceLocation Loc
) {
4176 ASTContext
&C
= CGM
.getContext();
4178 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4179 RecordDecl
*KmpDependInfoRD
=
4180 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4181 QualType KmpDependInfoPtrTy
= C
.getPointerType(KmpDependInfoTy
);
4182 LValue Base
= CGF
.EmitLoadOfPointerLValue(
4183 DepobjLVal
.getAddress(CGF
).withElementType(
4184 CGF
.ConvertTypeForMem(KmpDependInfoPtrTy
)),
4185 KmpDependInfoPtrTy
->castAs
<PointerType
>());
4186 Address DepObjAddr
= CGF
.Builder
.CreateGEP(
4187 Base
.getAddress(CGF
),
4188 llvm::ConstantInt::get(CGF
.IntPtrTy
, -1, /*isSigned=*/true));
4189 LValue NumDepsBase
= CGF
.MakeAddrLValue(
4190 DepObjAddr
, KmpDependInfoTy
, Base
.getBaseInfo(), Base
.getTBAAInfo());
4191 // NumDeps = deps[i].base_addr;
4192 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4194 *std::next(KmpDependInfoRD
->field_begin(),
4195 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4196 llvm::Value
*NumDeps
= CGF
.EmitLoadOfScalar(BaseAddrLVal
, Loc
);
4197 return std::make_pair(NumDeps
, Base
);
4200 static void emitDependData(CodeGenFunction
&CGF
, QualType
&KmpDependInfoTy
,
4201 llvm::PointerUnion
<unsigned *, LValue
*> Pos
,
4202 const OMPTaskDataTy::DependData
&Data
,
4203 Address DependenciesArray
) {
4204 CodeGenModule
&CGM
= CGF
.CGM
;
4205 ASTContext
&C
= CGM
.getContext();
4207 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4208 RecordDecl
*KmpDependInfoRD
=
4209 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4210 llvm::Type
*LLVMFlagsTy
= CGF
.ConvertTypeForMem(FlagsTy
);
4212 OMPIteratorGeneratorScope
IteratorScope(
4213 CGF
, cast_or_null
<OMPIteratorExpr
>(
4214 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4216 for (const Expr
*E
: Data
.DepExprs
) {
4220 // The expression will be a nullptr in the 'omp_all_memory' case.
4222 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
4223 Addr
= CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
);
4225 Addr
= llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4226 Size
= llvm::ConstantInt::get(CGF
.SizeTy
, 0);
4229 if (unsigned *P
= Pos
.dyn_cast
<unsigned *>()) {
4230 Base
= CGF
.MakeAddrLValue(
4231 CGF
.Builder
.CreateConstGEP(DependenciesArray
, *P
), KmpDependInfoTy
);
4233 assert(E
&& "Expected a non-null expression");
4234 LValue
&PosLVal
= *Pos
.get
<LValue
*>();
4235 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4236 Base
= CGF
.MakeAddrLValue(
4237 CGF
.Builder
.CreateGEP(DependenciesArray
, Idx
), KmpDependInfoTy
);
4239 // deps[i].base_addr = &<Dependencies[i].second>;
4240 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4242 *std::next(KmpDependInfoRD
->field_begin(),
4243 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4244 CGF
.EmitStoreOfScalar(Addr
, BaseAddrLVal
);
4245 // deps[i].len = sizeof(<Dependencies[i].second>);
4246 LValue LenLVal
= CGF
.EmitLValueForField(
4247 Base
, *std::next(KmpDependInfoRD
->field_begin(),
4248 static_cast<unsigned int>(RTLDependInfoFields::Len
)));
4249 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4250 // deps[i].flags = <Dependencies[i].first>;
4251 RTLDependenceKindTy DepKind
= translateDependencyKind(Data
.DepKind
);
4252 LValue FlagsLVal
= CGF
.EmitLValueForField(
4254 *std::next(KmpDependInfoRD
->field_begin(),
4255 static_cast<unsigned int>(RTLDependInfoFields::Flags
)));
4256 CGF
.EmitStoreOfScalar(
4257 llvm::ConstantInt::get(LLVMFlagsTy
, static_cast<unsigned int>(DepKind
)),
4259 if (unsigned *P
= Pos
.dyn_cast
<unsigned *>()) {
4262 LValue
&PosLVal
= *Pos
.get
<LValue
*>();
4263 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4264 Idx
= CGF
.Builder
.CreateNUWAdd(Idx
,
4265 llvm::ConstantInt::get(Idx
->getType(), 1));
4266 CGF
.EmitStoreOfScalar(Idx
, PosLVal
);
4271 SmallVector
<llvm::Value
*, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4272 CodeGenFunction
&CGF
, QualType
&KmpDependInfoTy
,
4273 const OMPTaskDataTy::DependData
&Data
) {
4274 assert(Data
.DepKind
== OMPC_DEPEND_depobj
&&
4275 "Expected depobj dependency kind.");
4276 SmallVector
<llvm::Value
*, 4> Sizes
;
4277 SmallVector
<LValue
, 4> SizeLVals
;
4278 ASTContext
&C
= CGF
.getContext();
4280 OMPIteratorGeneratorScope
IteratorScope(
4281 CGF
, cast_or_null
<OMPIteratorExpr
>(
4282 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4284 for (const Expr
*E
: Data
.DepExprs
) {
4285 llvm::Value
*NumDeps
;
4287 LValue DepobjLVal
= CGF
.EmitLValue(E
->IgnoreParenImpCasts());
4288 std::tie(NumDeps
, Base
) =
4289 getDepobjElements(CGF
, DepobjLVal
, E
->getExprLoc());
4290 LValue NumLVal
= CGF
.MakeAddrLValue(
4291 CGF
.CreateMemTemp(C
.getUIntPtrType(), "depobj.size.addr"),
4292 C
.getUIntPtrType());
4293 CGF
.Builder
.CreateStore(llvm::ConstantInt::get(CGF
.IntPtrTy
, 0),
4294 NumLVal
.getAddress(CGF
));
4295 llvm::Value
*PrevVal
= CGF
.EmitLoadOfScalar(NumLVal
, E
->getExprLoc());
4296 llvm::Value
*Add
= CGF
.Builder
.CreateNUWAdd(PrevVal
, NumDeps
);
4297 CGF
.EmitStoreOfScalar(Add
, NumLVal
);
4298 SizeLVals
.push_back(NumLVal
);
4301 for (unsigned I
= 0, E
= SizeLVals
.size(); I
< E
; ++I
) {
4303 CGF
.EmitLoadOfScalar(SizeLVals
[I
], Data
.DepExprs
[I
]->getExprLoc());
4304 Sizes
.push_back(Size
);
4309 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction
&CGF
,
4310 QualType
&KmpDependInfoTy
,
4312 const OMPTaskDataTy::DependData
&Data
,
4313 Address DependenciesArray
) {
4314 assert(Data
.DepKind
== OMPC_DEPEND_depobj
&&
4315 "Expected depobj dependency kind.");
4316 llvm::Value
*ElSize
= CGF
.getTypeSize(KmpDependInfoTy
);
4318 OMPIteratorGeneratorScope
IteratorScope(
4319 CGF
, cast_or_null
<OMPIteratorExpr
>(
4320 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4322 for (unsigned I
= 0, End
= Data
.DepExprs
.size(); I
< End
; ++I
) {
4323 const Expr
*E
= Data
.DepExprs
[I
];
4324 llvm::Value
*NumDeps
;
4326 LValue DepobjLVal
= CGF
.EmitLValue(E
->IgnoreParenImpCasts());
4327 std::tie(NumDeps
, Base
) =
4328 getDepobjElements(CGF
, DepobjLVal
, E
->getExprLoc());
4330 // memcopy dependency data.
4331 llvm::Value
*Size
= CGF
.Builder
.CreateNUWMul(
4333 CGF
.Builder
.CreateIntCast(NumDeps
, CGF
.SizeTy
, /*isSigned=*/false));
4334 llvm::Value
*Pos
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4335 Address DepAddr
= CGF
.Builder
.CreateGEP(DependenciesArray
, Pos
);
4336 CGF
.Builder
.CreateMemCpy(DepAddr
, Base
.getAddress(CGF
), Size
);
4340 llvm::Value
*Add
= CGF
.Builder
.CreateNUWAdd(Pos
, NumDeps
);
4341 CGF
.EmitStoreOfScalar(Add
, PosLVal
);
4346 std::pair
<llvm::Value
*, Address
> CGOpenMPRuntime::emitDependClause(
4347 CodeGenFunction
&CGF
, ArrayRef
<OMPTaskDataTy::DependData
> Dependencies
,
4348 SourceLocation Loc
) {
4349 if (llvm::all_of(Dependencies
, [](const OMPTaskDataTy::DependData
&D
) {
4350 return D
.DepExprs
.empty();
4352 return std::make_pair(nullptr, Address::invalid());
4353 // Process list of dependencies.
4354 ASTContext
&C
= CGM
.getContext();
4355 Address DependenciesArray
= Address::invalid();
4356 llvm::Value
*NumOfElements
= nullptr;
4357 unsigned NumDependencies
= std::accumulate(
4358 Dependencies
.begin(), Dependencies
.end(), 0,
4359 [](unsigned V
, const OMPTaskDataTy::DependData
&D
) {
4360 return D
.DepKind
== OMPC_DEPEND_depobj
4362 : (V
+ (D
.IteratorExpr
? 0 : D
.DepExprs
.size()));
4365 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4366 bool HasDepobjDeps
= false;
4367 bool HasRegularWithIterators
= false;
4368 llvm::Value
*NumOfDepobjElements
= llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4369 llvm::Value
*NumOfRegularWithIterators
=
4370 llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4371 // Calculate number of depobj dependencies and regular deps with the
4373 for (const OMPTaskDataTy::DependData
&D
: Dependencies
) {
4374 if (D
.DepKind
== OMPC_DEPEND_depobj
) {
4375 SmallVector
<llvm::Value
*, 4> Sizes
=
4376 emitDepobjElementsSizes(CGF
, KmpDependInfoTy
, D
);
4377 for (llvm::Value
*Size
: Sizes
) {
4378 NumOfDepobjElements
=
4379 CGF
.Builder
.CreateNUWAdd(NumOfDepobjElements
, Size
);
4381 HasDepobjDeps
= true;
4384 // Include number of iterations, if any.
4386 if (const auto *IE
= cast_or_null
<OMPIteratorExpr
>(D
.IteratorExpr
)) {
4387 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4388 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4389 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.IntPtrTy
, /*isSigned=*/false);
4390 llvm::Value
*NumClauseDeps
= CGF
.Builder
.CreateNUWMul(
4391 Sz
, llvm::ConstantInt::get(CGF
.IntPtrTy
, D
.DepExprs
.size()));
4392 NumOfRegularWithIterators
=
4393 CGF
.Builder
.CreateNUWAdd(NumOfRegularWithIterators
, NumClauseDeps
);
4395 HasRegularWithIterators
= true;
4400 QualType KmpDependInfoArrayTy
;
4401 if (HasDepobjDeps
|| HasRegularWithIterators
) {
4402 NumOfElements
= llvm::ConstantInt::get(CGM
.IntPtrTy
, NumDependencies
,
4403 /*isSigned=*/false);
4404 if (HasDepobjDeps
) {
4406 CGF
.Builder
.CreateNUWAdd(NumOfDepobjElements
, NumOfElements
);
4408 if (HasRegularWithIterators
) {
4410 CGF
.Builder
.CreateNUWAdd(NumOfRegularWithIterators
, NumOfElements
);
4412 auto *OVE
= new (C
) OpaqueValueExpr(
4413 Loc
, C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4415 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, OVE
,
4416 RValue::get(NumOfElements
));
4417 KmpDependInfoArrayTy
=
4418 C
.getVariableArrayType(KmpDependInfoTy
, OVE
, ArrayType::Normal
,
4419 /*IndexTypeQuals=*/0, SourceRange(Loc
, Loc
));
4420 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4421 // Properly emit variable-sized array.
4422 auto *PD
= ImplicitParamDecl::Create(C
, KmpDependInfoArrayTy
,
4423 ImplicitParamDecl::Other
);
4424 CGF
.EmitVarDecl(*PD
);
4425 DependenciesArray
= CGF
.GetAddrOfLocalVar(PD
);
4426 NumOfElements
= CGF
.Builder
.CreateIntCast(NumOfElements
, CGF
.Int32Ty
,
4427 /*isSigned=*/false);
4429 KmpDependInfoArrayTy
= C
.getConstantArrayType(
4430 KmpDependInfoTy
, llvm::APInt(/*numBits=*/64, NumDependencies
), nullptr,
4431 ArrayType::Normal
, /*IndexTypeQuals=*/0);
4433 CGF
.CreateMemTemp(KmpDependInfoArrayTy
, ".dep.arr.addr");
4434 DependenciesArray
= CGF
.Builder
.CreateConstArrayGEP(DependenciesArray
, 0);
4435 NumOfElements
= llvm::ConstantInt::get(CGM
.Int32Ty
, NumDependencies
,
4436 /*isSigned=*/false);
4439 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4440 if (Dependencies
[I
].DepKind
== OMPC_DEPEND_depobj
||
4441 Dependencies
[I
].IteratorExpr
)
4443 emitDependData(CGF
, KmpDependInfoTy
, &Pos
, Dependencies
[I
],
4446 // Copy regular dependencies with iterators.
4447 LValue PosLVal
= CGF
.MakeAddrLValue(
4448 CGF
.CreateMemTemp(C
.getSizeType(), "dep.counter.addr"), C
.getSizeType());
4449 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Pos
), PosLVal
);
4450 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4451 if (Dependencies
[I
].DepKind
== OMPC_DEPEND_depobj
||
4452 !Dependencies
[I
].IteratorExpr
)
4454 emitDependData(CGF
, KmpDependInfoTy
, &PosLVal
, Dependencies
[I
],
4457 // Copy final depobj arrays without iterators.
4458 if (HasDepobjDeps
) {
4459 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4460 if (Dependencies
[I
].DepKind
!= OMPC_DEPEND_depobj
)
4462 emitDepobjElements(CGF
, KmpDependInfoTy
, PosLVal
, Dependencies
[I
],
4466 DependenciesArray
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4467 DependenciesArray
, CGF
.VoidPtrTy
, CGF
.Int8Ty
);
4468 return std::make_pair(NumOfElements
, DependenciesArray
);
4471 Address
CGOpenMPRuntime::emitDepobjDependClause(
4472 CodeGenFunction
&CGF
, const OMPTaskDataTy::DependData
&Dependencies
,
4473 SourceLocation Loc
) {
4474 if (Dependencies
.DepExprs
.empty())
4475 return Address::invalid();
4476 // Process list of dependencies.
4477 ASTContext
&C
= CGM
.getContext();
4478 Address DependenciesArray
= Address::invalid();
4479 unsigned NumDependencies
= Dependencies
.DepExprs
.size();
4481 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4482 RecordDecl
*KmpDependInfoRD
=
4483 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4486 // Define type kmp_depend_info[<Dependencies.size()>];
4487 // For depobj reserve one extra element to store the number of elements.
4488 // It is required to handle depobj(x) update(in) construct.
4489 // kmp_depend_info[<Dependencies.size()>] deps;
4490 llvm::Value
*NumDepsVal
;
4491 CharUnits Align
= C
.getTypeAlignInChars(KmpDependInfoTy
);
4492 if (const auto *IE
=
4493 cast_or_null
<OMPIteratorExpr
>(Dependencies
.IteratorExpr
)) {
4494 NumDepsVal
= llvm::ConstantInt::get(CGF
.SizeTy
, 1);
4495 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4496 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4497 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.SizeTy
, /*isSigned=*/false);
4498 NumDepsVal
= CGF
.Builder
.CreateNUWMul(NumDepsVal
, Sz
);
4500 Size
= CGF
.Builder
.CreateNUWAdd(llvm::ConstantInt::get(CGF
.SizeTy
, 1),
4502 CharUnits SizeInBytes
=
4503 C
.getTypeSizeInChars(KmpDependInfoTy
).alignTo(Align
);
4504 llvm::Value
*RecSize
= CGM
.getSize(SizeInBytes
);
4505 Size
= CGF
.Builder
.CreateNUWMul(Size
, RecSize
);
4507 CGF
.Builder
.CreateIntCast(NumDepsVal
, CGF
.IntPtrTy
, /*isSigned=*/false);
4509 QualType KmpDependInfoArrayTy
= C
.getConstantArrayType(
4510 KmpDependInfoTy
, llvm::APInt(/*numBits=*/64, NumDependencies
+ 1),
4511 nullptr, ArrayType::Normal
, /*IndexTypeQuals=*/0);
4512 CharUnits Sz
= C
.getTypeSizeInChars(KmpDependInfoArrayTy
);
4513 Size
= CGM
.getSize(Sz
.alignTo(Align
));
4514 NumDepsVal
= llvm::ConstantInt::get(CGF
.IntPtrTy
, NumDependencies
);
4516 // Need to allocate on the dynamic memory.
4517 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4518 // Use default allocator.
4519 llvm::Value
*Allocator
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4520 llvm::Value
*Args
[] = {ThreadID
, Size
, Allocator
};
4523 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4524 CGM
.getModule(), OMPRTL___kmpc_alloc
),
4525 Args
, ".dep.arr.addr");
4526 llvm::Type
*KmpDependInfoLlvmTy
= CGF
.ConvertTypeForMem(KmpDependInfoTy
);
4527 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4528 Addr
, KmpDependInfoLlvmTy
->getPointerTo());
4529 DependenciesArray
= Address(Addr
, KmpDependInfoLlvmTy
, Align
);
4530 // Write number of elements in the first element of array for depobj.
4531 LValue Base
= CGF
.MakeAddrLValue(DependenciesArray
, KmpDependInfoTy
);
4532 // deps[i].base_addr = NumDependencies;
4533 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4535 *std::next(KmpDependInfoRD
->field_begin(),
4536 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4537 CGF
.EmitStoreOfScalar(NumDepsVal
, BaseAddrLVal
);
4538 llvm::PointerUnion
<unsigned *, LValue
*> Pos
;
4541 if (Dependencies
.IteratorExpr
) {
4542 PosLVal
= CGF
.MakeAddrLValue(
4543 CGF
.CreateMemTemp(C
.getSizeType(), "iterator.counter.addr"),
4545 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Idx
), PosLVal
,
4551 emitDependData(CGF
, KmpDependInfoTy
, Pos
, Dependencies
, DependenciesArray
);
4552 DependenciesArray
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4553 CGF
.Builder
.CreateConstGEP(DependenciesArray
, 1), CGF
.VoidPtrTy
,
4555 return DependenciesArray
;
4558 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4559 SourceLocation Loc
) {
4560 ASTContext
&C
= CGM
.getContext();
4562 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4563 LValue Base
= CGF
.EmitLoadOfPointerLValue(
4564 DepobjLVal
.getAddress(CGF
), C
.VoidPtrTy
.castAs
<PointerType
>());
4565 QualType KmpDependInfoPtrTy
= C
.getPointerType(KmpDependInfoTy
);
4566 Address Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4567 Base
.getAddress(CGF
), CGF
.ConvertTypeForMem(KmpDependInfoPtrTy
),
4568 CGF
.ConvertTypeForMem(KmpDependInfoTy
));
4569 llvm::Value
*DepObjAddr
= CGF
.Builder
.CreateGEP(
4570 Addr
.getElementType(), Addr
.getPointer(),
4571 llvm::ConstantInt::get(CGF
.IntPtrTy
, -1, /*isSigned=*/true));
4572 DepObjAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr
,
4574 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4575 // Use default allocator.
4576 llvm::Value
*Allocator
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4577 llvm::Value
*Args
[] = {ThreadID
, DepObjAddr
, Allocator
};
4579 // _kmpc_free(gtid, addr, nullptr);
4580 (void)CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4581 CGM
.getModule(), OMPRTL___kmpc_free
),
4585 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4586 OpenMPDependClauseKind NewDepKind
,
4587 SourceLocation Loc
) {
4588 ASTContext
&C
= CGM
.getContext();
4590 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4591 RecordDecl
*KmpDependInfoRD
=
4592 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4593 llvm::Type
*LLVMFlagsTy
= CGF
.ConvertTypeForMem(FlagsTy
);
4594 llvm::Value
*NumDeps
;
4596 std::tie(NumDeps
, Base
) = getDepobjElements(CGF
, DepobjLVal
, Loc
);
4598 Address Begin
= Base
.getAddress(CGF
);
4599 // Cast from pointer to array type to pointer to single element.
4600 llvm::Value
*End
= CGF
.Builder
.CreateGEP(
4601 Begin
.getElementType(), Begin
.getPointer(), NumDeps
);
4602 // The basic structure here is a while-do loop.
4603 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.body");
4604 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.done");
4605 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
4606 CGF
.EmitBlock(BodyBB
);
4607 llvm::PHINode
*ElementPHI
=
4608 CGF
.Builder
.CreatePHI(Begin
.getType(), 2, "omp.elementPast");
4609 ElementPHI
->addIncoming(Begin
.getPointer(), EntryBB
);
4610 Begin
= Begin
.withPointer(ElementPHI
, KnownNonNull
);
4611 Base
= CGF
.MakeAddrLValue(Begin
, KmpDependInfoTy
, Base
.getBaseInfo(),
4612 Base
.getTBAAInfo());
4613 // deps[i].flags = NewDepKind;
4614 RTLDependenceKindTy DepKind
= translateDependencyKind(NewDepKind
);
4615 LValue FlagsLVal
= CGF
.EmitLValueForField(
4616 Base
, *std::next(KmpDependInfoRD
->field_begin(),
4617 static_cast<unsigned int>(RTLDependInfoFields::Flags
)));
4618 CGF
.EmitStoreOfScalar(
4619 llvm::ConstantInt::get(LLVMFlagsTy
, static_cast<unsigned int>(DepKind
)),
4622 // Shift the address forward by one element.
4623 Address ElementNext
=
4624 CGF
.Builder
.CreateConstGEP(Begin
, /*Index=*/1, "omp.elementNext");
4625 ElementPHI
->addIncoming(ElementNext
.getPointer(),
4626 CGF
.Builder
.GetInsertBlock());
4627 llvm::Value
*IsEmpty
=
4628 CGF
.Builder
.CreateICmpEQ(ElementNext
.getPointer(), End
, "omp.isempty");
4629 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
4631 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
4634 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
4635 const OMPExecutableDirective
&D
,
4636 llvm::Function
*TaskFunction
,
4637 QualType SharedsTy
, Address Shareds
,
4639 const OMPTaskDataTy
&Data
) {
4640 if (!CGF
.HaveInsertPoint())
4643 TaskResultTy Result
=
4644 emitTaskInit(CGF
, Loc
, D
, TaskFunction
, SharedsTy
, Shareds
, Data
);
4645 llvm::Value
*NewTask
= Result
.NewTask
;
4646 llvm::Function
*TaskEntry
= Result
.TaskEntry
;
4647 llvm::Value
*NewTaskNewTaskTTy
= Result
.NewTaskNewTaskTTy
;
4648 LValue TDBase
= Result
.TDBase
;
4649 const RecordDecl
*KmpTaskTQTyRD
= Result
.KmpTaskTQTyRD
;
4650 // Process list of dependences.
4651 Address DependenciesArray
= Address::invalid();
4652 llvm::Value
*NumOfElements
;
4653 std::tie(NumOfElements
, DependenciesArray
) =
4654 emitDependClause(CGF
, Data
.Dependences
, Loc
);
4656 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4658 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4659 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4660 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4661 // list is not empty
4662 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4663 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
4664 llvm::Value
*TaskArgs
[] = { UpLoc
, ThreadID
, NewTask
};
4665 llvm::Value
*DepTaskArgs
[7];
4666 if (!Data
.Dependences
.empty()) {
4667 DepTaskArgs
[0] = UpLoc
;
4668 DepTaskArgs
[1] = ThreadID
;
4669 DepTaskArgs
[2] = NewTask
;
4670 DepTaskArgs
[3] = NumOfElements
;
4671 DepTaskArgs
[4] = DependenciesArray
.getPointer();
4672 DepTaskArgs
[5] = CGF
.Builder
.getInt32(0);
4673 DepTaskArgs
[6] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4675 auto &&ThenCodeGen
= [this, &Data
, TDBase
, KmpTaskTQTyRD
, &TaskArgs
,
4676 &DepTaskArgs
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4678 auto PartIdFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTPartId
);
4679 LValue PartIdLVal
= CGF
.EmitLValueForField(TDBase
, *PartIdFI
);
4680 CGF
.EmitStoreOfScalar(CGF
.Builder
.getInt32(0), PartIdLVal
);
4682 if (!Data
.Dependences
.empty()) {
4683 CGF
.EmitRuntimeCall(
4684 OMPBuilder
.getOrCreateRuntimeFunction(
4685 CGM
.getModule(), OMPRTL___kmpc_omp_task_with_deps
),
4688 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4689 CGM
.getModule(), OMPRTL___kmpc_omp_task
),
4692 // Check if parent region is untied and build return for untied task;
4694 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
4695 Region
->emitUntiedSwitch(CGF
);
4698 llvm::Value
*DepWaitTaskArgs
[7];
4699 if (!Data
.Dependences
.empty()) {
4700 DepWaitTaskArgs
[0] = UpLoc
;
4701 DepWaitTaskArgs
[1] = ThreadID
;
4702 DepWaitTaskArgs
[2] = NumOfElements
;
4703 DepWaitTaskArgs
[3] = DependenciesArray
.getPointer();
4704 DepWaitTaskArgs
[4] = CGF
.Builder
.getInt32(0);
4705 DepWaitTaskArgs
[5] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4706 DepWaitTaskArgs
[6] =
4707 llvm::ConstantInt::get(CGF
.Int32Ty
, Data
.HasNowaitClause
);
4709 auto &M
= CGM
.getModule();
4710 auto &&ElseCodeGen
= [this, &M
, &TaskArgs
, ThreadID
, NewTaskNewTaskTTy
,
4711 TaskEntry
, &Data
, &DepWaitTaskArgs
,
4712 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4713 CodeGenFunction::RunCleanupsScope
LocalScope(CGF
);
4714 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4715 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4716 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4718 if (!Data
.Dependences
.empty())
4719 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4720 M
, OMPRTL___kmpc_omp_taskwait_deps_51
),
4722 // Call proxy_task_entry(gtid, new_task);
4723 auto &&CodeGen
= [TaskEntry
, ThreadID
, NewTaskNewTaskTTy
,
4724 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4726 llvm::Value
*OutlinedFnArgs
[] = {ThreadID
, NewTaskNewTaskTTy
};
4727 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, Loc
, TaskEntry
,
4731 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4732 // kmp_task_t *new_task);
4733 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4734 // kmp_task_t *new_task);
4735 RegionCodeGenTy
RCG(CodeGen
);
4736 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
4737 M
, OMPRTL___kmpc_omp_task_begin_if0
),
4739 OMPBuilder
.getOrCreateRuntimeFunction(
4740 M
, OMPRTL___kmpc_omp_task_complete_if0
),
4742 RCG
.setAction(Action
);
4747 emitIfClause(CGF
, IfCond
, ThenCodeGen
, ElseCodeGen
);
4749 RegionCodeGenTy
ThenRCG(ThenCodeGen
);
4754 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
4755 const OMPLoopDirective
&D
,
4756 llvm::Function
*TaskFunction
,
4757 QualType SharedsTy
, Address Shareds
,
4759 const OMPTaskDataTy
&Data
) {
4760 if (!CGF
.HaveInsertPoint())
4762 TaskResultTy Result
=
4763 emitTaskInit(CGF
, Loc
, D
, TaskFunction
, SharedsTy
, Shareds
, Data
);
4764 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4766 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4767 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4768 // sched, kmp_uint64 grainsize, void *task_dup);
4769 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4770 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
4773 IfVal
= CGF
.Builder
.CreateIntCast(CGF
.EvaluateExprAsBool(IfCond
), CGF
.IntTy
,
4776 IfVal
= llvm::ConstantInt::getSigned(CGF
.IntTy
, /*V=*/1);
4779 LValue LBLVal
= CGF
.EmitLValueForField(
4781 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTLowerBound
));
4783 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getLowerBoundVariable())->getDecl());
4784 CGF
.EmitAnyExprToMem(LBVar
->getInit(), LBLVal
.getAddress(CGF
),
4786 /*IsInitializer=*/true);
4787 LValue UBLVal
= CGF
.EmitLValueForField(
4789 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTUpperBound
));
4791 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getUpperBoundVariable())->getDecl());
4792 CGF
.EmitAnyExprToMem(UBVar
->getInit(), UBLVal
.getAddress(CGF
),
4794 /*IsInitializer=*/true);
4795 LValue StLVal
= CGF
.EmitLValueForField(
4797 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTStride
));
4799 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getStrideVariable())->getDecl());
4800 CGF
.EmitAnyExprToMem(StVar
->getInit(), StLVal
.getAddress(CGF
),
4802 /*IsInitializer=*/true);
4803 // Store reductions address.
4804 LValue RedLVal
= CGF
.EmitLValueForField(
4806 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTReductions
));
4807 if (Data
.Reductions
) {
4808 CGF
.EmitStoreOfScalar(Data
.Reductions
, RedLVal
);
4810 CGF
.EmitNullInitialization(RedLVal
.getAddress(CGF
),
4811 CGF
.getContext().VoidPtrTy
);
4813 enum { NoSchedule
= 0, Grainsize
= 1, NumTasks
= 2 };
4814 llvm::Value
*TaskArgs
[] = {
4819 LBLVal
.getPointer(CGF
),
4820 UBLVal
.getPointer(CGF
),
4821 CGF
.EmitLoadOfScalar(StLVal
, Loc
),
4822 llvm::ConstantInt::getSigned(
4823 CGF
.IntTy
, 1), // Always 1 because taskgroup emitted by the compiler
4824 llvm::ConstantInt::getSigned(
4825 CGF
.IntTy
, Data
.Schedule
.getPointer()
4826 ? Data
.Schedule
.getInt() ? NumTasks
: Grainsize
4828 Data
.Schedule
.getPointer()
4829 ? CGF
.Builder
.CreateIntCast(Data
.Schedule
.getPointer(), CGF
.Int64Ty
,
4831 : llvm::ConstantInt::get(CGF
.Int64Ty
, /*V=*/0),
4832 Result
.TaskDupFn
? CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4833 Result
.TaskDupFn
, CGF
.VoidPtrTy
)
4834 : llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
)};
4835 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4836 CGM
.getModule(), OMPRTL___kmpc_taskloop
),
4840 /// Emit reduction operation for each element of array (required for
4841 /// array sections) LHS op = RHS.
4842 /// \param Type Type of array.
4843 /// \param LHSVar Variable on the left side of the reduction operation
4844 /// (references element of array in original variable).
4845 /// \param RHSVar Variable on the right side of the reduction operation
4846 /// (references element of array in original variable).
4847 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4849 static void EmitOMPAggregateReduction(
4850 CodeGenFunction
&CGF
, QualType Type
, const VarDecl
*LHSVar
,
4851 const VarDecl
*RHSVar
,
4852 const llvm::function_ref
<void(CodeGenFunction
&CGF
, const Expr
*,
4853 const Expr
*, const Expr
*)> &RedOpGen
,
4854 const Expr
*XExpr
= nullptr, const Expr
*EExpr
= nullptr,
4855 const Expr
*UpExpr
= nullptr) {
4856 // Perform element-by-element initialization.
4858 Address LHSAddr
= CGF
.GetAddrOfLocalVar(LHSVar
);
4859 Address RHSAddr
= CGF
.GetAddrOfLocalVar(RHSVar
);
4861 // Drill down to the base element type on both arrays.
4862 const ArrayType
*ArrayTy
= Type
->getAsArrayTypeUnsafe();
4863 llvm::Value
*NumElements
= CGF
.emitArrayLength(ArrayTy
, ElementTy
, LHSAddr
);
4865 llvm::Value
*RHSBegin
= RHSAddr
.getPointer();
4866 llvm::Value
*LHSBegin
= LHSAddr
.getPointer();
4867 // Cast from pointer to array type to pointer to single element.
4868 llvm::Value
*LHSEnd
=
4869 CGF
.Builder
.CreateGEP(LHSAddr
.getElementType(), LHSBegin
, NumElements
);
4870 // The basic structure here is a while-do loop.
4871 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.arraycpy.body");
4872 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.arraycpy.done");
4873 llvm::Value
*IsEmpty
=
4874 CGF
.Builder
.CreateICmpEQ(LHSBegin
, LHSEnd
, "omp.arraycpy.isempty");
4875 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
4877 // Enter the loop body, making that address the current address.
4878 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
4879 CGF
.EmitBlock(BodyBB
);
4881 CharUnits ElementSize
= CGF
.getContext().getTypeSizeInChars(ElementTy
);
4883 llvm::PHINode
*RHSElementPHI
= CGF
.Builder
.CreatePHI(
4884 RHSBegin
->getType(), 2, "omp.arraycpy.srcElementPast");
4885 RHSElementPHI
->addIncoming(RHSBegin
, EntryBB
);
4886 Address
RHSElementCurrent(
4887 RHSElementPHI
, RHSAddr
.getElementType(),
4888 RHSAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
4890 llvm::PHINode
*LHSElementPHI
= CGF
.Builder
.CreatePHI(
4891 LHSBegin
->getType(), 2, "omp.arraycpy.destElementPast");
4892 LHSElementPHI
->addIncoming(LHSBegin
, EntryBB
);
4893 Address
LHSElementCurrent(
4894 LHSElementPHI
, LHSAddr
.getElementType(),
4895 LHSAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
4898 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
4899 Scope
.addPrivate(LHSVar
, LHSElementCurrent
);
4900 Scope
.addPrivate(RHSVar
, RHSElementCurrent
);
4902 RedOpGen(CGF
, XExpr
, EExpr
, UpExpr
);
4903 Scope
.ForceCleanup();
4905 // Shift the address forward by one element.
4906 llvm::Value
*LHSElementNext
= CGF
.Builder
.CreateConstGEP1_32(
4907 LHSAddr
.getElementType(), LHSElementPHI
, /*Idx0=*/1,
4908 "omp.arraycpy.dest.element");
4909 llvm::Value
*RHSElementNext
= CGF
.Builder
.CreateConstGEP1_32(
4910 RHSAddr
.getElementType(), RHSElementPHI
, /*Idx0=*/1,
4911 "omp.arraycpy.src.element");
4912 // Check whether we've reached the end.
4914 CGF
.Builder
.CreateICmpEQ(LHSElementNext
, LHSEnd
, "omp.arraycpy.done");
4915 CGF
.Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
4916 LHSElementPHI
->addIncoming(LHSElementNext
, CGF
.Builder
.GetInsertBlock());
4917 RHSElementPHI
->addIncoming(RHSElementNext
, CGF
.Builder
.GetInsertBlock());
4920 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
4923 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4924 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4925 /// UDR combiner function.
4926 static void emitReductionCombiner(CodeGenFunction
&CGF
,
4927 const Expr
*ReductionOp
) {
4928 if (const auto *CE
= dyn_cast
<CallExpr
>(ReductionOp
))
4929 if (const auto *OVE
= dyn_cast
<OpaqueValueExpr
>(CE
->getCallee()))
4930 if (const auto *DRE
=
4931 dyn_cast
<DeclRefExpr
>(OVE
->getSourceExpr()->IgnoreImpCasts()))
4932 if (const auto *DRD
=
4933 dyn_cast
<OMPDeclareReductionDecl
>(DRE
->getDecl())) {
4934 std::pair
<llvm::Function
*, llvm::Function
*> Reduction
=
4935 CGF
.CGM
.getOpenMPRuntime().getUserDefinedReduction(DRD
);
4936 RValue Func
= RValue::get(Reduction
.first
);
4937 CodeGenFunction::OpaqueValueMapping
Map(CGF
, OVE
, Func
);
4938 CGF
.EmitIgnoredExpr(ReductionOp
);
4941 CGF
.EmitIgnoredExpr(ReductionOp
);
4944 llvm::Function
*CGOpenMPRuntime::emitReductionFunction(
4945 StringRef ReducerName
, SourceLocation Loc
, llvm::Type
*ArgsElemType
,
4946 ArrayRef
<const Expr
*> Privates
, ArrayRef
<const Expr
*> LHSExprs
,
4947 ArrayRef
<const Expr
*> RHSExprs
, ArrayRef
<const Expr
*> ReductionOps
) {
4948 ASTContext
&C
= CGM
.getContext();
4950 // void reduction_func(void *LHSArg, void *RHSArg);
4951 FunctionArgList Args
;
4952 ImplicitParamDecl
LHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
4953 ImplicitParamDecl::Other
);
4954 ImplicitParamDecl
RHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
4955 ImplicitParamDecl::Other
);
4956 Args
.push_back(&LHSArg
);
4957 Args
.push_back(&RHSArg
);
4959 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
4960 std::string Name
= getReductionFuncName(ReducerName
);
4961 auto *Fn
= llvm::Function::Create(CGM
.getTypes().GetFunctionType(CGFI
),
4962 llvm::GlobalValue::InternalLinkage
, Name
,
4964 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, CGFI
);
4965 Fn
->setDoesNotRecurse();
4966 CodeGenFunction
CGF(CGM
);
4967 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, CGFI
, Args
, Loc
, Loc
);
4969 // Dst = (void*[n])(LHSArg);
4970 // Src = (void*[n])(RHSArg);
4971 Address
LHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4972 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&LHSArg
)),
4973 ArgsElemType
->getPointerTo()),
4974 ArgsElemType
, CGF
.getPointerAlign());
4975 Address
RHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4976 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&RHSArg
)),
4977 ArgsElemType
->getPointerTo()),
4978 ArgsElemType
, CGF
.getPointerAlign());
4981 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4983 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
4984 const auto *IPriv
= Privates
.begin();
4986 for (unsigned I
= 0, E
= ReductionOps
.size(); I
< E
; ++I
, ++IPriv
, ++Idx
) {
4987 const auto *RHSVar
=
4988 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSExprs
[I
])->getDecl());
4989 Scope
.addPrivate(RHSVar
, emitAddrOfVarFromArray(CGF
, RHS
, Idx
, RHSVar
));
4990 const auto *LHSVar
=
4991 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSExprs
[I
])->getDecl());
4992 Scope
.addPrivate(LHSVar
, emitAddrOfVarFromArray(CGF
, LHS
, Idx
, LHSVar
));
4993 QualType PrivTy
= (*IPriv
)->getType();
4994 if (PrivTy
->isVariablyModifiedType()) {
4995 // Get array size and emit VLA type.
4997 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(LHS
, Idx
);
4998 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Elem
);
4999 const VariableArrayType
*VLA
=
5000 CGF
.getContext().getAsVariableArrayType(PrivTy
);
5001 const auto *OVE
= cast
<OpaqueValueExpr
>(VLA
->getSizeExpr());
5002 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
5003 CGF
, OVE
, RValue::get(CGF
.Builder
.CreatePtrToInt(Ptr
, CGF
.SizeTy
)));
5004 CGF
.EmitVariablyModifiedType(PrivTy
);
5008 IPriv
= Privates
.begin();
5009 const auto *ILHS
= LHSExprs
.begin();
5010 const auto *IRHS
= RHSExprs
.begin();
5011 for (const Expr
*E
: ReductionOps
) {
5012 if ((*IPriv
)->getType()->isArrayType()) {
5013 // Emit reduction for array section.
5014 const auto *LHSVar
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5015 const auto *RHSVar
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5016 EmitOMPAggregateReduction(
5017 CGF
, (*IPriv
)->getType(), LHSVar
, RHSVar
,
5018 [=](CodeGenFunction
&CGF
, const Expr
*, const Expr
*, const Expr
*) {
5019 emitReductionCombiner(CGF
, E
);
5022 // Emit reduction for array subscript or single variable.
5023 emitReductionCombiner(CGF
, E
);
5029 Scope
.ForceCleanup();
5030 CGF
.FinishFunction();
5034 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction
&CGF
,
5035 const Expr
*ReductionOp
,
5036 const Expr
*PrivateRef
,
5037 const DeclRefExpr
*LHS
,
5038 const DeclRefExpr
*RHS
) {
5039 if (PrivateRef
->getType()->isArrayType()) {
5040 // Emit reduction for array section.
5041 const auto *LHSVar
= cast
<VarDecl
>(LHS
->getDecl());
5042 const auto *RHSVar
= cast
<VarDecl
>(RHS
->getDecl());
5043 EmitOMPAggregateReduction(
5044 CGF
, PrivateRef
->getType(), LHSVar
, RHSVar
,
5045 [=](CodeGenFunction
&CGF
, const Expr
*, const Expr
*, const Expr
*) {
5046 emitReductionCombiner(CGF
, ReductionOp
);
5049 // Emit reduction for array subscript or single variable.
5050 emitReductionCombiner(CGF
, ReductionOp
);
5054 void CGOpenMPRuntime::emitReduction(CodeGenFunction
&CGF
, SourceLocation Loc
,
5055 ArrayRef
<const Expr
*> Privates
,
5056 ArrayRef
<const Expr
*> LHSExprs
,
5057 ArrayRef
<const Expr
*> RHSExprs
,
5058 ArrayRef
<const Expr
*> ReductionOps
,
5059 ReductionOptionsTy Options
) {
5060 if (!CGF
.HaveInsertPoint())
5063 bool WithNowait
= Options
.WithNowait
;
5064 bool SimpleReduction
= Options
.SimpleReduction
;
5066 // Next code should be emitted for reduction:
5068 // static kmp_critical_name lock = { 0 };
5070 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5071 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5073 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5074 // *(Type<n>-1*)rhs[<n>-1]);
5078 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5079 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5080 // RedList, reduce_func, &<lock>)) {
5083 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5085 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5089 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5091 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5096 // if SimpleReduction is true, only the next code is generated:
5098 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5101 ASTContext
&C
= CGM
.getContext();
5103 if (SimpleReduction
) {
5104 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
5105 const auto *IPriv
= Privates
.begin();
5106 const auto *ILHS
= LHSExprs
.begin();
5107 const auto *IRHS
= RHSExprs
.begin();
5108 for (const Expr
*E
: ReductionOps
) {
5109 emitSingleReductionCombiner(CGF
, E
, *IPriv
, cast
<DeclRefExpr
>(*ILHS
),
5110 cast
<DeclRefExpr
>(*IRHS
));
5118 // 1. Build a list of reduction variables.
5119 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5120 auto Size
= RHSExprs
.size();
5121 for (const Expr
*E
: Privates
) {
5122 if (E
->getType()->isVariablyModifiedType())
5123 // Reserve place for array size.
5126 llvm::APInt
ArraySize(/*unsigned int numBits=*/32, Size
);
5127 QualType ReductionArrayTy
=
5128 C
.getConstantArrayType(C
.VoidPtrTy
, ArraySize
, nullptr, ArrayType::Normal
,
5129 /*IndexTypeQuals=*/0);
5130 Address ReductionList
=
5131 CGF
.CreateMemTemp(ReductionArrayTy
, ".omp.reduction.red_list");
5132 const auto *IPriv
= Privates
.begin();
5134 for (unsigned I
= 0, E
= RHSExprs
.size(); I
< E
; ++I
, ++IPriv
, ++Idx
) {
5135 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(ReductionList
, Idx
);
5136 CGF
.Builder
.CreateStore(
5137 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5138 CGF
.EmitLValue(RHSExprs
[I
]).getPointer(CGF
), CGF
.VoidPtrTy
),
5140 if ((*IPriv
)->getType()->isVariablyModifiedType()) {
5141 // Store array size.
5143 Elem
= CGF
.Builder
.CreateConstArrayGEP(ReductionList
, Idx
);
5144 llvm::Value
*Size
= CGF
.Builder
.CreateIntCast(
5146 CGF
.getContext().getAsVariableArrayType((*IPriv
)->getType()))
5148 CGF
.SizeTy
, /*isSigned=*/false);
5149 CGF
.Builder
.CreateStore(CGF
.Builder
.CreateIntToPtr(Size
, CGF
.VoidPtrTy
),
5154 // 2. Emit reduce_func().
5155 llvm::Function
*ReductionFn
= emitReductionFunction(
5156 CGF
.CurFn
->getName(), Loc
, CGF
.ConvertTypeForMem(ReductionArrayTy
),
5157 Privates
, LHSExprs
, RHSExprs
, ReductionOps
);
5159 // 3. Create static kmp_critical_name lock = { 0 };
5160 std::string Name
= getName({"reduction"});
5161 llvm::Value
*Lock
= getCriticalRegionLock(Name
);
5163 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5164 // RedList, reduce_func, &<lock>);
5165 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
, OMP_ATOMIC_REDUCE
);
5166 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
5167 llvm::Value
*ReductionArrayTySize
= CGF
.getTypeSize(ReductionArrayTy
);
5168 llvm::Value
*RL
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5169 ReductionList
.getPointer(), CGF
.VoidPtrTy
);
5170 llvm::Value
*Args
[] = {
5171 IdentTLoc
, // ident_t *<loc>
5172 ThreadId
, // i32 <gtid>
5173 CGF
.Builder
.getInt32(RHSExprs
.size()), // i32 <n>
5174 ReductionArrayTySize
, // size_type sizeof(RedList)
5175 RL
, // void *RedList
5176 ReductionFn
, // void (*) (void *, void *) <reduce_func>
5177 Lock
// kmp_critical_name *&<lock>
5179 llvm::Value
*Res
= CGF
.EmitRuntimeCall(
5180 OMPBuilder
.getOrCreateRuntimeFunction(
5182 WithNowait
? OMPRTL___kmpc_reduce_nowait
: OMPRTL___kmpc_reduce
),
5185 // 5. Build switch(res)
5186 llvm::BasicBlock
*DefaultBB
= CGF
.createBasicBlock(".omp.reduction.default");
5187 llvm::SwitchInst
*SwInst
=
5188 CGF
.Builder
.CreateSwitch(Res
, DefaultBB
, /*NumCases=*/2);
5192 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5194 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5196 llvm::BasicBlock
*Case1BB
= CGF
.createBasicBlock(".omp.reduction.case1");
5197 SwInst
->addCase(CGF
.Builder
.getInt32(1), Case1BB
);
5198 CGF
.EmitBlock(Case1BB
);
5200 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5201 llvm::Value
*EndArgs
[] = {
5202 IdentTLoc
, // ident_t *<loc>
5203 ThreadId
, // i32 <gtid>
5204 Lock
// kmp_critical_name *&<lock>
5206 auto &&CodeGen
= [Privates
, LHSExprs
, RHSExprs
, ReductionOps
](
5207 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5208 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5209 const auto *IPriv
= Privates
.begin();
5210 const auto *ILHS
= LHSExprs
.begin();
5211 const auto *IRHS
= RHSExprs
.begin();
5212 for (const Expr
*E
: ReductionOps
) {
5213 RT
.emitSingleReductionCombiner(CGF
, E
, *IPriv
, cast
<DeclRefExpr
>(*ILHS
),
5214 cast
<DeclRefExpr
>(*IRHS
));
5220 RegionCodeGenTy
RCG(CodeGen
);
5221 CommonActionTy
Action(
5222 nullptr, std::nullopt
,
5223 OMPBuilder
.getOrCreateRuntimeFunction(
5224 CGM
.getModule(), WithNowait
? OMPRTL___kmpc_end_reduce_nowait
5225 : OMPRTL___kmpc_end_reduce
),
5227 RCG
.setAction(Action
);
5230 CGF
.EmitBranch(DefaultBB
);
5234 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5237 llvm::BasicBlock
*Case2BB
= CGF
.createBasicBlock(".omp.reduction.case2");
5238 SwInst
->addCase(CGF
.Builder
.getInt32(2), Case2BB
);
5239 CGF
.EmitBlock(Case2BB
);
5241 auto &&AtomicCodeGen
= [Loc
, Privates
, LHSExprs
, RHSExprs
, ReductionOps
](
5242 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5243 const auto *ILHS
= LHSExprs
.begin();
5244 const auto *IRHS
= RHSExprs
.begin();
5245 const auto *IPriv
= Privates
.begin();
5246 for (const Expr
*E
: ReductionOps
) {
5247 const Expr
*XExpr
= nullptr;
5248 const Expr
*EExpr
= nullptr;
5249 const Expr
*UpExpr
= nullptr;
5250 BinaryOperatorKind BO
= BO_Comma
;
5251 if (const auto *BO
= dyn_cast
<BinaryOperator
>(E
)) {
5252 if (BO
->getOpcode() == BO_Assign
) {
5253 XExpr
= BO
->getLHS();
5254 UpExpr
= BO
->getRHS();
5257 // Try to emit update expression as a simple atomic.
5258 const Expr
*RHSExpr
= UpExpr
;
5260 // Analyze RHS part of the whole expression.
5261 if (const auto *ACO
= dyn_cast
<AbstractConditionalOperator
>(
5262 RHSExpr
->IgnoreParenImpCasts())) {
5263 // If this is a conditional operator, analyze its condition for
5264 // min/max reduction operator.
5265 RHSExpr
= ACO
->getCond();
5267 if (const auto *BORHS
=
5268 dyn_cast
<BinaryOperator
>(RHSExpr
->IgnoreParenImpCasts())) {
5269 EExpr
= BORHS
->getRHS();
5270 BO
= BORHS
->getOpcode();
5274 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5275 auto &&AtomicRedGen
= [BO
, VD
,
5276 Loc
](CodeGenFunction
&CGF
, const Expr
*XExpr
,
5277 const Expr
*EExpr
, const Expr
*UpExpr
) {
5278 LValue X
= CGF
.EmitLValue(XExpr
);
5281 E
= CGF
.EmitAnyExpr(EExpr
);
5282 CGF
.EmitOMPAtomicSimpleUpdateExpr(
5283 X
, E
, BO
, /*IsXLHSInRHSPart=*/true,
5284 llvm::AtomicOrdering::Monotonic
, Loc
,
5285 [&CGF
, UpExpr
, VD
, Loc
](RValue XRValue
) {
5286 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
5287 Address LHSTemp
= CGF
.CreateMemTemp(VD
->getType());
5288 CGF
.emitOMPSimpleStore(
5289 CGF
.MakeAddrLValue(LHSTemp
, VD
->getType()), XRValue
,
5290 VD
->getType().getNonReferenceType(), Loc
);
5291 PrivateScope
.addPrivate(VD
, LHSTemp
);
5292 (void)PrivateScope
.Privatize();
5293 return CGF
.EmitAnyExpr(UpExpr
);
5296 if ((*IPriv
)->getType()->isArrayType()) {
5297 // Emit atomic reduction for array section.
5298 const auto *RHSVar
=
5299 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5300 EmitOMPAggregateReduction(CGF
, (*IPriv
)->getType(), VD
, RHSVar
,
5301 AtomicRedGen
, XExpr
, EExpr
, UpExpr
);
5303 // Emit atomic reduction for array subscript or single variable.
5304 AtomicRedGen(CGF
, XExpr
, EExpr
, UpExpr
);
5307 // Emit as a critical region.
5308 auto &&CritRedGen
= [E
, Loc
](CodeGenFunction
&CGF
, const Expr
*,
5309 const Expr
*, const Expr
*) {
5310 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5311 std::string Name
= RT
.getName({"atomic_reduction"});
5312 RT
.emitCriticalRegion(
5314 [=](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5316 emitReductionCombiner(CGF
, E
);
5320 if ((*IPriv
)->getType()->isArrayType()) {
5321 const auto *LHSVar
=
5322 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5323 const auto *RHSVar
=
5324 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5325 EmitOMPAggregateReduction(CGF
, (*IPriv
)->getType(), LHSVar
, RHSVar
,
5328 CritRedGen(CGF
, nullptr, nullptr, nullptr);
5336 RegionCodeGenTy
AtomicRCG(AtomicCodeGen
);
5338 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5339 llvm::Value
*EndArgs
[] = {
5340 IdentTLoc
, // ident_t *<loc>
5341 ThreadId
, // i32 <gtid>
5342 Lock
// kmp_critical_name *&<lock>
5344 CommonActionTy
Action(nullptr, std::nullopt
,
5345 OMPBuilder
.getOrCreateRuntimeFunction(
5346 CGM
.getModule(), OMPRTL___kmpc_end_reduce
),
5348 AtomicRCG
.setAction(Action
);
5354 CGF
.EmitBranch(DefaultBB
);
5355 CGF
.EmitBlock(DefaultBB
, /*IsFinished=*/true);
5358 /// Generates unique name for artificial threadprivate variables.
5359 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5360 static std::string
generateUniqueName(CodeGenModule
&CGM
, StringRef Prefix
,
5362 SmallString
<256> Buffer
;
5363 llvm::raw_svector_ostream
Out(Buffer
);
5364 const clang::DeclRefExpr
*DE
;
5365 const VarDecl
*D
= ::getBaseDecl(Ref
, DE
);
5367 D
= cast
<VarDecl
>(cast
<DeclRefExpr
>(Ref
)->getDecl());
5368 D
= D
->getCanonicalDecl();
5369 std::string Name
= CGM
.getOpenMPRuntime().getName(
5370 {D
->isLocalVarDeclOrParm() ? D
->getName() : CGM
.getMangledName(D
)});
5371 Out
<< Prefix
<< Name
<< "_"
5372 << D
->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5373 return std::string(Out
.str());
5376 /// Emits reduction initializer function:
5378 /// void @.red_init(void* %arg, void* %orig) {
5379 /// %0 = bitcast void* %arg to <type>*
5380 /// store <type> <init>, <type>* %0
5384 static llvm::Value
*emitReduceInitFunction(CodeGenModule
&CGM
,
5386 ReductionCodeGen
&RCG
, unsigned N
) {
5387 ASTContext
&C
= CGM
.getContext();
5388 QualType VoidPtrTy
= C
.VoidPtrTy
;
5389 VoidPtrTy
.addRestrict();
5390 FunctionArgList Args
;
5391 ImplicitParamDecl
Param(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, VoidPtrTy
,
5392 ImplicitParamDecl::Other
);
5393 ImplicitParamDecl
ParamOrig(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, VoidPtrTy
,
5394 ImplicitParamDecl::Other
);
5395 Args
.emplace_back(&Param
);
5396 Args
.emplace_back(&ParamOrig
);
5397 const auto &FnInfo
=
5398 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5399 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5400 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_init", ""});
5401 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5402 Name
, &CGM
.getModule());
5403 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5404 Fn
->setDoesNotRecurse();
5405 CodeGenFunction
CGF(CGM
);
5406 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5407 QualType PrivateType
= RCG
.getPrivateType(N
);
5408 Address PrivateAddr
= CGF
.EmitLoadOfPointer(
5409 CGF
.GetAddrOfLocalVar(&Param
).withElementType(
5410 CGF
.ConvertTypeForMem(PrivateType
)->getPointerTo()),
5411 C
.getPointerType(PrivateType
)->castAs
<PointerType
>());
5412 llvm::Value
*Size
= nullptr;
5413 // If the size of the reduction item is non-constant, load it from global
5414 // threadprivate variable.
5415 if (RCG
.getSizes(N
).second
) {
5416 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5417 CGF
, CGM
.getContext().getSizeType(),
5418 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5419 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5420 CGM
.getContext().getSizeType(), Loc
);
5422 RCG
.emitAggregateType(CGF
, N
, Size
);
5423 Address OrigAddr
= Address::invalid();
5424 // If initializer uses initializer from declare reduction construct, emit a
5425 // pointer to the address of the original reduction item (reuired by reduction
5427 if (RCG
.usesReductionInitializer(N
)) {
5428 Address SharedAddr
= CGF
.GetAddrOfLocalVar(&ParamOrig
);
5429 OrigAddr
= CGF
.EmitLoadOfPointer(
5431 CGM
.getContext().VoidPtrTy
.castAs
<PointerType
>()->getTypePtr());
5433 // Emit the initializer:
5434 // %0 = bitcast void* %arg to <type>*
5435 // store <type> <init>, <type>* %0
5436 RCG
.emitInitialization(CGF
, N
, PrivateAddr
, OrigAddr
,
5437 [](CodeGenFunction
&) { return false; });
5438 CGF
.FinishFunction();
5442 /// Emits reduction combiner function:
5444 /// void @.red_comb(void* %arg0, void* %arg1) {
5445 /// %lhs = bitcast void* %arg0 to <type>*
5446 /// %rhs = bitcast void* %arg1 to <type>*
5447 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5448 /// store <type> %2, <type>* %lhs
5452 static llvm::Value
*emitReduceCombFunction(CodeGenModule
&CGM
,
5454 ReductionCodeGen
&RCG
, unsigned N
,
5455 const Expr
*ReductionOp
,
5456 const Expr
*LHS
, const Expr
*RHS
,
5457 const Expr
*PrivateRef
) {
5458 ASTContext
&C
= CGM
.getContext();
5459 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(LHS
)->getDecl());
5460 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(RHS
)->getDecl());
5461 FunctionArgList Args
;
5462 ImplicitParamDecl
ParamInOut(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
5463 C
.VoidPtrTy
, ImplicitParamDecl::Other
);
5464 ImplicitParamDecl
ParamIn(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5465 ImplicitParamDecl::Other
);
5466 Args
.emplace_back(&ParamInOut
);
5467 Args
.emplace_back(&ParamIn
);
5468 const auto &FnInfo
=
5469 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5470 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5471 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_comb", ""});
5472 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5473 Name
, &CGM
.getModule());
5474 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5475 Fn
->setDoesNotRecurse();
5476 CodeGenFunction
CGF(CGM
);
5477 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5478 llvm::Value
*Size
= nullptr;
5479 // If the size of the reduction item is non-constant, load it from global
5480 // threadprivate variable.
5481 if (RCG
.getSizes(N
).second
) {
5482 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5483 CGF
, CGM
.getContext().getSizeType(),
5484 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5485 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5486 CGM
.getContext().getSizeType(), Loc
);
5488 RCG
.emitAggregateType(CGF
, N
, Size
);
5489 // Remap lhs and rhs variables to the addresses of the function arguments.
5490 // %lhs = bitcast void* %arg0 to <type>*
5491 // %rhs = bitcast void* %arg1 to <type>*
5492 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
5493 PrivateScope
.addPrivate(
5495 // Pull out the pointer to the variable.
5496 CGF
.EmitLoadOfPointer(
5497 CGF
.GetAddrOfLocalVar(&ParamInOut
)
5499 CGF
.ConvertTypeForMem(LHSVD
->getType())->getPointerTo()),
5500 C
.getPointerType(LHSVD
->getType())->castAs
<PointerType
>()));
5501 PrivateScope
.addPrivate(
5503 // Pull out the pointer to the variable.
5504 CGF
.EmitLoadOfPointer(
5505 CGF
.GetAddrOfLocalVar(&ParamIn
).withElementType(
5506 CGF
.ConvertTypeForMem(RHSVD
->getType())->getPointerTo()),
5507 C
.getPointerType(RHSVD
->getType())->castAs
<PointerType
>()));
5508 PrivateScope
.Privatize();
5509 // Emit the combiner body:
5510 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5511 // store <type> %2, <type>* %lhs
5512 CGM
.getOpenMPRuntime().emitSingleReductionCombiner(
5513 CGF
, ReductionOp
, PrivateRef
, cast
<DeclRefExpr
>(LHS
),
5514 cast
<DeclRefExpr
>(RHS
));
5515 CGF
.FinishFunction();
5519 /// Emits reduction finalizer function:
5521 /// void @.red_fini(void* %arg) {
5522 /// %0 = bitcast void* %arg to <type>*
5523 /// <destroy>(<type>* %0)
5527 static llvm::Value
*emitReduceFiniFunction(CodeGenModule
&CGM
,
5529 ReductionCodeGen
&RCG
, unsigned N
) {
5530 if (!RCG
.needCleanups(N
))
5532 ASTContext
&C
= CGM
.getContext();
5533 FunctionArgList Args
;
5534 ImplicitParamDecl
Param(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5535 ImplicitParamDecl::Other
);
5536 Args
.emplace_back(&Param
);
5537 const auto &FnInfo
=
5538 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5539 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5540 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_fini", ""});
5541 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5542 Name
, &CGM
.getModule());
5543 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5544 Fn
->setDoesNotRecurse();
5545 CodeGenFunction
CGF(CGM
);
5546 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5547 Address PrivateAddr
= CGF
.EmitLoadOfPointer(
5548 CGF
.GetAddrOfLocalVar(&Param
), C
.VoidPtrTy
.castAs
<PointerType
>());
5549 llvm::Value
*Size
= nullptr;
5550 // If the size of the reduction item is non-constant, load it from global
5551 // threadprivate variable.
5552 if (RCG
.getSizes(N
).second
) {
5553 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5554 CGF
, CGM
.getContext().getSizeType(),
5555 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5556 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5557 CGM
.getContext().getSizeType(), Loc
);
5559 RCG
.emitAggregateType(CGF
, N
, Size
);
5560 // Emit the finalizer body:
5561 // <destroy>(<type>* %0)
5562 RCG
.emitCleanups(CGF
, N
, PrivateAddr
);
5563 CGF
.FinishFunction(Loc
);
5567 llvm::Value
*CGOpenMPRuntime::emitTaskReductionInit(
5568 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> LHSExprs
,
5569 ArrayRef
<const Expr
*> RHSExprs
, const OMPTaskDataTy
&Data
) {
5570 if (!CGF
.HaveInsertPoint() || Data
.ReductionVars
.empty())
5573 // Build typedef struct:
5574 // kmp_taskred_input {
5575 // void *reduce_shar; // shared reduction item
5576 // void *reduce_orig; // original reduction item used for initialization
5577 // size_t reduce_size; // size of data item
5578 // void *reduce_init; // data initialization routine
5579 // void *reduce_fini; // data finalization routine
5580 // void *reduce_comb; // data combiner routine
5581 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5582 // } kmp_taskred_input_t;
5583 ASTContext
&C
= CGM
.getContext();
5584 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_taskred_input_t");
5585 RD
->startDefinition();
5586 const FieldDecl
*SharedFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5587 const FieldDecl
*OrigFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5588 const FieldDecl
*SizeFD
= addFieldToRecordDecl(C
, RD
, C
.getSizeType());
5589 const FieldDecl
*InitFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5590 const FieldDecl
*FiniFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5591 const FieldDecl
*CombFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5592 const FieldDecl
*FlagsFD
= addFieldToRecordDecl(
5593 C
, RD
, C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5594 RD
->completeDefinition();
5595 QualType RDType
= C
.getRecordType(RD
);
5596 unsigned Size
= Data
.ReductionVars
.size();
5597 llvm::APInt
ArraySize(/*numBits=*/64, Size
);
5598 QualType ArrayRDType
= C
.getConstantArrayType(
5599 RDType
, ArraySize
, nullptr, ArrayType::Normal
, /*IndexTypeQuals=*/0);
5600 // kmp_task_red_input_t .rd_input.[Size];
5601 Address TaskRedInput
= CGF
.CreateMemTemp(ArrayRDType
, ".rd_input.");
5602 ReductionCodeGen
RCG(Data
.ReductionVars
, Data
.ReductionOrigs
,
5603 Data
.ReductionCopies
, Data
.ReductionOps
);
5604 for (unsigned Cnt
= 0; Cnt
< Size
; ++Cnt
) {
5605 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5606 llvm::Value
*Idxs
[] = {llvm::ConstantInt::get(CGM
.SizeTy
, /*V=*/0),
5607 llvm::ConstantInt::get(CGM
.SizeTy
, Cnt
)};
5608 llvm::Value
*GEP
= CGF
.EmitCheckedInBoundsGEP(
5609 TaskRedInput
.getElementType(), TaskRedInput
.getPointer(), Idxs
,
5610 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc
,
5612 LValue ElemLVal
= CGF
.MakeNaturalAlignAddrLValue(GEP
, RDType
);
5613 // ElemLVal.reduce_shar = &Shareds[Cnt];
5614 LValue SharedLVal
= CGF
.EmitLValueForField(ElemLVal
, SharedFD
);
5615 RCG
.emitSharedOrigLValue(CGF
, Cnt
);
5616 llvm::Value
*Shared
= RCG
.getSharedLValue(Cnt
).getPointer(CGF
);
5617 CGF
.EmitStoreOfScalar(Shared
, SharedLVal
);
5618 // ElemLVal.reduce_orig = &Origs[Cnt];
5619 LValue OrigLVal
= CGF
.EmitLValueForField(ElemLVal
, OrigFD
);
5620 llvm::Value
*Orig
= RCG
.getOrigLValue(Cnt
).getPointer(CGF
);
5621 CGF
.EmitStoreOfScalar(Orig
, OrigLVal
);
5622 RCG
.emitAggregateType(CGF
, Cnt
);
5623 llvm::Value
*SizeValInChars
;
5624 llvm::Value
*SizeVal
;
5625 std::tie(SizeValInChars
, SizeVal
) = RCG
.getSizes(Cnt
);
5626 // We use delayed creation/initialization for VLAs and array sections. It is
5627 // required because runtime does not provide the way to pass the sizes of
5628 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5629 // threadprivate global variables are used to store these values and use
5630 // them in the functions.
5631 bool DelayedCreation
= !!SizeVal
;
5632 SizeValInChars
= CGF
.Builder
.CreateIntCast(SizeValInChars
, CGM
.SizeTy
,
5633 /*isSigned=*/false);
5634 LValue SizeLVal
= CGF
.EmitLValueForField(ElemLVal
, SizeFD
);
5635 CGF
.EmitStoreOfScalar(SizeValInChars
, SizeLVal
);
5636 // ElemLVal.reduce_init = init;
5637 LValue InitLVal
= CGF
.EmitLValueForField(ElemLVal
, InitFD
);
5638 llvm::Value
*InitAddr
= emitReduceInitFunction(CGM
, Loc
, RCG
, Cnt
);
5639 CGF
.EmitStoreOfScalar(InitAddr
, InitLVal
);
5640 // ElemLVal.reduce_fini = fini;
5641 LValue FiniLVal
= CGF
.EmitLValueForField(ElemLVal
, FiniFD
);
5642 llvm::Value
*Fini
= emitReduceFiniFunction(CGM
, Loc
, RCG
, Cnt
);
5643 llvm::Value
*FiniAddr
=
5644 Fini
? Fini
: llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
);
5645 CGF
.EmitStoreOfScalar(FiniAddr
, FiniLVal
);
5646 // ElemLVal.reduce_comb = comb;
5647 LValue CombLVal
= CGF
.EmitLValueForField(ElemLVal
, CombFD
);
5648 llvm::Value
*CombAddr
= emitReduceCombFunction(
5649 CGM
, Loc
, RCG
, Cnt
, Data
.ReductionOps
[Cnt
], LHSExprs
[Cnt
],
5650 RHSExprs
[Cnt
], Data
.ReductionCopies
[Cnt
]);
5651 CGF
.EmitStoreOfScalar(CombAddr
, CombLVal
);
5652 // ElemLVal.flags = 0;
5653 LValue FlagsLVal
= CGF
.EmitLValueForField(ElemLVal
, FlagsFD
);
5654 if (DelayedCreation
) {
5655 CGF
.EmitStoreOfScalar(
5656 llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/1, /*isSigned=*/true),
5659 CGF
.EmitNullInitialization(FlagsLVal
.getAddress(CGF
),
5660 FlagsLVal
.getType());
5662 if (Data
.IsReductionWithTaskMod
) {
5663 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5664 // is_ws, int num, void *data);
5665 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
);
5666 llvm::Value
*GTid
= CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5667 CGM
.IntTy
, /*isSigned=*/true);
5668 llvm::Value
*Args
[] = {
5670 llvm::ConstantInt::get(CGM
.IntTy
, Data
.IsWorksharingReduction
? 1 : 0,
5672 llvm::ConstantInt::get(CGM
.IntTy
, Size
, /*isSigned=*/true),
5673 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5674 TaskRedInput
.getPointer(), CGM
.VoidPtrTy
)};
5675 return CGF
.EmitRuntimeCall(
5676 OMPBuilder
.getOrCreateRuntimeFunction(
5677 CGM
.getModule(), OMPRTL___kmpc_taskred_modifier_init
),
5680 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5681 llvm::Value
*Args
[] = {
5682 CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
), CGM
.IntTy
,
5684 llvm::ConstantInt::get(CGM
.IntTy
, Size
, /*isSigned=*/true),
5685 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput
.getPointer(),
5687 return CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5688 CGM
.getModule(), OMPRTL___kmpc_taskred_init
),
5692 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction
&CGF
,
5694 bool IsWorksharingReduction
) {
5695 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5696 // is_ws, int num, void *data);
5697 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
);
5698 llvm::Value
*GTid
= CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5699 CGM
.IntTy
, /*isSigned=*/true);
5700 llvm::Value
*Args
[] = {IdentTLoc
, GTid
,
5701 llvm::ConstantInt::get(CGM
.IntTy
,
5702 IsWorksharingReduction
? 1 : 0,
5703 /*isSigned=*/true)};
5704 (void)CGF
.EmitRuntimeCall(
5705 OMPBuilder
.getOrCreateRuntimeFunction(
5706 CGM
.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini
),
5710 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction
&CGF
,
5712 ReductionCodeGen
&RCG
,
5714 auto Sizes
= RCG
.getSizes(N
);
5715 // Emit threadprivate global variable if the type is non-constant
5716 // (Sizes.second = nullptr).
5718 llvm::Value
*SizeVal
= CGF
.Builder
.CreateIntCast(Sizes
.second
, CGM
.SizeTy
,
5719 /*isSigned=*/false);
5720 Address SizeAddr
= getAddrOfArtificialThreadPrivate(
5721 CGF
, CGM
.getContext().getSizeType(),
5722 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5723 CGF
.Builder
.CreateStore(SizeVal
, SizeAddr
, /*IsVolatile=*/false);
5727 Address
CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction
&CGF
,
5729 llvm::Value
*ReductionsPtr
,
5730 LValue SharedLVal
) {
5731 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5733 llvm::Value
*Args
[] = {CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5737 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5738 SharedLVal
.getPointer(CGF
), CGM
.VoidPtrTy
)};
5740 CGF
.EmitRuntimeCall(
5741 OMPBuilder
.getOrCreateRuntimeFunction(
5742 CGM
.getModule(), OMPRTL___kmpc_task_reduction_get_th_data
),
5744 CGF
.Int8Ty
, SharedLVal
.getAlignment());
5747 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
5748 const OMPTaskDataTy
&Data
) {
5749 if (!CGF
.HaveInsertPoint())
5752 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
&& Data
.Dependences
.empty()) {
5753 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5754 OMPBuilder
.createTaskwait(CGF
.Builder
);
5756 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
5757 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
5758 auto &M
= CGM
.getModule();
5759 Address DependenciesArray
= Address::invalid();
5760 llvm::Value
*NumOfElements
;
5761 std::tie(NumOfElements
, DependenciesArray
) =
5762 emitDependClause(CGF
, Data
.Dependences
, Loc
);
5763 if (!Data
.Dependences
.empty()) {
5764 llvm::Value
*DepWaitTaskArgs
[7];
5765 DepWaitTaskArgs
[0] = UpLoc
;
5766 DepWaitTaskArgs
[1] = ThreadID
;
5767 DepWaitTaskArgs
[2] = NumOfElements
;
5768 DepWaitTaskArgs
[3] = DependenciesArray
.getPointer();
5769 DepWaitTaskArgs
[4] = CGF
.Builder
.getInt32(0);
5770 DepWaitTaskArgs
[5] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5771 DepWaitTaskArgs
[6] =
5772 llvm::ConstantInt::get(CGF
.Int32Ty
, Data
.HasNowaitClause
);
5774 CodeGenFunction::RunCleanupsScope
LocalScope(CGF
);
5776 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5777 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5778 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5779 // kmp_int32 has_no_wait); if dependence info is specified.
5780 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5781 M
, OMPRTL___kmpc_omp_taskwait_deps_51
),
5786 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5788 llvm::Value
*Args
[] = {UpLoc
, ThreadID
};
5789 // Ignore return result until untied tasks are supported.
5790 CGF
.EmitRuntimeCall(
5791 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_omp_taskwait
),
5796 if (auto *Region
= dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
5797 Region
->emitUntiedSwitch(CGF
);
5800 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction
&CGF
,
5801 OpenMPDirectiveKind InnerKind
,
5802 const RegionCodeGenTy
&CodeGen
,
5804 if (!CGF
.HaveInsertPoint())
5806 InlinedOpenMPRegionRAII
Region(CGF
, CodeGen
, InnerKind
, HasCancel
,
5807 InnerKind
!= OMPD_critical
&&
5808 InnerKind
!= OMPD_master
&&
5809 InnerKind
!= OMPD_masked
);
5810 CGF
.CapturedStmtInfo
->EmitBody(CGF
, /*S=*/nullptr);
5821 } // anonymous namespace
5823 static RTCancelKind
getCancellationKind(OpenMPDirectiveKind CancelRegion
) {
5824 RTCancelKind CancelKind
= CancelNoreq
;
5825 if (CancelRegion
== OMPD_parallel
)
5826 CancelKind
= CancelParallel
;
5827 else if (CancelRegion
== OMPD_for
)
5828 CancelKind
= CancelLoop
;
5829 else if (CancelRegion
== OMPD_sections
)
5830 CancelKind
= CancelSections
;
5832 assert(CancelRegion
== OMPD_taskgroup
);
5833 CancelKind
= CancelTaskgroup
;
5838 void CGOpenMPRuntime::emitCancellationPointCall(
5839 CodeGenFunction
&CGF
, SourceLocation Loc
,
5840 OpenMPDirectiveKind CancelRegion
) {
5841 if (!CGF
.HaveInsertPoint())
5843 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5844 // global_tid, kmp_int32 cncl_kind);
5845 if (auto *OMPRegionInfo
=
5846 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
5847 // For 'cancellation point taskgroup', the task region info may not have a
5848 // cancel. This may instead happen in another adjacent task.
5849 if (CancelRegion
== OMPD_taskgroup
|| OMPRegionInfo
->hasCancel()) {
5850 llvm::Value
*Args
[] = {
5851 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
5852 CGF
.Builder
.getInt32(getCancellationKind(CancelRegion
))};
5853 // Ignore return result until untied tasks are supported.
5854 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
5855 OMPBuilder
.getOrCreateRuntimeFunction(
5856 CGM
.getModule(), OMPRTL___kmpc_cancellationpoint
),
5858 // if (__kmpc_cancellationpoint()) {
5859 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5860 // exit from construct;
5862 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
5863 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
5864 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
5865 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
5866 CGF
.EmitBlock(ExitBB
);
5867 if (CancelRegion
== OMPD_parallel
)
5868 emitBarrierCall(CGF
, Loc
, OMPD_unknown
, /*EmitChecks=*/false);
5869 // exit from construct;
5870 CodeGenFunction::JumpDest CancelDest
=
5871 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
5872 CGF
.EmitBranchThroughCleanup(CancelDest
);
5873 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
5878 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
5880 OpenMPDirectiveKind CancelRegion
) {
5881 if (!CGF
.HaveInsertPoint())
5883 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5884 // kmp_int32 cncl_kind);
5885 auto &M
= CGM
.getModule();
5886 if (auto *OMPRegionInfo
=
5887 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
5888 auto &&ThenGen
= [this, &M
, Loc
, CancelRegion
,
5889 OMPRegionInfo
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5890 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5891 llvm::Value
*Args
[] = {
5892 RT
.emitUpdateLocation(CGF
, Loc
), RT
.getThreadID(CGF
, Loc
),
5893 CGF
.Builder
.getInt32(getCancellationKind(CancelRegion
))};
5894 // Ignore return result until untied tasks are supported.
5895 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
5896 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_cancel
), Args
);
5897 // if (__kmpc_cancel()) {
5898 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5899 // exit from construct;
5901 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
5902 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
5903 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
5904 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
5905 CGF
.EmitBlock(ExitBB
);
5906 if (CancelRegion
== OMPD_parallel
)
5907 RT
.emitBarrierCall(CGF
, Loc
, OMPD_unknown
, /*EmitChecks=*/false);
5908 // exit from construct;
5909 CodeGenFunction::JumpDest CancelDest
=
5910 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
5911 CGF
.EmitBranchThroughCleanup(CancelDest
);
5912 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
5915 emitIfClause(CGF
, IfCond
, ThenGen
,
5916 [](CodeGenFunction
&, PrePostActionTy
&) {});
5918 RegionCodeGenTy
ThenRCG(ThenGen
);
5925 /// Cleanup action for uses_allocators support.
5926 class OMPUsesAllocatorsActionTy final
: public PrePostActionTy
{
5927 ArrayRef
<std::pair
<const Expr
*, const Expr
*>> Allocators
;
5930 OMPUsesAllocatorsActionTy(
5931 ArrayRef
<std::pair
<const Expr
*, const Expr
*>> Allocators
)
5932 : Allocators(Allocators
) {}
5933 void Enter(CodeGenFunction
&CGF
) override
{
5934 if (!CGF
.HaveInsertPoint())
5936 for (const auto &AllocatorData
: Allocators
) {
5937 CGF
.CGM
.getOpenMPRuntime().emitUsesAllocatorsInit(
5938 CGF
, AllocatorData
.first
, AllocatorData
.second
);
5941 void Exit(CodeGenFunction
&CGF
) override
{
5942 if (!CGF
.HaveInsertPoint())
5944 for (const auto &AllocatorData
: Allocators
) {
5945 CGF
.CGM
.getOpenMPRuntime().emitUsesAllocatorsFini(CGF
,
5946 AllocatorData
.first
);
5952 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5953 const OMPExecutableDirective
&D
, StringRef ParentName
,
5954 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
5955 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
5956 assert(!ParentName
.empty() && "Invalid target entry parent name!");
5957 HasEmittedTargetRegion
= true;
5958 SmallVector
<std::pair
<const Expr
*, const Expr
*>, 4> Allocators
;
5959 for (const auto *C
: D
.getClausesOfKind
<OMPUsesAllocatorsClause
>()) {
5960 for (unsigned I
= 0, E
= C
->getNumberOfAllocators(); I
< E
; ++I
) {
5961 const OMPUsesAllocatorsClause::Data D
= C
->getAllocatorData(I
);
5962 if (!D
.AllocatorTraits
)
5964 Allocators
.emplace_back(D
.Allocator
, D
.AllocatorTraits
);
5967 OMPUsesAllocatorsActionTy
UsesAllocatorAction(Allocators
);
5968 CodeGen
.setAction(UsesAllocatorAction
);
5969 emitTargetOutlinedFunctionHelper(D
, ParentName
, OutlinedFn
, OutlinedFnID
,
5970 IsOffloadEntry
, CodeGen
);
5973 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction
&CGF
,
5974 const Expr
*Allocator
,
5975 const Expr
*AllocatorTraits
) {
5976 llvm::Value
*ThreadId
= getThreadID(CGF
, Allocator
->getExprLoc());
5977 ThreadId
= CGF
.Builder
.CreateIntCast(ThreadId
, CGF
.IntTy
, /*isSigned=*/true);
5978 // Use default memspace handle.
5979 llvm::Value
*MemSpaceHandle
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5980 llvm::Value
*NumTraits
= llvm::ConstantInt::get(
5981 CGF
.IntTy
, cast
<ConstantArrayType
>(
5982 AllocatorTraits
->getType()->getAsArrayTypeUnsafe())
5984 .getLimitedValue());
5985 LValue AllocatorTraitsLVal
= CGF
.EmitLValue(AllocatorTraits
);
5986 Address Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5987 AllocatorTraitsLVal
.getAddress(CGF
), CGF
.VoidPtrPtrTy
, CGF
.VoidPtrTy
);
5988 AllocatorTraitsLVal
= CGF
.MakeAddrLValue(Addr
, CGF
.getContext().VoidPtrTy
,
5989 AllocatorTraitsLVal
.getBaseInfo(),
5990 AllocatorTraitsLVal
.getTBAAInfo());
5991 llvm::Value
*Traits
= Addr
.getPointer();
5993 llvm::Value
*AllocatorVal
=
5994 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5995 CGM
.getModule(), OMPRTL___kmpc_init_allocator
),
5996 {ThreadId
, MemSpaceHandle
, NumTraits
, Traits
});
5997 // Store to allocator.
5998 CGF
.EmitAutoVarAlloca(*cast
<VarDecl
>(
5999 cast
<DeclRefExpr
>(Allocator
->IgnoreParenImpCasts())->getDecl()));
6000 LValue AllocatorLVal
= CGF
.EmitLValue(Allocator
->IgnoreParenImpCasts());
6002 CGF
.EmitScalarConversion(AllocatorVal
, CGF
.getContext().VoidPtrTy
,
6003 Allocator
->getType(), Allocator
->getExprLoc());
6004 CGF
.EmitStoreOfScalar(AllocatorVal
, AllocatorLVal
);
6007 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction
&CGF
,
6008 const Expr
*Allocator
) {
6009 llvm::Value
*ThreadId
= getThreadID(CGF
, Allocator
->getExprLoc());
6010 ThreadId
= CGF
.Builder
.CreateIntCast(ThreadId
, CGF
.IntTy
, /*isSigned=*/true);
6011 LValue AllocatorLVal
= CGF
.EmitLValue(Allocator
->IgnoreParenImpCasts());
6012 llvm::Value
*AllocatorVal
=
6013 CGF
.EmitLoadOfScalar(AllocatorLVal
, Allocator
->getExprLoc());
6014 AllocatorVal
= CGF
.EmitScalarConversion(AllocatorVal
, Allocator
->getType(),
6015 CGF
.getContext().VoidPtrTy
,
6016 Allocator
->getExprLoc());
6017 (void)CGF
.EmitRuntimeCall(
6018 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
6019 OMPRTL___kmpc_destroy_allocator
),
6020 {ThreadId
, AllocatorVal
});
6023 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6024 const OMPExecutableDirective
&D
, StringRef ParentName
,
6025 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
6026 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
6028 llvm::TargetRegionEntryInfo EntryInfo
=
6029 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
, D
.getBeginLoc(), ParentName
);
6031 CodeGenFunction
CGF(CGM
, true);
6032 llvm::OpenMPIRBuilder::FunctionGenCallback
&&GenerateOutlinedFunction
=
6033 [&CGF
, &D
, &CodeGen
](StringRef EntryFnName
) {
6034 const CapturedStmt
&CS
= *D
.getCapturedStmt(OMPD_target
);
6036 CGOpenMPTargetRegionInfo
CGInfo(CS
, CodeGen
, EntryFnName
);
6037 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6038 return CGF
.GenerateOpenMPCapturedStmtFunction(CS
, D
.getBeginLoc());
6041 // Get NumTeams and ThreadLimit attributes
6042 int32_t DefaultValTeams
= -1;
6043 uint32_t DefaultValThreads
= UINT32_MAX
;
6044 getNumTeamsExprForTargetDirective(CGF
, D
, DefaultValTeams
);
6045 getNumThreadsExprForTargetDirective(CGF
, D
, DefaultValThreads
,
6046 /*UpperBoundOnly=*/true);
6048 OMPBuilder
.emitTargetRegionFunction(EntryInfo
, GenerateOutlinedFunction
,
6049 DefaultValTeams
, DefaultValThreads
,
6050 IsOffloadEntry
, OutlinedFn
, OutlinedFnID
);
6055 CGM
.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn
, CGM
);
6057 for (auto *C
: D
.getClausesOfKind
<OMPXAttributeClause
>()) {
6058 for (auto *A
: C
->getAttrs()) {
6059 if (auto *Attr
= dyn_cast
<CUDALaunchBoundsAttr
>(A
))
6060 CGM
.handleCUDALaunchBoundsAttr(OutlinedFn
, Attr
);
6061 else if (auto *Attr
= dyn_cast
<AMDGPUFlatWorkGroupSizeAttr
>(A
))
6062 CGM
.handleAMDGPUFlatWorkGroupSizeAttr(OutlinedFn
, Attr
);
6063 else if (auto *Attr
= dyn_cast
<AMDGPUWavesPerEUAttr
>(A
))
6064 CGM
.handleAMDGPUWavesPerEUAttr(OutlinedFn
, Attr
);
6066 llvm_unreachable("Unexpected attribute kind");
6071 /// Checks if the expression is constant or does not have non-trivial function
6073 static bool isTrivial(ASTContext
&Ctx
, const Expr
* E
) {
6074 // We can skip constant expressions.
6075 // We can skip expressions with trivial calls or simple expressions.
6076 return (E
->isEvaluatable(Ctx
, Expr::SE_AllowUndefinedBehavior
) ||
6077 !E
->hasNonTrivialCall(Ctx
)) &&
6078 !E
->HasSideEffects(Ctx
, /*IncludePossibleEffects=*/true);
6081 const Stmt
*CGOpenMPRuntime::getSingleCompoundChild(ASTContext
&Ctx
,
6083 const Stmt
*Child
= Body
->IgnoreContainers();
6084 while (const auto *C
= dyn_cast_or_null
<CompoundStmt
>(Child
)) {
6086 for (const Stmt
*S
: C
->body()) {
6087 if (const auto *E
= dyn_cast
<Expr
>(S
)) {
6088 if (isTrivial(Ctx
, E
))
6091 // Some of the statements can be ignored.
6092 if (isa
<AsmStmt
>(S
) || isa
<NullStmt
>(S
) || isa
<OMPFlushDirective
>(S
) ||
6093 isa
<OMPBarrierDirective
>(S
) || isa
<OMPTaskyieldDirective
>(S
))
6095 // Analyze declarations.
6096 if (const auto *DS
= dyn_cast
<DeclStmt
>(S
)) {
6097 if (llvm::all_of(DS
->decls(), [](const Decl
*D
) {
6098 if (isa
<EmptyDecl
>(D
) || isa
<DeclContext
>(D
) ||
6099 isa
<TypeDecl
>(D
) || isa
<PragmaCommentDecl
>(D
) ||
6100 isa
<PragmaDetectMismatchDecl
>(D
) || isa
<UsingDecl
>(D
) ||
6101 isa
<UsingDirectiveDecl
>(D
) ||
6102 isa
<OMPDeclareReductionDecl
>(D
) ||
6103 isa
<OMPThreadPrivateDecl
>(D
) || isa
<OMPAllocateDecl
>(D
))
6105 const auto *VD
= dyn_cast
<VarDecl
>(D
);
6108 return VD
->hasGlobalStorage() || !VD
->isUsed();
6112 // Found multiple children - cannot get the one child only.
6118 Child
= Child
->IgnoreContainers();
6123 const Expr
*CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6124 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
6125 int32_t &DefaultVal
) {
6127 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6128 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6129 "Expected target-based executable directive.");
6130 switch (DirectiveKind
) {
6132 const auto *CS
= D
.getInnermostCapturedStmt();
6134 CS
->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6135 const Stmt
*ChildStmt
=
6136 CGOpenMPRuntime::getSingleCompoundChild(CGF
.getContext(), Body
);
6137 if (const auto *NestedDir
=
6138 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
6139 if (isOpenMPTeamsDirective(NestedDir
->getDirectiveKind())) {
6140 if (NestedDir
->hasClausesOfKind
<OMPNumTeamsClause
>()) {
6141 const Expr
*NumTeams
=
6142 NestedDir
->getSingleClause
<OMPNumTeamsClause
>()->getNumTeams();
6143 if (NumTeams
->isIntegerConstantExpr(CGF
.getContext()))
6145 NumTeams
->getIntegerConstantExpr(CGF
.getContext()))
6146 DefaultVal
= Constant
->getExtValue();
6152 if (isOpenMPParallelDirective(NestedDir
->getDirectiveKind()) ||
6153 isOpenMPSimdDirective(NestedDir
->getDirectiveKind())) {
6160 // A value of -1 is used to check if we need to emit no teams region
6164 case OMPD_target_teams_loop
:
6165 case OMPD_target_teams
:
6166 case OMPD_target_teams_distribute
:
6167 case OMPD_target_teams_distribute_simd
:
6168 case OMPD_target_teams_distribute_parallel_for
:
6169 case OMPD_target_teams_distribute_parallel_for_simd
: {
6170 if (D
.hasClausesOfKind
<OMPNumTeamsClause
>()) {
6171 const Expr
*NumTeams
=
6172 D
.getSingleClause
<OMPNumTeamsClause
>()->getNumTeams();
6173 if (NumTeams
->isIntegerConstantExpr(CGF
.getContext()))
6174 if (auto Constant
= NumTeams
->getIntegerConstantExpr(CGF
.getContext()))
6175 DefaultVal
= Constant
->getExtValue();
6181 case OMPD_target_parallel
:
6182 case OMPD_target_parallel_for
:
6183 case OMPD_target_parallel_for_simd
:
6184 case OMPD_target_parallel_loop
:
6185 case OMPD_target_simd
:
6190 case OMPD_parallel_for
:
6191 case OMPD_parallel_loop
:
6192 case OMPD_parallel_master
:
6193 case OMPD_parallel_sections
:
6195 case OMPD_parallel_for_simd
:
6197 case OMPD_cancellation_point
:
6199 case OMPD_threadprivate
:
6210 case OMPD_taskyield
:
6213 case OMPD_taskgroup
:
6219 case OMPD_target_data
:
6220 case OMPD_target_exit_data
:
6221 case OMPD_target_enter_data
:
6222 case OMPD_distribute
:
6223 case OMPD_distribute_simd
:
6224 case OMPD_distribute_parallel_for
:
6225 case OMPD_distribute_parallel_for_simd
:
6226 case OMPD_teams_distribute
:
6227 case OMPD_teams_distribute_simd
:
6228 case OMPD_teams_distribute_parallel_for
:
6229 case OMPD_teams_distribute_parallel_for_simd
:
6230 case OMPD_target_update
:
6231 case OMPD_declare_simd
:
6232 case OMPD_declare_variant
:
6233 case OMPD_begin_declare_variant
:
6234 case OMPD_end_declare_variant
:
6235 case OMPD_declare_target
:
6236 case OMPD_end_declare_target
:
6237 case OMPD_declare_reduction
:
6238 case OMPD_declare_mapper
:
6240 case OMPD_taskloop_simd
:
6241 case OMPD_master_taskloop
:
6242 case OMPD_master_taskloop_simd
:
6243 case OMPD_parallel_master_taskloop
:
6244 case OMPD_parallel_master_taskloop_simd
:
6246 case OMPD_metadirective
:
6252 llvm_unreachable("Unexpected directive kind.");
6255 llvm::Value
*CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6256 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) {
6257 assert(!CGF
.getLangOpts().OpenMPIsTargetDevice
&&
6258 "Clauses associated with the teams directive expected to be emitted "
6259 "only for the host!");
6260 CGBuilderTy
&Bld
= CGF
.Builder
;
6261 int32_t DefaultNT
= -1;
6262 const Expr
*NumTeams
= getNumTeamsExprForTargetDirective(CGF
, D
, DefaultNT
);
6263 if (NumTeams
!= nullptr) {
6264 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6266 switch (DirectiveKind
) {
6268 const auto *CS
= D
.getInnermostCapturedStmt();
6269 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6270 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6271 llvm::Value
*NumTeamsVal
= CGF
.EmitScalarExpr(NumTeams
,
6272 /*IgnoreResultAssign*/ true);
6273 return Bld
.CreateIntCast(NumTeamsVal
, CGF
.Int32Ty
,
6276 case OMPD_target_teams
:
6277 case OMPD_target_teams_distribute
:
6278 case OMPD_target_teams_distribute_simd
:
6279 case OMPD_target_teams_distribute_parallel_for
:
6280 case OMPD_target_teams_distribute_parallel_for_simd
: {
6281 CodeGenFunction::RunCleanupsScope
NumTeamsScope(CGF
);
6282 llvm::Value
*NumTeamsVal
= CGF
.EmitScalarExpr(NumTeams
,
6283 /*IgnoreResultAssign*/ true);
6284 return Bld
.CreateIntCast(NumTeamsVal
, CGF
.Int32Ty
,
6292 return llvm::ConstantInt::get(CGF
.Int32Ty
, DefaultNT
);
6295 /// Check for a num threads constant value (stored in \p DefaultVal), or
6296 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6297 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6298 /// nullptr, no expression evaluation is perfomed.
6299 static void getNumThreads(CodeGenFunction
&CGF
, const CapturedStmt
*CS
,
6300 const Expr
**E
, uint32_t &UpperBound
,
6301 bool UpperBoundOnly
, llvm::Value
**CondVal
) {
6302 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6303 CGF
.getContext(), CS
->getCapturedStmt());
6304 const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
);
6308 if (isOpenMPParallelDirective(Dir
->getDirectiveKind())) {
6309 // Handle if clause. If if clause present, the number of threads is
6310 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6311 if (CondVal
&& Dir
->hasClausesOfKind
<OMPIfClause
>()) {
6312 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6313 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6314 const OMPIfClause
*IfClause
= nullptr;
6315 for (const auto *C
: Dir
->getClausesOfKind
<OMPIfClause
>()) {
6316 if (C
->getNameModifier() == OMPD_unknown
||
6317 C
->getNameModifier() == OMPD_parallel
) {
6323 const Expr
*CondExpr
= IfClause
->getCondition();
6325 if (CondExpr
->EvaluateAsBooleanCondition(Result
, CGF
.getContext())) {
6331 CodeGenFunction::LexicalScope
Scope(CGF
, CondExpr
->getSourceRange());
6332 if (const auto *PreInit
=
6333 cast_or_null
<DeclStmt
>(IfClause
->getPreInitStmt())) {
6334 for (const auto *I
: PreInit
->decls()) {
6335 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6336 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6338 CodeGenFunction::AutoVarEmission Emission
=
6339 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6340 CGF
.EmitAutoVarCleanups(Emission
);
6343 *CondVal
= CGF
.EvaluateExprAsBool(CondExpr
);
6348 // Check the value of num_threads clause iff if clause was not specified
6349 // or is not evaluated to false.
6350 if (Dir
->hasClausesOfKind
<OMPNumThreadsClause
>()) {
6351 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6352 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6353 const auto *NumThreadsClause
=
6354 Dir
->getSingleClause
<OMPNumThreadsClause
>();
6355 const Expr
*NTExpr
= NumThreadsClause
->getNumThreads();
6356 if (NTExpr
->isIntegerConstantExpr(CGF
.getContext()))
6357 if (auto Constant
= NTExpr
->getIntegerConstantExpr(CGF
.getContext()))
6360 ? Constant
->getZExtValue()
6361 : std::min(UpperBound
,
6362 static_cast<uint32_t>(Constant
->getZExtValue()));
6363 // If we haven't found a upper bound, remember we saw a thread limiting
6365 if (UpperBound
== UINT32_MAX
)
6369 CodeGenFunction::LexicalScope
Scope(CGF
, NTExpr
->getSourceRange());
6370 if (const auto *PreInit
=
6371 cast_or_null
<DeclStmt
>(NumThreadsClause
->getPreInitStmt())) {
6372 for (const auto *I
: PreInit
->decls()) {
6373 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6374 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6376 CodeGenFunction::AutoVarEmission Emission
=
6377 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6378 CGF
.EmitAutoVarCleanups(Emission
);
6386 if (isOpenMPSimdDirective(Dir
->getDirectiveKind()))
6391 const Expr
*CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6392 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, uint32_t &UpperBound
,
6393 bool UpperBoundOnly
, llvm::Value
**CondVal
, const Expr
**ThreadLimitExpr
) {
6394 assert((!CGF
.getLangOpts().OpenMPIsTargetDevice
|| UpperBoundOnly
) &&
6395 "Clauses associated with the teams directive expected to be emitted "
6396 "only for the host!");
6397 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6398 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6399 "Expected target-based executable directive.");
6401 const Expr
*NT
= nullptr;
6402 const Expr
**NTPtr
= UpperBoundOnly
? nullptr : &NT
;
6404 auto CheckForConstExpr
= [&](const Expr
*E
, const Expr
**EPtr
) {
6405 if (E
->isIntegerConstantExpr(CGF
.getContext())) {
6406 if (auto Constant
= E
->getIntegerConstantExpr(CGF
.getContext()))
6407 UpperBound
= UpperBound
? Constant
->getZExtValue()
6408 : std::min(UpperBound
,
6409 uint32_t(Constant
->getZExtValue()));
6411 // If we haven't found a upper bound, remember we saw a thread limiting
6413 if (UpperBound
== UINT32_MAX
)
6419 auto ReturnSequential
= [&]() {
6424 switch (DirectiveKind
) {
6426 const CapturedStmt
*CS
= D
.getInnermostCapturedStmt();
6427 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6428 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6429 CGF
.getContext(), CS
->getCapturedStmt());
6430 // TODO: The standard is not clear how to resolve two thread limit clauses,
6431 // let's pick the teams one if it's present, otherwise the target one.
6432 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6433 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6434 if (const auto *TLC
= Dir
->getSingleClause
<OMPThreadLimitClause
>()) {
6435 ThreadLimitClause
= TLC
;
6436 if (ThreadLimitExpr
) {
6437 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6438 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6439 CodeGenFunction::LexicalScope
Scope(
6440 CGF
, ThreadLimitClause
->getThreadLimit()->getSourceRange());
6441 if (const auto *PreInit
=
6442 cast_or_null
<DeclStmt
>(ThreadLimitClause
->getPreInitStmt())) {
6443 for (const auto *I
: PreInit
->decls()) {
6444 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6445 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6447 CodeGenFunction::AutoVarEmission Emission
=
6448 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6449 CGF
.EmitAutoVarCleanups(Emission
);
6456 if (ThreadLimitClause
)
6457 CheckForConstExpr(ThreadLimitClause
->getThreadLimit(), ThreadLimitExpr
);
6458 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6459 if (isOpenMPTeamsDirective(Dir
->getDirectiveKind()) &&
6460 !isOpenMPDistributeDirective(Dir
->getDirectiveKind())) {
6461 CS
= Dir
->getInnermostCapturedStmt();
6462 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6463 CGF
.getContext(), CS
->getCapturedStmt());
6464 Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
);
6466 if (Dir
&& isOpenMPParallelDirective(Dir
->getDirectiveKind())) {
6467 CS
= Dir
->getInnermostCapturedStmt();
6468 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6469 } else if (Dir
&& isOpenMPSimdDirective(Dir
->getDirectiveKind()))
6470 return ReturnSequential();
6474 case OMPD_target_teams
: {
6475 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6476 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6477 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6478 CheckForConstExpr(ThreadLimitClause
->getThreadLimit(), ThreadLimitExpr
);
6480 const CapturedStmt
*CS
= D
.getInnermostCapturedStmt();
6481 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6482 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6483 CGF
.getContext(), CS
->getCapturedStmt());
6484 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6485 if (Dir
->getDirectiveKind() == OMPD_distribute
) {
6486 CS
= Dir
->getInnermostCapturedStmt();
6487 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6492 case OMPD_target_teams_distribute
:
6493 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6494 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6495 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6496 CheckForConstExpr(ThreadLimitClause
->getThreadLimit(), ThreadLimitExpr
);
6498 getNumThreads(CGF
, D
.getInnermostCapturedStmt(), NTPtr
, UpperBound
,
6499 UpperBoundOnly
, CondVal
);
6501 case OMPD_target_teams_loop
:
6502 case OMPD_target_parallel_loop
:
6503 case OMPD_target_parallel
:
6504 case OMPD_target_parallel_for
:
6505 case OMPD_target_parallel_for_simd
:
6506 case OMPD_target_teams_distribute_parallel_for
:
6507 case OMPD_target_teams_distribute_parallel_for_simd
: {
6508 if (CondVal
&& D
.hasClausesOfKind
<OMPIfClause
>()) {
6509 const OMPIfClause
*IfClause
= nullptr;
6510 for (const auto *C
: D
.getClausesOfKind
<OMPIfClause
>()) {
6511 if (C
->getNameModifier() == OMPD_unknown
||
6512 C
->getNameModifier() == OMPD_parallel
) {
6518 const Expr
*Cond
= IfClause
->getCondition();
6520 if (Cond
->EvaluateAsBooleanCondition(Result
, CGF
.getContext())) {
6522 return ReturnSequential();
6524 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
6525 *CondVal
= CGF
.EvaluateExprAsBool(Cond
);
6529 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6530 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6531 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6532 CheckForConstExpr(ThreadLimitClause
->getThreadLimit(), ThreadLimitExpr
);
6534 if (D
.hasClausesOfKind
<OMPNumThreadsClause
>()) {
6535 CodeGenFunction::RunCleanupsScope
NumThreadsScope(CGF
);
6536 const auto *NumThreadsClause
= D
.getSingleClause
<OMPNumThreadsClause
>();
6537 CheckForConstExpr(NumThreadsClause
->getNumThreads(), nullptr);
6538 return NumThreadsClause
->getNumThreads();
6542 case OMPD_target_teams_distribute_simd
:
6543 case OMPD_target_simd
:
6544 return ReturnSequential();
6548 llvm_unreachable("Unsupported directive kind.");
6551 llvm::Value
*CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6552 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) {
6553 llvm::Value
*NumThreadsVal
= nullptr;
6554 llvm::Value
*CondVal
= nullptr;
6555 llvm::Value
*ThreadLimitVal
= nullptr;
6556 const Expr
*ThreadLimitExpr
= nullptr;
6557 uint32_t UpperBound
= -1;
6559 const Expr
*NT
= getNumThreadsExprForTargetDirective(
6560 CGF
, D
, UpperBound
, /* UpperBoundOnly */ false, &CondVal
,
6563 // Thread limit expressions are used below, emit them.
6564 if (ThreadLimitExpr
) {
6566 CGF
.EmitScalarExpr(ThreadLimitExpr
, /*IgnoreResultAssign=*/true);
6567 ThreadLimitVal
= CGF
.Builder
.CreateIntCast(ThreadLimitVal
, CGF
.Int32Ty
,
6568 /*isSigned=*/false);
6571 // Generate the num teams expression.
6572 if (UpperBound
== 1) {
6573 NumThreadsVal
= CGF
.Builder
.getInt32(UpperBound
);
6575 NumThreadsVal
= CGF
.EmitScalarExpr(NT
, /*IgnoreResultAssign=*/true);
6576 NumThreadsVal
= CGF
.Builder
.CreateIntCast(NumThreadsVal
, CGF
.Int32Ty
,
6577 /*isSigned=*/false);
6578 } else if (ThreadLimitVal
) {
6579 // If we do not have a num threads value but a thread limit, replace the
6580 // former with the latter. We know handled the thread limit expression.
6581 NumThreadsVal
= ThreadLimitVal
;
6582 ThreadLimitVal
= nullptr;
6584 // Default to "0" which means runtime choice.
6585 assert(!ThreadLimitVal
&& "Default not applicable with thread limit value");
6586 NumThreadsVal
= CGF
.Builder
.getInt32(0);
6589 // Handle if clause. If if clause present, the number of threads is
6590 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6592 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
6593 NumThreadsVal
= CGF
.Builder
.CreateSelect(CondVal
, NumThreadsVal
,
6594 CGF
.Builder
.getInt32(1));
6597 // If the thread limit and num teams expression were present, take the
6599 if (ThreadLimitVal
) {
6600 NumThreadsVal
= CGF
.Builder
.CreateSelect(
6601 CGF
.Builder
.CreateICmpULT(ThreadLimitVal
, NumThreadsVal
),
6602 ThreadLimitVal
, NumThreadsVal
);
6605 return NumThreadsVal
;
6609 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6611 // Utility to handle information from clauses associated with a given
6612 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6613 // It provides a convenient interface to obtain the information and generate
6614 // code for that information.
6615 class MappableExprsHandler
{
6617 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6618 static unsigned getFlagMemberOffset() {
6619 unsigned Offset
= 0;
6620 for (uint64_t Remain
=
6621 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
6622 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
6623 !(Remain
& 1); Remain
= Remain
>> 1)
6628 /// Class that holds debugging information for a data mapping to be passed to
6629 /// the runtime library.
6630 class MappingExprInfo
{
6631 /// The variable declaration used for the data mapping.
6632 const ValueDecl
*MapDecl
= nullptr;
6633 /// The original expression used in the map clause, or null if there is
6635 const Expr
*MapExpr
= nullptr;
6638 MappingExprInfo(const ValueDecl
*MapDecl
, const Expr
*MapExpr
= nullptr)
6639 : MapDecl(MapDecl
), MapExpr(MapExpr
) {}
6641 const ValueDecl
*getMapDecl() const { return MapDecl
; }
6642 const Expr
*getMapExpr() const { return MapExpr
; }
6645 using DeviceInfoTy
= llvm::OpenMPIRBuilder::DeviceInfoTy
;
6646 using MapBaseValuesArrayTy
= llvm::OpenMPIRBuilder::MapValuesArrayTy
;
6647 using MapValuesArrayTy
= llvm::OpenMPIRBuilder::MapValuesArrayTy
;
6648 using MapFlagsArrayTy
= llvm::OpenMPIRBuilder::MapFlagsArrayTy
;
6649 using MapDimArrayTy
= llvm::OpenMPIRBuilder::MapDimArrayTy
;
6650 using MapNonContiguousArrayTy
=
6651 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy
;
6652 using MapExprsArrayTy
= SmallVector
<MappingExprInfo
, 4>;
6653 using MapValueDeclsArrayTy
= SmallVector
<const ValueDecl
*, 4>;
6655 /// This structure contains combined information generated for mappable
6656 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6657 /// mappers, and non-contiguous information.
6658 struct MapCombinedInfoTy
: llvm::OpenMPIRBuilder::MapInfosTy
{
6659 MapExprsArrayTy Exprs
;
6660 MapValueDeclsArrayTy Mappers
;
6661 MapValueDeclsArrayTy DevicePtrDecls
;
6663 /// Append arrays in \a CurInfo.
6664 void append(MapCombinedInfoTy
&CurInfo
) {
6665 Exprs
.append(CurInfo
.Exprs
.begin(), CurInfo
.Exprs
.end());
6666 DevicePtrDecls
.append(CurInfo
.DevicePtrDecls
.begin(),
6667 CurInfo
.DevicePtrDecls
.end());
6668 Mappers
.append(CurInfo
.Mappers
.begin(), CurInfo
.Mappers
.end());
6669 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo
);
6673 /// Map between a struct and the its lowest & highest elements which have been
6675 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6676 /// HE(FieldIndex, Pointer)}
6677 struct StructRangeInfoTy
{
6678 MapCombinedInfoTy PreliminaryMapData
;
6679 std::pair
<unsigned /*FieldIndex*/, Address
/*Pointer*/> LowestElem
= {
6680 0, Address::invalid()};
6681 std::pair
<unsigned /*FieldIndex*/, Address
/*Pointer*/> HighestElem
= {
6682 0, Address::invalid()};
6683 Address Base
= Address::invalid();
6684 Address LB
= Address::invalid();
6685 bool IsArraySection
= false;
6686 bool HasCompleteRecord
= false;
6690 /// Kind that defines how a device pointer has to be returned.
6692 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
6693 OpenMPMapClauseKind MapType
= OMPC_MAP_unknown
;
6694 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
6695 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
;
6696 bool ReturnDevicePointer
= false;
6697 bool IsImplicit
= false;
6698 const ValueDecl
*Mapper
= nullptr;
6699 const Expr
*VarRef
= nullptr;
6700 bool ForDeviceAddr
= false;
6702 MapInfo() = default;
6704 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
,
6705 OpenMPMapClauseKind MapType
,
6706 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6707 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
6708 bool ReturnDevicePointer
, bool IsImplicit
,
6709 const ValueDecl
*Mapper
= nullptr, const Expr
*VarRef
= nullptr,
6710 bool ForDeviceAddr
= false)
6711 : Components(Components
), MapType(MapType
), MapModifiers(MapModifiers
),
6712 MotionModifiers(MotionModifiers
),
6713 ReturnDevicePointer(ReturnDevicePointer
), IsImplicit(IsImplicit
),
6714 Mapper(Mapper
), VarRef(VarRef
), ForDeviceAddr(ForDeviceAddr
) {}
6717 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6718 /// member and there is no map information about it, then emission of that
6719 /// entry is deferred until the whole struct has been processed.
6720 struct DeferredDevicePtrEntryTy
{
6721 const Expr
*IE
= nullptr;
6722 const ValueDecl
*VD
= nullptr;
6723 bool ForDeviceAddr
= false;
6725 DeferredDevicePtrEntryTy(const Expr
*IE
, const ValueDecl
*VD
,
6727 : IE(IE
), VD(VD
), ForDeviceAddr(ForDeviceAddr
) {}
6730 /// The target directive from where the mappable clauses were extracted. It
6731 /// is either a executable directive or a user-defined mapper directive.
6732 llvm::PointerUnion
<const OMPExecutableDirective
*,
6733 const OMPDeclareMapperDecl
*>
6736 /// Function the directive is being generated for.
6737 CodeGenFunction
&CGF
;
6739 /// Set of all first private variables in the current directive.
6740 /// bool data is set to true if the variable is implicitly marked as
6741 /// firstprivate, false otherwise.
6742 llvm::DenseMap
<CanonicalDeclPtr
<const VarDecl
>, bool> FirstPrivateDecls
;
6744 /// Map between device pointer declarations and their expression components.
6745 /// The key value for declarations in 'this' is null.
6748 SmallVector
<OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>>
6751 /// Map between device addr declarations and their expression components.
6752 /// The key value for declarations in 'this' is null.
6755 SmallVector
<OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>>
6758 /// Map between lambda declarations and their map type.
6759 llvm::DenseMap
<const ValueDecl
*, const OMPMapClause
*> LambdasMap
;
6761 llvm::Value
*getExprTypeSize(const Expr
*E
) const {
6762 QualType ExprTy
= E
->getType().getCanonicalType();
6764 // Calculate the size for array shaping expression.
6765 if (const auto *OAE
= dyn_cast
<OMPArrayShapingExpr
>(E
)) {
6767 CGF
.getTypeSize(OAE
->getBase()->getType()->getPointeeType());
6768 for (const Expr
*SE
: OAE
->getDimensions()) {
6769 llvm::Value
*Sz
= CGF
.EmitScalarExpr(SE
);
6770 Sz
= CGF
.EmitScalarConversion(Sz
, SE
->getType(),
6771 CGF
.getContext().getSizeType(),
6773 Size
= CGF
.Builder
.CreateNUWMul(Size
, Sz
);
6778 // Reference types are ignored for mapping purposes.
6779 if (const auto *RefTy
= ExprTy
->getAs
<ReferenceType
>())
6780 ExprTy
= RefTy
->getPointeeType().getCanonicalType();
6782 // Given that an array section is considered a built-in type, we need to
6783 // do the calculation based on the length of the section instead of relying
6784 // on CGF.getTypeSize(E->getType()).
6785 if (const auto *OAE
= dyn_cast
<OMPArraySectionExpr
>(E
)) {
6786 QualType BaseTy
= OMPArraySectionExpr::getBaseOriginalType(
6787 OAE
->getBase()->IgnoreParenImpCasts())
6788 .getCanonicalType();
6790 // If there is no length associated with the expression and lower bound is
6791 // not specified too, that means we are using the whole length of the
6793 if (!OAE
->getLength() && OAE
->getColonLocFirst().isValid() &&
6794 !OAE
->getLowerBound())
6795 return CGF
.getTypeSize(BaseTy
);
6797 llvm::Value
*ElemSize
;
6798 if (const auto *PTy
= BaseTy
->getAs
<PointerType
>()) {
6799 ElemSize
= CGF
.getTypeSize(PTy
->getPointeeType().getCanonicalType());
6801 const auto *ATy
= cast
<ArrayType
>(BaseTy
.getTypePtr());
6802 assert(ATy
&& "Expecting array type if not a pointer type.");
6803 ElemSize
= CGF
.getTypeSize(ATy
->getElementType().getCanonicalType());
6806 // If we don't have a length at this point, that is because we have an
6807 // array section with a single element.
6808 if (!OAE
->getLength() && OAE
->getColonLocFirst().isInvalid())
6811 if (const Expr
*LenExpr
= OAE
->getLength()) {
6812 llvm::Value
*LengthVal
= CGF
.EmitScalarExpr(LenExpr
);
6813 LengthVal
= CGF
.EmitScalarConversion(LengthVal
, LenExpr
->getType(),
6814 CGF
.getContext().getSizeType(),
6815 LenExpr
->getExprLoc());
6816 return CGF
.Builder
.CreateNUWMul(LengthVal
, ElemSize
);
6818 assert(!OAE
->getLength() && OAE
->getColonLocFirst().isValid() &&
6819 OAE
->getLowerBound() && "expected array_section[lb:].");
6820 // Size = sizetype - lb * elemtype;
6821 llvm::Value
*LengthVal
= CGF
.getTypeSize(BaseTy
);
6822 llvm::Value
*LBVal
= CGF
.EmitScalarExpr(OAE
->getLowerBound());
6823 LBVal
= CGF
.EmitScalarConversion(LBVal
, OAE
->getLowerBound()->getType(),
6824 CGF
.getContext().getSizeType(),
6825 OAE
->getLowerBound()->getExprLoc());
6826 LBVal
= CGF
.Builder
.CreateNUWMul(LBVal
, ElemSize
);
6827 llvm::Value
*Cmp
= CGF
.Builder
.CreateICmpUGT(LengthVal
, LBVal
);
6828 llvm::Value
*TrueVal
= CGF
.Builder
.CreateNUWSub(LengthVal
, LBVal
);
6829 LengthVal
= CGF
.Builder
.CreateSelect(
6830 Cmp
, TrueVal
, llvm::ConstantInt::get(CGF
.SizeTy
, 0));
6833 return CGF
.getTypeSize(ExprTy
);
6836 /// Return the corresponding bits for a given map clause modifier. Add
6837 /// a flag marking the map as a pointer if requested. Add a flag marking the
6838 /// map as the first one of a series of maps that relate to the same map
6840 OpenMPOffloadMappingFlags
getMapTypeBits(
6841 OpenMPMapClauseKind MapType
, ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6842 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
, bool IsImplicit
,
6843 bool AddPtrFlag
, bool AddIsTargetParamFlag
, bool IsNonContiguous
) const {
6844 OpenMPOffloadMappingFlags Bits
=
6845 IsImplicit
? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6846 : OpenMPOffloadMappingFlags::OMP_MAP_NONE
;
6848 case OMPC_MAP_alloc
:
6849 case OMPC_MAP_release
:
6850 // alloc and release is the default behavior in the runtime library, i.e.
6851 // if we don't pass any bits alloc/release that is what the runtime is
6852 // going to do. Therefore, we don't need to signal anything for these two
6856 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TO
;
6859 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
6861 case OMPC_MAP_tofrom
:
6862 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TO
|
6863 OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
6865 case OMPC_MAP_delete
:
6866 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_DELETE
;
6868 case OMPC_MAP_unknown
:
6869 llvm_unreachable("Unexpected map type!");
6872 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
;
6873 if (AddIsTargetParamFlag
)
6874 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
6875 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_always
))
6876 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS
;
6877 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_close
))
6878 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE
;
6879 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_present
) ||
6880 llvm::is_contained(MotionModifiers
, OMPC_MOTION_MODIFIER_present
))
6881 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
;
6882 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_ompx_hold
))
6883 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
6884 if (IsNonContiguous
)
6885 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG
;
6889 /// Return true if the provided expression is a final array section. A
6890 /// final array section, is one whose length can't be proved to be one.
6891 bool isFinalArraySectionExpression(const Expr
*E
) const {
6892 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(E
);
6894 // It is not an array section and therefore not a unity-size one.
6898 // An array section with no colon always refer to a single element.
6899 if (OASE
->getColonLocFirst().isInvalid())
6902 const Expr
*Length
= OASE
->getLength();
6904 // If we don't have a length we have to check if the array has size 1
6905 // for this dimension. Also, we should always expect a length if the
6906 // base type is pointer.
6908 QualType BaseQTy
= OMPArraySectionExpr::getBaseOriginalType(
6909 OASE
->getBase()->IgnoreParenImpCasts())
6910 .getCanonicalType();
6911 if (const auto *ATy
= dyn_cast
<ConstantArrayType
>(BaseQTy
.getTypePtr()))
6912 return ATy
->getSize().getSExtValue() != 1;
6913 // If we don't have a constant dimension length, we have to consider
6914 // the current section as having any size, so it is not necessarily
6915 // unitary. If it happen to be unity size, that's user fault.
6919 // Check if the length evaluates to 1.
6920 Expr::EvalResult Result
;
6921 if (!Length
->EvaluateAsInt(Result
, CGF
.getContext()))
6922 return true; // Can have more that size 1.
6924 llvm::APSInt ConstLength
= Result
.Val
.getInt();
6925 return ConstLength
.getSExtValue() != 1;
6928 /// Generate the base pointers, section pointers, sizes, map type bits, and
6929 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6930 /// map type, map or motion modifiers, and expression components.
6931 /// \a IsFirstComponent should be set to true if the provided set of
6932 /// components is the first associated with a capture.
6933 void generateInfoForComponentList(
6934 OpenMPMapClauseKind MapType
, ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6935 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
6936 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
,
6937 MapCombinedInfoTy
&CombinedInfo
, StructRangeInfoTy
&PartialStruct
,
6938 bool IsFirstComponentList
, bool IsImplicit
,
6939 const ValueDecl
*Mapper
= nullptr, bool ForDeviceAddr
= false,
6940 const ValueDecl
*BaseDecl
= nullptr, const Expr
*MapExpr
= nullptr,
6941 ArrayRef
<OMPClauseMappableExprCommon::MappableExprComponentListRef
>
6942 OverlappedElements
= std::nullopt
) const {
6943 // The following summarizes what has to be generated for each map and the
6944 // types below. The generated information is expressed in this order:
6945 // base pointer, section pointer, size, flags
6946 // (to add to the ones that come from the map type and modifier).
6969 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6972 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6975 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6978 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6981 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6982 // in unified shared memory mode or for local pointers
6983 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6986 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6987 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6990 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6991 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6994 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6997 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7000 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7003 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7005 // map(to: s.p[:22])
7006 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7007 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7008 // &(s.p), &(s.p[0]), 22*sizeof(double),
7009 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7010 // (*) alloc space for struct members, only this is a target parameter
7011 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7012 // optimizes this entry out, same in the examples below)
7013 // (***) map the pointee (map: to)
7016 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7017 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7018 // (*) alloc space for struct members, only this is a target parameter
7019 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7020 // optimizes this entry out, same in the examples below)
7021 // (***) map the pointee (map: to)
7024 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7026 // map(from: s.ps->s.i)
7027 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7028 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7029 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7031 // map(to: s.ps->ps)
7032 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7033 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7034 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7036 // map(s.ps->ps->ps)
7037 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7038 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7039 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7040 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7042 // map(to: s.ps->ps->s.f[:22])
7043 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7044 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7045 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7046 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7049 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7052 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7055 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7058 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7060 // map(to: ps->p[:22])
7061 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7062 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7063 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7066 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7068 // map(from: ps->ps->s.i)
7069 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7070 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7071 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7073 // map(from: ps->ps->ps)
7074 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7075 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7076 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7078 // map(ps->ps->ps->ps)
7079 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7080 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7081 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7082 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7084 // map(to: ps->ps->ps->s.f[:22])
7085 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7086 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7087 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7088 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7090 // map(to: s.f[:22]) map(from: s.p[:33])
7091 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7092 // sizeof(double*) (**), TARGET_PARAM
7093 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7094 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7095 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7096 // (*) allocate contiguous space needed to fit all mapped members even if
7097 // we allocate space for members not mapped (in this example,
7098 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7099 // them as well because they fall between &s.f[0] and &s.p)
7101 // map(from: s.f[:22]) map(to: ps->p[:33])
7102 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7103 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7104 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7105 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7106 // (*) the struct this entry pertains to is the 2nd element in the list of
7107 // arguments, hence MEMBER_OF(2)
7109 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7110 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7111 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7112 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7113 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7114 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7115 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7116 // (*) the struct this entry pertains to is the 4th element in the list
7117 // of arguments, hence MEMBER_OF(4)
7119 // Track if the map information being generated is the first for a capture.
7120 bool IsCaptureFirstInfo
= IsFirstComponentList
;
7121 // When the variable is on a declare target link or in a to clause with
7122 // unified memory, a reference is needed to hold the host/device address
7124 bool RequiresReference
= false;
7126 // Scan the components from the base to the complete expression.
7127 auto CI
= Components
.rbegin();
7128 auto CE
= Components
.rend();
7131 // Track if the map information being generated is the first for a list of
7133 bool IsExpressionFirstInfo
= true;
7134 bool FirstPointerInComplexData
= false;
7135 Address BP
= Address::invalid();
7136 const Expr
*AssocExpr
= I
->getAssociatedExpression();
7137 const auto *AE
= dyn_cast
<ArraySubscriptExpr
>(AssocExpr
);
7138 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(AssocExpr
);
7139 const auto *OAShE
= dyn_cast
<OMPArrayShapingExpr
>(AssocExpr
);
7141 if (isa
<MemberExpr
>(AssocExpr
)) {
7142 // The base is the 'this' pointer. The content of the pointer is going
7143 // to be the base of the field being mapped.
7144 BP
= CGF
.LoadCXXThisAddress();
7145 } else if ((AE
&& isa
<CXXThisExpr
>(AE
->getBase()->IgnoreParenImpCasts())) ||
7147 isa
<CXXThisExpr
>(OASE
->getBase()->IgnoreParenImpCasts()))) {
7148 BP
= CGF
.EmitOMPSharedLValue(AssocExpr
).getAddress(CGF
);
7150 isa
<CXXThisExpr
>(OAShE
->getBase()->IgnoreParenCasts())) {
7152 CGF
.EmitScalarExpr(OAShE
->getBase()),
7153 CGF
.ConvertTypeForMem(OAShE
->getBase()->getType()->getPointeeType()),
7154 CGF
.getContext().getTypeAlignInChars(OAShE
->getBase()->getType()));
7156 // The base is the reference to the variable.
7158 BP
= CGF
.EmitOMPSharedLValue(AssocExpr
).getAddress(CGF
);
7159 if (const auto *VD
=
7160 dyn_cast_or_null
<VarDecl
>(I
->getAssociatedDeclaration())) {
7161 if (std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
7162 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
)) {
7163 if ((*Res
== OMPDeclareTargetDeclAttr::MT_Link
) ||
7164 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
7165 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
7166 CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7167 RequiresReference
= true;
7168 BP
= CGF
.CGM
.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD
);
7173 // If the variable is a pointer and is being dereferenced (i.e. is not
7174 // the last component), the base has to be the pointer itself, not its
7175 // reference. References are ignored for mapping purposes.
7177 I
->getAssociatedDeclaration()->getType().getNonReferenceType();
7178 if (Ty
->isAnyPointerType() && std::next(I
) != CE
) {
7179 // No need to generate individual map information for the pointer, it
7180 // can be associated with the combined storage if shared memory mode is
7181 // active or the base declaration is not global variable.
7182 const auto *VD
= dyn_cast
<VarDecl
>(I
->getAssociatedDeclaration());
7183 if (CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7184 !VD
|| VD
->hasLocalStorage())
7185 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7187 FirstPointerInComplexData
= true;
7192 // Track whether a component of the list should be marked as MEMBER_OF some
7193 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7194 // in a component list should be marked as MEMBER_OF, all subsequent entries
7195 // do not belong to the base struct. E.g.
7197 // s.ps->ps->ps->f[:]
7199 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7200 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7201 // is the pointee of ps(2) which is not member of struct s, so it should not
7202 // be marked as such (it is still PTR_AND_OBJ).
7203 // The variable is initialized to false so that PTR_AND_OBJ entries which
7204 // are not struct members are not considered (e.g. array of pointers to
7206 bool ShouldBeMemberOf
= false;
7208 // Variable keeping track of whether or not we have encountered a component
7209 // in the component list which is a member expression. Useful when we have a
7210 // pointer or a final array section, in which case it is the previous
7211 // component in the list which tells us whether we have a member expression.
7213 // While processing the final array section "[:]" it is "f" which tells us
7214 // whether we are dealing with a member of a declared struct.
7215 const MemberExpr
*EncounteredME
= nullptr;
7217 // Track for the total number of dimension. Start from one for the dummy
7219 uint64_t DimSize
= 1;
7221 bool IsNonContiguous
= CombinedInfo
.NonContigInfo
.IsNonContiguous
;
7222 bool IsPrevMemberReference
= false;
7224 for (; I
!= CE
; ++I
) {
7225 // If the current component is member of a struct (parent struct) mark it.
7226 if (!EncounteredME
) {
7227 EncounteredME
= dyn_cast
<MemberExpr
>(I
->getAssociatedExpression());
7228 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7229 // as MEMBER_OF the parent struct.
7230 if (EncounteredME
) {
7231 ShouldBeMemberOf
= true;
7232 // Do not emit as complex pointer if this is actually not array-like
7234 if (FirstPointerInComplexData
) {
7235 QualType Ty
= std::prev(I
)
7236 ->getAssociatedDeclaration()
7238 .getNonReferenceType();
7239 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7240 FirstPointerInComplexData
= false;
7245 auto Next
= std::next(I
);
7247 // We need to generate the addresses and sizes if this is the last
7248 // component, if the component is a pointer or if it is an array section
7249 // whose length can't be proved to be one. If this is a pointer, it
7250 // becomes the base address for the following components.
7252 // A final array section, is one whose length can't be proved to be one.
7253 // If the map item is non-contiguous then we don't treat any array section
7254 // as final array section.
7255 bool IsFinalArraySection
=
7257 isFinalArraySectionExpression(I
->getAssociatedExpression());
7259 // If we have a declaration for the mapping use that, otherwise use
7260 // the base declaration of the map clause.
7261 const ValueDecl
*MapDecl
= (I
->getAssociatedDeclaration())
7262 ? I
->getAssociatedDeclaration()
7264 MapExpr
= (I
->getAssociatedExpression()) ? I
->getAssociatedExpression()
7267 // Get information on whether the element is a pointer. Have to do a
7268 // special treatment for array sections given that they are built-in
7271 dyn_cast
<OMPArraySectionExpr
>(I
->getAssociatedExpression());
7273 dyn_cast
<OMPArrayShapingExpr
>(I
->getAssociatedExpression());
7274 const auto *UO
= dyn_cast
<UnaryOperator
>(I
->getAssociatedExpression());
7275 const auto *BO
= dyn_cast
<BinaryOperator
>(I
->getAssociatedExpression());
7278 (OASE
&& OMPArraySectionExpr::getBaseOriginalType(OASE
)
7280 ->isAnyPointerType()) ||
7281 I
->getAssociatedExpression()->getType()->isAnyPointerType();
7282 bool IsMemberReference
= isa
<MemberExpr
>(I
->getAssociatedExpression()) &&
7284 MapDecl
->getType()->isLValueReferenceType();
7285 bool IsNonDerefPointer
= IsPointer
&&
7286 !(UO
&& UO
->getOpcode() != UO_Deref
) && !BO
&&
7292 if (Next
== CE
|| IsMemberReference
|| IsNonDerefPointer
||
7293 IsFinalArraySection
) {
7294 // If this is not the last component, we expect the pointer to be
7295 // associated with an array expression or member expression.
7296 assert((Next
== CE
||
7297 isa
<MemberExpr
>(Next
->getAssociatedExpression()) ||
7298 isa
<ArraySubscriptExpr
>(Next
->getAssociatedExpression()) ||
7299 isa
<OMPArraySectionExpr
>(Next
->getAssociatedExpression()) ||
7300 isa
<OMPArrayShapingExpr
>(Next
->getAssociatedExpression()) ||
7301 isa
<UnaryOperator
>(Next
->getAssociatedExpression()) ||
7302 isa
<BinaryOperator
>(Next
->getAssociatedExpression())) &&
7303 "Unexpected expression");
7305 Address LB
= Address::invalid();
7306 Address LowestElem
= Address::invalid();
7307 auto &&EmitMemberExprBase
= [](CodeGenFunction
&CGF
,
7308 const MemberExpr
*E
) {
7309 const Expr
*BaseExpr
= E
->getBase();
7310 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7314 LValueBaseInfo BaseInfo
;
7315 TBAAAccessInfo TBAAInfo
;
7317 CGF
.EmitPointerWithAlignment(BaseExpr
, &BaseInfo
, &TBAAInfo
);
7318 QualType PtrTy
= BaseExpr
->getType()->getPointeeType();
7319 BaseLV
= CGF
.MakeAddrLValue(Addr
, PtrTy
, BaseInfo
, TBAAInfo
);
7321 BaseLV
= CGF
.EmitOMPSharedLValue(BaseExpr
);
7327 Address(CGF
.EmitScalarExpr(OAShE
->getBase()),
7328 CGF
.ConvertTypeForMem(
7329 OAShE
->getBase()->getType()->getPointeeType()),
7330 CGF
.getContext().getTypeAlignInChars(
7331 OAShE
->getBase()->getType()));
7332 } else if (IsMemberReference
) {
7333 const auto *ME
= cast
<MemberExpr
>(I
->getAssociatedExpression());
7334 LValue BaseLVal
= EmitMemberExprBase(CGF
, ME
);
7335 LowestElem
= CGF
.EmitLValueForFieldInitialization(
7336 BaseLVal
, cast
<FieldDecl
>(MapDecl
))
7338 LB
= CGF
.EmitLoadOfReferenceLValue(LowestElem
, MapDecl
->getType())
7342 CGF
.EmitOMPSharedLValue(I
->getAssociatedExpression())
7346 // If this component is a pointer inside the base struct then we don't
7347 // need to create any entry for it - it will be combined with the object
7348 // it is pointing to into a single PTR_AND_OBJ entry.
7349 bool IsMemberPointerOrAddr
=
7351 (((IsPointer
|| ForDeviceAddr
) &&
7352 I
->getAssociatedExpression() == EncounteredME
) ||
7353 (IsPrevMemberReference
&& !IsPointer
) ||
7354 (IsMemberReference
&& Next
!= CE
&&
7355 !Next
->getAssociatedExpression()->getType()->isPointerType()));
7356 if (!OverlappedElements
.empty() && Next
== CE
) {
7357 // Handle base element with the info for overlapped elements.
7358 assert(!PartialStruct
.Base
.isValid() && "The base element is set.");
7359 assert(!IsPointer
&&
7360 "Unexpected base element with the pointer type.");
7361 // Mark the whole struct as the struct that requires allocation on the
7363 PartialStruct
.LowestElem
= {0, LowestElem
};
7364 CharUnits TypeSize
= CGF
.getContext().getTypeSizeInChars(
7365 I
->getAssociatedExpression()->getType());
7366 Address HB
= CGF
.Builder
.CreateConstGEP(
7367 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
7368 LowestElem
, CGF
.VoidPtrTy
, CGF
.Int8Ty
),
7369 TypeSize
.getQuantity() - 1);
7370 PartialStruct
.HighestElem
= {
7371 std::numeric_limits
<decltype(
7372 PartialStruct
.HighestElem
.first
)>::max(),
7374 PartialStruct
.Base
= BP
;
7375 PartialStruct
.LB
= LB
;
7377 PartialStruct
.PreliminaryMapData
.BasePointers
.empty() &&
7378 "Overlapped elements must be used only once for the variable.");
7379 std::swap(PartialStruct
.PreliminaryMapData
, CombinedInfo
);
7380 // Emit data for non-overlapped data.
7381 OpenMPOffloadMappingFlags Flags
=
7382 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
7383 getMapTypeBits(MapType
, MapModifiers
, MotionModifiers
, IsImplicit
,
7384 /*AddPtrFlag=*/false,
7385 /*AddIsTargetParamFlag=*/false, IsNonContiguous
);
7386 llvm::Value
*Size
= nullptr;
7387 // Do bitcopy of all non-overlapped structure elements.
7388 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7389 Component
: OverlappedElements
) {
7390 Address ComponentLB
= Address::invalid();
7391 for (const OMPClauseMappableExprCommon::MappableComponent
&MC
:
7393 if (const ValueDecl
*VD
= MC
.getAssociatedDeclaration()) {
7394 const auto *FD
= dyn_cast
<FieldDecl
>(VD
);
7395 if (FD
&& FD
->getType()->isLValueReferenceType()) {
7397 cast
<MemberExpr
>(MC
.getAssociatedExpression());
7398 LValue BaseLVal
= EmitMemberExprBase(CGF
, ME
);
7400 CGF
.EmitLValueForFieldInitialization(BaseLVal
, FD
)
7404 CGF
.EmitOMPSharedLValue(MC
.getAssociatedExpression())
7407 Size
= CGF
.Builder
.CreatePtrDiff(
7408 CGF
.Int8Ty
, ComponentLB
.getPointer(), LB
.getPointer());
7412 assert(Size
&& "Failed to determine structure size");
7413 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7414 CombinedInfo
.BasePointers
.push_back(BP
.getPointer());
7415 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7416 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7417 CombinedInfo
.Pointers
.push_back(LB
.getPointer());
7418 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
7419 Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7420 CombinedInfo
.Types
.push_back(Flags
);
7421 CombinedInfo
.Mappers
.push_back(nullptr);
7422 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7424 LB
= CGF
.Builder
.CreateConstGEP(ComponentLB
, 1);
7426 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7427 CombinedInfo
.BasePointers
.push_back(BP
.getPointer());
7428 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7429 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7430 CombinedInfo
.Pointers
.push_back(LB
.getPointer());
7431 Size
= CGF
.Builder
.CreatePtrDiff(
7432 CGF
.Int8Ty
, CGF
.Builder
.CreateConstGEP(HB
, 1).getPointer(),
7434 CombinedInfo
.Sizes
.push_back(
7435 CGF
.Builder
.CreateIntCast(Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7436 CombinedInfo
.Types
.push_back(Flags
);
7437 CombinedInfo
.Mappers
.push_back(nullptr);
7438 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7442 llvm::Value
*Size
= getExprTypeSize(I
->getAssociatedExpression());
7443 if (!IsMemberPointerOrAddr
||
7444 (Next
== CE
&& MapType
!= OMPC_MAP_unknown
)) {
7445 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7446 CombinedInfo
.BasePointers
.push_back(BP
.getPointer());
7447 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7448 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7449 CombinedInfo
.Pointers
.push_back(LB
.getPointer());
7450 CombinedInfo
.Sizes
.push_back(
7451 CGF
.Builder
.CreateIntCast(Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7452 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7455 // If Mapper is valid, the last component inherits the mapper.
7456 bool HasMapper
= Mapper
&& Next
== CE
;
7457 CombinedInfo
.Mappers
.push_back(HasMapper
? Mapper
: nullptr);
7459 // We need to add a pointer flag for each map that comes from the
7460 // same expression except for the first one. We also need to signal
7461 // this map is the first one that relates with the current capture
7462 // (there is a set of entries for each capture).
7463 OpenMPOffloadMappingFlags Flags
= getMapTypeBits(
7464 MapType
, MapModifiers
, MotionModifiers
, IsImplicit
,
7465 !IsExpressionFirstInfo
|| RequiresReference
||
7466 FirstPointerInComplexData
|| IsMemberReference
,
7467 IsCaptureFirstInfo
&& !RequiresReference
, IsNonContiguous
);
7469 if (!IsExpressionFirstInfo
|| IsMemberReference
) {
7470 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7471 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7472 if (IsPointer
|| (IsMemberReference
&& Next
!= CE
))
7473 Flags
&= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7474 OpenMPOffloadMappingFlags::OMP_MAP_FROM
|
7475 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS
|
7476 OpenMPOffloadMappingFlags::OMP_MAP_DELETE
|
7477 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE
);
7479 if (ShouldBeMemberOf
) {
7480 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7481 // should be later updated with the correct value of MEMBER_OF.
7482 Flags
|= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
;
7483 // From now on, all subsequent PTR_AND_OBJ entries should not be
7484 // marked as MEMBER_OF.
7485 ShouldBeMemberOf
= false;
7489 CombinedInfo
.Types
.push_back(Flags
);
7492 // If we have encountered a member expression so far, keep track of the
7493 // mapped member. If the parent is "*this", then the value declaration
7495 if (EncounteredME
) {
7496 const auto *FD
= cast
<FieldDecl
>(EncounteredME
->getMemberDecl());
7497 unsigned FieldIndex
= FD
->getFieldIndex();
7499 // Update info about the lowest and highest elements for this struct
7500 if (!PartialStruct
.Base
.isValid()) {
7501 PartialStruct
.LowestElem
= {FieldIndex
, LowestElem
};
7502 if (IsFinalArraySection
) {
7504 CGF
.EmitOMPArraySectionExpr(OASE
, /*IsLowerBound=*/false)
7506 PartialStruct
.HighestElem
= {FieldIndex
, HB
};
7508 PartialStruct
.HighestElem
= {FieldIndex
, LowestElem
};
7510 PartialStruct
.Base
= BP
;
7511 PartialStruct
.LB
= BP
;
7512 } else if (FieldIndex
< PartialStruct
.LowestElem
.first
) {
7513 PartialStruct
.LowestElem
= {FieldIndex
, LowestElem
};
7514 } else if (FieldIndex
> PartialStruct
.HighestElem
.first
) {
7515 PartialStruct
.HighestElem
= {FieldIndex
, LowestElem
};
7519 // Need to emit combined struct for array sections.
7520 if (IsFinalArraySection
|| IsNonContiguous
)
7521 PartialStruct
.IsArraySection
= true;
7523 // If we have a final array section, we are done with this expression.
7524 if (IsFinalArraySection
)
7527 // The pointer becomes the base for the next element.
7529 BP
= IsMemberReference
? LowestElem
: LB
;
7531 IsExpressionFirstInfo
= false;
7532 IsCaptureFirstInfo
= false;
7533 FirstPointerInComplexData
= false;
7534 IsPrevMemberReference
= IsMemberReference
;
7535 } else if (FirstPointerInComplexData
) {
7536 QualType Ty
= Components
.rbegin()
7537 ->getAssociatedDeclaration()
7539 .getNonReferenceType();
7540 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7541 FirstPointerInComplexData
= false;
7544 // If ran into the whole component - allocate the space for the whole
7547 PartialStruct
.HasCompleteRecord
= true;
7549 if (!IsNonContiguous
)
7552 const ASTContext
&Context
= CGF
.getContext();
7554 // For supporting stride in array section, we need to initialize the first
7555 // dimension size as 1, first offset as 0, and first count as 1
7556 MapValuesArrayTy CurOffsets
= {llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 0)};
7557 MapValuesArrayTy CurCounts
= {llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 1)};
7558 MapValuesArrayTy CurStrides
;
7559 MapValuesArrayTy DimSizes
{llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 1)};
7560 uint64_t ElementTypeSize
;
7562 // Collect Size information for each dimension and get the element size as
7563 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7564 // should be [10, 10] and the first stride is 4 btyes.
7565 for (const OMPClauseMappableExprCommon::MappableComponent
&Component
:
7567 const Expr
*AssocExpr
= Component
.getAssociatedExpression();
7568 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(AssocExpr
);
7573 QualType Ty
= OMPArraySectionExpr::getBaseOriginalType(OASE
->getBase());
7574 auto *CAT
= Context
.getAsConstantArrayType(Ty
);
7575 auto *VAT
= Context
.getAsVariableArrayType(Ty
);
7577 // We need all the dimension size except for the last dimension.
7578 assert((VAT
|| CAT
|| &Component
== &*Components
.begin()) &&
7579 "Should be either ConstantArray or VariableArray if not the "
7582 // Get element size if CurStrides is empty.
7583 if (CurStrides
.empty()) {
7584 const Type
*ElementType
= nullptr;
7586 ElementType
= CAT
->getElementType().getTypePtr();
7588 ElementType
= VAT
->getElementType().getTypePtr();
7590 assert(&Component
== &*Components
.begin() &&
7591 "Only expect pointer (non CAT or VAT) when this is the "
7593 // If ElementType is null, then it means the base is a pointer
7594 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7595 // for next iteration.
7597 // For the case that having pointer as base, we need to remove one
7598 // level of indirection.
7599 if (&Component
!= &*Components
.begin())
7600 ElementType
= ElementType
->getPointeeOrArrayElementType();
7602 Context
.getTypeSizeInChars(ElementType
).getQuantity();
7603 CurStrides
.push_back(
7604 llvm::ConstantInt::get(CGF
.Int64Ty
, ElementTypeSize
));
7607 // Get dimension value except for the last dimension since we don't need
7609 if (DimSizes
.size() < Components
.size() - 1) {
7611 DimSizes
.push_back(llvm::ConstantInt::get(
7612 CGF
.Int64Ty
, CAT
->getSize().getZExtValue()));
7614 DimSizes
.push_back(CGF
.Builder
.CreateIntCast(
7615 CGF
.EmitScalarExpr(VAT
->getSizeExpr()), CGF
.Int64Ty
,
7616 /*IsSigned=*/false));
7620 // Skip the dummy dimension since we have already have its information.
7621 auto *DI
= DimSizes
.begin() + 1;
7622 // Product of dimension.
7623 llvm::Value
*DimProd
=
7624 llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, ElementTypeSize
);
7626 // Collect info for non-contiguous. Notice that offset, count, and stride
7627 // are only meaningful for array-section, so we insert a null for anything
7628 // other than array-section.
7629 // Also, the size of offset, count, and stride are not the same as
7630 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7631 // count, and stride are the same as the number of non-contiguous
7632 // declaration in target update to/from clause.
7633 for (const OMPClauseMappableExprCommon::MappableComponent
&Component
:
7635 const Expr
*AssocExpr
= Component
.getAssociatedExpression();
7637 if (const auto *AE
= dyn_cast
<ArraySubscriptExpr
>(AssocExpr
)) {
7638 llvm::Value
*Offset
= CGF
.Builder
.CreateIntCast(
7639 CGF
.EmitScalarExpr(AE
->getIdx()), CGF
.Int64Ty
,
7640 /*isSigned=*/false);
7641 CurOffsets
.push_back(Offset
);
7642 CurCounts
.push_back(llvm::ConstantInt::get(CGF
.Int64Ty
, /*V=*/1));
7643 CurStrides
.push_back(CurStrides
.back());
7647 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(AssocExpr
);
7653 const Expr
*OffsetExpr
= OASE
->getLowerBound();
7654 llvm::Value
*Offset
= nullptr;
7656 // If offset is absent, then we just set it to zero.
7657 Offset
= llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
7659 Offset
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(OffsetExpr
),
7661 /*isSigned=*/false);
7663 CurOffsets
.push_back(Offset
);
7666 const Expr
*CountExpr
= OASE
->getLength();
7667 llvm::Value
*Count
= nullptr;
7669 // In Clang, once a high dimension is an array section, we construct all
7670 // the lower dimension as array section, however, for case like
7671 // arr[0:2][2], Clang construct the inner dimension as an array section
7672 // but it actually is not in an array section form according to spec.
7673 if (!OASE
->getColonLocFirst().isValid() &&
7674 !OASE
->getColonLocSecond().isValid()) {
7675 Count
= llvm::ConstantInt::get(CGF
.Int64Ty
, 1);
7677 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7678 // When the length is absent it defaults to ⌈(size −
7679 // lower-bound)/stride⌉, where size is the size of the array
7681 const Expr
*StrideExpr
= OASE
->getStride();
7682 llvm::Value
*Stride
=
7684 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(StrideExpr
),
7685 CGF
.Int64Ty
, /*isSigned=*/false)
7688 Count
= CGF
.Builder
.CreateUDiv(
7689 CGF
.Builder
.CreateNUWSub(*DI
, Offset
), Stride
);
7691 Count
= CGF
.Builder
.CreateNUWSub(*DI
, Offset
);
7694 Count
= CGF
.EmitScalarExpr(CountExpr
);
7696 Count
= CGF
.Builder
.CreateIntCast(Count
, CGF
.Int64Ty
, /*isSigned=*/false);
7697 CurCounts
.push_back(Count
);
7699 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7700 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7701 // Offset Count Stride
7702 // D0 0 1 4 (int) <- dummy dimension
7703 // D1 0 2 8 (2 * (1) * 4)
7704 // D2 1 2 20 (1 * (1 * 5) * 4)
7705 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7706 const Expr
*StrideExpr
= OASE
->getStride();
7707 llvm::Value
*Stride
=
7709 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(StrideExpr
),
7710 CGF
.Int64Ty
, /*isSigned=*/false)
7712 DimProd
= CGF
.Builder
.CreateNUWMul(DimProd
, *(DI
- 1));
7714 CurStrides
.push_back(CGF
.Builder
.CreateNUWMul(DimProd
, Stride
));
7716 CurStrides
.push_back(DimProd
);
7717 if (DI
!= DimSizes
.end())
7721 CombinedInfo
.NonContigInfo
.Offsets
.push_back(CurOffsets
);
7722 CombinedInfo
.NonContigInfo
.Counts
.push_back(CurCounts
);
7723 CombinedInfo
.NonContigInfo
.Strides
.push_back(CurStrides
);
7726 /// Return the adjusted map modifiers if the declaration a capture refers to
7727 /// appears in a first-private clause. This is expected to be used only with
7728 /// directives that start with 'target'.
7729 OpenMPOffloadMappingFlags
7730 getMapModifiersForPrivateClauses(const CapturedStmt::Capture
&Cap
) const {
7731 assert(Cap
.capturesVariable() && "Expected capture by reference only!");
7733 // A first private variable captured by reference will use only the
7734 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7735 // declaration is known as first-private in this handler.
7736 if (FirstPrivateDecls
.count(Cap
.getCapturedVar())) {
7737 if (Cap
.getCapturedVar()->getType()->isAnyPointerType())
7738 return OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7739 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
;
7740 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE
|
7741 OpenMPOffloadMappingFlags::OMP_MAP_TO
;
7743 auto I
= LambdasMap
.find(Cap
.getCapturedVar()->getCanonicalDecl());
7744 if (I
!= LambdasMap
.end())
7745 // for map(to: lambda): using user specified map type.
7746 return getMapTypeBits(
7747 I
->getSecond()->getMapType(), I
->getSecond()->getMapTypeModifiers(),
7748 /*MotionModifiers=*/std::nullopt
, I
->getSecond()->isImplicit(),
7749 /*AddPtrFlag=*/false,
7750 /*AddIsTargetParamFlag=*/false,
7751 /*isNonContiguous=*/false);
7752 return OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7753 OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
7756 static OpenMPOffloadMappingFlags
getMemberOfFlag(unsigned Position
) {
7757 // Rotate by getFlagMemberOffset() bits.
7758 return static_cast<OpenMPOffloadMappingFlags
>(((uint64_t)Position
+ 1)
7759 << getFlagMemberOffset());
7762 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags
&Flags
,
7763 OpenMPOffloadMappingFlags MemberOfFlag
) {
7764 // If the entry is PTR_AND_OBJ but has not been marked with the special
7765 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
7766 // marked as MEMBER_OF.
7767 if (static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
7768 Flags
& OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
) &&
7769 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
7770 (Flags
& OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
) !=
7771 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
))
7774 // Reset the placeholder value to prepare the flag for the assignment of the
7775 // proper MEMBER_OF value.
7776 Flags
&= ~OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
;
7777 Flags
|= MemberOfFlag
;
7780 void getPlainLayout(const CXXRecordDecl
*RD
,
7781 llvm::SmallVectorImpl
<const FieldDecl
*> &Layout
,
7782 bool AsBase
) const {
7783 const CGRecordLayout
&RL
= CGF
.getTypes().getCGRecordLayout(RD
);
7785 llvm::StructType
*St
=
7786 AsBase
? RL
.getBaseSubobjectLLVMType() : RL
.getLLVMType();
7788 unsigned NumElements
= St
->getNumElements();
7790 llvm::PointerUnion
<const CXXRecordDecl
*, const FieldDecl
*>, 4>
7791 RecordLayout(NumElements
);
7794 for (const auto &I
: RD
->bases()) {
7797 const auto *Base
= I
.getType()->getAsCXXRecordDecl();
7798 // Ignore empty bases.
7799 if (Base
->isEmpty() || CGF
.getContext()
7800 .getASTRecordLayout(Base
)
7801 .getNonVirtualSize()
7805 unsigned FieldIndex
= RL
.getNonVirtualBaseLLVMFieldNo(Base
);
7806 RecordLayout
[FieldIndex
] = Base
;
7808 // Fill in virtual bases.
7809 for (const auto &I
: RD
->vbases()) {
7810 const auto *Base
= I
.getType()->getAsCXXRecordDecl();
7811 // Ignore empty bases.
7812 if (Base
->isEmpty())
7814 unsigned FieldIndex
= RL
.getVirtualBaseIndex(Base
);
7815 if (RecordLayout
[FieldIndex
])
7817 RecordLayout
[FieldIndex
] = Base
;
7819 // Fill in all the fields.
7820 assert(!RD
->isUnion() && "Unexpected union.");
7821 for (const auto *Field
: RD
->fields()) {
7822 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7823 // will fill in later.)
7824 if (!Field
->isBitField() && !Field
->isZeroSize(CGF
.getContext())) {
7825 unsigned FieldIndex
= RL
.getLLVMFieldNo(Field
);
7826 RecordLayout
[FieldIndex
] = Field
;
7829 for (const llvm::PointerUnion
<const CXXRecordDecl
*, const FieldDecl
*>
7830 &Data
: RecordLayout
) {
7833 if (const auto *Base
= Data
.dyn_cast
<const CXXRecordDecl
*>())
7834 getPlainLayout(Base
, Layout
, /*AsBase=*/true);
7836 Layout
.push_back(Data
.get
<const FieldDecl
*>());
7840 /// Generate all the base pointers, section pointers, sizes, map types, and
7841 /// mappers for the extracted mappable expressions (all included in \a
7842 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7843 /// pair of the relevant declaration and index where it occurs is appended to
7844 /// the device pointers info array.
7845 void generateAllInfoForClauses(
7846 ArrayRef
<const OMPClause
*> Clauses
, MapCombinedInfoTy
&CombinedInfo
,
7847 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkipVarSet
=
7848 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) const {
7849 // We have to process the component lists that relate with the same
7850 // declaration in a single chunk so that we can generate the map flags
7851 // correctly. Therefore, we organize all lists in a map.
7852 enum MapKind
{ Present
, Allocs
, Other
, Total
};
7853 llvm::MapVector
<CanonicalDeclPtr
<const Decl
>,
7854 SmallVector
<SmallVector
<MapInfo
, 8>, 4>>
7857 // Helper function to fill the information map for the different supported
7860 [&Info
, &SkipVarSet
](
7861 const ValueDecl
*D
, MapKind Kind
,
7862 OMPClauseMappableExprCommon::MappableExprComponentListRef L
,
7863 OpenMPMapClauseKind MapType
,
7864 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
7865 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
7866 bool ReturnDevicePointer
, bool IsImplicit
, const ValueDecl
*Mapper
,
7867 const Expr
*VarRef
= nullptr, bool ForDeviceAddr
= false) {
7868 if (SkipVarSet
.contains(D
))
7870 auto It
= Info
.find(D
);
7871 if (It
== Info
.end())
7873 .insert(std::make_pair(
7874 D
, SmallVector
<SmallVector
<MapInfo
, 8>, 4>(Total
)))
7876 It
->second
[Kind
].emplace_back(
7877 L
, MapType
, MapModifiers
, MotionModifiers
, ReturnDevicePointer
,
7878 IsImplicit
, Mapper
, VarRef
, ForDeviceAddr
);
7881 for (const auto *Cl
: Clauses
) {
7882 const auto *C
= dyn_cast
<OMPMapClause
>(Cl
);
7885 MapKind Kind
= Other
;
7886 if (llvm::is_contained(C
->getMapTypeModifiers(),
7887 OMPC_MAP_MODIFIER_present
))
7889 else if (C
->getMapType() == OMPC_MAP_alloc
)
7891 const auto *EI
= C
->getVarRefs().begin();
7892 for (const auto L
: C
->component_lists()) {
7893 const Expr
*E
= (C
->getMapLoc().isValid()) ? *EI
: nullptr;
7894 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), C
->getMapType(),
7895 C
->getMapTypeModifiers(), std::nullopt
,
7896 /*ReturnDevicePointer=*/false, C
->isImplicit(), std::get
<2>(L
),
7901 for (const auto *Cl
: Clauses
) {
7902 const auto *C
= dyn_cast
<OMPToClause
>(Cl
);
7905 MapKind Kind
= Other
;
7906 if (llvm::is_contained(C
->getMotionModifiers(),
7907 OMPC_MOTION_MODIFIER_present
))
7909 const auto *EI
= C
->getVarRefs().begin();
7910 for (const auto L
: C
->component_lists()) {
7911 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), OMPC_MAP_to
, std::nullopt
,
7912 C
->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7913 C
->isImplicit(), std::get
<2>(L
), *EI
);
7917 for (const auto *Cl
: Clauses
) {
7918 const auto *C
= dyn_cast
<OMPFromClause
>(Cl
);
7921 MapKind Kind
= Other
;
7922 if (llvm::is_contained(C
->getMotionModifiers(),
7923 OMPC_MOTION_MODIFIER_present
))
7925 const auto *EI
= C
->getVarRefs().begin();
7926 for (const auto L
: C
->component_lists()) {
7927 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), OMPC_MAP_from
,
7928 std::nullopt
, C
->getMotionModifiers(),
7929 /*ReturnDevicePointer=*/false, C
->isImplicit(), std::get
<2>(L
),
7935 // Look at the use_device_ptr and use_device_addr clauses information and
7936 // mark the existing map entries as such. If there is no map information for
7937 // an entry in the use_device_ptr and use_device_addr list, we create one
7938 // with map type 'alloc' and zero size section. It is the user fault if that
7939 // was not mapped before. If there is no map information and the pointer is
7940 // a struct member, then we defer the emission of that entry until the whole
7941 // struct has been processed.
7942 llvm::MapVector
<CanonicalDeclPtr
<const Decl
>,
7943 SmallVector
<DeferredDevicePtrEntryTy
, 4>>
7945 MapCombinedInfoTy UseDeviceDataCombinedInfo
;
7947 auto &&UseDeviceDataCombinedInfoGen
=
7948 [&UseDeviceDataCombinedInfo
](const ValueDecl
*VD
, llvm::Value
*Ptr
,
7949 CodeGenFunction
&CGF
, bool IsDevAddr
) {
7950 UseDeviceDataCombinedInfo
.Exprs
.push_back(VD
);
7951 UseDeviceDataCombinedInfo
.BasePointers
.emplace_back(Ptr
);
7952 UseDeviceDataCombinedInfo
.DevicePtrDecls
.emplace_back(VD
);
7953 UseDeviceDataCombinedInfo
.DevicePointers
.emplace_back(
7954 IsDevAddr
? DeviceInfoTy::Address
: DeviceInfoTy::Pointer
);
7955 UseDeviceDataCombinedInfo
.Pointers
.push_back(Ptr
);
7956 UseDeviceDataCombinedInfo
.Sizes
.push_back(
7957 llvm::Constant::getNullValue(CGF
.Int64Ty
));
7958 UseDeviceDataCombinedInfo
.Types
.push_back(
7959 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
);
7960 UseDeviceDataCombinedInfo
.Mappers
.push_back(nullptr);
7964 [&DeferredInfo
, &UseDeviceDataCombinedInfoGen
,
7965 &InfoGen
](CodeGenFunction
&CGF
, const Expr
*IE
, const ValueDecl
*VD
,
7966 OMPClauseMappableExprCommon::MappableExprComponentListRef
7968 bool IsImplicit
, bool IsDevAddr
) {
7969 // We didn't find any match in our map information - generate a zero
7970 // size array section - if the pointer is a struct member we defer
7971 // this action until the whole struct has been processed.
7972 if (isa
<MemberExpr
>(IE
)) {
7973 // Insert the pointer into Info to be processed by
7974 // generateInfoForComponentList. Because it is a member pointer
7975 // without a pointee, no entry will be generated for it, therefore
7976 // we need to generate one after the whole struct has been
7977 // processed. Nonetheless, generateInfoForComponentList must be
7978 // called to take the pointer into account for the calculation of
7979 // the range of the partial struct.
7980 InfoGen(nullptr, Other
, Components
, OMPC_MAP_unknown
, std::nullopt
,
7981 std::nullopt
, /*ReturnDevicePointer=*/false, IsImplicit
,
7982 nullptr, nullptr, IsDevAddr
);
7983 DeferredInfo
[nullptr].emplace_back(IE
, VD
, IsDevAddr
);
7987 if (IE
->isGLValue())
7988 Ptr
= CGF
.EmitLValue(IE
).getPointer(CGF
);
7990 Ptr
= CGF
.EmitScalarExpr(IE
);
7992 Ptr
= CGF
.EmitLoadOfScalar(CGF
.EmitLValue(IE
), IE
->getExprLoc());
7994 UseDeviceDataCombinedInfoGen(VD
, Ptr
, CGF
, IsDevAddr
);
7998 auto &&IsMapInfoExist
= [&Info
](CodeGenFunction
&CGF
, const ValueDecl
*VD
,
7999 const Expr
*IE
, bool IsDevAddr
) -> bool {
8000 // We potentially have map information for this declaration already.
8001 // Look for the first set of components that refer to it. If found,
8003 // If the first component is a member expression, we have to look into
8004 // 'this', which maps to null in the map of map information. Otherwise
8005 // look directly for the information.
8006 auto It
= Info
.find(isa
<MemberExpr
>(IE
) ? nullptr : VD
);
8007 if (It
!= Info
.end()) {
8009 for (auto &Data
: It
->second
) {
8010 auto *CI
= llvm::find_if(Data
, [VD
](const MapInfo
&MI
) {
8011 return MI
.Components
.back().getAssociatedDeclaration() == VD
;
8013 // If we found a map entry, signal that the pointer has to be
8014 // returned and move on to the next declaration. Exclude cases where
8015 // the base pointer is mapped as array subscript, array section or
8016 // array shaping. The base address is passed as a pointer to base in
8017 // this case and cannot be used as a base for use_device_ptr list
8019 if (CI
!= Data
.end()) {
8021 CI
->ForDeviceAddr
= IsDevAddr
;
8022 CI
->ReturnDevicePointer
= true;
8026 auto PrevCI
= std::next(CI
->Components
.rbegin());
8027 const auto *VarD
= dyn_cast
<VarDecl
>(VD
);
8028 if (CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8029 isa
<MemberExpr
>(IE
) ||
8030 !VD
->getType().getNonReferenceType()->isPointerType() ||
8031 PrevCI
== CI
->Components
.rend() ||
8032 isa
<MemberExpr
>(PrevCI
->getAssociatedExpression()) || !VarD
||
8033 VarD
->hasLocalStorage()) {
8034 CI
->ForDeviceAddr
= IsDevAddr
;
8035 CI
->ReturnDevicePointer
= true;
8047 // Look at the use_device_ptr clause information and mark the existing map
8048 // entries as such. If there is no map information for an entry in the
8049 // use_device_ptr list, we create one with map type 'alloc' and zero size
8050 // section. It is the user fault if that was not mapped before. If there is
8051 // no map information and the pointer is a struct member, then we defer the
8052 // emission of that entry until the whole struct has been processed.
8053 for (const auto *Cl
: Clauses
) {
8054 const auto *C
= dyn_cast
<OMPUseDevicePtrClause
>(Cl
);
8057 for (const auto L
: C
->component_lists()) {
8058 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
=
8060 assert(!Components
.empty() &&
8061 "Not expecting empty list of components!");
8062 const ValueDecl
*VD
= Components
.back().getAssociatedDeclaration();
8063 VD
= cast
<ValueDecl
>(VD
->getCanonicalDecl());
8064 const Expr
*IE
= Components
.back().getAssociatedExpression();
8065 if (IsMapInfoExist(CGF
, VD
, IE
, /*IsDevAddr=*/false))
8067 MapInfoGen(CGF
, IE
, VD
, Components
, C
->isImplicit(),
8068 /*IsDevAddr=*/false);
8072 llvm::SmallDenseSet
<CanonicalDeclPtr
<const Decl
>, 4> Processed
;
8073 for (const auto *Cl
: Clauses
) {
8074 const auto *C
= dyn_cast
<OMPUseDeviceAddrClause
>(Cl
);
8077 for (const auto L
: C
->component_lists()) {
8078 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
=
8080 assert(!std::get
<1>(L
).empty() &&
8081 "Not expecting empty list of components!");
8082 const ValueDecl
*VD
= std::get
<1>(L
).back().getAssociatedDeclaration();
8083 if (!Processed
.insert(VD
).second
)
8085 VD
= cast
<ValueDecl
>(VD
->getCanonicalDecl());
8086 const Expr
*IE
= std::get
<1>(L
).back().getAssociatedExpression();
8087 if (IsMapInfoExist(CGF
, VD
, IE
, /*IsDevAddr=*/true))
8089 MapInfoGen(CGF
, IE
, VD
, Components
, C
->isImplicit(),
8090 /*IsDevAddr=*/true);
8094 for (const auto &Data
: Info
) {
8095 StructRangeInfoTy PartialStruct
;
8096 // Temporary generated information.
8097 MapCombinedInfoTy CurInfo
;
8098 const Decl
*D
= Data
.first
;
8099 const ValueDecl
*VD
= cast_or_null
<ValueDecl
>(D
);
8100 for (const auto &M
: Data
.second
) {
8101 for (const MapInfo
&L
: M
) {
8102 assert(!L
.Components
.empty() &&
8103 "Not expecting declaration with no component lists.");
8105 // Remember the current base pointer index.
8106 unsigned CurrentBasePointersIdx
= CurInfo
.BasePointers
.size();
8107 CurInfo
.NonContigInfo
.IsNonContiguous
=
8108 L
.Components
.back().isNonContiguous();
8109 generateInfoForComponentList(
8110 L
.MapType
, L
.MapModifiers
, L
.MotionModifiers
, L
.Components
,
8111 CurInfo
, PartialStruct
, /*IsFirstComponentList=*/false,
8112 L
.IsImplicit
, L
.Mapper
, L
.ForDeviceAddr
, VD
, L
.VarRef
);
8114 // If this entry relates with a device pointer, set the relevant
8115 // declaration and add the 'return pointer' flag.
8116 if (L
.ReturnDevicePointer
) {
8117 assert(CurInfo
.BasePointers
.size() > CurrentBasePointersIdx
&&
8118 "Unexpected number of mapped base pointers.");
8120 const ValueDecl
*RelevantVD
=
8121 L
.Components
.back().getAssociatedDeclaration();
8122 assert(RelevantVD
&&
8123 "No relevant declaration related with device pointer??");
8125 CurInfo
.DevicePtrDecls
[CurrentBasePointersIdx
] = RelevantVD
;
8126 CurInfo
.DevicePointers
[CurrentBasePointersIdx
] =
8127 L
.ForDeviceAddr
? DeviceInfoTy::Address
: DeviceInfoTy::Pointer
;
8128 CurInfo
.Types
[CurrentBasePointersIdx
] |=
8129 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
;
8134 // Append any pending zero-length pointers which are struct members and
8135 // used with use_device_ptr or use_device_addr.
8136 auto CI
= DeferredInfo
.find(Data
.first
);
8137 if (CI
!= DeferredInfo
.end()) {
8138 for (const DeferredDevicePtrEntryTy
&L
: CI
->second
) {
8139 llvm::Value
*BasePtr
;
8141 if (L
.ForDeviceAddr
) {
8142 if (L
.IE
->isGLValue())
8143 Ptr
= this->CGF
.EmitLValue(L
.IE
).getPointer(CGF
);
8145 Ptr
= this->CGF
.EmitScalarExpr(L
.IE
);
8147 // Entry is RETURN_PARAM. Also, set the placeholder value
8148 // MEMBER_OF=FFFF so that the entry is later updated with the
8149 // correct value of MEMBER_OF.
8150 CurInfo
.Types
.push_back(
8151 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
|
8152 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
8154 BasePtr
= this->CGF
.EmitLValue(L
.IE
).getPointer(CGF
);
8155 Ptr
= this->CGF
.EmitLoadOfScalar(this->CGF
.EmitLValue(L
.IE
),
8156 L
.IE
->getExprLoc());
8157 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8158 // placeholder value MEMBER_OF=FFFF so that the entry is later
8159 // updated with the correct value of MEMBER_OF.
8160 CurInfo
.Types
.push_back(
8161 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8162 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
|
8163 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
8165 CurInfo
.Exprs
.push_back(L
.VD
);
8166 CurInfo
.BasePointers
.emplace_back(BasePtr
);
8167 CurInfo
.DevicePtrDecls
.emplace_back(L
.VD
);
8168 CurInfo
.DevicePointers
.emplace_back(
8169 L
.ForDeviceAddr
? DeviceInfoTy::Address
: DeviceInfoTy::Pointer
);
8170 CurInfo
.Pointers
.push_back(Ptr
);
8171 CurInfo
.Sizes
.push_back(
8172 llvm::Constant::getNullValue(this->CGF
.Int64Ty
));
8173 CurInfo
.Mappers
.push_back(nullptr);
8176 // If there is an entry in PartialStruct it means we have a struct with
8177 // individual members mapped. Emit an extra combined entry.
8178 if (PartialStruct
.Base
.isValid()) {
8179 CurInfo
.NonContigInfo
.Dims
.push_back(0);
8180 emitCombinedEntry(CombinedInfo
, CurInfo
.Types
, PartialStruct
,
8181 /*IsMapThis*/ !VD
, VD
);
8184 // We need to append the results of this capture to what we already
8186 CombinedInfo
.append(CurInfo
);
8188 // Append data for use_device_ptr clauses.
8189 CombinedInfo
.append(UseDeviceDataCombinedInfo
);
8193 MappableExprsHandler(const OMPExecutableDirective
&Dir
, CodeGenFunction
&CGF
)
8194 : CurDir(&Dir
), CGF(CGF
) {
8195 // Extract firstprivate clause information.
8196 for (const auto *C
: Dir
.getClausesOfKind
<OMPFirstprivateClause
>())
8197 for (const auto *D
: C
->varlists())
8198 FirstPrivateDecls
.try_emplace(
8199 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
)->getDecl()), C
->isImplicit());
8200 // Extract implicit firstprivates from uses_allocators clauses.
8201 for (const auto *C
: Dir
.getClausesOfKind
<OMPUsesAllocatorsClause
>()) {
8202 for (unsigned I
= 0, E
= C
->getNumberOfAllocators(); I
< E
; ++I
) {
8203 OMPUsesAllocatorsClause::Data D
= C
->getAllocatorData(I
);
8204 if (const auto *DRE
= dyn_cast_or_null
<DeclRefExpr
>(D
.AllocatorTraits
))
8205 FirstPrivateDecls
.try_emplace(cast
<VarDecl
>(DRE
->getDecl()),
8207 else if (const auto *VD
= dyn_cast
<VarDecl
>(
8208 cast
<DeclRefExpr
>(D
.Allocator
->IgnoreParenImpCasts())
8210 FirstPrivateDecls
.try_emplace(VD
, /*Implicit=*/true);
8213 // Extract device pointer clause information.
8214 for (const auto *C
: Dir
.getClausesOfKind
<OMPIsDevicePtrClause
>())
8215 for (auto L
: C
->component_lists())
8216 DevPointersMap
[std::get
<0>(L
)].push_back(std::get
<1>(L
));
8217 // Extract device addr clause information.
8218 for (const auto *C
: Dir
.getClausesOfKind
<OMPHasDeviceAddrClause
>())
8219 for (auto L
: C
->component_lists())
8220 HasDevAddrsMap
[std::get
<0>(L
)].push_back(std::get
<1>(L
));
8221 // Extract map information.
8222 for (const auto *C
: Dir
.getClausesOfKind
<OMPMapClause
>()) {
8223 if (C
->getMapType() != OMPC_MAP_to
)
8225 for (auto L
: C
->component_lists()) {
8226 const ValueDecl
*VD
= std::get
<0>(L
);
8227 const auto *RD
= VD
? VD
->getType()
8229 .getNonReferenceType()
8230 ->getAsCXXRecordDecl()
8232 if (RD
&& RD
->isLambda())
8233 LambdasMap
.try_emplace(std::get
<0>(L
), C
);
8238 /// Constructor for the declare mapper directive.
8239 MappableExprsHandler(const OMPDeclareMapperDecl
&Dir
, CodeGenFunction
&CGF
)
8240 : CurDir(&Dir
), CGF(CGF
) {}
8242 /// Generate code for the combined entry if we have a partially mapped struct
8243 /// and take care of the mapping flags of the arguments corresponding to
8244 /// individual struct members.
8245 void emitCombinedEntry(MapCombinedInfoTy
&CombinedInfo
,
8246 MapFlagsArrayTy
&CurTypes
,
8247 const StructRangeInfoTy
&PartialStruct
, bool IsMapThis
,
8248 const ValueDecl
*VD
= nullptr,
8249 bool NotTargetParams
= true) const {
8250 if (CurTypes
.size() == 1 &&
8251 ((CurTypes
.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
) !=
8252 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
) &&
8253 !PartialStruct
.IsArraySection
)
8255 Address LBAddr
= PartialStruct
.LowestElem
.second
;
8256 Address HBAddr
= PartialStruct
.HighestElem
.second
;
8257 if (PartialStruct
.HasCompleteRecord
) {
8258 LBAddr
= PartialStruct
.LB
;
8259 HBAddr
= PartialStruct
.LB
;
8261 CombinedInfo
.Exprs
.push_back(VD
);
8262 // Base is the base of the struct
8263 CombinedInfo
.BasePointers
.push_back(PartialStruct
.Base
.getPointer());
8264 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8265 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8266 // Pointer is the address of the lowest element
8267 llvm::Value
*LB
= LBAddr
.getPointer();
8268 const CXXMethodDecl
*MD
=
8269 CGF
.CurFuncDecl
? dyn_cast
<CXXMethodDecl
>(CGF
.CurFuncDecl
) : nullptr;
8270 const CXXRecordDecl
*RD
= MD
? MD
->getParent() : nullptr;
8271 bool HasBaseClass
= RD
&& IsMapThis
? RD
->getNumBases() > 0 : false;
8272 // There should not be a mapper for a combined entry.
8274 // OpenMP 5.2 148:21:
8275 // If the target construct is within a class non-static member function,
8276 // and a variable is an accessible data member of the object for which the
8277 // non-static data member function is invoked, the variable is treated as
8278 // if the this[:1] expression had appeared in a map clause with a map-type
8281 CombinedInfo
.Pointers
.push_back(PartialStruct
.Base
.getPointer());
8282 QualType Ty
= MD
->getThisType()->getPointeeType();
8284 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(Ty
), CGF
.Int64Ty
,
8286 CombinedInfo
.Sizes
.push_back(Size
);
8288 CombinedInfo
.Pointers
.push_back(LB
);
8289 // Size is (addr of {highest+1} element) - (addr of lowest element)
8290 llvm::Value
*HB
= HBAddr
.getPointer();
8291 llvm::Value
*HAddr
= CGF
.Builder
.CreateConstGEP1_32(
8292 HBAddr
.getElementType(), HB
, /*Idx0=*/1);
8293 llvm::Value
*CLAddr
= CGF
.Builder
.CreatePointerCast(LB
, CGF
.VoidPtrTy
);
8294 llvm::Value
*CHAddr
= CGF
.Builder
.CreatePointerCast(HAddr
, CGF
.VoidPtrTy
);
8295 llvm::Value
*Diff
= CGF
.Builder
.CreatePtrDiff(CGF
.Int8Ty
, CHAddr
, CLAddr
);
8296 llvm::Value
*Size
= CGF
.Builder
.CreateIntCast(Diff
, CGF
.Int64Ty
,
8297 /*isSigned=*/false);
8298 CombinedInfo
.Sizes
.push_back(Size
);
8300 CombinedInfo
.Mappers
.push_back(nullptr);
8301 // Map type is always TARGET_PARAM, if generate info for captures.
8302 CombinedInfo
.Types
.push_back(
8303 NotTargetParams
? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8304 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
);
8305 // If any element has the present modifier, then make sure the runtime
8306 // doesn't attempt to allocate the struct.
8307 if (CurTypes
.end() !=
8308 llvm::find_if(CurTypes
, [](OpenMPOffloadMappingFlags Type
) {
8309 return static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
8310 Type
& OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
);
8312 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
;
8313 // Remove TARGET_PARAM flag from the first element
8314 (*CurTypes
.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
8315 // If any element has the ompx_hold modifier, then make sure the runtime
8316 // uses the hold reference count for the struct as a whole so that it won't
8317 // be unmapped by an extra dynamic reference count decrement. Add it to all
8318 // elements as well so the runtime knows which reference count to check
8319 // when determining whether it's time for device-to-host transfers of
8320 // individual elements.
8321 if (CurTypes
.end() !=
8322 llvm::find_if(CurTypes
, [](OpenMPOffloadMappingFlags Type
) {
8323 return static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
8324 Type
& OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
);
8326 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
8327 for (auto &M
: CurTypes
)
8328 M
|= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
8331 // All other current entries will be MEMBER_OF the combined entry
8332 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8333 // 0xFFFF in the MEMBER_OF field).
8334 OpenMPOffloadMappingFlags MemberOfFlag
=
8335 getMemberOfFlag(CombinedInfo
.BasePointers
.size() - 1);
8336 for (auto &M
: CurTypes
)
8337 setCorrectMemberOfFlag(M
, MemberOfFlag
);
8340 /// Generate all the base pointers, section pointers, sizes, map types, and
8341 /// mappers for the extracted mappable expressions (all included in \a
8342 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8343 /// pair of the relevant declaration and index where it occurs is appended to
8344 /// the device pointers info array.
8345 void generateAllInfo(
8346 MapCombinedInfoTy
&CombinedInfo
,
8347 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkipVarSet
=
8348 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) const {
8349 assert(CurDir
.is
<const OMPExecutableDirective
*>() &&
8350 "Expect a executable directive");
8351 const auto *CurExecDir
= CurDir
.get
<const OMPExecutableDirective
*>();
8352 generateAllInfoForClauses(CurExecDir
->clauses(), CombinedInfo
, SkipVarSet
);
8355 /// Generate all the base pointers, section pointers, sizes, map types, and
8356 /// mappers for the extracted map clauses of user-defined mapper (all included
8357 /// in \a CombinedInfo).
8358 void generateAllInfoForMapper(MapCombinedInfoTy
&CombinedInfo
) const {
8359 assert(CurDir
.is
<const OMPDeclareMapperDecl
*>() &&
8360 "Expect a declare mapper directive");
8361 const auto *CurMapperDir
= CurDir
.get
<const OMPDeclareMapperDecl
*>();
8362 generateAllInfoForClauses(CurMapperDir
->clauses(), CombinedInfo
);
8365 /// Emit capture info for lambdas for variables captured by reference.
8366 void generateInfoForLambdaCaptures(
8367 const ValueDecl
*VD
, llvm::Value
*Arg
, MapCombinedInfoTy
&CombinedInfo
,
8368 llvm::DenseMap
<llvm::Value
*, llvm::Value
*> &LambdaPointers
) const {
8369 QualType VDType
= VD
->getType().getCanonicalType().getNonReferenceType();
8370 const auto *RD
= VDType
->getAsCXXRecordDecl();
8371 if (!RD
|| !RD
->isLambda())
8373 Address
VDAddr(Arg
, CGF
.ConvertTypeForMem(VDType
),
8374 CGF
.getContext().getDeclAlign(VD
));
8375 LValue VDLVal
= CGF
.MakeAddrLValue(VDAddr
, VDType
);
8376 llvm::DenseMap
<const ValueDecl
*, FieldDecl
*> Captures
;
8377 FieldDecl
*ThisCapture
= nullptr;
8378 RD
->getCaptureFields(Captures
, ThisCapture
);
8381 CGF
.EmitLValueForFieldInitialization(VDLVal
, ThisCapture
);
8382 LValue ThisLValVal
= CGF
.EmitLValueForField(VDLVal
, ThisCapture
);
8383 LambdaPointers
.try_emplace(ThisLVal
.getPointer(CGF
),
8384 VDLVal
.getPointer(CGF
));
8385 CombinedInfo
.Exprs
.push_back(VD
);
8386 CombinedInfo
.BasePointers
.push_back(ThisLVal
.getPointer(CGF
));
8387 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8388 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8389 CombinedInfo
.Pointers
.push_back(ThisLValVal
.getPointer(CGF
));
8390 CombinedInfo
.Sizes
.push_back(
8391 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(CGF
.getContext().VoidPtrTy
),
8392 CGF
.Int64Ty
, /*isSigned=*/true));
8393 CombinedInfo
.Types
.push_back(
8394 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8395 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8396 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8397 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
8398 CombinedInfo
.Mappers
.push_back(nullptr);
8400 for (const LambdaCapture
&LC
: RD
->captures()) {
8401 if (!LC
.capturesVariable())
8403 const VarDecl
*VD
= cast
<VarDecl
>(LC
.getCapturedVar());
8404 if (LC
.getCaptureKind() != LCK_ByRef
&& !VD
->getType()->isPointerType())
8406 auto It
= Captures
.find(VD
);
8407 assert(It
!= Captures
.end() && "Found lambda capture without field.");
8408 LValue VarLVal
= CGF
.EmitLValueForFieldInitialization(VDLVal
, It
->second
);
8409 if (LC
.getCaptureKind() == LCK_ByRef
) {
8410 LValue VarLValVal
= CGF
.EmitLValueForField(VDLVal
, It
->second
);
8411 LambdaPointers
.try_emplace(VarLVal
.getPointer(CGF
),
8412 VDLVal
.getPointer(CGF
));
8413 CombinedInfo
.Exprs
.push_back(VD
);
8414 CombinedInfo
.BasePointers
.push_back(VarLVal
.getPointer(CGF
));
8415 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8416 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8417 CombinedInfo
.Pointers
.push_back(VarLValVal
.getPointer(CGF
));
8418 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8420 VD
->getType().getCanonicalType().getNonReferenceType()),
8421 CGF
.Int64Ty
, /*isSigned=*/true));
8423 RValue VarRVal
= CGF
.EmitLoadOfLValue(VarLVal
, RD
->getLocation());
8424 LambdaPointers
.try_emplace(VarLVal
.getPointer(CGF
),
8425 VDLVal
.getPointer(CGF
));
8426 CombinedInfo
.Exprs
.push_back(VD
);
8427 CombinedInfo
.BasePointers
.push_back(VarLVal
.getPointer(CGF
));
8428 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8429 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8430 CombinedInfo
.Pointers
.push_back(VarRVal
.getScalarVal());
8431 CombinedInfo
.Sizes
.push_back(llvm::ConstantInt::get(CGF
.Int64Ty
, 0));
8433 CombinedInfo
.Types
.push_back(
8434 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8435 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8436 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8437 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
8438 CombinedInfo
.Mappers
.push_back(nullptr);
8442 /// Set correct indices for lambdas captures.
8443 void adjustMemberOfForLambdaCaptures(
8444 const llvm::DenseMap
<llvm::Value
*, llvm::Value
*> &LambdaPointers
,
8445 MapBaseValuesArrayTy
&BasePointers
, MapValuesArrayTy
&Pointers
,
8446 MapFlagsArrayTy
&Types
) const {
8447 for (unsigned I
= 0, E
= Types
.size(); I
< E
; ++I
) {
8448 // Set correct member_of idx for all implicit lambda captures.
8449 if (Types
[I
] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8450 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8451 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8452 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
))
8454 llvm::Value
*BasePtr
= LambdaPointers
.lookup(BasePointers
[I
]);
8455 assert(BasePtr
&& "Unable to find base lambda address.");
8457 for (unsigned J
= I
; J
> 0; --J
) {
8458 unsigned Idx
= J
- 1;
8459 if (Pointers
[Idx
] != BasePtr
)
8464 assert(TgtIdx
!= -1 && "Unable to find parent lambda.");
8465 // All other current entries will be MEMBER_OF the combined entry
8466 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8467 // 0xFFFF in the MEMBER_OF field).
8468 OpenMPOffloadMappingFlags MemberOfFlag
= getMemberOfFlag(TgtIdx
);
8469 setCorrectMemberOfFlag(Types
[I
], MemberOfFlag
);
8473 /// Generate the base pointers, section pointers, sizes, map types, and
8474 /// mappers associated to a given capture (all included in \a CombinedInfo).
8475 void generateInfoForCapture(const CapturedStmt::Capture
*Cap
,
8476 llvm::Value
*Arg
, MapCombinedInfoTy
&CombinedInfo
,
8477 StructRangeInfoTy
&PartialStruct
) const {
8478 assert(!Cap
->capturesVariableArrayType() &&
8479 "Not expecting to generate map info for a variable array type!");
8481 // We need to know when we generating information for the first component
8482 const ValueDecl
*VD
= Cap
->capturesThis()
8484 : Cap
->getCapturedVar()->getCanonicalDecl();
8486 // for map(to: lambda): skip here, processing it in
8487 // generateDefaultMapInfo
8488 if (LambdasMap
.count(VD
))
8491 // If this declaration appears in a is_device_ptr clause we just have to
8492 // pass the pointer by value. If it is a reference to a declaration, we just
8494 if (VD
&& (DevPointersMap
.count(VD
) || HasDevAddrsMap
.count(VD
))) {
8495 CombinedInfo
.Exprs
.push_back(VD
);
8496 CombinedInfo
.BasePointers
.emplace_back(Arg
);
8497 CombinedInfo
.DevicePtrDecls
.emplace_back(VD
);
8498 CombinedInfo
.DevicePointers
.emplace_back(DeviceInfoTy::Pointer
);
8499 CombinedInfo
.Pointers
.push_back(Arg
);
8500 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8501 CGF
.getTypeSize(CGF
.getContext().VoidPtrTy
), CGF
.Int64Ty
,
8502 /*isSigned=*/true));
8503 CombinedInfo
.Types
.push_back(
8504 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8505 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
);
8506 CombinedInfo
.Mappers
.push_back(nullptr);
8511 std::tuple
<OMPClauseMappableExprCommon::MappableExprComponentListRef
,
8512 OpenMPMapClauseKind
, ArrayRef
<OpenMPMapModifierKind
>, bool,
8513 const ValueDecl
*, const Expr
*>;
8514 SmallVector
<MapData
, 4> DeclComponentLists
;
8515 // For member fields list in is_device_ptr, store it in
8516 // DeclComponentLists for generating components info.
8517 static const OpenMPMapModifierKind Unknown
= OMPC_MAP_MODIFIER_unknown
;
8518 auto It
= DevPointersMap
.find(VD
);
8519 if (It
!= DevPointersMap
.end())
8520 for (const auto &MCL
: It
->second
)
8521 DeclComponentLists
.emplace_back(MCL
, OMPC_MAP_to
, Unknown
,
8522 /*IsImpicit = */ true, nullptr,
8524 auto I
= HasDevAddrsMap
.find(VD
);
8525 if (I
!= HasDevAddrsMap
.end())
8526 for (const auto &MCL
: I
->second
)
8527 DeclComponentLists
.emplace_back(MCL
, OMPC_MAP_tofrom
, Unknown
,
8528 /*IsImpicit = */ true, nullptr,
8530 assert(CurDir
.is
<const OMPExecutableDirective
*>() &&
8531 "Expect a executable directive");
8532 const auto *CurExecDir
= CurDir
.get
<const OMPExecutableDirective
*>();
8533 for (const auto *C
: CurExecDir
->getClausesOfKind
<OMPMapClause
>()) {
8534 const auto *EI
= C
->getVarRefs().begin();
8535 for (const auto L
: C
->decl_component_lists(VD
)) {
8536 const ValueDecl
*VDecl
, *Mapper
;
8537 // The Expression is not correct if the mapping is implicit
8538 const Expr
*E
= (C
->getMapLoc().isValid()) ? *EI
: nullptr;
8539 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8540 std::tie(VDecl
, Components
, Mapper
) = L
;
8541 assert(VDecl
== VD
&& "We got information for the wrong declaration??");
8542 assert(!Components
.empty() &&
8543 "Not expecting declaration with no component lists.");
8544 DeclComponentLists
.emplace_back(Components
, C
->getMapType(),
8545 C
->getMapTypeModifiers(),
8546 C
->isImplicit(), Mapper
, E
);
8550 llvm::stable_sort(DeclComponentLists
, [](const MapData
&LHS
,
8551 const MapData
&RHS
) {
8552 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
= std::get
<2>(LHS
);
8553 OpenMPMapClauseKind MapType
= std::get
<1>(RHS
);
8555 llvm::is_contained(MapModifiers
, clang::OMPC_MAP_MODIFIER_present
);
8556 bool HasAllocs
= MapType
== OMPC_MAP_alloc
;
8557 MapModifiers
= std::get
<2>(RHS
);
8558 MapType
= std::get
<1>(LHS
);
8560 llvm::is_contained(MapModifiers
, clang::OMPC_MAP_MODIFIER_present
);
8561 bool HasAllocsR
= MapType
== OMPC_MAP_alloc
;
8562 return (HasPresent
&& !HasPresentR
) || (HasAllocs
&& !HasAllocsR
);
8565 // Find overlapping elements (including the offset from the base element).
8566 llvm::SmallDenseMap
<
8569 OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>,
8573 for (const MapData
&L
: DeclComponentLists
) {
8574 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8575 OpenMPMapClauseKind MapType
;
8576 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8578 const ValueDecl
*Mapper
;
8580 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8583 for (const MapData
&L1
: ArrayRef(DeclComponentLists
).slice(Count
)) {
8584 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1
;
8585 std::tie(Components1
, MapType
, MapModifiers
, IsImplicit
, Mapper
,
8587 auto CI
= Components
.rbegin();
8588 auto CE
= Components
.rend();
8589 auto SI
= Components1
.rbegin();
8590 auto SE
= Components1
.rend();
8591 for (; CI
!= CE
&& SI
!= SE
; ++CI
, ++SI
) {
8592 if (CI
->getAssociatedExpression()->getStmtClass() !=
8593 SI
->getAssociatedExpression()->getStmtClass())
8595 // Are we dealing with different variables/fields?
8596 if (CI
->getAssociatedDeclaration() != SI
->getAssociatedDeclaration())
8599 // Found overlapping if, at least for one component, reached the head
8600 // of the components list.
8601 if (CI
== CE
|| SI
== SE
) {
8602 // Ignore it if it is the same component.
8603 if (CI
== CE
&& SI
== SE
)
8605 const auto It
= (SI
== SE
) ? CI
: SI
;
8606 // If one component is a pointer and another one is a kind of
8607 // dereference of this pointer (array subscript, section, dereference,
8608 // etc.), it is not an overlapping.
8609 // Same, if one component is a base and another component is a
8610 // dereferenced pointer memberexpr with the same base.
8611 if (!isa
<MemberExpr
>(It
->getAssociatedExpression()) ||
8612 (std::prev(It
)->getAssociatedDeclaration() &&
8614 ->getAssociatedDeclaration()
8616 ->isPointerType()) ||
8617 (It
->getAssociatedDeclaration() &&
8618 It
->getAssociatedDeclaration()->getType()->isPointerType() &&
8619 std::next(It
) != CE
&& std::next(It
) != SE
))
8621 const MapData
&BaseData
= CI
== CE
? L
: L1
;
8622 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData
=
8623 SI
== SE
? Components
: Components1
;
8624 auto &OverlappedElements
= OverlappedData
.FindAndConstruct(&BaseData
);
8625 OverlappedElements
.getSecond().push_back(SubData
);
8629 // Sort the overlapped elements for each item.
8630 llvm::SmallVector
<const FieldDecl
*, 4> Layout
;
8631 if (!OverlappedData
.empty()) {
8632 const Type
*BaseType
= VD
->getType().getCanonicalType().getTypePtr();
8633 const Type
*OrigType
= BaseType
->getPointeeOrArrayElementType();
8634 while (BaseType
!= OrigType
) {
8635 BaseType
= OrigType
->getCanonicalTypeInternal().getTypePtr();
8636 OrigType
= BaseType
->getPointeeOrArrayElementType();
8639 if (const auto *CRD
= BaseType
->getAsCXXRecordDecl())
8640 getPlainLayout(CRD
, Layout
, /*AsBase=*/false);
8642 const auto *RD
= BaseType
->getAsRecordDecl();
8643 Layout
.append(RD
->field_begin(), RD
->field_end());
8646 for (auto &Pair
: OverlappedData
) {
8650 OMPClauseMappableExprCommon::MappableExprComponentListRef First
,
8651 OMPClauseMappableExprCommon::MappableExprComponentListRef
8653 auto CI
= First
.rbegin();
8654 auto CE
= First
.rend();
8655 auto SI
= Second
.rbegin();
8656 auto SE
= Second
.rend();
8657 for (; CI
!= CE
&& SI
!= SE
; ++CI
, ++SI
) {
8658 if (CI
->getAssociatedExpression()->getStmtClass() !=
8659 SI
->getAssociatedExpression()->getStmtClass())
8661 // Are we dealing with different variables/fields?
8662 if (CI
->getAssociatedDeclaration() !=
8663 SI
->getAssociatedDeclaration())
8667 // Lists contain the same elements.
8668 if (CI
== CE
&& SI
== SE
)
8671 // List with less elements is less than list with more elements.
8672 if (CI
== CE
|| SI
== SE
)
8675 const auto *FD1
= cast
<FieldDecl
>(CI
->getAssociatedDeclaration());
8676 const auto *FD2
= cast
<FieldDecl
>(SI
->getAssociatedDeclaration());
8677 if (FD1
->getParent() == FD2
->getParent())
8678 return FD1
->getFieldIndex() < FD2
->getFieldIndex();
8680 llvm::find_if(Layout
, [FD1
, FD2
](const FieldDecl
*FD
) {
8681 return FD
== FD1
|| FD
== FD2
;
8687 // Associated with a capture, because the mapping flags depend on it.
8688 // Go through all of the elements with the overlapped elements.
8689 bool IsFirstComponentList
= true;
8690 for (const auto &Pair
: OverlappedData
) {
8691 const MapData
&L
= *Pair
.getFirst();
8692 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8693 OpenMPMapClauseKind MapType
;
8694 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8696 const ValueDecl
*Mapper
;
8698 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8700 ArrayRef
<OMPClauseMappableExprCommon::MappableExprComponentListRef
>
8701 OverlappedComponents
= Pair
.getSecond();
8702 generateInfoForComponentList(
8703 MapType
, MapModifiers
, std::nullopt
, Components
, CombinedInfo
,
8704 PartialStruct
, IsFirstComponentList
, IsImplicit
, Mapper
,
8705 /*ForDeviceAddr=*/false, VD
, VarRef
, OverlappedComponents
);
8706 IsFirstComponentList
= false;
8708 // Go through other elements without overlapped elements.
8709 for (const MapData
&L
: DeclComponentLists
) {
8710 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8711 OpenMPMapClauseKind MapType
;
8712 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8714 const ValueDecl
*Mapper
;
8716 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8718 auto It
= OverlappedData
.find(&L
);
8719 if (It
== OverlappedData
.end())
8720 generateInfoForComponentList(MapType
, MapModifiers
, std::nullopt
,
8721 Components
, CombinedInfo
, PartialStruct
,
8722 IsFirstComponentList
, IsImplicit
, Mapper
,
8723 /*ForDeviceAddr=*/false, VD
, VarRef
);
8724 IsFirstComponentList
= false;
8728 /// Generate the default map information for a given capture \a CI,
8729 /// record field declaration \a RI and captured value \a CV.
8730 void generateDefaultMapInfo(const CapturedStmt::Capture
&CI
,
8731 const FieldDecl
&RI
, llvm::Value
*CV
,
8732 MapCombinedInfoTy
&CombinedInfo
) const {
8733 bool IsImplicit
= true;
8734 // Do the default mapping.
8735 if (CI
.capturesThis()) {
8736 CombinedInfo
.Exprs
.push_back(nullptr);
8737 CombinedInfo
.BasePointers
.push_back(CV
);
8738 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8739 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8740 CombinedInfo
.Pointers
.push_back(CV
);
8741 const auto *PtrTy
= cast
<PointerType
>(RI
.getType().getTypePtr());
8742 CombinedInfo
.Sizes
.push_back(
8743 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(PtrTy
->getPointeeType()),
8744 CGF
.Int64Ty
, /*isSigned=*/true));
8745 // Default map type.
8746 CombinedInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO
|
8747 OpenMPOffloadMappingFlags::OMP_MAP_FROM
);
8748 } else if (CI
.capturesVariableByCopy()) {
8749 const VarDecl
*VD
= CI
.getCapturedVar();
8750 CombinedInfo
.Exprs
.push_back(VD
->getCanonicalDecl());
8751 CombinedInfo
.BasePointers
.push_back(CV
);
8752 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8753 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8754 CombinedInfo
.Pointers
.push_back(CV
);
8755 if (!RI
.getType()->isAnyPointerType()) {
8756 // We have to signal to the runtime captures passed by value that are
8758 CombinedInfo
.Types
.push_back(
8759 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
);
8760 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8761 CGF
.getTypeSize(RI
.getType()), CGF
.Int64Ty
, /*isSigned=*/true));
8763 // Pointers are implicitly mapped with a zero size and no flags
8764 // (other than first map that is added for all implicit maps).
8765 CombinedInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE
);
8766 CombinedInfo
.Sizes
.push_back(llvm::Constant::getNullValue(CGF
.Int64Ty
));
8768 auto I
= FirstPrivateDecls
.find(VD
);
8769 if (I
!= FirstPrivateDecls
.end())
8770 IsImplicit
= I
->getSecond();
8772 assert(CI
.capturesVariable() && "Expected captured reference.");
8773 const auto *PtrTy
= cast
<ReferenceType
>(RI
.getType().getTypePtr());
8774 QualType ElementType
= PtrTy
->getPointeeType();
8775 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8776 CGF
.getTypeSize(ElementType
), CGF
.Int64Ty
, /*isSigned=*/true));
8777 // The default map type for a scalar/complex type is 'to' because by
8778 // default the value doesn't have to be retrieved. For an aggregate
8779 // type, the default is 'tofrom'.
8780 CombinedInfo
.Types
.push_back(getMapModifiersForPrivateClauses(CI
));
8781 const VarDecl
*VD
= CI
.getCapturedVar();
8782 auto I
= FirstPrivateDecls
.find(VD
);
8783 CombinedInfo
.Exprs
.push_back(VD
->getCanonicalDecl());
8784 CombinedInfo
.BasePointers
.push_back(CV
);
8785 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8786 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8787 if (I
!= FirstPrivateDecls
.end() && ElementType
->isAnyPointerType()) {
8788 Address PtrAddr
= CGF
.EmitLoadOfReference(CGF
.MakeAddrLValue(
8789 CV
, ElementType
, CGF
.getContext().getDeclAlign(VD
),
8790 AlignmentSource::Decl
));
8791 CombinedInfo
.Pointers
.push_back(PtrAddr
.getPointer());
8793 CombinedInfo
.Pointers
.push_back(CV
);
8795 if (I
!= FirstPrivateDecls
.end())
8796 IsImplicit
= I
->getSecond();
8798 // Every default map produces a single argument which is a target parameter.
8799 CombinedInfo
.Types
.back() |=
8800 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
8802 // Add flag stating this is an implicit map.
8804 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
;
8806 // No user-defined mapper for default mapping.
8807 CombinedInfo
.Mappers
.push_back(nullptr);
8810 } // anonymous namespace
8812 // Try to extract the base declaration from a `this->x` expression if possible.
8813 static ValueDecl
*getDeclFromThisExpr(const Expr
*E
) {
8817 if (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(E
->IgnoreParenCasts()))
8818 if (const MemberExpr
*ME
=
8819 dyn_cast
<MemberExpr
>(OASE
->getBase()->IgnoreParenImpCasts()))
8820 return ME
->getMemberDecl();
8824 /// Emit a string constant containing the names of the values mapped to the
8825 /// offloading runtime library.
8827 emitMappingInformation(CodeGenFunction
&CGF
, llvm::OpenMPIRBuilder
&OMPBuilder
,
8828 MappableExprsHandler::MappingExprInfo
&MapExprs
) {
8830 uint32_t SrcLocStrSize
;
8831 if (!MapExprs
.getMapDecl() && !MapExprs
.getMapExpr())
8832 return OMPBuilder
.getOrCreateDefaultSrcLocStr(SrcLocStrSize
);
8835 if (!MapExprs
.getMapDecl() && MapExprs
.getMapExpr()) {
8836 if (const ValueDecl
*VD
= getDeclFromThisExpr(MapExprs
.getMapExpr()))
8837 Loc
= VD
->getLocation();
8839 Loc
= MapExprs
.getMapExpr()->getExprLoc();
8841 Loc
= MapExprs
.getMapDecl()->getLocation();
8844 std::string ExprName
;
8845 if (MapExprs
.getMapExpr()) {
8846 PrintingPolicy
P(CGF
.getContext().getLangOpts());
8847 llvm::raw_string_ostream
OS(ExprName
);
8848 MapExprs
.getMapExpr()->printPretty(OS
, nullptr, P
);
8851 ExprName
= MapExprs
.getMapDecl()->getNameAsString();
8854 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
8855 return OMPBuilder
.getOrCreateSrcLocStr(PLoc
.getFilename(), ExprName
,
8856 PLoc
.getLine(), PLoc
.getColumn(),
8860 /// Emit the arrays used to pass the captures and map information to the
8861 /// offloading runtime library. If there is no map or capture information,
8862 /// return nullptr by reference.
8863 static void emitOffloadingArrays(
8864 CodeGenFunction
&CGF
, MappableExprsHandler::MapCombinedInfoTy
&CombinedInfo
,
8865 CGOpenMPRuntime::TargetDataInfo
&Info
, llvm::OpenMPIRBuilder
&OMPBuilder
,
8866 bool IsNonContiguous
= false) {
8867 CodeGenModule
&CGM
= CGF
.CGM
;
8869 // Reset the array information.
8870 Info
.clearArrayInfo();
8871 Info
.NumberOfPtrs
= CombinedInfo
.BasePointers
.size();
8873 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
8874 InsertPointTy
AllocaIP(CGF
.AllocaInsertPt
->getParent(),
8875 CGF
.AllocaInsertPt
->getIterator());
8876 InsertPointTy
CodeGenIP(CGF
.Builder
.GetInsertBlock(),
8877 CGF
.Builder
.GetInsertPoint());
8879 auto FillInfoMap
= [&](MappableExprsHandler::MappingExprInfo
&MapExpr
) {
8880 return emitMappingInformation(CGF
, OMPBuilder
, MapExpr
);
8882 if (CGM
.getCodeGenOpts().getDebugInfo() !=
8883 llvm::codegenoptions::NoDebugInfo
) {
8884 CombinedInfo
.Names
.resize(CombinedInfo
.Exprs
.size());
8885 llvm::transform(CombinedInfo
.Exprs
, CombinedInfo
.Names
.begin(),
8889 auto DeviceAddrCB
= [&](unsigned int I
, llvm::Value
*NewDecl
) {
8890 if (const ValueDecl
*DevVD
= CombinedInfo
.DevicePtrDecls
[I
]) {
8891 Info
.CaptureDeviceAddrMap
.try_emplace(DevVD
, NewDecl
);
8895 auto CustomMapperCB
= [&](unsigned int I
) {
8896 llvm::Value
*MFunc
= nullptr;
8897 if (CombinedInfo
.Mappers
[I
]) {
8898 Info
.HasMapper
= true;
8899 MFunc
= CGF
.CGM
.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8900 cast
<OMPDeclareMapperDecl
>(CombinedInfo
.Mappers
[I
]));
8904 OMPBuilder
.emitOffloadingArrays(AllocaIP
, CodeGenIP
, CombinedInfo
, Info
,
8905 /*IsNonContiguous=*/true, DeviceAddrCB
,
8909 /// Check for inner distribute directive.
8910 static const OMPExecutableDirective
*
8911 getNestedDistributeDirective(ASTContext
&Ctx
, const OMPExecutableDirective
&D
) {
8912 const auto *CS
= D
.getInnermostCapturedStmt();
8914 CS
->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8915 const Stmt
*ChildStmt
=
8916 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx
, Body
);
8918 if (const auto *NestedDir
=
8919 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
8920 OpenMPDirectiveKind DKind
= NestedDir
->getDirectiveKind();
8921 switch (D
.getDirectiveKind()) {
8923 // For now, just treat 'target teams loop' as if it's distributed.
8924 if (isOpenMPDistributeDirective(DKind
) || DKind
== OMPD_teams_loop
)
8926 if (DKind
== OMPD_teams
) {
8927 Body
= NestedDir
->getInnermostCapturedStmt()->IgnoreContainers(
8928 /*IgnoreCaptured=*/true);
8931 ChildStmt
= CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx
, Body
);
8932 if (const auto *NND
=
8933 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
8934 DKind
= NND
->getDirectiveKind();
8935 if (isOpenMPDistributeDirective(DKind
))
8940 case OMPD_target_teams
:
8941 if (isOpenMPDistributeDirective(DKind
))
8944 case OMPD_target_parallel
:
8945 case OMPD_target_simd
:
8946 case OMPD_target_parallel_for
:
8947 case OMPD_target_parallel_for_simd
:
8949 case OMPD_target_teams_distribute
:
8950 case OMPD_target_teams_distribute_simd
:
8951 case OMPD_target_teams_distribute_parallel_for
:
8952 case OMPD_target_teams_distribute_parallel_for_simd
:
8955 case OMPD_parallel_for
:
8956 case OMPD_parallel_master
:
8957 case OMPD_parallel_sections
:
8959 case OMPD_parallel_for_simd
:
8961 case OMPD_cancellation_point
:
8963 case OMPD_threadprivate
:
8974 case OMPD_taskyield
:
8977 case OMPD_taskgroup
:
8983 case OMPD_target_data
:
8984 case OMPD_target_exit_data
:
8985 case OMPD_target_enter_data
:
8986 case OMPD_distribute
:
8987 case OMPD_distribute_simd
:
8988 case OMPD_distribute_parallel_for
:
8989 case OMPD_distribute_parallel_for_simd
:
8990 case OMPD_teams_distribute
:
8991 case OMPD_teams_distribute_simd
:
8992 case OMPD_teams_distribute_parallel_for
:
8993 case OMPD_teams_distribute_parallel_for_simd
:
8994 case OMPD_target_update
:
8995 case OMPD_declare_simd
:
8996 case OMPD_declare_variant
:
8997 case OMPD_begin_declare_variant
:
8998 case OMPD_end_declare_variant
:
8999 case OMPD_declare_target
:
9000 case OMPD_end_declare_target
:
9001 case OMPD_declare_reduction
:
9002 case OMPD_declare_mapper
:
9004 case OMPD_taskloop_simd
:
9005 case OMPD_master_taskloop
:
9006 case OMPD_master_taskloop_simd
:
9007 case OMPD_parallel_master_taskloop
:
9008 case OMPD_parallel_master_taskloop_simd
:
9010 case OMPD_metadirective
:
9013 llvm_unreachable("Unexpected directive.");
9020 /// Emit the user-defined mapper function. The code generation follows the
9021 /// pattern in the example below.
9023 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9024 /// void *base, void *begin,
9025 /// int64_t size, int64_t type,
9026 /// void *name = nullptr) {
9027 /// // Allocate space for an array section first or add a base/begin for
9028 /// // pointer dereference.
9029 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9030 /// !maptype.IsDelete)
9031 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9032 /// size*sizeof(Ty), clearToFromMember(type));
9034 /// for (unsigned i = 0; i < size; i++) {
9035 /// // For each component specified by this mapper:
9036 /// for (auto c : begin[i]->all_components) {
9037 /// if (c.hasMapper())
9038 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9039 /// c.arg_type, c.arg_name);
9041 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9042 /// c.arg_begin, c.arg_size, c.arg_type,
9046 /// // Delete the array section.
9047 /// if (size > 1 && maptype.IsDelete)
9048 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9049 /// size*sizeof(Ty), clearToFromMember(type));
9052 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl
*D
,
9053 CodeGenFunction
*CGF
) {
9054 if (UDMMap
.count(D
) > 0)
9056 ASTContext
&C
= CGM
.getContext();
9057 QualType Ty
= D
->getType();
9058 QualType PtrTy
= C
.getPointerType(Ty
).withRestrict();
9059 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9060 auto *MapperVarDecl
=
9061 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getMapperVarRef())->getDecl());
9062 SourceLocation Loc
= D
->getLocation();
9063 CharUnits ElementSize
= C
.getTypeSizeInChars(Ty
);
9064 llvm::Type
*ElemTy
= CGM
.getTypes().ConvertTypeForMem(Ty
);
9066 // Prepare mapper function arguments and attributes.
9067 ImplicitParamDecl
HandleArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
9068 C
.VoidPtrTy
, ImplicitParamDecl::Other
);
9069 ImplicitParamDecl
BaseArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
9070 ImplicitParamDecl::Other
);
9071 ImplicitParamDecl
BeginArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
9072 C
.VoidPtrTy
, ImplicitParamDecl::Other
);
9073 ImplicitParamDecl
SizeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, Int64Ty
,
9074 ImplicitParamDecl::Other
);
9075 ImplicitParamDecl
TypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, Int64Ty
,
9076 ImplicitParamDecl::Other
);
9077 ImplicitParamDecl
NameArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
9078 ImplicitParamDecl::Other
);
9079 FunctionArgList Args
;
9080 Args
.push_back(&HandleArg
);
9081 Args
.push_back(&BaseArg
);
9082 Args
.push_back(&BeginArg
);
9083 Args
.push_back(&SizeArg
);
9084 Args
.push_back(&TypeArg
);
9085 Args
.push_back(&NameArg
);
9086 const CGFunctionInfo
&FnInfo
=
9087 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
9088 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
9089 SmallString
<64> TyStr
;
9090 llvm::raw_svector_ostream
Out(TyStr
);
9091 CGM
.getCXXABI().getMangleContext().mangleTypeName(Ty
, Out
);
9092 std::string Name
= getName({"omp_mapper", TyStr
, D
->getName()});
9093 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
9094 Name
, &CGM
.getModule());
9095 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
9096 Fn
->removeFnAttr(llvm::Attribute::OptimizeNone
);
9097 // Start the mapper function code generation.
9098 CodeGenFunction
MapperCGF(CGM
);
9099 MapperCGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
9100 // Compute the starting and end addresses of array elements.
9101 llvm::Value
*Size
= MapperCGF
.EmitLoadOfScalar(
9102 MapperCGF
.GetAddrOfLocalVar(&SizeArg
), /*Volatile=*/false,
9103 C
.getPointerType(Int64Ty
), Loc
);
9104 // Prepare common arguments for array initiation and deletion.
9105 llvm::Value
*Handle
= MapperCGF
.EmitLoadOfScalar(
9106 MapperCGF
.GetAddrOfLocalVar(&HandleArg
),
9107 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9108 llvm::Value
*BaseIn
= MapperCGF
.EmitLoadOfScalar(
9109 MapperCGF
.GetAddrOfLocalVar(&BaseArg
),
9110 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9111 llvm::Value
*BeginIn
= MapperCGF
.EmitLoadOfScalar(
9112 MapperCGF
.GetAddrOfLocalVar(&BeginArg
),
9113 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9114 // Convert the size in bytes into the number of array elements.
9115 Size
= MapperCGF
.Builder
.CreateExactUDiv(
9116 Size
, MapperCGF
.Builder
.getInt64(ElementSize
.getQuantity()));
9117 llvm::Value
*PtrBegin
= MapperCGF
.Builder
.CreateBitCast(
9118 BeginIn
, CGM
.getTypes().ConvertTypeForMem(PtrTy
));
9119 llvm::Value
*PtrEnd
= MapperCGF
.Builder
.CreateGEP(ElemTy
, PtrBegin
, Size
);
9120 llvm::Value
*MapType
= MapperCGF
.EmitLoadOfScalar(
9121 MapperCGF
.GetAddrOfLocalVar(&TypeArg
), /*Volatile=*/false,
9122 C
.getPointerType(Int64Ty
), Loc
);
9123 llvm::Value
*MapName
= MapperCGF
.EmitLoadOfScalar(
9124 MapperCGF
.GetAddrOfLocalVar(&NameArg
),
9125 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9127 // Emit array initiation if this is an array section and \p MapType indicates
9128 // that memory allocation is required.
9129 llvm::BasicBlock
*HeadBB
= MapperCGF
.createBasicBlock("omp.arraymap.head");
9130 emitUDMapperArrayInitOrDel(MapperCGF
, Handle
, BaseIn
, BeginIn
, Size
, MapType
,
9131 MapName
, ElementSize
, HeadBB
, /*IsInit=*/true);
9133 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9135 // Emit the loop header block.
9136 MapperCGF
.EmitBlock(HeadBB
);
9137 llvm::BasicBlock
*BodyBB
= MapperCGF
.createBasicBlock("omp.arraymap.body");
9138 llvm::BasicBlock
*DoneBB
= MapperCGF
.createBasicBlock("omp.done");
9139 // Evaluate whether the initial condition is satisfied.
9140 llvm::Value
*IsEmpty
=
9141 MapperCGF
.Builder
.CreateICmpEQ(PtrBegin
, PtrEnd
, "omp.arraymap.isempty");
9142 MapperCGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
9143 llvm::BasicBlock
*EntryBB
= MapperCGF
.Builder
.GetInsertBlock();
9145 // Emit the loop body block.
9146 MapperCGF
.EmitBlock(BodyBB
);
9147 llvm::BasicBlock
*LastBB
= BodyBB
;
9148 llvm::PHINode
*PtrPHI
= MapperCGF
.Builder
.CreatePHI(
9149 PtrBegin
->getType(), 2, "omp.arraymap.ptrcurrent");
9150 PtrPHI
->addIncoming(PtrBegin
, EntryBB
);
9151 Address
PtrCurrent(PtrPHI
, ElemTy
,
9152 MapperCGF
.GetAddrOfLocalVar(&BeginArg
)
9154 .alignmentOfArrayElement(ElementSize
));
9155 // Privatize the declared variable of mapper to be the current array element.
9156 CodeGenFunction::OMPPrivateScope
Scope(MapperCGF
);
9157 Scope
.addPrivate(MapperVarDecl
, PtrCurrent
);
9158 (void)Scope
.Privatize();
9160 // Get map clause information. Fill up the arrays with all mapped variables.
9161 MappableExprsHandler::MapCombinedInfoTy Info
;
9162 MappableExprsHandler
MEHandler(*D
, MapperCGF
);
9163 MEHandler
.generateAllInfoForMapper(Info
);
9165 // Call the runtime API __tgt_mapper_num_components to get the number of
9166 // pre-existing components.
9167 llvm::Value
*OffloadingArgs
[] = {Handle
};
9168 llvm::Value
*PreviousSize
= MapperCGF
.EmitRuntimeCall(
9169 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
9170 OMPRTL___tgt_mapper_num_components
),
9172 llvm::Value
*ShiftedPreviousSize
= MapperCGF
.Builder
.CreateShl(
9174 MapperCGF
.Builder
.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9176 // Fill up the runtime mapper handle for all components.
9177 for (unsigned I
= 0; I
< Info
.BasePointers
.size(); ++I
) {
9178 llvm::Value
*CurBaseArg
= MapperCGF
.Builder
.CreateBitCast(
9179 Info
.BasePointers
[I
], CGM
.getTypes().ConvertTypeForMem(C
.VoidPtrTy
));
9180 llvm::Value
*CurBeginArg
= MapperCGF
.Builder
.CreateBitCast(
9181 Info
.Pointers
[I
], CGM
.getTypes().ConvertTypeForMem(C
.VoidPtrTy
));
9182 llvm::Value
*CurSizeArg
= Info
.Sizes
[I
];
9183 llvm::Value
*CurNameArg
=
9184 (CGM
.getCodeGenOpts().getDebugInfo() ==
9185 llvm::codegenoptions::NoDebugInfo
)
9186 ? llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
)
9187 : emitMappingInformation(MapperCGF
, OMPBuilder
, Info
.Exprs
[I
]);
9189 // Extract the MEMBER_OF field from the map type.
9190 llvm::Value
*OriMapType
= MapperCGF
.Builder
.getInt64(
9191 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9193 llvm::Value
*MemberMapType
=
9194 MapperCGF
.Builder
.CreateNUWAdd(OriMapType
, ShiftedPreviousSize
);
9196 // Combine the map type inherited from user-defined mapper with that
9197 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9198 // bits of the \a MapType, which is the input argument of the mapper
9199 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9200 // bits of MemberMapType.
9201 // [OpenMP 5.0], 1.2.6. map-type decay.
9202 // | alloc | to | from | tofrom | release | delete
9203 // ----------------------------------------------------------
9204 // alloc | alloc | alloc | alloc | alloc | release | delete
9205 // to | alloc | to | alloc | to | release | delete
9206 // from | alloc | alloc | from | from | release | delete
9207 // tofrom | alloc | to | from | tofrom | release | delete
9208 llvm::Value
*LeftToFrom
= MapperCGF
.Builder
.CreateAnd(
9210 MapperCGF
.Builder
.getInt64(
9211 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9212 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9213 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9214 llvm::BasicBlock
*AllocBB
= MapperCGF
.createBasicBlock("omp.type.alloc");
9215 llvm::BasicBlock
*AllocElseBB
=
9216 MapperCGF
.createBasicBlock("omp.type.alloc.else");
9217 llvm::BasicBlock
*ToBB
= MapperCGF
.createBasicBlock("omp.type.to");
9218 llvm::BasicBlock
*ToElseBB
= MapperCGF
.createBasicBlock("omp.type.to.else");
9219 llvm::BasicBlock
*FromBB
= MapperCGF
.createBasicBlock("omp.type.from");
9220 llvm::BasicBlock
*EndBB
= MapperCGF
.createBasicBlock("omp.type.end");
9221 llvm::Value
*IsAlloc
= MapperCGF
.Builder
.CreateIsNull(LeftToFrom
);
9222 MapperCGF
.Builder
.CreateCondBr(IsAlloc
, AllocBB
, AllocElseBB
);
9223 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9224 MapperCGF
.EmitBlock(AllocBB
);
9225 llvm::Value
*AllocMapType
= MapperCGF
.Builder
.CreateAnd(
9227 MapperCGF
.Builder
.getInt64(
9228 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9229 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9230 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9231 MapperCGF
.Builder
.CreateBr(EndBB
);
9232 MapperCGF
.EmitBlock(AllocElseBB
);
9233 llvm::Value
*IsTo
= MapperCGF
.Builder
.CreateICmpEQ(
9235 MapperCGF
.Builder
.getInt64(
9236 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9237 OpenMPOffloadMappingFlags::OMP_MAP_TO
)));
9238 MapperCGF
.Builder
.CreateCondBr(IsTo
, ToBB
, ToElseBB
);
9239 // In case of to, clear OMP_MAP_FROM.
9240 MapperCGF
.EmitBlock(ToBB
);
9241 llvm::Value
*ToMapType
= MapperCGF
.Builder
.CreateAnd(
9243 MapperCGF
.Builder
.getInt64(
9244 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9245 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9246 MapperCGF
.Builder
.CreateBr(EndBB
);
9247 MapperCGF
.EmitBlock(ToElseBB
);
9248 llvm::Value
*IsFrom
= MapperCGF
.Builder
.CreateICmpEQ(
9250 MapperCGF
.Builder
.getInt64(
9251 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9252 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9253 MapperCGF
.Builder
.CreateCondBr(IsFrom
, FromBB
, EndBB
);
9254 // In case of from, clear OMP_MAP_TO.
9255 MapperCGF
.EmitBlock(FromBB
);
9256 llvm::Value
*FromMapType
= MapperCGF
.Builder
.CreateAnd(
9258 MapperCGF
.Builder
.getInt64(
9259 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9260 OpenMPOffloadMappingFlags::OMP_MAP_TO
)));
9261 // In case of tofrom, do nothing.
9262 MapperCGF
.EmitBlock(EndBB
);
9264 llvm::PHINode
*CurMapType
=
9265 MapperCGF
.Builder
.CreatePHI(CGM
.Int64Ty
, 4, "omp.maptype");
9266 CurMapType
->addIncoming(AllocMapType
, AllocBB
);
9267 CurMapType
->addIncoming(ToMapType
, ToBB
);
9268 CurMapType
->addIncoming(FromMapType
, FromBB
);
9269 CurMapType
->addIncoming(MemberMapType
, ToElseBB
);
9271 llvm::Value
*OffloadingArgs
[] = {Handle
, CurBaseArg
, CurBeginArg
,
9272 CurSizeArg
, CurMapType
, CurNameArg
};
9273 if (Info
.Mappers
[I
]) {
9274 // Call the corresponding mapper function.
9275 llvm::Function
*MapperFunc
= getOrCreateUserDefinedMapperFunc(
9276 cast
<OMPDeclareMapperDecl
>(Info
.Mappers
[I
]));
9277 assert(MapperFunc
&& "Expect a valid mapper function is available.");
9278 MapperCGF
.EmitNounwindRuntimeCall(MapperFunc
, OffloadingArgs
);
9280 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9282 MapperCGF
.EmitRuntimeCall(
9283 OMPBuilder
.getOrCreateRuntimeFunction(
9284 CGM
.getModule(), OMPRTL___tgt_push_mapper_component
),
9289 // Update the pointer to point to the next element that needs to be mapped,
9290 // and check whether we have mapped all elements.
9291 llvm::Value
*PtrNext
= MapperCGF
.Builder
.CreateConstGEP1_32(
9292 ElemTy
, PtrPHI
, /*Idx0=*/1, "omp.arraymap.next");
9293 PtrPHI
->addIncoming(PtrNext
, LastBB
);
9294 llvm::Value
*IsDone
=
9295 MapperCGF
.Builder
.CreateICmpEQ(PtrNext
, PtrEnd
, "omp.arraymap.isdone");
9296 llvm::BasicBlock
*ExitBB
= MapperCGF
.createBasicBlock("omp.arraymap.exit");
9297 MapperCGF
.Builder
.CreateCondBr(IsDone
, ExitBB
, BodyBB
);
9299 MapperCGF
.EmitBlock(ExitBB
);
9300 // Emit array deletion if this is an array section and \p MapType indicates
9301 // that deletion is required.
9302 emitUDMapperArrayInitOrDel(MapperCGF
, Handle
, BaseIn
, BeginIn
, Size
, MapType
,
9303 MapName
, ElementSize
, DoneBB
, /*IsInit=*/false);
9305 // Emit the function exit block.
9306 MapperCGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
9307 MapperCGF
.FinishFunction();
9308 UDMMap
.try_emplace(D
, Fn
);
9310 auto &Decls
= FunctionUDMMap
.FindAndConstruct(CGF
->CurFn
);
9311 Decls
.second
.push_back(D
);
9315 /// Emit the array initialization or deletion portion for user-defined mapper
9316 /// code generation. First, it evaluates whether an array section is mapped and
9317 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9318 /// true, and \a MapType indicates to not delete this array, array
9319 /// initialization code is generated. If \a IsInit is false, and \a MapType
9320 /// indicates to not this array, array deletion code is generated.
9321 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9322 CodeGenFunction
&MapperCGF
, llvm::Value
*Handle
, llvm::Value
*Base
,
9323 llvm::Value
*Begin
, llvm::Value
*Size
, llvm::Value
*MapType
,
9324 llvm::Value
*MapName
, CharUnits ElementSize
, llvm::BasicBlock
*ExitBB
,
9326 StringRef Prefix
= IsInit
? ".init" : ".del";
9328 // Evaluate if this is an array section.
9329 llvm::BasicBlock
*BodyBB
=
9330 MapperCGF
.createBasicBlock(getName({"omp.array", Prefix
}));
9331 llvm::Value
*IsArray
= MapperCGF
.Builder
.CreateICmpSGT(
9332 Size
, MapperCGF
.Builder
.getInt64(1), "omp.arrayinit.isarray");
9333 llvm::Value
*DeleteBit
= MapperCGF
.Builder
.CreateAnd(
9335 MapperCGF
.Builder
.getInt64(
9336 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9337 OpenMPOffloadMappingFlags::OMP_MAP_DELETE
)));
9338 llvm::Value
*DeleteCond
;
9342 llvm::Value
*BaseIsBegin
= MapperCGF
.Builder
.CreateICmpNE(Base
, Begin
);
9344 llvm::Value
*PtrAndObjBit
= MapperCGF
.Builder
.CreateAnd(
9346 MapperCGF
.Builder
.getInt64(
9347 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9348 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
)));
9349 PtrAndObjBit
= MapperCGF
.Builder
.CreateIsNotNull(PtrAndObjBit
);
9350 BaseIsBegin
= MapperCGF
.Builder
.CreateAnd(BaseIsBegin
, PtrAndObjBit
);
9351 Cond
= MapperCGF
.Builder
.CreateOr(IsArray
, BaseIsBegin
);
9352 DeleteCond
= MapperCGF
.Builder
.CreateIsNull(
9353 DeleteBit
, getName({"omp.array", Prefix
, ".delete"}));
9356 DeleteCond
= MapperCGF
.Builder
.CreateIsNotNull(
9357 DeleteBit
, getName({"omp.array", Prefix
, ".delete"}));
9359 Cond
= MapperCGF
.Builder
.CreateAnd(Cond
, DeleteCond
);
9360 MapperCGF
.Builder
.CreateCondBr(Cond
, BodyBB
, ExitBB
);
9362 MapperCGF
.EmitBlock(BodyBB
);
9363 // Get the array size by multiplying element size and element number (i.e., \p
9365 llvm::Value
*ArraySize
= MapperCGF
.Builder
.CreateNUWMul(
9366 Size
, MapperCGF
.Builder
.getInt64(ElementSize
.getQuantity()));
9367 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9368 // memory allocation/deletion purpose only.
9369 llvm::Value
*MapTypeArg
= MapperCGF
.Builder
.CreateAnd(
9371 MapperCGF
.Builder
.getInt64(
9372 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9373 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9374 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9375 MapTypeArg
= MapperCGF
.Builder
.CreateOr(
9377 MapperCGF
.Builder
.getInt64(
9378 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9379 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
)));
9381 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9383 llvm::Value
*OffloadingArgs
[] = {Handle
, Base
, Begin
,
9384 ArraySize
, MapTypeArg
, MapName
};
9385 MapperCGF
.EmitRuntimeCall(
9386 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
9387 OMPRTL___tgt_push_mapper_component
),
9391 llvm::Function
*CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9392 const OMPDeclareMapperDecl
*D
) {
9393 auto I
= UDMMap
.find(D
);
9394 if (I
!= UDMMap
.end())
9396 emitUserDefinedMapper(D
);
9397 return UDMMap
.lookup(D
);
9400 llvm::Value
*CGOpenMPRuntime::emitTargetNumIterationsCall(
9401 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
9402 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9403 const OMPLoopDirective
&D
)>
9405 OpenMPDirectiveKind Kind
= D
.getDirectiveKind();
9406 const OMPExecutableDirective
*TD
= &D
;
9407 // Get nested teams distribute kind directive, if any.
9408 if ((!isOpenMPDistributeDirective(Kind
) || !isOpenMPTeamsDirective(Kind
)) &&
9409 Kind
!= OMPD_target_teams_loop
)
9410 TD
= getNestedDistributeDirective(CGM
.getContext(), D
);
9412 return llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
9414 const auto *LD
= cast
<OMPLoopDirective
>(TD
);
9415 if (llvm::Value
*NumIterations
= SizeEmitter(CGF
, *LD
))
9416 return NumIterations
;
9417 return llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
9421 emitTargetCallFallback(CGOpenMPRuntime
*OMPRuntime
, llvm::Function
*OutlinedFn
,
9422 const OMPExecutableDirective
&D
,
9423 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
,
9424 bool RequiresOuterTask
, const CapturedStmt
&CS
,
9425 bool OffloadingMandatory
, CodeGenFunction
&CGF
) {
9426 if (OffloadingMandatory
) {
9427 CGF
.Builder
.CreateUnreachable();
9429 if (RequiresOuterTask
) {
9430 CapturedVars
.clear();
9431 CGF
.GenerateOpenMPCapturedVars(CS
, CapturedVars
);
9433 OMPRuntime
->emitOutlinedFunctionCall(CGF
, D
.getBeginLoc(), OutlinedFn
,
9438 static llvm::Value
*emitDeviceID(
9439 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9440 CodeGenFunction
&CGF
) {
9441 // Emit device ID if any.
9442 llvm::Value
*DeviceID
;
9443 if (Device
.getPointer()) {
9444 assert((Device
.getInt() == OMPC_DEVICE_unknown
||
9445 Device
.getInt() == OMPC_DEVICE_device_num
) &&
9446 "Expected device_num modifier.");
9447 llvm::Value
*DevVal
= CGF
.EmitScalarExpr(Device
.getPointer());
9449 CGF
.Builder
.CreateIntCast(DevVal
, CGF
.Int64Ty
, /*isSigned=*/true);
9451 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
9456 llvm::Value
*emitDynCGGroupMem(const OMPExecutableDirective
&D
,
9457 CodeGenFunction
&CGF
) {
9458 llvm::Value
*DynCGroupMem
= CGF
.Builder
.getInt32(0);
9460 if (auto *DynMemClause
= D
.getSingleClause
<OMPXDynCGroupMemClause
>()) {
9461 CodeGenFunction::RunCleanupsScope
DynCGroupMemScope(CGF
);
9462 llvm::Value
*DynCGroupMemVal
= CGF
.EmitScalarExpr(
9463 DynMemClause
->getSize(), /*IgnoreResultAssign=*/true);
9464 DynCGroupMem
= CGF
.Builder
.CreateIntCast(DynCGroupMemVal
, CGF
.Int32Ty
,
9465 /*isSigned=*/false);
9467 return DynCGroupMem
;
9470 static void emitTargetCallKernelLaunch(
9471 CGOpenMPRuntime
*OMPRuntime
, llvm::Function
*OutlinedFn
,
9472 const OMPExecutableDirective
&D
,
9473 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
, bool RequiresOuterTask
,
9474 const CapturedStmt
&CS
, bool OffloadingMandatory
,
9475 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9476 llvm::Value
*OutlinedFnID
, CodeGenFunction::OMPTargetDataInfo
&InputInfo
,
9477 llvm::Value
*&MapTypesArray
, llvm::Value
*&MapNamesArray
,
9478 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9479 const OMPLoopDirective
&D
)>
9481 CodeGenFunction
&CGF
, CodeGenModule
&CGM
) {
9482 llvm::OpenMPIRBuilder
&OMPBuilder
= OMPRuntime
->getOMPBuilder();
9484 // Fill up the arrays with all the captured variables.
9485 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
9487 // Get mappable expression information.
9488 MappableExprsHandler
MEHandler(D
, CGF
);
9489 llvm::DenseMap
<llvm::Value
*, llvm::Value
*> LambdaPointers
;
9490 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> MappedVarSet
;
9492 auto RI
= CS
.getCapturedRecordDecl()->field_begin();
9493 auto *CV
= CapturedVars
.begin();
9494 for (CapturedStmt::const_capture_iterator CI
= CS
.capture_begin(),
9495 CE
= CS
.capture_end();
9496 CI
!= CE
; ++CI
, ++RI
, ++CV
) {
9497 MappableExprsHandler::MapCombinedInfoTy CurInfo
;
9498 MappableExprsHandler::StructRangeInfoTy PartialStruct
;
9500 // VLA sizes are passed to the outlined region by copy and do not have map
9501 // information associated.
9502 if (CI
->capturesVariableArrayType()) {
9503 CurInfo
.Exprs
.push_back(nullptr);
9504 CurInfo
.BasePointers
.push_back(*CV
);
9505 CurInfo
.DevicePtrDecls
.push_back(nullptr);
9506 CurInfo
.DevicePointers
.push_back(
9507 MappableExprsHandler::DeviceInfoTy::None
);
9508 CurInfo
.Pointers
.push_back(*CV
);
9509 CurInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
9510 CGF
.getTypeSize(RI
->getType()), CGF
.Int64Ty
, /*isSigned=*/true));
9511 // Copy to the device as an argument. No need to retrieve it.
9512 CurInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
9513 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
|
9514 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
9515 CurInfo
.Mappers
.push_back(nullptr);
9517 // If we have any information in the map clause, we use it, otherwise we
9518 // just do a default mapping.
9519 MEHandler
.generateInfoForCapture(CI
, *CV
, CurInfo
, PartialStruct
);
9520 if (!CI
->capturesThis())
9521 MappedVarSet
.insert(CI
->getCapturedVar());
9523 MappedVarSet
.insert(nullptr);
9524 if (CurInfo
.BasePointers
.empty() && !PartialStruct
.Base
.isValid())
9525 MEHandler
.generateDefaultMapInfo(*CI
, **RI
, *CV
, CurInfo
);
9526 // Generate correct mapping for variables captured by reference in
9528 if (CI
->capturesVariable())
9529 MEHandler
.generateInfoForLambdaCaptures(CI
->getCapturedVar(), *CV
,
9530 CurInfo
, LambdaPointers
);
9532 // We expect to have at least an element of information for this capture.
9533 assert((!CurInfo
.BasePointers
.empty() || PartialStruct
.Base
.isValid()) &&
9534 "Non-existing map pointer for capture!");
9535 assert(CurInfo
.BasePointers
.size() == CurInfo
.Pointers
.size() &&
9536 CurInfo
.BasePointers
.size() == CurInfo
.Sizes
.size() &&
9537 CurInfo
.BasePointers
.size() == CurInfo
.Types
.size() &&
9538 CurInfo
.BasePointers
.size() == CurInfo
.Mappers
.size() &&
9539 "Inconsistent map information sizes!");
9541 // If there is an entry in PartialStruct it means we have a struct with
9542 // individual members mapped. Emit an extra combined entry.
9543 if (PartialStruct
.Base
.isValid()) {
9544 CombinedInfo
.append(PartialStruct
.PreliminaryMapData
);
9545 MEHandler
.emitCombinedEntry(
9546 CombinedInfo
, CurInfo
.Types
, PartialStruct
, CI
->capturesThis(),
9547 nullptr, !PartialStruct
.PreliminaryMapData
.BasePointers
.empty());
9550 // We need to append the results of this capture to what we already have.
9551 CombinedInfo
.append(CurInfo
);
9553 // Adjust MEMBER_OF flags for the lambdas captures.
9554 MEHandler
.adjustMemberOfForLambdaCaptures(
9555 LambdaPointers
, CombinedInfo
.BasePointers
, CombinedInfo
.Pointers
,
9556 CombinedInfo
.Types
);
9557 // Map any list items in a map clause that were not captures because they
9558 // weren't referenced within the construct.
9559 MEHandler
.generateAllInfo(CombinedInfo
, MappedVarSet
);
9561 CGOpenMPRuntime::TargetDataInfo Info
;
9562 // Fill up the arrays and create the arguments.
9563 emitOffloadingArrays(CGF
, CombinedInfo
, Info
, OMPBuilder
);
9564 bool EmitDebug
= CGF
.CGM
.getCodeGenOpts().getDebugInfo() !=
9565 llvm::codegenoptions::NoDebugInfo
;
9566 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, Info
.RTArgs
, Info
,
9568 /*ForEndCall=*/false);
9570 InputInfo
.NumberOfTargetItems
= Info
.NumberOfPtrs
;
9571 InputInfo
.BasePointersArray
= Address(Info
.RTArgs
.BasePointersArray
,
9572 CGF
.VoidPtrTy
, CGM
.getPointerAlign());
9573 InputInfo
.PointersArray
=
9574 Address(Info
.RTArgs
.PointersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
9575 InputInfo
.SizesArray
=
9576 Address(Info
.RTArgs
.SizesArray
, CGF
.Int64Ty
, CGM
.getPointerAlign());
9577 InputInfo
.MappersArray
=
9578 Address(Info
.RTArgs
.MappersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
9579 MapTypesArray
= Info
.RTArgs
.MapTypesArray
;
9580 MapNamesArray
= Info
.RTArgs
.MapNamesArray
;
9582 auto &&ThenGen
= [&OMPRuntime
, OutlinedFn
, &D
, &CapturedVars
,
9583 RequiresOuterTask
, &CS
, OffloadingMandatory
, Device
,
9584 OutlinedFnID
, &InputInfo
, &MapTypesArray
, &MapNamesArray
,
9585 SizeEmitter
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9586 bool IsReverseOffloading
= Device
.getInt() == OMPC_DEVICE_ancestor
;
9588 if (IsReverseOffloading
) {
9589 // Reverse offloading is not supported, so just execute on the host.
9590 // FIXME: This fallback solution is incorrect since it ignores the
9591 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9592 // assert here and ensure SEMA emits an error.
9593 emitTargetCallFallback(OMPRuntime
, OutlinedFn
, D
, CapturedVars
,
9594 RequiresOuterTask
, CS
, OffloadingMandatory
, CGF
);
9598 bool HasNoWait
= D
.hasClausesOfKind
<OMPNowaitClause
>();
9599 unsigned NumTargetItems
= InputInfo
.NumberOfTargetItems
;
9601 llvm::Value
*BasePointersArray
= InputInfo
.BasePointersArray
.getPointer();
9602 llvm::Value
*PointersArray
= InputInfo
.PointersArray
.getPointer();
9603 llvm::Value
*SizesArray
= InputInfo
.SizesArray
.getPointer();
9604 llvm::Value
*MappersArray
= InputInfo
.MappersArray
.getPointer();
9606 auto &&EmitTargetCallFallbackCB
=
9607 [&OMPRuntime
, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
, &CS
,
9608 OffloadingMandatory
, &CGF
](llvm::OpenMPIRBuilder::InsertPointTy IP
)
9609 -> llvm::OpenMPIRBuilder::InsertPointTy
{
9610 CGF
.Builder
.restoreIP(IP
);
9611 emitTargetCallFallback(OMPRuntime
, OutlinedFn
, D
, CapturedVars
,
9612 RequiresOuterTask
, CS
, OffloadingMandatory
, CGF
);
9613 return CGF
.Builder
.saveIP();
9616 llvm::Value
*DeviceID
= emitDeviceID(Device
, CGF
);
9617 llvm::Value
*NumTeams
= OMPRuntime
->emitNumTeamsForTargetDirective(CGF
, D
);
9618 llvm::Value
*NumThreads
=
9619 OMPRuntime
->emitNumThreadsForTargetDirective(CGF
, D
);
9620 llvm::Value
*RTLoc
= OMPRuntime
->emitUpdateLocation(CGF
, D
.getBeginLoc());
9621 llvm::Value
*NumIterations
=
9622 OMPRuntime
->emitTargetNumIterationsCall(CGF
, D
, SizeEmitter
);
9623 llvm::Value
*DynCGGroupMem
= emitDynCGGroupMem(D
, CGF
);
9624 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
9625 CGF
.AllocaInsertPt
->getParent(), CGF
.AllocaInsertPt
->getIterator());
9627 llvm::OpenMPIRBuilder::TargetDataRTArgs
RTArgs(
9628 BasePointersArray
, PointersArray
, SizesArray
, MapTypesArray
,
9629 nullptr /* MapTypesArrayEnd */, MappersArray
, MapNamesArray
);
9631 llvm::OpenMPIRBuilder::TargetKernelArgs
Args(
9632 NumTargetItems
, RTArgs
, NumIterations
, NumTeams
, NumThreads
,
9633 DynCGGroupMem
, HasNoWait
);
9635 CGF
.Builder
.restoreIP(OMPRuntime
->getOMPBuilder().emitKernelLaunch(
9636 CGF
.Builder
, OutlinedFn
, OutlinedFnID
, EmitTargetCallFallbackCB
, Args
,
9637 DeviceID
, RTLoc
, AllocaIP
));
9640 if (RequiresOuterTask
)
9641 CGF
.EmitOMPTargetTaskBasedDirective(D
, ThenGen
, InputInfo
);
9643 OMPRuntime
->emitInlinedDirective(CGF
, D
.getDirectiveKind(), ThenGen
);
9647 emitTargetCallElse(CGOpenMPRuntime
*OMPRuntime
, llvm::Function
*OutlinedFn
,
9648 const OMPExecutableDirective
&D
,
9649 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
,
9650 bool RequiresOuterTask
, const CapturedStmt
&CS
,
9651 bool OffloadingMandatory
, CodeGenFunction
&CGF
) {
9653 // Notify that the host version must be executed.
9655 [&OMPRuntime
, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
, &CS
,
9656 OffloadingMandatory
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9657 emitTargetCallFallback(OMPRuntime
, OutlinedFn
, D
, CapturedVars
,
9658 RequiresOuterTask
, CS
, OffloadingMandatory
, CGF
);
9661 if (RequiresOuterTask
) {
9662 CodeGenFunction::OMPTargetDataInfo InputInfo
;
9663 CGF
.EmitOMPTargetTaskBasedDirective(D
, ElseGen
, InputInfo
);
9665 OMPRuntime
->emitInlinedDirective(CGF
, D
.getDirectiveKind(), ElseGen
);
9669 void CGOpenMPRuntime::emitTargetCall(
9670 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
9671 llvm::Function
*OutlinedFn
, llvm::Value
*OutlinedFnID
, const Expr
*IfCond
,
9672 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9673 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9674 const OMPLoopDirective
&D
)>
9676 if (!CGF
.HaveInsertPoint())
9679 const bool OffloadingMandatory
= !CGM
.getLangOpts().OpenMPIsTargetDevice
&&
9680 CGM
.getLangOpts().OpenMPOffloadMandatory
;
9682 assert((OffloadingMandatory
|| OutlinedFn
) && "Invalid outlined function!");
9684 const bool RequiresOuterTask
= D
.hasClausesOfKind
<OMPDependClause
>() ||
9685 D
.hasClausesOfKind
<OMPNowaitClause
>() ||
9686 D
.hasClausesOfKind
<OMPInReductionClause
>();
9687 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
9688 const CapturedStmt
&CS
= *D
.getCapturedStmt(OMPD_target
);
9689 auto &&ArgsCodegen
= [&CS
, &CapturedVars
](CodeGenFunction
&CGF
,
9690 PrePostActionTy
&) {
9691 CGF
.GenerateOpenMPCapturedVars(CS
, CapturedVars
);
9693 emitInlinedDirective(CGF
, OMPD_unknown
, ArgsCodegen
);
9695 CodeGenFunction::OMPTargetDataInfo InputInfo
;
9696 llvm::Value
*MapTypesArray
= nullptr;
9697 llvm::Value
*MapNamesArray
= nullptr;
9699 auto &&TargetThenGen
= [this, OutlinedFn
, &D
, &CapturedVars
,
9700 RequiresOuterTask
, &CS
, OffloadingMandatory
, Device
,
9701 OutlinedFnID
, &InputInfo
, &MapTypesArray
,
9702 &MapNamesArray
, SizeEmitter
](CodeGenFunction
&CGF
,
9703 PrePostActionTy
&) {
9704 emitTargetCallKernelLaunch(this, OutlinedFn
, D
, CapturedVars
,
9705 RequiresOuterTask
, CS
, OffloadingMandatory
,
9706 Device
, OutlinedFnID
, InputInfo
, MapTypesArray
,
9707 MapNamesArray
, SizeEmitter
, CGF
, CGM
);
9710 auto &&TargetElseGen
=
9711 [this, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
, &CS
,
9712 OffloadingMandatory
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9713 emitTargetCallElse(this, OutlinedFn
, D
, CapturedVars
, RequiresOuterTask
,
9714 CS
, OffloadingMandatory
, CGF
);
9717 // If we have a target function ID it means that we need to support
9718 // offloading, otherwise, just execute on the host. We need to execute on host
9719 // regardless of the conditional in the if clause if, e.g., the user do not
9720 // specify target triples.
9723 emitIfClause(CGF
, IfCond
, TargetThenGen
, TargetElseGen
);
9725 RegionCodeGenTy
ThenRCG(TargetThenGen
);
9729 RegionCodeGenTy
ElseRCG(TargetElseGen
);
9734 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt
*S
,
9735 StringRef ParentName
) {
9739 // Codegen OMP target directives that offload compute to the device.
9740 bool RequiresDeviceCodegen
=
9741 isa
<OMPExecutableDirective
>(S
) &&
9742 isOpenMPTargetExecutionDirective(
9743 cast
<OMPExecutableDirective
>(S
)->getDirectiveKind());
9745 if (RequiresDeviceCodegen
) {
9746 const auto &E
= *cast
<OMPExecutableDirective
>(S
);
9748 llvm::TargetRegionEntryInfo EntryInfo
= getEntryInfoFromPresumedLoc(
9749 CGM
, OMPBuilder
, E
.getBeginLoc(), ParentName
);
9751 // Is this a target region that should not be emitted as an entry point? If
9752 // so just signal we are done with this target region.
9753 if (!OMPBuilder
.OffloadInfoManager
.hasTargetRegionEntryInfo(EntryInfo
))
9756 switch (E
.getDirectiveKind()) {
9758 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM
, ParentName
,
9759 cast
<OMPTargetDirective
>(E
));
9761 case OMPD_target_parallel
:
9762 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9763 CGM
, ParentName
, cast
<OMPTargetParallelDirective
>(E
));
9765 case OMPD_target_teams
:
9766 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9767 CGM
, ParentName
, cast
<OMPTargetTeamsDirective
>(E
));
9769 case OMPD_target_teams_distribute
:
9770 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9771 CGM
, ParentName
, cast
<OMPTargetTeamsDistributeDirective
>(E
));
9773 case OMPD_target_teams_distribute_simd
:
9774 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9775 CGM
, ParentName
, cast
<OMPTargetTeamsDistributeSimdDirective
>(E
));
9777 case OMPD_target_parallel_for
:
9778 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9779 CGM
, ParentName
, cast
<OMPTargetParallelForDirective
>(E
));
9781 case OMPD_target_parallel_for_simd
:
9782 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9783 CGM
, ParentName
, cast
<OMPTargetParallelForSimdDirective
>(E
));
9785 case OMPD_target_simd
:
9786 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9787 CGM
, ParentName
, cast
<OMPTargetSimdDirective
>(E
));
9789 case OMPD_target_teams_distribute_parallel_for
:
9790 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9792 cast
<OMPTargetTeamsDistributeParallelForDirective
>(E
));
9794 case OMPD_target_teams_distribute_parallel_for_simd
:
9796 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9798 cast
<OMPTargetTeamsDistributeParallelForSimdDirective
>(E
));
9800 case OMPD_target_teams_loop
:
9801 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9802 CGM
, ParentName
, cast
<OMPTargetTeamsGenericLoopDirective
>(E
));
9804 case OMPD_target_parallel_loop
:
9805 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9806 CGM
, ParentName
, cast
<OMPTargetParallelGenericLoopDirective
>(E
));
9810 case OMPD_parallel_for
:
9811 case OMPD_parallel_master
:
9812 case OMPD_parallel_sections
:
9814 case OMPD_parallel_for_simd
:
9816 case OMPD_cancellation_point
:
9818 case OMPD_threadprivate
:
9829 case OMPD_taskyield
:
9832 case OMPD_taskgroup
:
9838 case OMPD_target_data
:
9839 case OMPD_target_exit_data
:
9840 case OMPD_target_enter_data
:
9841 case OMPD_distribute
:
9842 case OMPD_distribute_simd
:
9843 case OMPD_distribute_parallel_for
:
9844 case OMPD_distribute_parallel_for_simd
:
9845 case OMPD_teams_distribute
:
9846 case OMPD_teams_distribute_simd
:
9847 case OMPD_teams_distribute_parallel_for
:
9848 case OMPD_teams_distribute_parallel_for_simd
:
9849 case OMPD_target_update
:
9850 case OMPD_declare_simd
:
9851 case OMPD_declare_variant
:
9852 case OMPD_begin_declare_variant
:
9853 case OMPD_end_declare_variant
:
9854 case OMPD_declare_target
:
9855 case OMPD_end_declare_target
:
9856 case OMPD_declare_reduction
:
9857 case OMPD_declare_mapper
:
9859 case OMPD_taskloop_simd
:
9860 case OMPD_master_taskloop
:
9861 case OMPD_master_taskloop_simd
:
9862 case OMPD_parallel_master_taskloop
:
9863 case OMPD_parallel_master_taskloop_simd
:
9865 case OMPD_metadirective
:
9868 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9873 if (const auto *E
= dyn_cast
<OMPExecutableDirective
>(S
)) {
9874 if (!E
->hasAssociatedStmt() || !E
->getAssociatedStmt())
9877 scanForTargetRegionsFunctions(E
->getRawStmt(), ParentName
);
9881 // If this is a lambda function, look into its body.
9882 if (const auto *L
= dyn_cast
<LambdaExpr
>(S
))
9885 // Keep looking for target regions recursively.
9886 for (const Stmt
*II
: S
->children())
9887 scanForTargetRegionsFunctions(II
, ParentName
);
9890 static bool isAssumedToBeNotEmitted(const ValueDecl
*VD
, bool IsDevice
) {
9891 std::optional
<OMPDeclareTargetDeclAttr::DevTypeTy
> DevTy
=
9892 OMPDeclareTargetDeclAttr::getDeviceType(VD
);
9895 // Do not emit device_type(nohost) functions for the host.
9896 if (!IsDevice
&& DevTy
== OMPDeclareTargetDeclAttr::DT_NoHost
)
9898 // Do not emit device_type(host) functions for the device.
9899 if (IsDevice
&& DevTy
== OMPDeclareTargetDeclAttr::DT_Host
)
9904 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD
) {
9905 // If emitting code for the host, we do not process FD here. Instead we do
9906 // the normal code generation.
9907 if (!CGM
.getLangOpts().OpenMPIsTargetDevice
) {
9908 if (const auto *FD
= dyn_cast
<FunctionDecl
>(GD
.getDecl()))
9909 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(FD
),
9910 CGM
.getLangOpts().OpenMPIsTargetDevice
))
9915 const ValueDecl
*VD
= cast
<ValueDecl
>(GD
.getDecl());
9916 // Try to detect target regions in the function.
9917 if (const auto *FD
= dyn_cast
<FunctionDecl
>(VD
)) {
9918 StringRef Name
= CGM
.getMangledName(GD
);
9919 scanForTargetRegionsFunctions(FD
->getBody(), Name
);
9920 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(FD
),
9921 CGM
.getLangOpts().OpenMPIsTargetDevice
))
9925 // Do not to emit function if it is not marked as declare target.
9926 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
) &&
9927 AlreadyEmittedTargetDecls
.count(VD
) == 0;
9930 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD
) {
9931 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(GD
.getDecl()),
9932 CGM
.getLangOpts().OpenMPIsTargetDevice
))
9935 if (!CGM
.getLangOpts().OpenMPIsTargetDevice
)
9938 // Check if there are Ctors/Dtors in this declaration and look for target
9939 // regions in it. We use the complete variant to produce the kernel name
9941 QualType RDTy
= cast
<VarDecl
>(GD
.getDecl())->getType();
9942 if (const auto *RD
= RDTy
->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9943 for (const CXXConstructorDecl
*Ctor
: RD
->ctors()) {
9944 StringRef ParentName
=
9945 CGM
.getMangledName(GlobalDecl(Ctor
, Ctor_Complete
));
9946 scanForTargetRegionsFunctions(Ctor
->getBody(), ParentName
);
9948 if (const CXXDestructorDecl
*Dtor
= RD
->getDestructor()) {
9949 StringRef ParentName
=
9950 CGM
.getMangledName(GlobalDecl(Dtor
, Dtor_Complete
));
9951 scanForTargetRegionsFunctions(Dtor
->getBody(), ParentName
);
9955 // Do not to emit variable if it is not marked as declare target.
9956 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
9957 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9958 cast
<VarDecl
>(GD
.getDecl()));
9959 if (!Res
|| *Res
== OMPDeclareTargetDeclAttr::MT_Link
||
9960 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
9961 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
9962 HasRequiresUnifiedSharedMemory
)) {
9963 DeferredGlobalVariables
.insert(cast
<VarDecl
>(GD
.getDecl()));
9969 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl
*VD
,
9970 llvm::Constant
*Addr
) {
9971 if (CGM
.getLangOpts().OMPTargetTriples
.empty() &&
9972 !CGM
.getLangOpts().OpenMPIsTargetDevice
)
9975 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
9976 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
9978 // If this is an 'extern' declaration we defer to the canonical definition and
9979 // do not emit an offloading entry.
9980 if (Res
&& *Res
!= OMPDeclareTargetDeclAttr::MT_Link
&&
9981 VD
->hasExternalStorage())
9985 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
9986 // Register non-target variables being emitted in device code (debug info
9988 StringRef VarName
= CGM
.getMangledName(VD
);
9989 EmittedNonTargetVariables
.try_emplace(VarName
, Addr
);
9994 auto AddrOfGlobal
= [&VD
, this]() { return CGM
.GetAddrOfGlobal(VD
); };
9995 auto LinkageForVariable
= [&VD
, this]() {
9996 return CGM
.getLLVMLinkageVarDefinition(VD
);
9999 std::vector
<llvm::GlobalVariable
*> GeneratedRefs
;
10000 OMPBuilder
.registerTargetGlobalVariable(
10001 convertCaptureClause(VD
), convertDeviceClause(VD
),
10002 VD
->hasDefinition(CGM
.getContext()) == VarDecl::DeclarationOnly
,
10003 VD
->isExternallyVisible(),
10004 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
,
10005 VD
->getCanonicalDecl()->getBeginLoc()),
10006 CGM
.getMangledName(VD
), GeneratedRefs
, CGM
.getLangOpts().OpenMPSimd
,
10007 CGM
.getLangOpts().OMPTargetTriples
, AddrOfGlobal
, LinkageForVariable
,
10008 CGM
.getTypes().ConvertTypeForMem(
10009 CGM
.getContext().getPointerType(VD
->getType())),
10012 for (auto *ref
: GeneratedRefs
)
10013 CGM
.addCompilerUsedGlobal(ref
);
10018 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD
) {
10019 if (isa
<FunctionDecl
>(GD
.getDecl()) ||
10020 isa
<OMPDeclareReductionDecl
>(GD
.getDecl()))
10021 return emitTargetFunctions(GD
);
10023 return emitTargetGlobalVariable(GD
);
10026 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10027 for (const VarDecl
*VD
: DeferredGlobalVariables
) {
10028 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10029 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
10032 if ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10033 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10034 !HasRequiresUnifiedSharedMemory
) {
10035 CGM
.EmitGlobal(VD
);
10037 assert((*Res
== OMPDeclareTargetDeclAttr::MT_Link
||
10038 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10039 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10040 HasRequiresUnifiedSharedMemory
)) &&
10041 "Expected link clause or to clause with unified memory.");
10042 (void)CGM
.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD
);
10047 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10048 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) const {
10049 assert(isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) &&
10050 " Expected target-based directive.");
10053 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl
*D
) {
10054 for (const OMPClause
*Clause
: D
->clauselists()) {
10055 if (Clause
->getClauseKind() == OMPC_unified_shared_memory
) {
10056 HasRequiresUnifiedSharedMemory
= true;
10057 OMPBuilder
.Config
.setHasRequiresUnifiedSharedMemory(true);
10058 } else if (const auto *AC
=
10059 dyn_cast
<OMPAtomicDefaultMemOrderClause
>(Clause
)) {
10060 switch (AC
->getAtomicDefaultMemOrderKind()) {
10061 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel
:
10062 RequiresAtomicOrdering
= llvm::AtomicOrdering::AcquireRelease
;
10064 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst
:
10065 RequiresAtomicOrdering
= llvm::AtomicOrdering::SequentiallyConsistent
;
10067 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed
:
10068 RequiresAtomicOrdering
= llvm::AtomicOrdering::Monotonic
;
10070 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
:
10077 llvm::AtomicOrdering
CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10078 return RequiresAtomicOrdering
;
10081 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl
*VD
,
10083 if (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())
10085 const auto *A
= VD
->getAttr
<OMPAllocateDeclAttr
>();
10086 switch(A
->getAllocatorType()) {
10087 case OMPAllocateDeclAttr::OMPNullMemAlloc
:
10088 case OMPAllocateDeclAttr::OMPDefaultMemAlloc
:
10089 // Not supported, fallback to the default mem space.
10090 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc
:
10091 case OMPAllocateDeclAttr::OMPCGroupMemAlloc
:
10092 case OMPAllocateDeclAttr::OMPHighBWMemAlloc
:
10093 case OMPAllocateDeclAttr::OMPLowLatMemAlloc
:
10094 case OMPAllocateDeclAttr::OMPThreadMemAlloc
:
10095 case OMPAllocateDeclAttr::OMPConstMemAlloc
:
10096 case OMPAllocateDeclAttr::OMPPTeamMemAlloc
:
10097 AS
= LangAS::Default
;
10099 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc
:
10100 llvm_unreachable("Expected predefined allocator for the variables with the "
10101 "static storage.");
10106 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10107 return HasRequiresUnifiedSharedMemory
;
10110 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10111 CodeGenModule
&CGM
)
10113 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
10114 SavedShouldMarkAsGlobal
= CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
;
10115 CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
= false;
10119 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10120 if (CGM
.getLangOpts().OpenMPIsTargetDevice
)
10121 CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
= SavedShouldMarkAsGlobal
;
10124 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD
) {
10125 if (!CGM
.getLangOpts().OpenMPIsTargetDevice
|| !ShouldMarkAsGlobal
)
10128 const auto *D
= cast
<FunctionDecl
>(GD
.getDecl());
10129 // Do not to emit function if it is marked as declare target as it was already
10131 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D
)) {
10132 if (D
->hasBody() && AlreadyEmittedTargetDecls
.count(D
) == 0) {
10133 if (auto *F
= dyn_cast_or_null
<llvm::Function
>(
10134 CGM
.GetGlobalValue(CGM
.getMangledName(GD
))))
10135 return !F
->isDeclaration();
10141 return !AlreadyEmittedTargetDecls
.insert(D
).second
;
10144 llvm::Function
*CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10145 // If we don't have entries or if we are emitting code for the device, we
10146 // don't need to do anything.
10147 if (CGM
.getLangOpts().OMPTargetTriples
.empty() ||
10148 CGM
.getLangOpts().OpenMPSimd
|| CGM
.getLangOpts().OpenMPIsTargetDevice
||
10149 (OMPBuilder
.OffloadInfoManager
.empty() &&
10150 !HasEmittedDeclareTargetRegion
&& !HasEmittedTargetRegion
))
10153 // Create and register the function that handles the requires directives.
10154 ASTContext
&C
= CGM
.getContext();
10156 llvm::Function
*RequiresRegFn
;
10158 CodeGenFunction
CGF(CGM
);
10159 const auto &FI
= CGM
.getTypes().arrangeNullaryFunction();
10160 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
10161 std::string ReqName
= getName({"omp_offloading", "requires_reg"});
10162 RequiresRegFn
= CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, ReqName
, FI
);
10163 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, RequiresRegFn
, FI
, {});
10164 OpenMPOffloadingRequiresDirFlags Flags
= OMP_REQ_NONE
;
10165 // TODO: check for other requires clauses.
10166 // The requires directive takes effect only when a target region is
10167 // present in the compilation unit. Otherwise it is ignored and not
10168 // passed to the runtime. This avoids the runtime from throwing an error
10169 // for mismatching requires clauses across compilation units that don't
10170 // contain at least 1 target region.
10171 assert((HasEmittedTargetRegion
|| HasEmittedDeclareTargetRegion
||
10172 !OMPBuilder
.OffloadInfoManager
.empty()) &&
10173 "Target or declare target region expected.");
10174 if (HasRequiresUnifiedSharedMemory
)
10175 Flags
= OMP_REQ_UNIFIED_SHARED_MEMORY
;
10176 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10177 CGM
.getModule(), OMPRTL___tgt_register_requires
),
10178 llvm::ConstantInt::get(CGM
.Int64Ty
, Flags
));
10179 CGF
.FinishFunction();
10181 return RequiresRegFn
;
10184 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction
&CGF
,
10185 const OMPExecutableDirective
&D
,
10186 SourceLocation Loc
,
10187 llvm::Function
*OutlinedFn
,
10188 ArrayRef
<llvm::Value
*> CapturedVars
) {
10189 if (!CGF
.HaveInsertPoint())
10192 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10193 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
10195 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10196 llvm::Value
*Args
[] = {
10198 CGF
.Builder
.getInt32(CapturedVars
.size()), // Number of captured vars
10199 CGF
.Builder
.CreateBitCast(OutlinedFn
, getKmpc_MicroPointerTy())};
10200 llvm::SmallVector
<llvm::Value
*, 16> RealArgs
;
10201 RealArgs
.append(std::begin(Args
), std::end(Args
));
10202 RealArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
10204 llvm::FunctionCallee RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
10205 CGM
.getModule(), OMPRTL___kmpc_fork_teams
);
10206 CGF
.EmitRuntimeCall(RTLFn
, RealArgs
);
10209 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction
&CGF
,
10210 const Expr
*NumTeams
,
10211 const Expr
*ThreadLimit
,
10212 SourceLocation Loc
) {
10213 if (!CGF
.HaveInsertPoint())
10216 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10218 llvm::Value
*NumTeamsVal
=
10220 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(NumTeams
),
10221 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10222 : CGF
.Builder
.getInt32(0);
10224 llvm::Value
*ThreadLimitVal
=
10226 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(ThreadLimit
),
10227 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10228 : CGF
.Builder
.getInt32(0);
10230 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10231 llvm::Value
*PushNumTeamsArgs
[] = {RTLoc
, getThreadID(CGF
, Loc
), NumTeamsVal
,
10233 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10234 CGM
.getModule(), OMPRTL___kmpc_push_num_teams
),
10238 void CGOpenMPRuntime::emitTargetDataCalls(
10239 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
10240 const Expr
*Device
, const RegionCodeGenTy
&CodeGen
,
10241 CGOpenMPRuntime::TargetDataInfo
&Info
) {
10242 if (!CGF
.HaveInsertPoint())
10245 // Action used to replace the default codegen action and turn privatization
10247 PrePostActionTy NoPrivAction
;
10249 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
10250 InsertPointTy
AllocaIP(CGF
.AllocaInsertPt
->getParent(),
10251 CGF
.AllocaInsertPt
->getIterator());
10252 InsertPointTy
CodeGenIP(CGF
.Builder
.GetInsertBlock(),
10253 CGF
.Builder
.GetInsertPoint());
10254 llvm::OpenMPIRBuilder::LocationDescription
OmpLoc(CodeGenIP
);
10256 llvm::Value
*IfCondVal
= nullptr;
10258 IfCondVal
= CGF
.EvaluateExprAsBool(IfCond
);
10260 // Emit device ID if any.
10261 llvm::Value
*DeviceID
= nullptr;
10263 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10264 CGF
.Int64Ty
, /*isSigned=*/true);
10266 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10269 // Fill up the arrays with all the mapped variables.
10270 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10271 auto GenMapInfoCB
=
10272 [&](InsertPointTy CodeGenIP
) -> llvm::OpenMPIRBuilder::MapInfosTy
& {
10273 CGF
.Builder
.restoreIP(CodeGenIP
);
10274 // Get map clause information.
10275 MappableExprsHandler
MEHandler(D
, CGF
);
10276 MEHandler
.generateAllInfo(CombinedInfo
);
10278 auto FillInfoMap
= [&](MappableExprsHandler::MappingExprInfo
&MapExpr
) {
10279 return emitMappingInformation(CGF
, OMPBuilder
, MapExpr
);
10281 if (CGM
.getCodeGenOpts().getDebugInfo() !=
10282 llvm::codegenoptions::NoDebugInfo
) {
10283 CombinedInfo
.Names
.resize(CombinedInfo
.Exprs
.size());
10284 llvm::transform(CombinedInfo
.Exprs
, CombinedInfo
.Names
.begin(),
10288 return CombinedInfo
;
10290 using BodyGenTy
= llvm::OpenMPIRBuilder::BodyGenTy
;
10291 auto BodyCB
= [&](InsertPointTy CodeGenIP
, BodyGenTy BodyGenType
) {
10292 CGF
.Builder
.restoreIP(CodeGenIP
);
10293 switch (BodyGenType
) {
10294 case BodyGenTy::Priv
:
10295 if (!Info
.CaptureDeviceAddrMap
.empty())
10298 case BodyGenTy::DupNoPriv
:
10299 if (!Info
.CaptureDeviceAddrMap
.empty()) {
10300 CodeGen
.setAction(NoPrivAction
);
10304 case BodyGenTy::NoPriv
:
10305 if (Info
.CaptureDeviceAddrMap
.empty()) {
10306 CodeGen
.setAction(NoPrivAction
);
10311 return InsertPointTy(CGF
.Builder
.GetInsertBlock(),
10312 CGF
.Builder
.GetInsertPoint());
10315 auto DeviceAddrCB
= [&](unsigned int I
, llvm::Value
*NewDecl
) {
10316 if (const ValueDecl
*DevVD
= CombinedInfo
.DevicePtrDecls
[I
]) {
10317 Info
.CaptureDeviceAddrMap
.try_emplace(DevVD
, NewDecl
);
10321 auto CustomMapperCB
= [&](unsigned int I
) {
10322 llvm::Value
*MFunc
= nullptr;
10323 if (CombinedInfo
.Mappers
[I
]) {
10324 Info
.HasMapper
= true;
10325 MFunc
= CGF
.CGM
.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10326 cast
<OMPDeclareMapperDecl
>(CombinedInfo
.Mappers
[I
]));
10331 // Source location for the ident struct
10332 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10334 CGF
.Builder
.restoreIP(OMPBuilder
.createTargetData(
10335 OmpLoc
, AllocaIP
, CodeGenIP
, DeviceID
, IfCondVal
, Info
, GenMapInfoCB
,
10336 /*MapperFunc=*/nullptr, BodyCB
, DeviceAddrCB
, CustomMapperCB
, RTLoc
));
10339 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10340 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
10341 const Expr
*Device
) {
10342 if (!CGF
.HaveInsertPoint())
10345 assert((isa
<OMPTargetEnterDataDirective
>(D
) ||
10346 isa
<OMPTargetExitDataDirective
>(D
) ||
10347 isa
<OMPTargetUpdateDirective
>(D
)) &&
10348 "Expecting either target enter, exit data, or update directives.");
10350 CodeGenFunction::OMPTargetDataInfo InputInfo
;
10351 llvm::Value
*MapTypesArray
= nullptr;
10352 llvm::Value
*MapNamesArray
= nullptr;
10353 // Generate the code for the opening of the data environment.
10354 auto &&ThenGen
= [this, &D
, Device
, &InputInfo
, &MapTypesArray
,
10355 &MapNamesArray
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
10356 // Emit device ID if any.
10357 llvm::Value
*DeviceID
= nullptr;
10359 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10360 CGF
.Int64Ty
, /*isSigned=*/true);
10362 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10365 // Emit the number of elements in the offloading arrays.
10366 llvm::Constant
*PointerNum
=
10367 CGF
.Builder
.getInt32(InputInfo
.NumberOfTargetItems
);
10369 // Source location for the ident struct
10370 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10372 llvm::Value
*OffloadingArgs
[] = {RTLoc
,
10375 InputInfo
.BasePointersArray
.getPointer(),
10376 InputInfo
.PointersArray
.getPointer(),
10377 InputInfo
.SizesArray
.getPointer(),
10380 InputInfo
.MappersArray
.getPointer()};
10382 // Select the right runtime function call for each standalone
10384 const bool HasNowait
= D
.hasClausesOfKind
<OMPNowaitClause
>();
10385 RuntimeFunction RTLFn
;
10386 switch (D
.getDirectiveKind()) {
10387 case OMPD_target_enter_data
:
10388 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_begin_nowait_mapper
10389 : OMPRTL___tgt_target_data_begin_mapper
;
10391 case OMPD_target_exit_data
:
10392 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_end_nowait_mapper
10393 : OMPRTL___tgt_target_data_end_mapper
;
10395 case OMPD_target_update
:
10396 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_update_nowait_mapper
10397 : OMPRTL___tgt_target_data_update_mapper
;
10399 case OMPD_parallel
:
10401 case OMPD_parallel_for
:
10402 case OMPD_parallel_master
:
10403 case OMPD_parallel_sections
:
10404 case OMPD_for_simd
:
10405 case OMPD_parallel_for_simd
:
10407 case OMPD_cancellation_point
:
10409 case OMPD_threadprivate
:
10410 case OMPD_allocate
:
10415 case OMPD_sections
:
10419 case OMPD_critical
:
10420 case OMPD_taskyield
:
10422 case OMPD_taskwait
:
10423 case OMPD_taskgroup
:
10429 case OMPD_target_data
:
10430 case OMPD_distribute
:
10431 case OMPD_distribute_simd
:
10432 case OMPD_distribute_parallel_for
:
10433 case OMPD_distribute_parallel_for_simd
:
10434 case OMPD_teams_distribute
:
10435 case OMPD_teams_distribute_simd
:
10436 case OMPD_teams_distribute_parallel_for
:
10437 case OMPD_teams_distribute_parallel_for_simd
:
10438 case OMPD_declare_simd
:
10439 case OMPD_declare_variant
:
10440 case OMPD_begin_declare_variant
:
10441 case OMPD_end_declare_variant
:
10442 case OMPD_declare_target
:
10443 case OMPD_end_declare_target
:
10444 case OMPD_declare_reduction
:
10445 case OMPD_declare_mapper
:
10446 case OMPD_taskloop
:
10447 case OMPD_taskloop_simd
:
10448 case OMPD_master_taskloop
:
10449 case OMPD_master_taskloop_simd
:
10450 case OMPD_parallel_master_taskloop
:
10451 case OMPD_parallel_master_taskloop_simd
:
10453 case OMPD_target_simd
:
10454 case OMPD_target_teams_distribute
:
10455 case OMPD_target_teams_distribute_simd
:
10456 case OMPD_target_teams_distribute_parallel_for
:
10457 case OMPD_target_teams_distribute_parallel_for_simd
:
10458 case OMPD_target_teams
:
10459 case OMPD_target_parallel
:
10460 case OMPD_target_parallel_for
:
10461 case OMPD_target_parallel_for_simd
:
10462 case OMPD_requires
:
10463 case OMPD_metadirective
:
10466 llvm_unreachable("Unexpected standalone target data directive.");
10469 CGF
.EmitRuntimeCall(
10470 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(), RTLFn
),
10474 auto &&TargetThenGen
= [this, &ThenGen
, &D
, &InputInfo
, &MapTypesArray
,
10475 &MapNamesArray
](CodeGenFunction
&CGF
,
10476 PrePostActionTy
&) {
10477 // Fill up the arrays with all the mapped variables.
10478 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10480 // Get map clause information.
10481 MappableExprsHandler
MEHandler(D
, CGF
);
10482 MEHandler
.generateAllInfo(CombinedInfo
);
10484 CGOpenMPRuntime::TargetDataInfo Info
;
10485 // Fill up the arrays and create the arguments.
10486 emitOffloadingArrays(CGF
, CombinedInfo
, Info
, OMPBuilder
,
10487 /*IsNonContiguous=*/true);
10488 bool RequiresOuterTask
= D
.hasClausesOfKind
<OMPDependClause
>() ||
10489 D
.hasClausesOfKind
<OMPNowaitClause
>();
10490 bool EmitDebug
= CGF
.CGM
.getCodeGenOpts().getDebugInfo() !=
10491 llvm::codegenoptions::NoDebugInfo
;
10492 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, Info
.RTArgs
, Info
,
10494 /*ForEndCall=*/false);
10495 InputInfo
.NumberOfTargetItems
= Info
.NumberOfPtrs
;
10496 InputInfo
.BasePointersArray
= Address(Info
.RTArgs
.BasePointersArray
,
10497 CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10498 InputInfo
.PointersArray
= Address(Info
.RTArgs
.PointersArray
, CGF
.VoidPtrTy
,
10499 CGM
.getPointerAlign());
10500 InputInfo
.SizesArray
=
10501 Address(Info
.RTArgs
.SizesArray
, CGF
.Int64Ty
, CGM
.getPointerAlign());
10502 InputInfo
.MappersArray
=
10503 Address(Info
.RTArgs
.MappersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10504 MapTypesArray
= Info
.RTArgs
.MapTypesArray
;
10505 MapNamesArray
= Info
.RTArgs
.MapNamesArray
;
10506 if (RequiresOuterTask
)
10507 CGF
.EmitOMPTargetTaskBasedDirective(D
, ThenGen
, InputInfo
);
10509 emitInlinedDirective(CGF
, D
.getDirectiveKind(), ThenGen
);
10513 emitIfClause(CGF
, IfCond
, TargetThenGen
,
10514 [](CodeGenFunction
&CGF
, PrePostActionTy
&) {});
10516 RegionCodeGenTy
ThenRCG(TargetThenGen
);
10522 /// Kind of parameter in a function with 'declare simd' directive.
10531 /// Attribute set of the parameter.
10532 struct ParamAttrTy
{
10533 ParamKindTy Kind
= Vector
;
10534 llvm::APSInt StrideOrArg
;
10535 llvm::APSInt Alignment
;
10536 bool HasVarStride
= false;
10540 static unsigned evaluateCDTSize(const FunctionDecl
*FD
,
10541 ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10542 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10543 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10544 // of that clause. The VLEN value must be power of 2.
10545 // In other case the notion of the function`s "characteristic data type" (CDT)
10546 // is used to compute the vector length.
10547 // CDT is defined in the following order:
10548 // a) For non-void function, the CDT is the return type.
10549 // b) If the function has any non-uniform, non-linear parameters, then the
10550 // CDT is the type of the first such parameter.
10551 // c) If the CDT determined by a) or b) above is struct, union, or class
10552 // type which is pass-by-value (except for the type that maps to the
10553 // built-in complex data type), the characteristic data type is int.
10554 // d) If none of the above three cases is applicable, the CDT is int.
10555 // The VLEN is then determined based on the CDT and the size of vector
10556 // register of that ISA for which current vector version is generated. The
10557 // VLEN is computed using the formula below:
10558 // VLEN = sizeof(vector_register) / sizeof(CDT),
10559 // where vector register size specified in section 3.2.1 Registers and the
10560 // Stack Frame of original AMD64 ABI document.
10561 QualType RetType
= FD
->getReturnType();
10562 if (RetType
.isNull())
10564 ASTContext
&C
= FD
->getASTContext();
10566 if (!RetType
.isNull() && !RetType
->isVoidType()) {
10569 unsigned Offset
= 0;
10570 if (const auto *MD
= dyn_cast
<CXXMethodDecl
>(FD
)) {
10571 if (ParamAttrs
[Offset
].Kind
== Vector
)
10572 CDT
= C
.getPointerType(C
.getRecordType(MD
->getParent()));
10575 if (CDT
.isNull()) {
10576 for (unsigned I
= 0, E
= FD
->getNumParams(); I
< E
; ++I
) {
10577 if (ParamAttrs
[I
+ Offset
].Kind
== Vector
) {
10578 CDT
= FD
->getParamDecl(I
)->getType();
10586 CDT
= CDT
->getCanonicalTypeUnqualified();
10587 if (CDT
->isRecordType() || CDT
->isUnionType())
10589 return C
.getTypeSize(CDT
);
10592 /// Mangle the parameter part of the vector function name according to
10593 /// their OpenMP classification. The mangling function is defined in
10594 /// section 4.5 of the AAVFABI(2021Q1).
10595 static std::string
mangleVectorParameters(ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10596 SmallString
<256> Buffer
;
10597 llvm::raw_svector_ostream
Out(Buffer
);
10598 for (const auto &ParamAttr
: ParamAttrs
) {
10599 switch (ParamAttr
.Kind
) {
10619 if (ParamAttr
.HasVarStride
)
10620 Out
<< "s" << ParamAttr
.StrideOrArg
;
10621 else if (ParamAttr
.Kind
== Linear
|| ParamAttr
.Kind
== LinearRef
||
10622 ParamAttr
.Kind
== LinearUVal
|| ParamAttr
.Kind
== LinearVal
) {
10623 // Don't print the step value if it is not present or if it is
10625 if (ParamAttr
.StrideOrArg
< 0)
10626 Out
<< 'n' << -ParamAttr
.StrideOrArg
;
10627 else if (ParamAttr
.StrideOrArg
!= 1)
10628 Out
<< ParamAttr
.StrideOrArg
;
10631 if (!!ParamAttr
.Alignment
)
10632 Out
<< 'a' << ParamAttr
.Alignment
;
10635 return std::string(Out
.str());
10639 emitX86DeclareSimdFunction(const FunctionDecl
*FD
, llvm::Function
*Fn
,
10640 const llvm::APSInt
&VLENVal
,
10641 ArrayRef
<ParamAttrTy
> ParamAttrs
,
10642 OMPDeclareSimdDeclAttr::BranchStateTy State
) {
10645 unsigned VecRegSize
;
10647 ISADataTy ISAData
[] = {
10661 llvm::SmallVector
<char, 2> Masked
;
10663 case OMPDeclareSimdDeclAttr::BS_Undefined
:
10664 Masked
.push_back('N');
10665 Masked
.push_back('M');
10667 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
10668 Masked
.push_back('N');
10670 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
10671 Masked
.push_back('M');
10674 for (char Mask
: Masked
) {
10675 for (const ISADataTy
&Data
: ISAData
) {
10676 SmallString
<256> Buffer
;
10677 llvm::raw_svector_ostream
Out(Buffer
);
10678 Out
<< "_ZGV" << Data
.ISA
<< Mask
;
10680 unsigned NumElts
= evaluateCDTSize(FD
, ParamAttrs
);
10681 assert(NumElts
&& "Non-zero simdlen/cdtsize expected");
10682 Out
<< llvm::APSInt::getUnsigned(Data
.VecRegSize
/ NumElts
);
10686 Out
<< mangleVectorParameters(ParamAttrs
);
10687 Out
<< '_' << Fn
->getName();
10688 Fn
->addFnAttr(Out
.str());
10693 // This are the Functions that are needed to mangle the name of the
10694 // vector functions generated by the compiler, according to the rules
10695 // defined in the "Vector Function ABI specifications for AArch64",
10697 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10699 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10700 static bool getAArch64MTV(QualType QT
, ParamKindTy Kind
) {
10701 QT
= QT
.getCanonicalType();
10703 if (QT
->isVoidType())
10706 if (Kind
== ParamKindTy::Uniform
)
10709 if (Kind
== ParamKindTy::LinearUVal
|| Kind
== ParamKindTy::LinearRef
)
10712 if ((Kind
== ParamKindTy::Linear
|| Kind
== ParamKindTy::LinearVal
) &&
10713 !QT
->isReferenceType())
10719 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10720 static bool getAArch64PBV(QualType QT
, ASTContext
&C
) {
10721 QT
= QT
.getCanonicalType();
10722 unsigned Size
= C
.getTypeSize(QT
);
10724 // Only scalars and complex within 16 bytes wide set PVB to true.
10725 if (Size
!= 8 && Size
!= 16 && Size
!= 32 && Size
!= 64 && Size
!= 128)
10728 if (QT
->isFloatingType())
10731 if (QT
->isIntegerType())
10734 if (QT
->isPointerType())
10737 // TODO: Add support for complex types (section 3.1.2, item 2).
10742 /// Computes the lane size (LS) of a return type or of an input parameter,
10743 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10744 /// TODO: Add support for references, section 3.2.1, item 1.
10745 static unsigned getAArch64LS(QualType QT
, ParamKindTy Kind
, ASTContext
&C
) {
10746 if (!getAArch64MTV(QT
, Kind
) && QT
.getCanonicalType()->isPointerType()) {
10747 QualType PTy
= QT
.getCanonicalType()->getPointeeType();
10748 if (getAArch64PBV(PTy
, C
))
10749 return C
.getTypeSize(PTy
);
10751 if (getAArch64PBV(QT
, C
))
10752 return C
.getTypeSize(QT
);
10754 return C
.getTypeSize(C
.getUIntPtrType());
10757 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10758 // signature of the scalar function, as defined in 3.2.2 of the
10760 static std::tuple
<unsigned, unsigned, bool>
10761 getNDSWDS(const FunctionDecl
*FD
, ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10762 QualType RetType
= FD
->getReturnType().getCanonicalType();
10764 ASTContext
&C
= FD
->getASTContext();
10766 bool OutputBecomesInput
= false;
10768 llvm::SmallVector
<unsigned, 8> Sizes
;
10769 if (!RetType
->isVoidType()) {
10770 Sizes
.push_back(getAArch64LS(RetType
, ParamKindTy::Vector
, C
));
10771 if (!getAArch64PBV(RetType
, C
) && getAArch64MTV(RetType
, {}))
10772 OutputBecomesInput
= true;
10774 for (unsigned I
= 0, E
= FD
->getNumParams(); I
< E
; ++I
) {
10775 QualType QT
= FD
->getParamDecl(I
)->getType().getCanonicalType();
10776 Sizes
.push_back(getAArch64LS(QT
, ParamAttrs
[I
].Kind
, C
));
10779 assert(!Sizes
.empty() && "Unable to determine NDS and WDS.");
10780 // The LS of a function parameter / return value can only be a power
10781 // of 2, starting from 8 bits, up to 128.
10782 assert(llvm::all_of(Sizes
,
10783 [](unsigned Size
) {
10784 return Size
== 8 || Size
== 16 || Size
== 32 ||
10785 Size
== 64 || Size
== 128;
10789 return std::make_tuple(*std::min_element(std::begin(Sizes
), std::end(Sizes
)),
10790 *std::max_element(std::begin(Sizes
), std::end(Sizes
)),
10791 OutputBecomesInput
);
10794 // Function used to add the attribute. The parameter `VLEN` is
10795 // templated to allow the use of "x" when targeting scalable functions
10797 template <typename T
>
10798 static void addAArch64VectorName(T VLEN
, StringRef LMask
, StringRef Prefix
,
10799 char ISA
, StringRef ParSeq
,
10800 StringRef MangledName
, bool OutputBecomesInput
,
10801 llvm::Function
*Fn
) {
10802 SmallString
<256> Buffer
;
10803 llvm::raw_svector_ostream
Out(Buffer
);
10804 Out
<< Prefix
<< ISA
<< LMask
<< VLEN
;
10805 if (OutputBecomesInput
)
10807 Out
<< ParSeq
<< "_" << MangledName
;
10808 Fn
->addFnAttr(Out
.str());
10811 // Helper function to generate the Advanced SIMD names depending on
10812 // the value of the NDS when simdlen is not present.
10813 static void addAArch64AdvSIMDNDSNames(unsigned NDS
, StringRef Mask
,
10814 StringRef Prefix
, char ISA
,
10815 StringRef ParSeq
, StringRef MangledName
,
10816 bool OutputBecomesInput
,
10817 llvm::Function
*Fn
) {
10820 addAArch64VectorName(8, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10821 OutputBecomesInput
, Fn
);
10822 addAArch64VectorName(16, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10823 OutputBecomesInput
, Fn
);
10826 addAArch64VectorName(4, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10827 OutputBecomesInput
, Fn
);
10828 addAArch64VectorName(8, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10829 OutputBecomesInput
, Fn
);
10832 addAArch64VectorName(2, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10833 OutputBecomesInput
, Fn
);
10834 addAArch64VectorName(4, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10835 OutputBecomesInput
, Fn
);
10839 addAArch64VectorName(2, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10840 OutputBecomesInput
, Fn
);
10843 llvm_unreachable("Scalar type is too wide.");
10847 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10848 static void emitAArch64DeclareSimdFunction(
10849 CodeGenModule
&CGM
, const FunctionDecl
*FD
, unsigned UserVLEN
,
10850 ArrayRef
<ParamAttrTy
> ParamAttrs
,
10851 OMPDeclareSimdDeclAttr::BranchStateTy State
, StringRef MangledName
,
10852 char ISA
, unsigned VecRegSize
, llvm::Function
*Fn
, SourceLocation SLoc
) {
10854 // Get basic data for building the vector signature.
10855 const auto Data
= getNDSWDS(FD
, ParamAttrs
);
10856 const unsigned NDS
= std::get
<0>(Data
);
10857 const unsigned WDS
= std::get
<1>(Data
);
10858 const bool OutputBecomesInput
= std::get
<2>(Data
);
10860 // Check the values provided via `simdlen` by the user.
10861 // 1. A `simdlen(1)` doesn't produce vector signatures,
10862 if (UserVLEN
== 1) {
10863 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
10864 DiagnosticsEngine::Warning
,
10865 "The clause simdlen(1) has no effect when targeting aarch64.");
10866 CGM
.getDiags().Report(SLoc
, DiagID
);
10870 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10871 // Advanced SIMD output.
10872 if (ISA
== 'n' && UserVLEN
&& !llvm::isPowerOf2_32(UserVLEN
)) {
10873 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
10874 DiagnosticsEngine::Warning
, "The value specified in simdlen must be a "
10875 "power of 2 when targeting Advanced SIMD.");
10876 CGM
.getDiags().Report(SLoc
, DiagID
);
10880 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10882 if (ISA
== 's' && UserVLEN
!= 0) {
10883 if ((UserVLEN
* WDS
> 2048) || (UserVLEN
* WDS
% 128 != 0)) {
10884 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
10885 DiagnosticsEngine::Warning
, "The clause simdlen must fit the %0-bit "
10886 "lanes in the architectural constraints "
10887 "for SVE (min is 128-bit, max is "
10888 "2048-bit, by steps of 128-bit)");
10889 CGM
.getDiags().Report(SLoc
, DiagID
) << WDS
;
10894 // Sort out parameter sequence.
10895 const std::string ParSeq
= mangleVectorParameters(ParamAttrs
);
10896 StringRef Prefix
= "_ZGV";
10897 // Generate simdlen from user input (if any).
10900 // SVE generates only a masked function.
10901 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10902 OutputBecomesInput
, Fn
);
10904 assert(ISA
== 'n' && "Expected ISA either 's' or 'n'.");
10905 // Advanced SIMD generates one or two functions, depending on
10906 // the `[not]inbranch` clause.
10908 case OMPDeclareSimdDeclAttr::BS_Undefined
:
10909 addAArch64VectorName(UserVLEN
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10910 OutputBecomesInput
, Fn
);
10911 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10912 OutputBecomesInput
, Fn
);
10914 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
10915 addAArch64VectorName(UserVLEN
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10916 OutputBecomesInput
, Fn
);
10918 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
10919 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10920 OutputBecomesInput
, Fn
);
10925 // If no user simdlen is provided, follow the AAVFABI rules for
10926 // generating the vector length.
10928 // SVE, section 3.4.1, item 1.
10929 addAArch64VectorName("x", "M", Prefix
, ISA
, ParSeq
, MangledName
,
10930 OutputBecomesInput
, Fn
);
10932 assert(ISA
== 'n' && "Expected ISA either 's' or 'n'.");
10933 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10934 // two vector names depending on the use of the clause
10935 // `[not]inbranch`.
10937 case OMPDeclareSimdDeclAttr::BS_Undefined
:
10938 addAArch64AdvSIMDNDSNames(NDS
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10939 OutputBecomesInput
, Fn
);
10940 addAArch64AdvSIMDNDSNames(NDS
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10941 OutputBecomesInput
, Fn
);
10943 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
10944 addAArch64AdvSIMDNDSNames(NDS
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10945 OutputBecomesInput
, Fn
);
10947 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
10948 addAArch64AdvSIMDNDSNames(NDS
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10949 OutputBecomesInput
, Fn
);
10956 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl
*FD
,
10957 llvm::Function
*Fn
) {
10958 ASTContext
&C
= CGM
.getContext();
10959 FD
= FD
->getMostRecentDecl();
10961 // Map params to their positions in function decl.
10962 llvm::DenseMap
<const Decl
*, unsigned> ParamPositions
;
10963 if (isa
<CXXMethodDecl
>(FD
))
10964 ParamPositions
.try_emplace(FD
, 0);
10965 unsigned ParamPos
= ParamPositions
.size();
10966 for (const ParmVarDecl
*P
: FD
->parameters()) {
10967 ParamPositions
.try_emplace(P
->getCanonicalDecl(), ParamPos
);
10970 for (const auto *Attr
: FD
->specific_attrs
<OMPDeclareSimdDeclAttr
>()) {
10971 llvm::SmallVector
<ParamAttrTy
, 8> ParamAttrs(ParamPositions
.size());
10972 // Mark uniform parameters.
10973 for (const Expr
*E
: Attr
->uniforms()) {
10974 E
= E
->IgnoreParenImpCasts();
10976 if (isa
<CXXThisExpr
>(E
)) {
10977 Pos
= ParamPositions
[FD
];
10979 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
10980 ->getCanonicalDecl();
10981 auto It
= ParamPositions
.find(PVD
);
10982 assert(It
!= ParamPositions
.end() && "Function parameter not found");
10985 ParamAttrs
[Pos
].Kind
= Uniform
;
10987 // Get alignment info.
10988 auto *NI
= Attr
->alignments_begin();
10989 for (const Expr
*E
: Attr
->aligneds()) {
10990 E
= E
->IgnoreParenImpCasts();
10993 if (isa
<CXXThisExpr
>(E
)) {
10994 Pos
= ParamPositions
[FD
];
10995 ParmTy
= E
->getType();
10997 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
10998 ->getCanonicalDecl();
10999 auto It
= ParamPositions
.find(PVD
);
11000 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11002 ParmTy
= PVD
->getType();
11004 ParamAttrs
[Pos
].Alignment
=
11006 ? (*NI
)->EvaluateKnownConstInt(C
)
11007 : llvm::APSInt::getUnsigned(
11008 C
.toCharUnitsFromBits(C
.getOpenMPDefaultSimdAlign(ParmTy
))
11012 // Mark linear parameters.
11013 auto *SI
= Attr
->steps_begin();
11014 auto *MI
= Attr
->modifiers_begin();
11015 for (const Expr
*E
: Attr
->linears()) {
11016 E
= E
->IgnoreParenImpCasts();
11018 bool IsReferenceType
= false;
11019 // Rescaling factor needed to compute the linear parameter
11020 // value in the mangled name.
11021 unsigned PtrRescalingFactor
= 1;
11022 if (isa
<CXXThisExpr
>(E
)) {
11023 Pos
= ParamPositions
[FD
];
11024 auto *P
= cast
<PointerType
>(E
->getType());
11025 PtrRescalingFactor
= CGM
.getContext()
11026 .getTypeSizeInChars(P
->getPointeeType())
11029 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11030 ->getCanonicalDecl();
11031 auto It
= ParamPositions
.find(PVD
);
11032 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11034 if (auto *P
= dyn_cast
<PointerType
>(PVD
->getType()))
11035 PtrRescalingFactor
= CGM
.getContext()
11036 .getTypeSizeInChars(P
->getPointeeType())
11038 else if (PVD
->getType()->isReferenceType()) {
11039 IsReferenceType
= true;
11040 PtrRescalingFactor
=
11042 .getTypeSizeInChars(PVD
->getType().getNonReferenceType())
11046 ParamAttrTy
&ParamAttr
= ParamAttrs
[Pos
];
11047 if (*MI
== OMPC_LINEAR_ref
)
11048 ParamAttr
.Kind
= LinearRef
;
11049 else if (*MI
== OMPC_LINEAR_uval
)
11050 ParamAttr
.Kind
= LinearUVal
;
11051 else if (IsReferenceType
)
11052 ParamAttr
.Kind
= LinearVal
;
11054 ParamAttr
.Kind
= Linear
;
11055 // Assuming a stride of 1, for `linear` without modifiers.
11056 ParamAttr
.StrideOrArg
= llvm::APSInt::getUnsigned(1);
11058 Expr::EvalResult Result
;
11059 if (!(*SI
)->EvaluateAsInt(Result
, C
, Expr::SE_AllowSideEffects
)) {
11060 if (const auto *DRE
=
11061 cast
<DeclRefExpr
>((*SI
)->IgnoreParenImpCasts())) {
11062 if (const auto *StridePVD
=
11063 dyn_cast
<ParmVarDecl
>(DRE
->getDecl())) {
11064 ParamAttr
.HasVarStride
= true;
11065 auto It
= ParamPositions
.find(StridePVD
->getCanonicalDecl());
11066 assert(It
!= ParamPositions
.end() &&
11067 "Function parameter not found");
11068 ParamAttr
.StrideOrArg
= llvm::APSInt::getUnsigned(It
->second
);
11072 ParamAttr
.StrideOrArg
= Result
.Val
.getInt();
11075 // If we are using a linear clause on a pointer, we need to
11076 // rescale the value of linear_step with the byte size of the
11078 if (!ParamAttr
.HasVarStride
&&
11079 (ParamAttr
.Kind
== Linear
|| ParamAttr
.Kind
== LinearRef
))
11080 ParamAttr
.StrideOrArg
= ParamAttr
.StrideOrArg
* PtrRescalingFactor
;
11084 llvm::APSInt VLENVal
;
11085 SourceLocation ExprLoc
;
11086 const Expr
*VLENExpr
= Attr
->getSimdlen();
11088 VLENVal
= VLENExpr
->EvaluateKnownConstInt(C
);
11089 ExprLoc
= VLENExpr
->getExprLoc();
11091 OMPDeclareSimdDeclAttr::BranchStateTy State
= Attr
->getBranchState();
11092 if (CGM
.getTriple().isX86()) {
11093 emitX86DeclareSimdFunction(FD
, Fn
, VLENVal
, ParamAttrs
, State
);
11094 } else if (CGM
.getTriple().getArch() == llvm::Triple::aarch64
) {
11095 unsigned VLEN
= VLENVal
.getExtValue();
11096 StringRef MangledName
= Fn
->getName();
11097 if (CGM
.getTarget().hasFeature("sve"))
11098 emitAArch64DeclareSimdFunction(CGM
, FD
, VLEN
, ParamAttrs
, State
,
11099 MangledName
, 's', 128, Fn
, ExprLoc
);
11100 else if (CGM
.getTarget().hasFeature("neon"))
11101 emitAArch64DeclareSimdFunction(CGM
, FD
, VLEN
, ParamAttrs
, State
,
11102 MangledName
, 'n', 128, Fn
, ExprLoc
);
11105 FD
= FD
->getPreviousDecl();
11110 /// Cleanup action for doacross support.
11111 class DoacrossCleanupTy final
: public EHScopeStack::Cleanup
{
11113 static const int DoacrossFinArgs
= 2;
11116 llvm::FunctionCallee RTLFn
;
11117 llvm::Value
*Args
[DoacrossFinArgs
];
11120 DoacrossCleanupTy(llvm::FunctionCallee RTLFn
,
11121 ArrayRef
<llvm::Value
*> CallArgs
)
11123 assert(CallArgs
.size() == DoacrossFinArgs
);
11124 std::copy(CallArgs
.begin(), CallArgs
.end(), std::begin(Args
));
11126 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
11127 if (!CGF
.HaveInsertPoint())
11129 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11134 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction
&CGF
,
11135 const OMPLoopDirective
&D
,
11136 ArrayRef
<Expr
*> NumIterations
) {
11137 if (!CGF
.HaveInsertPoint())
11140 ASTContext
&C
= CGM
.getContext();
11141 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11143 if (KmpDimTy
.isNull()) {
11144 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11145 // kmp_int64 lo; // lower
11146 // kmp_int64 up; // upper
11147 // kmp_int64 st; // stride
11149 RD
= C
.buildImplicitRecord("kmp_dim");
11150 RD
->startDefinition();
11151 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11152 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11153 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11154 RD
->completeDefinition();
11155 KmpDimTy
= C
.getRecordType(RD
);
11157 RD
= cast
<RecordDecl
>(KmpDimTy
->getAsTagDecl());
11159 llvm::APInt
Size(/*numBits=*/32, NumIterations
.size());
11161 C
.getConstantArrayType(KmpDimTy
, Size
, nullptr, ArrayType::Normal
, 0);
11163 Address DimsAddr
= CGF
.CreateMemTemp(ArrayTy
, "dims");
11164 CGF
.EmitNullInitialization(DimsAddr
, ArrayTy
);
11165 enum { LowerFD
= 0, UpperFD
, StrideFD
};
11166 // Fill dims with data.
11167 for (unsigned I
= 0, E
= NumIterations
.size(); I
< E
; ++I
) {
11168 LValue DimsLVal
= CGF
.MakeAddrLValue(
11169 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, I
), KmpDimTy
);
11170 // dims.upper = num_iterations;
11171 LValue UpperLVal
= CGF
.EmitLValueForField(
11172 DimsLVal
, *std::next(RD
->field_begin(), UpperFD
));
11173 llvm::Value
*NumIterVal
= CGF
.EmitScalarConversion(
11174 CGF
.EmitScalarExpr(NumIterations
[I
]), NumIterations
[I
]->getType(),
11175 Int64Ty
, NumIterations
[I
]->getExprLoc());
11176 CGF
.EmitStoreOfScalar(NumIterVal
, UpperLVal
);
11177 // dims.stride = 1;
11178 LValue StrideLVal
= CGF
.EmitLValueForField(
11179 DimsLVal
, *std::next(RD
->field_begin(), StrideFD
));
11180 CGF
.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM
.Int64Ty
, /*V=*/1),
11184 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11185 // kmp_int32 num_dims, struct kmp_dim * dims);
11186 llvm::Value
*Args
[] = {
11187 emitUpdateLocation(CGF
, D
.getBeginLoc()),
11188 getThreadID(CGF
, D
.getBeginLoc()),
11189 llvm::ConstantInt::getSigned(CGM
.Int32Ty
, NumIterations
.size()),
11190 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11191 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, 0).getPointer(),
11194 llvm::FunctionCallee RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11195 CGM
.getModule(), OMPRTL___kmpc_doacross_init
);
11196 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11197 llvm::Value
*FiniArgs
[DoacrossCleanupTy::DoacrossFinArgs
] = {
11198 emitUpdateLocation(CGF
, D
.getEndLoc()), getThreadID(CGF
, D
.getEndLoc())};
11199 llvm::FunctionCallee FiniRTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11200 CGM
.getModule(), OMPRTL___kmpc_doacross_fini
);
11201 CGF
.EHStack
.pushCleanup
<DoacrossCleanupTy
>(NormalAndEHCleanup
, FiniRTLFn
,
11202 llvm::ArrayRef(FiniArgs
));
11205 template <typename T
>
11206 static void EmitDoacrossOrdered(CodeGenFunction
&CGF
, CodeGenModule
&CGM
,
11207 const T
*C
, llvm::Value
*ULoc
,
11208 llvm::Value
*ThreadID
) {
11210 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11211 llvm::APInt
Size(/*numBits=*/32, C
->getNumLoops());
11212 QualType ArrayTy
= CGM
.getContext().getConstantArrayType(
11213 Int64Ty
, Size
, nullptr, ArrayType::Normal
, 0);
11214 Address CntAddr
= CGF
.CreateMemTemp(ArrayTy
, ".cnt.addr");
11215 for (unsigned I
= 0, E
= C
->getNumLoops(); I
< E
; ++I
) {
11216 const Expr
*CounterVal
= C
->getLoopData(I
);
11217 assert(CounterVal
);
11218 llvm::Value
*CntVal
= CGF
.EmitScalarConversion(
11219 CGF
.EmitScalarExpr(CounterVal
), CounterVal
->getType(), Int64Ty
,
11220 CounterVal
->getExprLoc());
11221 CGF
.EmitStoreOfScalar(CntVal
, CGF
.Builder
.CreateConstArrayGEP(CntAddr
, I
),
11222 /*Volatile=*/false, Int64Ty
);
11224 llvm::Value
*Args
[] = {
11225 ULoc
, ThreadID
, CGF
.Builder
.CreateConstArrayGEP(CntAddr
, 0).getPointer()};
11226 llvm::FunctionCallee RTLFn
;
11227 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
11228 OMPDoacrossKind
<T
> ODK
;
11229 if (ODK
.isSource(C
)) {
11230 RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
11231 OMPRTL___kmpc_doacross_post
);
11233 assert(ODK
.isSink(C
) && "Expect sink modifier.");
11234 RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
11235 OMPRTL___kmpc_doacross_wait
);
11237 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11240 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
11241 const OMPDependClause
*C
) {
11242 return EmitDoacrossOrdered
<OMPDependClause
>(
11243 CGF
, CGM
, C
, emitUpdateLocation(CGF
, C
->getBeginLoc()),
11244 getThreadID(CGF
, C
->getBeginLoc()));
11247 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
11248 const OMPDoacrossClause
*C
) {
11249 return EmitDoacrossOrdered
<OMPDoacrossClause
>(
11250 CGF
, CGM
, C
, emitUpdateLocation(CGF
, C
->getBeginLoc()),
11251 getThreadID(CGF
, C
->getBeginLoc()));
11254 void CGOpenMPRuntime::emitCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
11255 llvm::FunctionCallee Callee
,
11256 ArrayRef
<llvm::Value
*> Args
) const {
11257 assert(Loc
.isValid() && "Outlined function call location must be valid.");
11258 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
11260 if (auto *Fn
= dyn_cast
<llvm::Function
>(Callee
.getCallee())) {
11261 if (Fn
->doesNotThrow()) {
11262 CGF
.EmitNounwindRuntimeCall(Fn
, Args
);
11266 CGF
.EmitRuntimeCall(Callee
, Args
);
11269 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11270 CodeGenFunction
&CGF
, SourceLocation Loc
, llvm::FunctionCallee OutlinedFn
,
11271 ArrayRef
<llvm::Value
*> Args
) const {
11272 emitCall(CGF
, Loc
, OutlinedFn
, Args
);
11275 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction
&CGF
, const Decl
*D
) {
11276 if (const auto *FD
= dyn_cast
<FunctionDecl
>(D
))
11277 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD
))
11278 HasEmittedDeclareTargetRegion
= true;
11281 Address
CGOpenMPRuntime::getParameterAddress(CodeGenFunction
&CGF
,
11282 const VarDecl
*NativeParam
,
11283 const VarDecl
*TargetParam
) const {
11284 return CGF
.GetAddrOfLocalVar(NativeParam
);
11287 /// Return allocator value from expression, or return a null allocator (default
11288 /// when no allocator specified).
11289 static llvm::Value
*getAllocatorVal(CodeGenFunction
&CGF
,
11290 const Expr
*Allocator
) {
11291 llvm::Value
*AllocVal
;
11293 AllocVal
= CGF
.EmitScalarExpr(Allocator
);
11294 // According to the standard, the original allocator type is a enum
11295 // (integer). Convert to pointer type, if required.
11296 AllocVal
= CGF
.EmitScalarConversion(AllocVal
, Allocator
->getType(),
11297 CGF
.getContext().VoidPtrTy
,
11298 Allocator
->getExprLoc());
11300 // If no allocator specified, it defaults to the null allocator.
11301 AllocVal
= llvm::Constant::getNullValue(
11302 CGF
.CGM
.getTypes().ConvertType(CGF
.getContext().VoidPtrTy
));
11307 /// Return the alignment from an allocate directive if present.
11308 static llvm::Value
*getAlignmentValue(CodeGenModule
&CGM
, const VarDecl
*VD
) {
11309 std::optional
<CharUnits
> AllocateAlignment
= CGM
.getOMPAllocateAlignment(VD
);
11311 if (!AllocateAlignment
)
11314 return llvm::ConstantInt::get(CGM
.SizeTy
, AllocateAlignment
->getQuantity());
11317 Address
CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction
&CGF
,
11318 const VarDecl
*VD
) {
11320 return Address::invalid();
11321 Address UntiedAddr
= Address::invalid();
11322 Address UntiedRealAddr
= Address::invalid();
11323 auto It
= FunctionToUntiedTaskStackMap
.find(CGF
.CurFn
);
11324 if (It
!= FunctionToUntiedTaskStackMap
.end()) {
11325 const UntiedLocalVarsAddressesMap
&UntiedData
=
11326 UntiedLocalVarsStack
[It
->second
];
11327 auto I
= UntiedData
.find(VD
);
11328 if (I
!= UntiedData
.end()) {
11329 UntiedAddr
= I
->second
.first
;
11330 UntiedRealAddr
= I
->second
.second
;
11333 const VarDecl
*CVD
= VD
->getCanonicalDecl();
11334 if (CVD
->hasAttr
<OMPAllocateDeclAttr
>()) {
11335 // Use the default allocation.
11336 if (!isAllocatableDecl(VD
))
11339 CharUnits Align
= CGM
.getContext().getDeclAlign(CVD
);
11340 if (CVD
->getType()->isVariablyModifiedType()) {
11341 Size
= CGF
.getTypeSize(CVD
->getType());
11342 // Align the size: ((size + align - 1) / align) * align
11343 Size
= CGF
.Builder
.CreateNUWAdd(
11344 Size
, CGM
.getSize(Align
- CharUnits::fromQuantity(1)));
11345 Size
= CGF
.Builder
.CreateUDiv(Size
, CGM
.getSize(Align
));
11346 Size
= CGF
.Builder
.CreateNUWMul(Size
, CGM
.getSize(Align
));
11348 CharUnits Sz
= CGM
.getContext().getTypeSizeInChars(CVD
->getType());
11349 Size
= CGM
.getSize(Sz
.alignTo(Align
));
11351 llvm::Value
*ThreadID
= getThreadID(CGF
, CVD
->getBeginLoc());
11352 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
11353 const Expr
*Allocator
= AA
->getAllocator();
11354 llvm::Value
*AllocVal
= getAllocatorVal(CGF
, Allocator
);
11355 llvm::Value
*Alignment
= getAlignmentValue(CGM
, CVD
);
11356 SmallVector
<llvm::Value
*, 4> Args
;
11357 Args
.push_back(ThreadID
);
11359 Args
.push_back(Alignment
);
11360 Args
.push_back(Size
);
11361 Args
.push_back(AllocVal
);
11362 llvm::omp::RuntimeFunction FnID
=
11363 Alignment
? OMPRTL___kmpc_aligned_alloc
: OMPRTL___kmpc_alloc
;
11364 llvm::Value
*Addr
= CGF
.EmitRuntimeCall(
11365 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(), FnID
), Args
,
11366 getName({CVD
->getName(), ".void.addr"}));
11367 llvm::FunctionCallee FiniRTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11368 CGM
.getModule(), OMPRTL___kmpc_free
);
11369 QualType Ty
= CGM
.getContext().getPointerType(CVD
->getType());
11370 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11371 Addr
, CGF
.ConvertTypeForMem(Ty
), getName({CVD
->getName(), ".addr"}));
11372 if (UntiedAddr
.isValid())
11373 CGF
.EmitStoreOfScalar(Addr
, UntiedAddr
, /*Volatile=*/false, Ty
);
11375 // Cleanup action for allocate support.
11376 class OMPAllocateCleanupTy final
: public EHScopeStack::Cleanup
{
11377 llvm::FunctionCallee RTLFn
;
11378 SourceLocation::UIntTy LocEncoding
;
11380 const Expr
*AllocExpr
;
11383 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn
,
11384 SourceLocation::UIntTy LocEncoding
, Address Addr
,
11385 const Expr
*AllocExpr
)
11386 : RTLFn(RTLFn
), LocEncoding(LocEncoding
), Addr(Addr
),
11387 AllocExpr(AllocExpr
) {}
11388 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
11389 if (!CGF
.HaveInsertPoint())
11391 llvm::Value
*Args
[3];
11392 Args
[0] = CGF
.CGM
.getOpenMPRuntime().getThreadID(
11393 CGF
, SourceLocation::getFromRawEncoding(LocEncoding
));
11394 Args
[1] = CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11395 Addr
.getPointer(), CGF
.VoidPtrTy
);
11396 llvm::Value
*AllocVal
= getAllocatorVal(CGF
, AllocExpr
);
11397 Args
[2] = AllocVal
;
11398 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11402 UntiedRealAddr
.isValid()
11404 : Address(Addr
, CGF
.ConvertTypeForMem(CVD
->getType()), Align
);
11405 CGF
.EHStack
.pushCleanup
<OMPAllocateCleanupTy
>(
11406 NormalAndEHCleanup
, FiniRTLFn
, CVD
->getLocation().getRawEncoding(),
11407 VDAddr
, Allocator
);
11408 if (UntiedRealAddr
.isValid())
11410 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
11411 Region
->emitUntiedSwitch(CGF
);
11417 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction
&CGF
,
11418 const VarDecl
*VD
) const {
11419 auto It
= FunctionToUntiedTaskStackMap
.find(CGF
.CurFn
);
11420 if (It
== FunctionToUntiedTaskStackMap
.end())
11422 return UntiedLocalVarsStack
[It
->second
].count(VD
) > 0;
11425 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11426 CodeGenModule
&CGM
, const OMPLoopDirective
&S
)
11427 : CGM(CGM
), NeedToPush(S
.hasClausesOfKind
<OMPNontemporalClause
>()) {
11428 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11431 NontemporalDeclsSet
&DS
=
11432 CGM
.getOpenMPRuntime().NontemporalDeclsStack
.emplace_back();
11433 for (const auto *C
: S
.getClausesOfKind
<OMPNontemporalClause
>()) {
11434 for (const Stmt
*Ref
: C
->private_refs()) {
11435 const auto *SimpleRefExpr
= cast
<Expr
>(Ref
)->IgnoreParenImpCasts();
11436 const ValueDecl
*VD
;
11437 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(SimpleRefExpr
)) {
11438 VD
= DRE
->getDecl();
11440 const auto *ME
= cast
<MemberExpr
>(SimpleRefExpr
);
11441 assert((ME
->isImplicitCXXThis() ||
11442 isa
<CXXThisExpr
>(ME
->getBase()->IgnoreParenImpCasts())) &&
11443 "Expected member of current class.");
11444 VD
= ME
->getMemberDecl();
11451 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11454 CGM
.getOpenMPRuntime().NontemporalDeclsStack
.pop_back();
11457 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11458 CodeGenFunction
&CGF
,
11459 const llvm::MapVector
<CanonicalDeclPtr
<const VarDecl
>,
11460 std::pair
<Address
, Address
>> &LocalVars
)
11461 : CGM(CGF
.CGM
), NeedToPush(!LocalVars
.empty()) {
11464 CGM
.getOpenMPRuntime().FunctionToUntiedTaskStackMap
.try_emplace(
11465 CGF
.CurFn
, CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.size());
11466 CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.push_back(LocalVars
);
11469 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11472 CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.pop_back();
11475 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl
*VD
) const {
11476 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11478 return llvm::any_of(
11479 CGM
.getOpenMPRuntime().NontemporalDeclsStack
,
11480 [VD
](const NontemporalDeclsSet
&Set
) { return Set
.contains(VD
); });
11483 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11484 const OMPExecutableDirective
&S
,
11485 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &NeedToAddForLPCsAsDisabled
)
11487 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> NeedToCheckForLPCs
;
11488 // Vars in target/task regions must be excluded completely.
11489 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()) ||
11490 isOpenMPTaskingDirective(S
.getDirectiveKind())) {
11491 SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
11492 getOpenMPCaptureRegions(CaptureRegions
, S
.getDirectiveKind());
11493 const CapturedStmt
*CS
= S
.getCapturedStmt(CaptureRegions
.front());
11494 for (const CapturedStmt::Capture
&Cap
: CS
->captures()) {
11495 if (Cap
.capturesVariable() || Cap
.capturesVariableByCopy())
11496 NeedToCheckForLPCs
.insert(Cap
.getCapturedVar());
11499 // Exclude vars in private clauses.
11500 for (const auto *C
: S
.getClausesOfKind
<OMPPrivateClause
>()) {
11501 for (const Expr
*Ref
: C
->varlists()) {
11502 if (!Ref
->getType()->isScalarType())
11504 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11507 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11510 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
11511 for (const Expr
*Ref
: C
->varlists()) {
11512 if (!Ref
->getType()->isScalarType())
11514 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11517 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11520 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
11521 for (const Expr
*Ref
: C
->varlists()) {
11522 if (!Ref
->getType()->isScalarType())
11524 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11527 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11530 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
11531 for (const Expr
*Ref
: C
->varlists()) {
11532 if (!Ref
->getType()->isScalarType())
11534 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11537 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11540 for (const auto *C
: S
.getClausesOfKind
<OMPLinearClause
>()) {
11541 for (const Expr
*Ref
: C
->varlists()) {
11542 if (!Ref
->getType()->isScalarType())
11544 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11547 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11550 for (const Decl
*VD
: NeedToCheckForLPCs
) {
11551 for (const LastprivateConditionalData
&Data
:
11552 llvm::reverse(CGM
.getOpenMPRuntime().LastprivateConditionalStack
)) {
11553 if (Data
.DeclToUniqueName
.count(VD
) > 0) {
11554 if (!Data
.Disabled
)
11555 NeedToAddForLPCsAsDisabled
.insert(VD
);
11562 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11563 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
, LValue IVLVal
)
11565 Action((CGM
.getLangOpts().OpenMP
>= 50 &&
11566 llvm::any_of(S
.getClausesOfKind
<OMPLastprivateClause
>(),
11567 [](const OMPLastprivateClause
*C
) {
11568 return C
->getKind() ==
11569 OMPC_LASTPRIVATE_conditional
;
11571 ? ActionToDo::PushAsLastprivateConditional
11572 : ActionToDo::DoNotPush
) {
11573 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11574 if (CGM
.getLangOpts().OpenMP
< 50 || Action
== ActionToDo::DoNotPush
)
11576 assert(Action
== ActionToDo::PushAsLastprivateConditional
&&
11577 "Expected a push action.");
11578 LastprivateConditionalData
&Data
=
11579 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.emplace_back();
11580 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
11581 if (C
->getKind() != OMPC_LASTPRIVATE_conditional
)
11584 for (const Expr
*Ref
: C
->varlists()) {
11585 Data
.DeclToUniqueName
.insert(std::make_pair(
11586 cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts())->getDecl(),
11587 SmallString
<16>(generateUniqueName(CGM
, "pl_cond", Ref
))));
11590 Data
.IVLVal
= IVLVal
;
11591 Data
.Fn
= CGF
.CurFn
;
11594 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11595 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
11596 : CGM(CGF
.CGM
), Action(ActionToDo::DoNotPush
) {
11597 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11598 if (CGM
.getLangOpts().OpenMP
< 50)
11600 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> NeedToAddForLPCsAsDisabled
;
11601 tryToDisableInnerAnalysis(S
, NeedToAddForLPCsAsDisabled
);
11602 if (!NeedToAddForLPCsAsDisabled
.empty()) {
11603 Action
= ActionToDo::DisableLastprivateConditional
;
11604 LastprivateConditionalData
&Data
=
11605 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.emplace_back();
11606 for (const Decl
*VD
: NeedToAddForLPCsAsDisabled
)
11607 Data
.DeclToUniqueName
.insert(std::make_pair(VD
, SmallString
<16>()));
11608 Data
.Fn
= CGF
.CurFn
;
11609 Data
.Disabled
= true;
11613 CGOpenMPRuntime::LastprivateConditionalRAII
11614 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11615 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
11616 return LastprivateConditionalRAII(CGF
, S
);
11619 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11620 if (CGM
.getLangOpts().OpenMP
< 50)
11622 if (Action
== ActionToDo::DisableLastprivateConditional
) {
11623 assert(CGM
.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled
&&
11624 "Expected list of disabled private vars.");
11625 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.pop_back();
11627 if (Action
== ActionToDo::PushAsLastprivateConditional
) {
11629 !CGM
.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled
&&
11630 "Expected list of lastprivate conditional vars.");
11631 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.pop_back();
11635 Address
CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction
&CGF
,
11636 const VarDecl
*VD
) {
11637 ASTContext
&C
= CGM
.getContext();
11638 auto I
= LastprivateConditionalToTypes
.find(CGF
.CurFn
);
11639 if (I
== LastprivateConditionalToTypes
.end())
11640 I
= LastprivateConditionalToTypes
.try_emplace(CGF
.CurFn
).first
;
11642 const FieldDecl
*VDField
;
11643 const FieldDecl
*FiredField
;
11645 auto VI
= I
->getSecond().find(VD
);
11646 if (VI
== I
->getSecond().end()) {
11647 RecordDecl
*RD
= C
.buildImplicitRecord("lasprivate.conditional");
11648 RD
->startDefinition();
11649 VDField
= addFieldToRecordDecl(C
, RD
, VD
->getType().getNonReferenceType());
11650 FiredField
= addFieldToRecordDecl(C
, RD
, C
.CharTy
);
11651 RD
->completeDefinition();
11652 NewType
= C
.getRecordType(RD
);
11653 Address Addr
= CGF
.CreateMemTemp(NewType
, C
.getDeclAlign(VD
), VD
->getName());
11654 BaseLVal
= CGF
.MakeAddrLValue(Addr
, NewType
, AlignmentSource::Decl
);
11655 I
->getSecond().try_emplace(VD
, NewType
, VDField
, FiredField
, BaseLVal
);
11657 NewType
= std::get
<0>(VI
->getSecond());
11658 VDField
= std::get
<1>(VI
->getSecond());
11659 FiredField
= std::get
<2>(VI
->getSecond());
11660 BaseLVal
= std::get
<3>(VI
->getSecond());
11663 CGF
.EmitLValueForField(BaseLVal
, FiredField
);
11664 CGF
.EmitStoreOfScalar(
11665 llvm::ConstantInt::getNullValue(CGF
.ConvertTypeForMem(C
.CharTy
)),
11667 return CGF
.EmitLValueForField(BaseLVal
, VDField
).getAddress(CGF
);
11671 /// Checks if the lastprivate conditional variable is referenced in LHS.
11672 class LastprivateConditionalRefChecker final
11673 : public ConstStmtVisitor
<LastprivateConditionalRefChecker
, bool> {
11674 ArrayRef
<CGOpenMPRuntime::LastprivateConditionalData
> LPM
;
11675 const Expr
*FoundE
= nullptr;
11676 const Decl
*FoundD
= nullptr;
11677 StringRef UniqueDeclName
;
11679 llvm::Function
*FoundFn
= nullptr;
11680 SourceLocation Loc
;
11683 bool VisitDeclRefExpr(const DeclRefExpr
*E
) {
11684 for (const CGOpenMPRuntime::LastprivateConditionalData
&D
:
11685 llvm::reverse(LPM
)) {
11686 auto It
= D
.DeclToUniqueName
.find(E
->getDecl());
11687 if (It
== D
.DeclToUniqueName
.end())
11692 FoundD
= E
->getDecl()->getCanonicalDecl();
11693 UniqueDeclName
= It
->second
;
11698 return FoundE
== E
;
11700 bool VisitMemberExpr(const MemberExpr
*E
) {
11701 if (!CodeGenFunction::IsWrappedCXXThis(E
->getBase()))
11703 for (const CGOpenMPRuntime::LastprivateConditionalData
&D
:
11704 llvm::reverse(LPM
)) {
11705 auto It
= D
.DeclToUniqueName
.find(E
->getMemberDecl());
11706 if (It
== D
.DeclToUniqueName
.end())
11711 FoundD
= E
->getMemberDecl()->getCanonicalDecl();
11712 UniqueDeclName
= It
->second
;
11717 return FoundE
== E
;
11719 bool VisitStmt(const Stmt
*S
) {
11720 for (const Stmt
*Child
: S
->children()) {
11723 if (const auto *E
= dyn_cast
<Expr
>(Child
))
11724 if (!E
->isGLValue())
11731 explicit LastprivateConditionalRefChecker(
11732 ArrayRef
<CGOpenMPRuntime::LastprivateConditionalData
> LPM
)
11734 std::tuple
<const Expr
*, const Decl
*, StringRef
, LValue
, llvm::Function
*>
11735 getFoundData() const {
11736 return std::make_tuple(FoundE
, FoundD
, UniqueDeclName
, IVLVal
, FoundFn
);
11741 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction
&CGF
,
11743 StringRef UniqueDeclName
,
11745 SourceLocation Loc
) {
11746 // Last updated loop counter for the lastprivate conditional var.
11747 // int<xx> last_iv = 0;
11748 llvm::Type
*LLIVTy
= CGF
.ConvertTypeForMem(IVLVal
.getType());
11749 llvm::Constant
*LastIV
= OMPBuilder
.getOrCreateInternalVariable(
11750 LLIVTy
, getName({UniqueDeclName
, "iv"}));
11751 cast
<llvm::GlobalVariable
>(LastIV
)->setAlignment(
11752 IVLVal
.getAlignment().getAsAlign());
11753 LValue LastIVLVal
= CGF
.MakeNaturalAlignAddrLValue(LastIV
, IVLVal
.getType());
11755 // Last value of the lastprivate conditional.
11756 // decltype(priv_a) last_a;
11757 llvm::GlobalVariable
*Last
= OMPBuilder
.getOrCreateInternalVariable(
11758 CGF
.ConvertTypeForMem(LVal
.getType()), UniqueDeclName
);
11759 Last
->setAlignment(LVal
.getAlignment().getAsAlign());
11760 LValue LastLVal
= CGF
.MakeAddrLValue(
11761 Address(Last
, Last
->getValueType(), LVal
.getAlignment()), LVal
.getType());
11763 // Global loop counter. Required to handle inner parallel-for regions.
11765 llvm::Value
*IVVal
= CGF
.EmitLoadOfScalar(IVLVal
, Loc
);
11767 // #pragma omp critical(a)
11768 // if (last_iv <= iv) {
11770 // last_a = priv_a;
11772 auto &&CodeGen
= [&LastIVLVal
, &IVLVal
, IVVal
, &LVal
, &LastLVal
,
11773 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
11775 llvm::Value
*LastIVVal
= CGF
.EmitLoadOfScalar(LastIVLVal
, Loc
);
11776 // (last_iv <= iv) ? Check if the variable is updated and store new
11777 // value in global var.
11778 llvm::Value
*CmpRes
;
11779 if (IVLVal
.getType()->isSignedIntegerType()) {
11780 CmpRes
= CGF
.Builder
.CreateICmpSLE(LastIVVal
, IVVal
);
11782 assert(IVLVal
.getType()->isUnsignedIntegerType() &&
11783 "Loop iteration variable must be integer.");
11784 CmpRes
= CGF
.Builder
.CreateICmpULE(LastIVVal
, IVVal
);
11786 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock("lp_cond_then");
11787 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock("lp_cond_exit");
11788 CGF
.Builder
.CreateCondBr(CmpRes
, ThenBB
, ExitBB
);
11790 CGF
.EmitBlock(ThenBB
);
11793 CGF
.EmitStoreOfScalar(IVVal
, LastIVLVal
);
11795 // last_a = priv_a;
11796 switch (CGF
.getEvaluationKind(LVal
.getType())) {
11798 llvm::Value
*PrivVal
= CGF
.EmitLoadOfScalar(LVal
, Loc
);
11799 CGF
.EmitStoreOfScalar(PrivVal
, LastLVal
);
11802 case TEK_Complex
: {
11803 CodeGenFunction::ComplexPairTy PrivVal
= CGF
.EmitLoadOfComplex(LVal
, Loc
);
11804 CGF
.EmitStoreOfComplex(PrivVal
, LastLVal
, /*isInit=*/false);
11807 case TEK_Aggregate
:
11809 "Aggregates are not supported in lastprivate conditional.");
11812 CGF
.EmitBranch(ExitBB
);
11813 // There is no need to emit line number for unconditional branch.
11814 (void)ApplyDebugLocation::CreateEmpty(CGF
);
11815 CGF
.EmitBlock(ExitBB
, /*IsFinished=*/true);
11818 if (CGM
.getLangOpts().OpenMPSimd
) {
11819 // Do not emit as a critical region as no parallel region could be emitted.
11820 RegionCodeGenTy
ThenRCG(CodeGen
);
11823 emitCriticalRegion(CGF
, UniqueDeclName
, CodeGen
, Loc
);
11827 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction
&CGF
,
11829 if (CGF
.getLangOpts().OpenMP
< 50 || LastprivateConditionalStack
.empty())
11831 LastprivateConditionalRefChecker
Checker(LastprivateConditionalStack
);
11832 if (!Checker
.Visit(LHS
))
11834 const Expr
*FoundE
;
11835 const Decl
*FoundD
;
11836 StringRef UniqueDeclName
;
11838 llvm::Function
*FoundFn
;
11839 std::tie(FoundE
, FoundD
, UniqueDeclName
, IVLVal
, FoundFn
) =
11840 Checker
.getFoundData();
11841 if (FoundFn
!= CGF
.CurFn
) {
11842 // Special codegen for inner parallel regions.
11843 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11844 auto It
= LastprivateConditionalToTypes
[FoundFn
].find(FoundD
);
11845 assert(It
!= LastprivateConditionalToTypes
[FoundFn
].end() &&
11846 "Lastprivate conditional is not found in outer region.");
11847 QualType StructTy
= std::get
<0>(It
->getSecond());
11848 const FieldDecl
* FiredDecl
= std::get
<2>(It
->getSecond());
11849 LValue PrivLVal
= CGF
.EmitLValue(FoundE
);
11850 Address StructAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11851 PrivLVal
.getAddress(CGF
),
11852 CGF
.ConvertTypeForMem(CGF
.getContext().getPointerType(StructTy
)),
11853 CGF
.ConvertTypeForMem(StructTy
));
11855 CGF
.MakeAddrLValue(StructAddr
, StructTy
, AlignmentSource::Decl
);
11856 LValue FiredLVal
= CGF
.EmitLValueForField(BaseLVal
, FiredDecl
);
11857 CGF
.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11858 CGF
.ConvertTypeForMem(FiredDecl
->getType()), 1)),
11859 FiredLVal
, llvm::AtomicOrdering::Unordered
,
11860 /*IsVolatile=*/true, /*isInit=*/false);
11864 // Private address of the lastprivate conditional in the current context.
11866 LValue LVal
= CGF
.EmitLValue(FoundE
);
11867 emitLastprivateConditionalUpdate(CGF
, IVLVal
, UniqueDeclName
, LVal
,
11868 FoundE
->getExprLoc());
11871 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11872 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
11873 const llvm::DenseSet
<CanonicalDeclPtr
<const VarDecl
>> &IgnoredDecls
) {
11874 if (CGF
.getLangOpts().OpenMP
< 50 || LastprivateConditionalStack
.empty())
11876 auto Range
= llvm::reverse(LastprivateConditionalStack
);
11877 auto It
= llvm::find_if(
11878 Range
, [](const LastprivateConditionalData
&D
) { return !D
.Disabled
; });
11879 if (It
== Range
.end() || It
->Fn
!= CGF
.CurFn
)
11881 auto LPCI
= LastprivateConditionalToTypes
.find(It
->Fn
);
11882 assert(LPCI
!= LastprivateConditionalToTypes
.end() &&
11883 "Lastprivates must be registered already.");
11884 SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
11885 getOpenMPCaptureRegions(CaptureRegions
, D
.getDirectiveKind());
11886 const CapturedStmt
*CS
= D
.getCapturedStmt(CaptureRegions
.back());
11887 for (const auto &Pair
: It
->DeclToUniqueName
) {
11888 const auto *VD
= cast
<VarDecl
>(Pair
.first
->getCanonicalDecl());
11889 if (!CS
->capturesVariable(VD
) || IgnoredDecls
.contains(VD
))
11891 auto I
= LPCI
->getSecond().find(Pair
.first
);
11892 assert(I
!= LPCI
->getSecond().end() &&
11893 "Lastprivate must be rehistered already.");
11894 // bool Cmp = priv_a.Fired != 0;
11895 LValue BaseLVal
= std::get
<3>(I
->getSecond());
11897 CGF
.EmitLValueForField(BaseLVal
, std::get
<2>(I
->getSecond()));
11898 llvm::Value
*Res
= CGF
.EmitLoadOfScalar(FiredLVal
, D
.getBeginLoc());
11899 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Res
);
11900 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock("lpc.then");
11901 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("lpc.done");
11903 CGF
.Builder
.CreateCondBr(Cmp
, ThenBB
, DoneBB
);
11904 CGF
.EmitBlock(ThenBB
);
11905 Address Addr
= CGF
.GetAddrOfLocalVar(VD
);
11907 if (VD
->getType()->isReferenceType())
11908 LVal
= CGF
.EmitLoadOfReferenceLValue(Addr
, VD
->getType(),
11909 AlignmentSource::Decl
);
11911 LVal
= CGF
.MakeAddrLValue(Addr
, VD
->getType().getNonReferenceType(),
11912 AlignmentSource::Decl
);
11913 emitLastprivateConditionalUpdate(CGF
, It
->IVLVal
, Pair
.second
, LVal
,
11915 auto AL
= ApplyDebugLocation::CreateArtificial(CGF
);
11916 CGF
.EmitBlock(DoneBB
, /*IsFinal=*/true);
11921 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11922 CodeGenFunction
&CGF
, LValue PrivLVal
, const VarDecl
*VD
,
11923 SourceLocation Loc
) {
11924 if (CGF
.getLangOpts().OpenMP
< 50)
11926 auto It
= LastprivateConditionalStack
.back().DeclToUniqueName
.find(VD
);
11927 assert(It
!= LastprivateConditionalStack
.back().DeclToUniqueName
.end() &&
11928 "Unknown lastprivate conditional variable.");
11929 StringRef UniqueName
= It
->second
;
11930 llvm::GlobalVariable
*GV
= CGM
.getModule().getNamedGlobal(UniqueName
);
11931 // The variable was not updated in the region - exit.
11934 LValue LPLVal
= CGF
.MakeAddrLValue(
11935 Address(GV
, GV
->getValueType(), PrivLVal
.getAlignment()),
11936 PrivLVal
.getType().getNonReferenceType());
11937 llvm::Value
*Res
= CGF
.EmitLoadOfScalar(LPLVal
, Loc
);
11938 CGF
.EmitStoreOfScalar(Res
, PrivLVal
);
11941 llvm::Function
*CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11942 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
11943 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
11944 const RegionCodeGenTy
&CodeGen
) {
11945 llvm_unreachable("Not supported in SIMD-only mode");
11948 llvm::Function
*CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11949 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
11950 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
11951 const RegionCodeGenTy
&CodeGen
) {
11952 llvm_unreachable("Not supported in SIMD-only mode");
11955 llvm::Function
*CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11956 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
11957 const VarDecl
*PartIDVar
, const VarDecl
*TaskTVar
,
11958 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
11959 bool Tied
, unsigned &NumberOfParts
) {
11960 llvm_unreachable("Not supported in SIMD-only mode");
11963 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction
&CGF
,
11964 SourceLocation Loc
,
11965 llvm::Function
*OutlinedFn
,
11966 ArrayRef
<llvm::Value
*> CapturedVars
,
11967 const Expr
*IfCond
,
11968 llvm::Value
*NumThreads
) {
11969 llvm_unreachable("Not supported in SIMD-only mode");
11972 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11973 CodeGenFunction
&CGF
, StringRef CriticalName
,
11974 const RegionCodeGenTy
&CriticalOpGen
, SourceLocation Loc
,
11975 const Expr
*Hint
) {
11976 llvm_unreachable("Not supported in SIMD-only mode");
11979 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction
&CGF
,
11980 const RegionCodeGenTy
&MasterOpGen
,
11981 SourceLocation Loc
) {
11982 llvm_unreachable("Not supported in SIMD-only mode");
11985 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction
&CGF
,
11986 const RegionCodeGenTy
&MasterOpGen
,
11987 SourceLocation Loc
,
11988 const Expr
*Filter
) {
11989 llvm_unreachable("Not supported in SIMD-only mode");
11992 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction
&CGF
,
11993 SourceLocation Loc
) {
11994 llvm_unreachable("Not supported in SIMD-only mode");
11997 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
11998 CodeGenFunction
&CGF
, const RegionCodeGenTy
&TaskgroupOpGen
,
11999 SourceLocation Loc
) {
12000 llvm_unreachable("Not supported in SIMD-only mode");
12003 void CGOpenMPSIMDRuntime::emitSingleRegion(
12004 CodeGenFunction
&CGF
, const RegionCodeGenTy
&SingleOpGen
,
12005 SourceLocation Loc
, ArrayRef
<const Expr
*> CopyprivateVars
,
12006 ArrayRef
<const Expr
*> DestExprs
, ArrayRef
<const Expr
*> SrcExprs
,
12007 ArrayRef
<const Expr
*> AssignmentOps
) {
12008 llvm_unreachable("Not supported in SIMD-only mode");
12011 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction
&CGF
,
12012 const RegionCodeGenTy
&OrderedOpGen
,
12013 SourceLocation Loc
,
12015 llvm_unreachable("Not supported in SIMD-only mode");
12018 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction
&CGF
,
12019 SourceLocation Loc
,
12020 OpenMPDirectiveKind Kind
,
12022 bool ForceSimpleCall
) {
12023 llvm_unreachable("Not supported in SIMD-only mode");
12026 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12027 CodeGenFunction
&CGF
, SourceLocation Loc
,
12028 const OpenMPScheduleTy
&ScheduleKind
, unsigned IVSize
, bool IVSigned
,
12029 bool Ordered
, const DispatchRTInput
&DispatchValues
) {
12030 llvm_unreachable("Not supported in SIMD-only mode");
12033 void CGOpenMPSIMDRuntime::emitForStaticInit(
12034 CodeGenFunction
&CGF
, SourceLocation Loc
, OpenMPDirectiveKind DKind
,
12035 const OpenMPScheduleTy
&ScheduleKind
, const StaticRTInput
&Values
) {
12036 llvm_unreachable("Not supported in SIMD-only mode");
12039 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12040 CodeGenFunction
&CGF
, SourceLocation Loc
,
12041 OpenMPDistScheduleClauseKind SchedKind
, const StaticRTInput
&Values
) {
12042 llvm_unreachable("Not supported in SIMD-only mode");
12045 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction
&CGF
,
12046 SourceLocation Loc
,
12049 llvm_unreachable("Not supported in SIMD-only mode");
12052 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction
&CGF
,
12053 SourceLocation Loc
,
12054 OpenMPDirectiveKind DKind
) {
12055 llvm_unreachable("Not supported in SIMD-only mode");
12058 llvm::Value
*CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction
&CGF
,
12059 SourceLocation Loc
,
12060 unsigned IVSize
, bool IVSigned
,
12061 Address IL
, Address LB
,
12062 Address UB
, Address ST
) {
12063 llvm_unreachable("Not supported in SIMD-only mode");
12066 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction
&CGF
,
12067 llvm::Value
*NumThreads
,
12068 SourceLocation Loc
) {
12069 llvm_unreachable("Not supported in SIMD-only mode");
12072 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction
&CGF
,
12073 ProcBindKind ProcBind
,
12074 SourceLocation Loc
) {
12075 llvm_unreachable("Not supported in SIMD-only mode");
12078 Address
CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction
&CGF
,
12081 SourceLocation Loc
) {
12082 llvm_unreachable("Not supported in SIMD-only mode");
12085 llvm::Function
*CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12086 const VarDecl
*VD
, Address VDAddr
, SourceLocation Loc
, bool PerformInit
,
12087 CodeGenFunction
*CGF
) {
12088 llvm_unreachable("Not supported in SIMD-only mode");
12091 Address
CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12092 CodeGenFunction
&CGF
, QualType VarType
, StringRef Name
) {
12093 llvm_unreachable("Not supported in SIMD-only mode");
12096 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction
&CGF
,
12097 ArrayRef
<const Expr
*> Vars
,
12098 SourceLocation Loc
,
12099 llvm::AtomicOrdering AO
) {
12100 llvm_unreachable("Not supported in SIMD-only mode");
12103 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
12104 const OMPExecutableDirective
&D
,
12105 llvm::Function
*TaskFunction
,
12106 QualType SharedsTy
, Address Shareds
,
12107 const Expr
*IfCond
,
12108 const OMPTaskDataTy
&Data
) {
12109 llvm_unreachable("Not supported in SIMD-only mode");
12112 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12113 CodeGenFunction
&CGF
, SourceLocation Loc
, const OMPLoopDirective
&D
,
12114 llvm::Function
*TaskFunction
, QualType SharedsTy
, Address Shareds
,
12115 const Expr
*IfCond
, const OMPTaskDataTy
&Data
) {
12116 llvm_unreachable("Not supported in SIMD-only mode");
12119 void CGOpenMPSIMDRuntime::emitReduction(
12120 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> Privates
,
12121 ArrayRef
<const Expr
*> LHSExprs
, ArrayRef
<const Expr
*> RHSExprs
,
12122 ArrayRef
<const Expr
*> ReductionOps
, ReductionOptionsTy Options
) {
12123 assert(Options
.SimpleReduction
&& "Only simple reduction is expected.");
12124 CGOpenMPRuntime::emitReduction(CGF
, Loc
, Privates
, LHSExprs
, RHSExprs
,
12125 ReductionOps
, Options
);
12128 llvm::Value
*CGOpenMPSIMDRuntime::emitTaskReductionInit(
12129 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> LHSExprs
,
12130 ArrayRef
<const Expr
*> RHSExprs
, const OMPTaskDataTy
&Data
) {
12131 llvm_unreachable("Not supported in SIMD-only mode");
12134 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction
&CGF
,
12135 SourceLocation Loc
,
12136 bool IsWorksharingReduction
) {
12137 llvm_unreachable("Not supported in SIMD-only mode");
12140 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction
&CGF
,
12141 SourceLocation Loc
,
12142 ReductionCodeGen
&RCG
,
12144 llvm_unreachable("Not supported in SIMD-only mode");
12147 Address
CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction
&CGF
,
12148 SourceLocation Loc
,
12149 llvm::Value
*ReductionsPtr
,
12150 LValue SharedLVal
) {
12151 llvm_unreachable("Not supported in SIMD-only mode");
12154 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction
&CGF
,
12155 SourceLocation Loc
,
12156 const OMPTaskDataTy
&Data
) {
12157 llvm_unreachable("Not supported in SIMD-only mode");
12160 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12161 CodeGenFunction
&CGF
, SourceLocation Loc
,
12162 OpenMPDirectiveKind CancelRegion
) {
12163 llvm_unreachable("Not supported in SIMD-only mode");
12166 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction
&CGF
,
12167 SourceLocation Loc
, const Expr
*IfCond
,
12168 OpenMPDirectiveKind CancelRegion
) {
12169 llvm_unreachable("Not supported in SIMD-only mode");
12172 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12173 const OMPExecutableDirective
&D
, StringRef ParentName
,
12174 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
12175 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
12176 llvm_unreachable("Not supported in SIMD-only mode");
12179 void CGOpenMPSIMDRuntime::emitTargetCall(
12180 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
12181 llvm::Function
*OutlinedFn
, llvm::Value
*OutlinedFnID
, const Expr
*IfCond
,
12182 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
12183 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
12184 const OMPLoopDirective
&D
)>
12186 llvm_unreachable("Not supported in SIMD-only mode");
12189 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD
) {
12190 llvm_unreachable("Not supported in SIMD-only mode");
12193 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD
) {
12194 llvm_unreachable("Not supported in SIMD-only mode");
12197 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD
) {
12201 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction
&CGF
,
12202 const OMPExecutableDirective
&D
,
12203 SourceLocation Loc
,
12204 llvm::Function
*OutlinedFn
,
12205 ArrayRef
<llvm::Value
*> CapturedVars
) {
12206 llvm_unreachable("Not supported in SIMD-only mode");
12209 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction
&CGF
,
12210 const Expr
*NumTeams
,
12211 const Expr
*ThreadLimit
,
12212 SourceLocation Loc
) {
12213 llvm_unreachable("Not supported in SIMD-only mode");
12216 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12217 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
12218 const Expr
*Device
, const RegionCodeGenTy
&CodeGen
,
12219 CGOpenMPRuntime::TargetDataInfo
&Info
) {
12220 llvm_unreachable("Not supported in SIMD-only mode");
12223 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12224 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
12225 const Expr
*Device
) {
12226 llvm_unreachable("Not supported in SIMD-only mode");
12229 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction
&CGF
,
12230 const OMPLoopDirective
&D
,
12231 ArrayRef
<Expr
*> NumIterations
) {
12232 llvm_unreachable("Not supported in SIMD-only mode");
12235 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
12236 const OMPDependClause
*C
) {
12237 llvm_unreachable("Not supported in SIMD-only mode");
12240 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
12241 const OMPDoacrossClause
*C
) {
12242 llvm_unreachable("Not supported in SIMD-only mode");
12246 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl
*FD
,
12247 const VarDecl
*NativeParam
) const {
12248 llvm_unreachable("Not supported in SIMD-only mode");
12252 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction
&CGF
,
12253 const VarDecl
*NativeParam
,
12254 const VarDecl
*TargetParam
) const {
12255 llvm_unreachable("Not supported in SIMD-only mode");