1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
13 #include "CGOpenMPRuntime.h"
14 #include "ABIInfoImpl.h"
16 #include "CGCleanup.h"
17 #include "CGRecordLayout.h"
18 #include "CodeGenFunction.h"
19 #include "TargetInfo.h"
20 #include "clang/AST/APValue.h"
21 #include "clang/AST/Attr.h"
22 #include "clang/AST/Decl.h"
23 #include "clang/AST/OpenMPClause.h"
24 #include "clang/AST/StmtOpenMP.h"
25 #include "clang/AST/StmtVisitor.h"
26 #include "clang/Basic/BitmaskEnum.h"
27 #include "clang/Basic/FileManager.h"
28 #include "clang/Basic/OpenMPKinds.h"
29 #include "clang/Basic/SourceManager.h"
30 #include "clang/CodeGen/ConstantInitBuilder.h"
31 #include "llvm/ADT/ArrayRef.h"
32 #include "llvm/ADT/SetOperations.h"
33 #include "llvm/ADT/SmallBitVector.h"
34 #include "llvm/ADT/SmallVector.h"
35 #include "llvm/ADT/StringExtras.h"
36 #include "llvm/Bitcode/BitcodeReader.h"
37 #include "llvm/IR/Constants.h"
38 #include "llvm/IR/DerivedTypes.h"
39 #include "llvm/IR/GlobalValue.h"
40 #include "llvm/IR/InstrTypes.h"
41 #include "llvm/IR/Value.h"
42 #include "llvm/Support/AtomicOrdering.h"
43 #include "llvm/Support/Format.h"
44 #include "llvm/Support/raw_ostream.h"
50 using namespace clang
;
51 using namespace CodeGen
;
52 using namespace llvm::omp
;
55 /// Base class for handling code generation inside OpenMP regions.
56 class CGOpenMPRegionInfo
: public CodeGenFunction::CGCapturedStmtInfo
{
58 /// Kinds of OpenMP regions used in codegen.
59 enum CGOpenMPRegionKind
{
60 /// Region with outlined function for standalone 'parallel'
62 ParallelOutlinedRegion
,
63 /// Region with outlined function for standalone 'task' directive.
65 /// Region for constructs that do not require function outlining,
66 /// like 'for', 'sections', 'atomic' etc. directives.
68 /// Region with outlined function for standalone 'target' directive.
72 CGOpenMPRegionInfo(const CapturedStmt
&CS
,
73 const CGOpenMPRegionKind RegionKind
,
74 const RegionCodeGenTy
&CodeGen
, OpenMPDirectiveKind Kind
,
76 : CGCapturedStmtInfo(CS
, CR_OpenMP
), RegionKind(RegionKind
),
77 CodeGen(CodeGen
), Kind(Kind
), HasCancel(HasCancel
) {}
79 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind
,
80 const RegionCodeGenTy
&CodeGen
, OpenMPDirectiveKind Kind
,
82 : CGCapturedStmtInfo(CR_OpenMP
), RegionKind(RegionKind
), CodeGen(CodeGen
),
83 Kind(Kind
), HasCancel(HasCancel
) {}
85 /// Get a variable or parameter for storing global thread id
86 /// inside OpenMP construct.
87 virtual const VarDecl
*getThreadIDVariable() const = 0;
89 /// Emit the captured statement body.
90 void EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) override
;
92 /// Get an LValue for the current ThreadID variable.
93 /// \return LValue for thread id variable. This LValue always has type int32*.
94 virtual LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
);
96 virtual void emitUntiedSwitch(CodeGenFunction
& /*CGF*/) {}
98 CGOpenMPRegionKind
getRegionKind() const { return RegionKind
; }
100 OpenMPDirectiveKind
getDirectiveKind() const { return Kind
; }
102 bool hasCancel() const { return HasCancel
; }
104 static bool classof(const CGCapturedStmtInfo
*Info
) {
105 return Info
->getKind() == CR_OpenMP
;
108 ~CGOpenMPRegionInfo() override
= default;
111 CGOpenMPRegionKind RegionKind
;
112 RegionCodeGenTy CodeGen
;
113 OpenMPDirectiveKind Kind
;
117 /// API for captured statement code generation in OpenMP constructs.
118 class CGOpenMPOutlinedRegionInfo final
: public CGOpenMPRegionInfo
{
120 CGOpenMPOutlinedRegionInfo(const CapturedStmt
&CS
, const VarDecl
*ThreadIDVar
,
121 const RegionCodeGenTy
&CodeGen
,
122 OpenMPDirectiveKind Kind
, bool HasCancel
,
123 StringRef HelperName
)
124 : CGOpenMPRegionInfo(CS
, ParallelOutlinedRegion
, CodeGen
, Kind
,
126 ThreadIDVar(ThreadIDVar
), HelperName(HelperName
) {
127 assert(ThreadIDVar
!= nullptr && "No ThreadID in OpenMP region.");
130 /// Get a variable or parameter for storing global thread id
131 /// inside OpenMP construct.
132 const VarDecl
*getThreadIDVariable() const override
{ return ThreadIDVar
; }
134 /// Get the name of the capture helper.
135 StringRef
getHelperName() const override
{ return HelperName
; }
137 static bool classof(const CGCapturedStmtInfo
*Info
) {
138 return CGOpenMPRegionInfo::classof(Info
) &&
139 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() ==
140 ParallelOutlinedRegion
;
144 /// A variable or parameter storing global thread id for OpenMP
146 const VarDecl
*ThreadIDVar
;
147 StringRef HelperName
;
150 /// API for captured statement code generation in OpenMP constructs.
151 class CGOpenMPTaskOutlinedRegionInfo final
: public CGOpenMPRegionInfo
{
153 class UntiedTaskActionTy final
: public PrePostActionTy
{
155 const VarDecl
*PartIDVar
;
156 const RegionCodeGenTy UntiedCodeGen
;
157 llvm::SwitchInst
*UntiedSwitch
= nullptr;
160 UntiedTaskActionTy(bool Tied
, const VarDecl
*PartIDVar
,
161 const RegionCodeGenTy
&UntiedCodeGen
)
162 : Untied(!Tied
), PartIDVar(PartIDVar
), UntiedCodeGen(UntiedCodeGen
) {}
163 void Enter(CodeGenFunction
&CGF
) override
{
165 // Emit task switching point.
166 LValue PartIdLVal
= CGF
.EmitLoadOfPointerLValue(
167 CGF
.GetAddrOfLocalVar(PartIDVar
),
168 PartIDVar
->getType()->castAs
<PointerType
>());
170 CGF
.EmitLoadOfScalar(PartIdLVal
, PartIDVar
->getLocation());
171 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock(".untied.done.");
172 UntiedSwitch
= CGF
.Builder
.CreateSwitch(Res
, DoneBB
);
173 CGF
.EmitBlock(DoneBB
);
174 CGF
.EmitBranchThroughCleanup(CGF
.ReturnBlock
);
175 CGF
.EmitBlock(CGF
.createBasicBlock(".untied.jmp."));
176 UntiedSwitch
->addCase(CGF
.Builder
.getInt32(0),
177 CGF
.Builder
.GetInsertBlock());
178 emitUntiedSwitch(CGF
);
181 void emitUntiedSwitch(CodeGenFunction
&CGF
) const {
183 LValue PartIdLVal
= CGF
.EmitLoadOfPointerLValue(
184 CGF
.GetAddrOfLocalVar(PartIDVar
),
185 PartIDVar
->getType()->castAs
<PointerType
>());
186 CGF
.EmitStoreOfScalar(CGF
.Builder
.getInt32(UntiedSwitch
->getNumCases()),
189 CodeGenFunction::JumpDest CurPoint
=
190 CGF
.getJumpDestInCurrentScope(".untied.next.");
191 CGF
.EmitBranch(CGF
.ReturnBlock
.getBlock());
192 CGF
.EmitBlock(CGF
.createBasicBlock(".untied.jmp."));
193 UntiedSwitch
->addCase(CGF
.Builder
.getInt32(UntiedSwitch
->getNumCases()),
194 CGF
.Builder
.GetInsertBlock());
195 CGF
.EmitBranchThroughCleanup(CurPoint
);
196 CGF
.EmitBlock(CurPoint
.getBlock());
199 unsigned getNumberOfParts() const { return UntiedSwitch
->getNumCases(); }
201 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt
&CS
,
202 const VarDecl
*ThreadIDVar
,
203 const RegionCodeGenTy
&CodeGen
,
204 OpenMPDirectiveKind Kind
, bool HasCancel
,
205 const UntiedTaskActionTy
&Action
)
206 : CGOpenMPRegionInfo(CS
, TaskOutlinedRegion
, CodeGen
, Kind
, HasCancel
),
207 ThreadIDVar(ThreadIDVar
), Action(Action
) {
208 assert(ThreadIDVar
!= nullptr && "No ThreadID in OpenMP region.");
211 /// Get a variable or parameter for storing global thread id
212 /// inside OpenMP construct.
213 const VarDecl
*getThreadIDVariable() const override
{ return ThreadIDVar
; }
215 /// Get an LValue for the current ThreadID variable.
216 LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
) override
;
218 /// Get the name of the capture helper.
219 StringRef
getHelperName() const override
{ return ".omp_outlined."; }
221 void emitUntiedSwitch(CodeGenFunction
&CGF
) override
{
222 Action
.emitUntiedSwitch(CGF
);
225 static bool classof(const CGCapturedStmtInfo
*Info
) {
226 return CGOpenMPRegionInfo::classof(Info
) &&
227 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() ==
232 /// A variable or parameter storing global thread id for OpenMP
234 const VarDecl
*ThreadIDVar
;
235 /// Action for emitting code for untied tasks.
236 const UntiedTaskActionTy
&Action
;
239 /// API for inlined captured statement code generation in OpenMP
241 class CGOpenMPInlinedRegionInfo
: public CGOpenMPRegionInfo
{
243 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo
*OldCSI
,
244 const RegionCodeGenTy
&CodeGen
,
245 OpenMPDirectiveKind Kind
, bool HasCancel
)
246 : CGOpenMPRegionInfo(InlinedRegion
, CodeGen
, Kind
, HasCancel
),
248 OuterRegionInfo(dyn_cast_or_null
<CGOpenMPRegionInfo
>(OldCSI
)) {}
250 // Retrieve the value of the context parameter.
251 llvm::Value
*getContextValue() const override
{
253 return OuterRegionInfo
->getContextValue();
254 llvm_unreachable("No context value for inlined OpenMP region");
257 void setContextValue(llvm::Value
*V
) override
{
258 if (OuterRegionInfo
) {
259 OuterRegionInfo
->setContextValue(V
);
262 llvm_unreachable("No context value for inlined OpenMP region");
265 /// Lookup the captured field decl for a variable.
266 const FieldDecl
*lookup(const VarDecl
*VD
) const override
{
268 return OuterRegionInfo
->lookup(VD
);
269 // If there is no outer outlined region,no need to lookup in a list of
270 // captured variables, we can use the original one.
274 FieldDecl
*getThisFieldDecl() const override
{
276 return OuterRegionInfo
->getThisFieldDecl();
280 /// Get a variable or parameter for storing global thread id
281 /// inside OpenMP construct.
282 const VarDecl
*getThreadIDVariable() const override
{
284 return OuterRegionInfo
->getThreadIDVariable();
288 /// Get an LValue for the current ThreadID variable.
289 LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
) override
{
291 return OuterRegionInfo
->getThreadIDVariableLValue(CGF
);
292 llvm_unreachable("No LValue for inlined OpenMP construct");
295 /// Get the name of the capture helper.
296 StringRef
getHelperName() const override
{
297 if (auto *OuterRegionInfo
= getOldCSI())
298 return OuterRegionInfo
->getHelperName();
299 llvm_unreachable("No helper name for inlined OpenMP construct");
302 void emitUntiedSwitch(CodeGenFunction
&CGF
) override
{
304 OuterRegionInfo
->emitUntiedSwitch(CGF
);
307 CodeGenFunction::CGCapturedStmtInfo
*getOldCSI() const { return OldCSI
; }
309 static bool classof(const CGCapturedStmtInfo
*Info
) {
310 return CGOpenMPRegionInfo::classof(Info
) &&
311 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() == InlinedRegion
;
314 ~CGOpenMPInlinedRegionInfo() override
= default;
317 /// CodeGen info about outer OpenMP region.
318 CodeGenFunction::CGCapturedStmtInfo
*OldCSI
;
319 CGOpenMPRegionInfo
*OuterRegionInfo
;
322 /// API for captured statement code generation in OpenMP target
323 /// constructs. For this captures, implicit parameters are used instead of the
324 /// captured fields. The name of the target region has to be unique in a given
325 /// application so it is provided by the client, because only the client has
326 /// the information to generate that.
327 class CGOpenMPTargetRegionInfo final
: public CGOpenMPRegionInfo
{
329 CGOpenMPTargetRegionInfo(const CapturedStmt
&CS
,
330 const RegionCodeGenTy
&CodeGen
, StringRef HelperName
)
331 : CGOpenMPRegionInfo(CS
, TargetRegion
, CodeGen
, OMPD_target
,
332 /*HasCancel=*/false),
333 HelperName(HelperName
) {}
335 /// This is unused for target regions because each starts executing
336 /// with a single thread.
337 const VarDecl
*getThreadIDVariable() const override
{ return nullptr; }
339 /// Get the name of the capture helper.
340 StringRef
getHelperName() const override
{ return HelperName
; }
342 static bool classof(const CGCapturedStmtInfo
*Info
) {
343 return CGOpenMPRegionInfo::classof(Info
) &&
344 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() == TargetRegion
;
348 StringRef HelperName
;
351 static void EmptyCodeGen(CodeGenFunction
&, PrePostActionTy
&) {
352 llvm_unreachable("No codegen for expressions");
354 /// API for generation of expressions captured in a innermost OpenMP
356 class CGOpenMPInnerExprInfo final
: public CGOpenMPInlinedRegionInfo
{
358 CGOpenMPInnerExprInfo(CodeGenFunction
&CGF
, const CapturedStmt
&CS
)
359 : CGOpenMPInlinedRegionInfo(CGF
.CapturedStmtInfo
, EmptyCodeGen
,
361 /*HasCancel=*/false),
363 // Make sure the globals captured in the provided statement are local by
364 // using the privatization logic. We assume the same variable is not
365 // captured more than once.
366 for (const auto &C
: CS
.captures()) {
367 if (!C
.capturesVariable() && !C
.capturesVariableByCopy())
370 const VarDecl
*VD
= C
.getCapturedVar();
371 if (VD
->isLocalVarDeclOrParm())
374 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(VD
),
375 /*RefersToEnclosingVariableOrCapture=*/false,
376 VD
->getType().getNonReferenceType(), VK_LValue
,
378 PrivScope
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress());
380 (void)PrivScope
.Privatize();
383 /// Lookup the captured field decl for a variable.
384 const FieldDecl
*lookup(const VarDecl
*VD
) const override
{
385 if (const FieldDecl
*FD
= CGOpenMPInlinedRegionInfo::lookup(VD
))
390 /// Emit the captured statement body.
391 void EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) override
{
392 llvm_unreachable("No body for expressions");
395 /// Get a variable or parameter for storing global thread id
396 /// inside OpenMP construct.
397 const VarDecl
*getThreadIDVariable() const override
{
398 llvm_unreachable("No thread id for expressions");
401 /// Get the name of the capture helper.
402 StringRef
getHelperName() const override
{
403 llvm_unreachable("No helper name for expressions");
406 static bool classof(const CGCapturedStmtInfo
*Info
) { return false; }
409 /// Private scope to capture global variables.
410 CodeGenFunction::OMPPrivateScope PrivScope
;
413 /// RAII for emitting code of OpenMP constructs.
414 class InlinedOpenMPRegionRAII
{
415 CodeGenFunction
&CGF
;
416 llvm::DenseMap
<const ValueDecl
*, FieldDecl
*> LambdaCaptureFields
;
417 FieldDecl
*LambdaThisCaptureField
= nullptr;
418 const CodeGen::CGBlockInfo
*BlockInfo
= nullptr;
419 bool NoInheritance
= false;
422 /// Constructs region for combined constructs.
423 /// \param CodeGen Code generation sequence for combined directives. Includes
424 /// a list of functions used for code generation of implicitly inlined
426 InlinedOpenMPRegionRAII(CodeGenFunction
&CGF
, const RegionCodeGenTy
&CodeGen
,
427 OpenMPDirectiveKind Kind
, bool HasCancel
,
428 bool NoInheritance
= true)
429 : CGF(CGF
), NoInheritance(NoInheritance
) {
430 // Start emission for the construct.
431 CGF
.CapturedStmtInfo
= new CGOpenMPInlinedRegionInfo(
432 CGF
.CapturedStmtInfo
, CodeGen
, Kind
, HasCancel
);
434 std::swap(CGF
.LambdaCaptureFields
, LambdaCaptureFields
);
435 LambdaThisCaptureField
= CGF
.LambdaThisCaptureField
;
436 CGF
.LambdaThisCaptureField
= nullptr;
437 BlockInfo
= CGF
.BlockInfo
;
438 CGF
.BlockInfo
= nullptr;
442 ~InlinedOpenMPRegionRAII() {
443 // Restore original CapturedStmtInfo only if we're done with code emission.
445 cast
<CGOpenMPInlinedRegionInfo
>(CGF
.CapturedStmtInfo
)->getOldCSI();
446 delete CGF
.CapturedStmtInfo
;
447 CGF
.CapturedStmtInfo
= OldCSI
;
449 std::swap(CGF
.LambdaCaptureFields
, LambdaCaptureFields
);
450 CGF
.LambdaThisCaptureField
= LambdaThisCaptureField
;
451 CGF
.BlockInfo
= BlockInfo
;
456 /// Values for bit flags used in the ident_t to describe the fields.
457 /// All enumeric elements are named and described in accordance with the code
458 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
459 enum OpenMPLocationFlags
: unsigned {
460 /// Use trampoline for internal microtask.
461 OMP_IDENT_IMD
= 0x01,
462 /// Use c-style ident structure.
463 OMP_IDENT_KMPC
= 0x02,
464 /// Atomic reduction option for kmpc_reduce.
465 OMP_ATOMIC_REDUCE
= 0x10,
466 /// Explicit 'barrier' directive.
467 OMP_IDENT_BARRIER_EXPL
= 0x20,
468 /// Implicit barrier in code.
469 OMP_IDENT_BARRIER_IMPL
= 0x40,
470 /// Implicit barrier in 'for' directive.
471 OMP_IDENT_BARRIER_IMPL_FOR
= 0x40,
472 /// Implicit barrier in 'sections' directive.
473 OMP_IDENT_BARRIER_IMPL_SECTIONS
= 0xC0,
474 /// Implicit barrier in 'single' directive.
475 OMP_IDENT_BARRIER_IMPL_SINGLE
= 0x140,
476 /// Call of __kmp_for_static_init for static loop.
477 OMP_IDENT_WORK_LOOP
= 0x200,
478 /// Call of __kmp_for_static_init for sections.
479 OMP_IDENT_WORK_SECTIONS
= 0x400,
480 /// Call of __kmp_for_static_init for distribute.
481 OMP_IDENT_WORK_DISTRIBUTE
= 0x800,
482 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE
)
485 /// Describes ident structure that describes a source location.
486 /// All descriptions are taken from
487 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
488 /// Original structure:
489 /// typedef struct ident {
490 /// kmp_int32 reserved_1; /**< might be used in Fortran;
492 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
493 /// KMP_IDENT_KMPC identifies this union
495 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
498 /// /* but currently used for storing
499 /// region-specific ITT */
500 /// /* contextual information. */
501 ///#endif /* USE_ITT_BUILD */
502 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
504 /// char const *psource; /**< String describing the source location.
505 /// The string is composed of semi-colon separated
506 // fields which describe the source file,
507 /// the function and a pair of line numbers that
508 /// delimit the construct.
511 enum IdentFieldIndex
{
512 /// might be used in Fortran
513 IdentField_Reserved_1
,
514 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
516 /// Not really used in Fortran any more
517 IdentField_Reserved_2
,
518 /// Source[4] in Fortran, do not use for C++
519 IdentField_Reserved_3
,
520 /// String describing the source location. The string is composed of
521 /// semi-colon separated fields which describe the source file, the function
522 /// and a pair of line numbers that delimit the construct.
526 /// Schedule types for 'omp for' loops (these enumerators are taken from
527 /// the enum sched_type in kmp.h).
528 enum OpenMPSchedType
{
529 /// Lower bound for default (unordered) versions.
531 OMP_sch_static_chunked
= 33,
533 OMP_sch_dynamic_chunked
= 35,
534 OMP_sch_guided_chunked
= 36,
535 OMP_sch_runtime
= 37,
537 /// static with chunk adjustment (e.g., simd)
538 OMP_sch_static_balanced_chunked
= 45,
539 /// Lower bound for 'ordered' versions.
541 OMP_ord_static_chunked
= 65,
543 OMP_ord_dynamic_chunked
= 67,
544 OMP_ord_guided_chunked
= 68,
545 OMP_ord_runtime
= 69,
547 OMP_sch_default
= OMP_sch_static
,
548 /// dist_schedule types
549 OMP_dist_sch_static_chunked
= 91,
550 OMP_dist_sch_static
= 92,
551 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
552 /// Set if the monotonic schedule modifier was present.
553 OMP_sch_modifier_monotonic
= (1 << 29),
554 /// Set if the nonmonotonic schedule modifier was present.
555 OMP_sch_modifier_nonmonotonic
= (1 << 30),
558 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
560 class CleanupTy final
: public EHScopeStack::Cleanup
{
561 PrePostActionTy
*Action
;
564 explicit CleanupTy(PrePostActionTy
*Action
) : Action(Action
) {}
565 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
566 if (!CGF
.HaveInsertPoint())
572 } // anonymous namespace
574 void RegionCodeGenTy::operator()(CodeGenFunction
&CGF
) const {
575 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
577 CGF
.EHStack
.pushCleanup
<CleanupTy
>(NormalAndEHCleanup
, PrePostAction
);
578 Callback(CodeGen
, CGF
, *PrePostAction
);
580 PrePostActionTy Action
;
581 Callback(CodeGen
, CGF
, Action
);
585 /// Check if the combiner is a call to UDR combiner and if it is so return the
586 /// UDR decl used for reduction.
587 static const OMPDeclareReductionDecl
*
588 getReductionInit(const Expr
*ReductionOp
) {
589 if (const auto *CE
= dyn_cast
<CallExpr
>(ReductionOp
))
590 if (const auto *OVE
= dyn_cast
<OpaqueValueExpr
>(CE
->getCallee()))
591 if (const auto *DRE
=
592 dyn_cast
<DeclRefExpr
>(OVE
->getSourceExpr()->IgnoreImpCasts()))
593 if (const auto *DRD
= dyn_cast
<OMPDeclareReductionDecl
>(DRE
->getDecl()))
598 static void emitInitWithReductionInitializer(CodeGenFunction
&CGF
,
599 const OMPDeclareReductionDecl
*DRD
,
601 Address Private
, Address Original
,
603 if (DRD
->getInitializer()) {
604 std::pair
<llvm::Function
*, llvm::Function
*> Reduction
=
605 CGF
.CGM
.getOpenMPRuntime().getUserDefinedReduction(DRD
);
606 const auto *CE
= cast
<CallExpr
>(InitOp
);
607 const auto *OVE
= cast
<OpaqueValueExpr
>(CE
->getCallee());
608 const Expr
*LHS
= CE
->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
609 const Expr
*RHS
= CE
->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
611 cast
<DeclRefExpr
>(cast
<UnaryOperator
>(LHS
)->getSubExpr());
613 cast
<DeclRefExpr
>(cast
<UnaryOperator
>(RHS
)->getSubExpr());
614 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
615 PrivateScope
.addPrivate(cast
<VarDecl
>(LHSDRE
->getDecl()), Private
);
616 PrivateScope
.addPrivate(cast
<VarDecl
>(RHSDRE
->getDecl()), Original
);
617 (void)PrivateScope
.Privatize();
618 RValue Func
= RValue::get(Reduction
.second
);
619 CodeGenFunction::OpaqueValueMapping
Map(CGF
, OVE
, Func
);
620 CGF
.EmitIgnoredExpr(InitOp
);
622 llvm::Constant
*Init
= CGF
.CGM
.EmitNullConstant(Ty
);
623 std::string Name
= CGF
.CGM
.getOpenMPRuntime().getName({"init"});
624 auto *GV
= new llvm::GlobalVariable(
625 CGF
.CGM
.getModule(), Init
->getType(), /*isConstant=*/true,
626 llvm::GlobalValue::PrivateLinkage
, Init
, Name
);
627 LValue LV
= CGF
.MakeNaturalAlignRawAddrLValue(GV
, Ty
);
629 switch (CGF
.getEvaluationKind(Ty
)) {
631 InitRVal
= CGF
.EmitLoadOfLValue(LV
, DRD
->getLocation());
635 RValue::getComplex(CGF
.EmitLoadOfComplex(LV
, DRD
->getLocation()));
637 case TEK_Aggregate
: {
638 OpaqueValueExpr
OVE(DRD
->getLocation(), Ty
, VK_LValue
);
639 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, &OVE
, LV
);
640 CGF
.EmitAnyExprToMem(&OVE
, Private
, Ty
.getQualifiers(),
641 /*IsInitializer=*/false);
645 OpaqueValueExpr
OVE(DRD
->getLocation(), Ty
, VK_PRValue
);
646 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, &OVE
, InitRVal
);
647 CGF
.EmitAnyExprToMem(&OVE
, Private
, Ty
.getQualifiers(),
648 /*IsInitializer=*/false);
652 /// Emit initialization of arrays of complex types.
653 /// \param DestAddr Address of the array.
654 /// \param Type Type of array.
655 /// \param Init Initial expression of array.
656 /// \param SrcAddr Address of the original array.
657 static void EmitOMPAggregateInit(CodeGenFunction
&CGF
, Address DestAddr
,
658 QualType Type
, bool EmitDeclareReductionInit
,
660 const OMPDeclareReductionDecl
*DRD
,
661 Address SrcAddr
= Address::invalid()) {
662 // Perform element-by-element initialization.
665 // Drill down to the base element type on both arrays.
666 const ArrayType
*ArrayTy
= Type
->getAsArrayTypeUnsafe();
667 llvm::Value
*NumElements
= CGF
.emitArrayLength(ArrayTy
, ElementTy
, DestAddr
);
669 SrcAddr
= SrcAddr
.withElementType(DestAddr
.getElementType());
671 llvm::Value
*SrcBegin
= nullptr;
673 SrcBegin
= SrcAddr
.emitRawPointer(CGF
);
674 llvm::Value
*DestBegin
= DestAddr
.emitRawPointer(CGF
);
675 // Cast from pointer to array type to pointer to single element.
676 llvm::Value
*DestEnd
=
677 CGF
.Builder
.CreateGEP(DestAddr
.getElementType(), DestBegin
, NumElements
);
678 // The basic structure here is a while-do loop.
679 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.arrayinit.body");
680 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.arrayinit.done");
681 llvm::Value
*IsEmpty
=
682 CGF
.Builder
.CreateICmpEQ(DestBegin
, DestEnd
, "omp.arrayinit.isempty");
683 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
685 // Enter the loop body, making that address the current address.
686 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
687 CGF
.EmitBlock(BodyBB
);
689 CharUnits ElementSize
= CGF
.getContext().getTypeSizeInChars(ElementTy
);
691 llvm::PHINode
*SrcElementPHI
= nullptr;
692 Address SrcElementCurrent
= Address::invalid();
694 SrcElementPHI
= CGF
.Builder
.CreatePHI(SrcBegin
->getType(), 2,
695 "omp.arraycpy.srcElementPast");
696 SrcElementPHI
->addIncoming(SrcBegin
, EntryBB
);
698 Address(SrcElementPHI
, SrcAddr
.getElementType(),
699 SrcAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
701 llvm::PHINode
*DestElementPHI
= CGF
.Builder
.CreatePHI(
702 DestBegin
->getType(), 2, "omp.arraycpy.destElementPast");
703 DestElementPHI
->addIncoming(DestBegin
, EntryBB
);
704 Address DestElementCurrent
=
705 Address(DestElementPHI
, DestAddr
.getElementType(),
706 DestAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
710 CodeGenFunction::RunCleanupsScope
InitScope(CGF
);
711 if (EmitDeclareReductionInit
) {
712 emitInitWithReductionInitializer(CGF
, DRD
, Init
, DestElementCurrent
,
713 SrcElementCurrent
, ElementTy
);
715 CGF
.EmitAnyExprToMem(Init
, DestElementCurrent
, ElementTy
.getQualifiers(),
716 /*IsInitializer=*/false);
720 // Shift the address forward by one element.
721 llvm::Value
*SrcElementNext
= CGF
.Builder
.CreateConstGEP1_32(
722 SrcAddr
.getElementType(), SrcElementPHI
, /*Idx0=*/1,
723 "omp.arraycpy.dest.element");
724 SrcElementPHI
->addIncoming(SrcElementNext
, CGF
.Builder
.GetInsertBlock());
727 // Shift the address forward by one element.
728 llvm::Value
*DestElementNext
= CGF
.Builder
.CreateConstGEP1_32(
729 DestAddr
.getElementType(), DestElementPHI
, /*Idx0=*/1,
730 "omp.arraycpy.dest.element");
731 // Check whether we've reached the end.
733 CGF
.Builder
.CreateICmpEQ(DestElementNext
, DestEnd
, "omp.arraycpy.done");
734 CGF
.Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
735 DestElementPHI
->addIncoming(DestElementNext
, CGF
.Builder
.GetInsertBlock());
738 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
741 LValue
ReductionCodeGen::emitSharedLValue(CodeGenFunction
&CGF
, const Expr
*E
) {
742 return CGF
.EmitOMPSharedLValue(E
);
745 LValue
ReductionCodeGen::emitSharedLValueUB(CodeGenFunction
&CGF
,
747 if (const auto *OASE
= dyn_cast
<ArraySectionExpr
>(E
))
748 return CGF
.EmitArraySectionExpr(OASE
, /*IsLowerBound=*/false);
752 void ReductionCodeGen::emitAggregateInitialization(
753 CodeGenFunction
&CGF
, unsigned N
, Address PrivateAddr
, Address SharedAddr
,
754 const OMPDeclareReductionDecl
*DRD
) {
755 // Emit VarDecl with copy init for arrays.
756 // Get the address of the original variable captured in current
758 const auto *PrivateVD
=
759 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Private
)->getDecl());
760 bool EmitDeclareReductionInit
=
761 DRD
&& (DRD
->getInitializer() || !PrivateVD
->hasInit());
762 EmitOMPAggregateInit(CGF
, PrivateAddr
, PrivateVD
->getType(),
763 EmitDeclareReductionInit
,
764 EmitDeclareReductionInit
? ClausesData
[N
].ReductionOp
765 : PrivateVD
->getInit(),
769 ReductionCodeGen::ReductionCodeGen(ArrayRef
<const Expr
*> Shareds
,
770 ArrayRef
<const Expr
*> Origs
,
771 ArrayRef
<const Expr
*> Privates
,
772 ArrayRef
<const Expr
*> ReductionOps
) {
773 ClausesData
.reserve(Shareds
.size());
774 SharedAddresses
.reserve(Shareds
.size());
775 Sizes
.reserve(Shareds
.size());
776 BaseDecls
.reserve(Shareds
.size());
777 const auto *IOrig
= Origs
.begin();
778 const auto *IPriv
= Privates
.begin();
779 const auto *IRed
= ReductionOps
.begin();
780 for (const Expr
*Ref
: Shareds
) {
781 ClausesData
.emplace_back(Ref
, *IOrig
, *IPriv
, *IRed
);
782 std::advance(IOrig
, 1);
783 std::advance(IPriv
, 1);
784 std::advance(IRed
, 1);
788 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction
&CGF
, unsigned N
) {
789 assert(SharedAddresses
.size() == N
&& OrigAddresses
.size() == N
&&
790 "Number of generated lvalues must be exactly N.");
791 LValue First
= emitSharedLValue(CGF
, ClausesData
[N
].Shared
);
792 LValue Second
= emitSharedLValueUB(CGF
, ClausesData
[N
].Shared
);
793 SharedAddresses
.emplace_back(First
, Second
);
794 if (ClausesData
[N
].Shared
== ClausesData
[N
].Ref
) {
795 OrigAddresses
.emplace_back(First
, Second
);
797 LValue First
= emitSharedLValue(CGF
, ClausesData
[N
].Ref
);
798 LValue Second
= emitSharedLValueUB(CGF
, ClausesData
[N
].Ref
);
799 OrigAddresses
.emplace_back(First
, Second
);
803 void ReductionCodeGen::emitAggregateType(CodeGenFunction
&CGF
, unsigned N
) {
804 QualType PrivateType
= getPrivateType(N
);
805 bool AsArraySection
= isa
<ArraySectionExpr
>(ClausesData
[N
].Ref
);
806 if (!PrivateType
->isVariablyModifiedType()) {
808 CGF
.getTypeSize(OrigAddresses
[N
].first
.getType().getNonReferenceType()),
813 llvm::Value
*SizeInChars
;
814 auto *ElemType
= OrigAddresses
[N
].first
.getAddress().getElementType();
815 auto *ElemSizeOf
= llvm::ConstantExpr::getSizeOf(ElemType
);
816 if (AsArraySection
) {
817 Size
= CGF
.Builder
.CreatePtrDiff(ElemType
,
818 OrigAddresses
[N
].second
.getPointer(CGF
),
819 OrigAddresses
[N
].first
.getPointer(CGF
));
820 Size
= CGF
.Builder
.CreateNUWAdd(
821 Size
, llvm::ConstantInt::get(Size
->getType(), /*V=*/1));
822 SizeInChars
= CGF
.Builder
.CreateNUWMul(Size
, ElemSizeOf
);
825 CGF
.getTypeSize(OrigAddresses
[N
].first
.getType().getNonReferenceType());
826 Size
= CGF
.Builder
.CreateExactUDiv(SizeInChars
, ElemSizeOf
);
828 Sizes
.emplace_back(SizeInChars
, Size
);
829 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
831 cast
<OpaqueValueExpr
>(
832 CGF
.getContext().getAsVariableArrayType(PrivateType
)->getSizeExpr()),
834 CGF
.EmitVariablyModifiedType(PrivateType
);
837 void ReductionCodeGen::emitAggregateType(CodeGenFunction
&CGF
, unsigned N
,
839 QualType PrivateType
= getPrivateType(N
);
840 if (!PrivateType
->isVariablyModifiedType()) {
841 assert(!Size
&& !Sizes
[N
].second
&&
842 "Size should be nullptr for non-variably modified reduction "
846 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
848 cast
<OpaqueValueExpr
>(
849 CGF
.getContext().getAsVariableArrayType(PrivateType
)->getSizeExpr()),
851 CGF
.EmitVariablyModifiedType(PrivateType
);
854 void ReductionCodeGen::emitInitialization(
855 CodeGenFunction
&CGF
, unsigned N
, Address PrivateAddr
, Address SharedAddr
,
856 llvm::function_ref
<bool(CodeGenFunction
&)> DefaultInit
) {
857 assert(SharedAddresses
.size() > N
&& "No variable was generated");
858 const auto *PrivateVD
=
859 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Private
)->getDecl());
860 const OMPDeclareReductionDecl
*DRD
=
861 getReductionInit(ClausesData
[N
].ReductionOp
);
862 if (CGF
.getContext().getAsArrayType(PrivateVD
->getType())) {
863 if (DRD
&& DRD
->getInitializer())
864 (void)DefaultInit(CGF
);
865 emitAggregateInitialization(CGF
, N
, PrivateAddr
, SharedAddr
, DRD
);
866 } else if (DRD
&& (DRD
->getInitializer() || !PrivateVD
->hasInit())) {
867 (void)DefaultInit(CGF
);
868 QualType SharedType
= SharedAddresses
[N
].first
.getType();
869 emitInitWithReductionInitializer(CGF
, DRD
, ClausesData
[N
].ReductionOp
,
870 PrivateAddr
, SharedAddr
, SharedType
);
871 } else if (!DefaultInit(CGF
) && PrivateVD
->hasInit() &&
872 !CGF
.isTrivialInitializer(PrivateVD
->getInit())) {
873 CGF
.EmitAnyExprToMem(PrivateVD
->getInit(), PrivateAddr
,
874 PrivateVD
->getType().getQualifiers(),
875 /*IsInitializer=*/false);
879 bool ReductionCodeGen::needCleanups(unsigned N
) {
880 QualType PrivateType
= getPrivateType(N
);
881 QualType::DestructionKind DTorKind
= PrivateType
.isDestructedType();
882 return DTorKind
!= QualType::DK_none
;
885 void ReductionCodeGen::emitCleanups(CodeGenFunction
&CGF
, unsigned N
,
886 Address PrivateAddr
) {
887 QualType PrivateType
= getPrivateType(N
);
888 QualType::DestructionKind DTorKind
= PrivateType
.isDestructedType();
889 if (needCleanups(N
)) {
891 PrivateAddr
.withElementType(CGF
.ConvertTypeForMem(PrivateType
));
892 CGF
.pushDestroy(DTorKind
, PrivateAddr
, PrivateType
);
896 static LValue
loadToBegin(CodeGenFunction
&CGF
, QualType BaseTy
, QualType ElTy
,
898 BaseTy
= BaseTy
.getNonReferenceType();
899 while ((BaseTy
->isPointerType() || BaseTy
->isReferenceType()) &&
900 !CGF
.getContext().hasSameType(BaseTy
, ElTy
)) {
901 if (const auto *PtrTy
= BaseTy
->getAs
<PointerType
>()) {
902 BaseLV
= CGF
.EmitLoadOfPointerLValue(BaseLV
.getAddress(), PtrTy
);
904 LValue RefLVal
= CGF
.MakeAddrLValue(BaseLV
.getAddress(), BaseTy
);
905 BaseLV
= CGF
.EmitLoadOfReferenceLValue(RefLVal
);
907 BaseTy
= BaseTy
->getPointeeType();
909 return CGF
.MakeAddrLValue(
910 BaseLV
.getAddress().withElementType(CGF
.ConvertTypeForMem(ElTy
)),
911 BaseLV
.getType(), BaseLV
.getBaseInfo(),
912 CGF
.CGM
.getTBAAInfoForSubobject(BaseLV
, BaseLV
.getType()));
915 static Address
castToBase(CodeGenFunction
&CGF
, QualType BaseTy
, QualType ElTy
,
916 Address OriginalBaseAddress
, llvm::Value
*Addr
) {
917 RawAddress Tmp
= RawAddress::invalid();
918 Address TopTmp
= Address::invalid();
919 Address MostTopTmp
= Address::invalid();
920 BaseTy
= BaseTy
.getNonReferenceType();
921 while ((BaseTy
->isPointerType() || BaseTy
->isReferenceType()) &&
922 !CGF
.getContext().hasSameType(BaseTy
, ElTy
)) {
923 Tmp
= CGF
.CreateMemTemp(BaseTy
);
924 if (TopTmp
.isValid())
925 CGF
.Builder
.CreateStore(Tmp
.getPointer(), TopTmp
);
929 BaseTy
= BaseTy
->getPointeeType();
933 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
934 Addr
, Tmp
.getElementType());
935 CGF
.Builder
.CreateStore(Addr
, Tmp
);
939 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
940 Addr
, OriginalBaseAddress
.getType());
941 return OriginalBaseAddress
.withPointer(Addr
, NotKnownNonNull
);
944 static const VarDecl
*getBaseDecl(const Expr
*Ref
, const DeclRefExpr
*&DE
) {
945 const VarDecl
*OrigVD
= nullptr;
946 if (const auto *OASE
= dyn_cast
<ArraySectionExpr
>(Ref
)) {
947 const Expr
*Base
= OASE
->getBase()->IgnoreParenImpCasts();
948 while (const auto *TempOASE
= dyn_cast
<ArraySectionExpr
>(Base
))
949 Base
= TempOASE
->getBase()->IgnoreParenImpCasts();
950 while (const auto *TempASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
951 Base
= TempASE
->getBase()->IgnoreParenImpCasts();
952 DE
= cast
<DeclRefExpr
>(Base
);
953 OrigVD
= cast
<VarDecl
>(DE
->getDecl());
954 } else if (const auto *ASE
= dyn_cast
<ArraySubscriptExpr
>(Ref
)) {
955 const Expr
*Base
= ASE
->getBase()->IgnoreParenImpCasts();
956 while (const auto *TempASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
957 Base
= TempASE
->getBase()->IgnoreParenImpCasts();
958 DE
= cast
<DeclRefExpr
>(Base
);
959 OrigVD
= cast
<VarDecl
>(DE
->getDecl());
964 Address
ReductionCodeGen::adjustPrivateAddress(CodeGenFunction
&CGF
, unsigned N
,
965 Address PrivateAddr
) {
966 const DeclRefExpr
*DE
;
967 if (const VarDecl
*OrigVD
= ::getBaseDecl(ClausesData
[N
].Ref
, DE
)) {
968 BaseDecls
.emplace_back(OrigVD
);
969 LValue OriginalBaseLValue
= CGF
.EmitLValue(DE
);
971 loadToBegin(CGF
, OrigVD
->getType(), SharedAddresses
[N
].first
.getType(),
973 Address SharedAddr
= SharedAddresses
[N
].first
.getAddress();
974 llvm::Value
*Adjustment
= CGF
.Builder
.CreatePtrDiff(
975 SharedAddr
.getElementType(), BaseLValue
.getPointer(CGF
),
976 SharedAddr
.emitRawPointer(CGF
));
977 llvm::Value
*PrivatePointer
=
978 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
979 PrivateAddr
.emitRawPointer(CGF
), SharedAddr
.getType());
980 llvm::Value
*Ptr
= CGF
.Builder
.CreateGEP(
981 SharedAddr
.getElementType(), PrivatePointer
, Adjustment
);
982 return castToBase(CGF
, OrigVD
->getType(),
983 SharedAddresses
[N
].first
.getType(),
984 OriginalBaseLValue
.getAddress(), Ptr
);
986 BaseDecls
.emplace_back(
987 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Ref
)->getDecl()));
991 bool ReductionCodeGen::usesReductionInitializer(unsigned N
) const {
992 const OMPDeclareReductionDecl
*DRD
=
993 getReductionInit(ClausesData
[N
].ReductionOp
);
994 return DRD
&& DRD
->getInitializer();
997 LValue
CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction
&CGF
) {
998 return CGF
.EmitLoadOfPointerLValue(
999 CGF
.GetAddrOfLocalVar(getThreadIDVariable()),
1000 getThreadIDVariable()->getType()->castAs
<PointerType
>());
1003 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) {
1004 if (!CGF
.HaveInsertPoint())
1006 // 1.2.2 OpenMP Language Terminology
1007 // Structured block - An executable statement with a single entry at the
1008 // top and a single exit at the bottom.
1009 // The point of exit cannot be a branch out of the structured block.
1010 // longjmp() and throw() must not violate the entry/exit criteria.
1011 CGF
.EHStack
.pushTerminate();
1013 CGF
.incrementProfileCounter(S
);
1015 CGF
.EHStack
.popTerminate();
1018 LValue
CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1019 CodeGenFunction
&CGF
) {
1020 return CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(getThreadIDVariable()),
1021 getThreadIDVariable()->getType(),
1022 AlignmentSource::Decl
);
1025 static FieldDecl
*addFieldToRecordDecl(ASTContext
&C
, DeclContext
*DC
,
1027 auto *Field
= FieldDecl::Create(
1028 C
, DC
, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy
,
1029 C
.getTrivialTypeSourceInfo(FieldTy
, SourceLocation()),
1030 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit
);
1031 Field
->setAccess(AS_public
);
1036 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule
&CGM
)
1037 : CGM(CGM
), OMPBuilder(CGM
.getModule()) {
1038 KmpCriticalNameTy
= llvm::ArrayType::get(CGM
.Int32Ty
, /*NumElements*/ 8);
1039 llvm::OpenMPIRBuilderConfig
Config(
1040 CGM
.getLangOpts().OpenMPIsTargetDevice
, isGPU(),
1041 CGM
.getLangOpts().OpenMPOffloadMandatory
,
1042 /*HasRequiresReverseOffload*/ false, /*HasRequiresUnifiedAddress*/ false,
1043 hasRequiresUnifiedSharedMemory(), /*HasRequiresDynamicAllocators*/ false);
1044 OMPBuilder
.initialize();
1045 OMPBuilder
.loadOffloadInfoMetadata(CGM
.getLangOpts().OpenMPIsTargetDevice
1046 ? CGM
.getLangOpts().OMPHostIRFile
1048 OMPBuilder
.setConfig(Config
);
1050 // The user forces the compiler to behave as if omp requires
1051 // unified_shared_memory was given.
1052 if (CGM
.getLangOpts().OpenMPForceUSM
) {
1053 HasRequiresUnifiedSharedMemory
= true;
1054 OMPBuilder
.Config
.setHasRequiresUnifiedSharedMemory(true);
1058 void CGOpenMPRuntime::clear() {
1059 InternalVars
.clear();
1060 // Clean non-target variable declarations possibly used only in debug info.
1061 for (const auto &Data
: EmittedNonTargetVariables
) {
1062 if (!Data
.getValue().pointsToAliveValue())
1064 auto *GV
= dyn_cast
<llvm::GlobalVariable
>(Data
.getValue());
1067 if (!GV
->isDeclaration() || GV
->getNumUses() > 0)
1069 GV
->eraseFromParent();
1073 std::string
CGOpenMPRuntime::getName(ArrayRef
<StringRef
> Parts
) const {
1074 return OMPBuilder
.createPlatformSpecificName(Parts
);
1077 static llvm::Function
*
1078 emitCombinerOrInitializer(CodeGenModule
&CGM
, QualType Ty
,
1079 const Expr
*CombinerInitializer
, const VarDecl
*In
,
1080 const VarDecl
*Out
, bool IsCombiner
) {
1081 // void .omp_combiner.(Ty *in, Ty *out);
1082 ASTContext
&C
= CGM
.getContext();
1083 QualType PtrTy
= C
.getPointerType(Ty
).withRestrict();
1084 FunctionArgList Args
;
1085 ImplicitParamDecl
OmpOutParm(C
, /*DC=*/nullptr, Out
->getLocation(),
1086 /*Id=*/nullptr, PtrTy
, ImplicitParamKind::Other
);
1087 ImplicitParamDecl
OmpInParm(C
, /*DC=*/nullptr, In
->getLocation(),
1088 /*Id=*/nullptr, PtrTy
, ImplicitParamKind::Other
);
1089 Args
.push_back(&OmpOutParm
);
1090 Args
.push_back(&OmpInParm
);
1091 const CGFunctionInfo
&FnInfo
=
1092 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
1093 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
1094 std::string Name
= CGM
.getOpenMPRuntime().getName(
1095 {IsCombiner
? "omp_combiner" : "omp_initializer", ""});
1096 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
1097 Name
, &CGM
.getModule());
1098 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
1099 if (CGM
.getLangOpts().Optimize
) {
1100 Fn
->removeFnAttr(llvm::Attribute::NoInline
);
1101 Fn
->removeFnAttr(llvm::Attribute::OptimizeNone
);
1102 Fn
->addFnAttr(llvm::Attribute::AlwaysInline
);
1104 CodeGenFunction
CGF(CGM
);
1105 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1106 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1107 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, In
->getLocation(),
1108 Out
->getLocation());
1109 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
1110 Address AddrIn
= CGF
.GetAddrOfLocalVar(&OmpInParm
);
1112 In
, CGF
.EmitLoadOfPointerLValue(AddrIn
, PtrTy
->castAs
<PointerType
>())
1114 Address AddrOut
= CGF
.GetAddrOfLocalVar(&OmpOutParm
);
1116 Out
, CGF
.EmitLoadOfPointerLValue(AddrOut
, PtrTy
->castAs
<PointerType
>())
1118 (void)Scope
.Privatize();
1119 if (!IsCombiner
&& Out
->hasInit() &&
1120 !CGF
.isTrivialInitializer(Out
->getInit())) {
1121 CGF
.EmitAnyExprToMem(Out
->getInit(), CGF
.GetAddrOfLocalVar(Out
),
1122 Out
->getType().getQualifiers(),
1123 /*IsInitializer=*/true);
1125 if (CombinerInitializer
)
1126 CGF
.EmitIgnoredExpr(CombinerInitializer
);
1127 Scope
.ForceCleanup();
1128 CGF
.FinishFunction();
1132 void CGOpenMPRuntime::emitUserDefinedReduction(
1133 CodeGenFunction
*CGF
, const OMPDeclareReductionDecl
*D
) {
1134 if (UDRMap
.count(D
) > 0)
1136 llvm::Function
*Combiner
= emitCombinerOrInitializer(
1137 CGM
, D
->getType(), D
->getCombiner(),
1138 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getCombinerIn())->getDecl()),
1139 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getCombinerOut())->getDecl()),
1140 /*IsCombiner=*/true);
1141 llvm::Function
*Initializer
= nullptr;
1142 if (const Expr
*Init
= D
->getInitializer()) {
1143 Initializer
= emitCombinerOrInitializer(
1145 D
->getInitializerKind() == OMPDeclareReductionInitKind::Call
? Init
1147 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getInitOrig())->getDecl()),
1148 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getInitPriv())->getDecl()),
1149 /*IsCombiner=*/false);
1151 UDRMap
.try_emplace(D
, Combiner
, Initializer
);
1153 auto &Decls
= FunctionUDRMap
.FindAndConstruct(CGF
->CurFn
);
1154 Decls
.second
.push_back(D
);
1158 std::pair
<llvm::Function
*, llvm::Function
*>
1159 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl
*D
) {
1160 auto I
= UDRMap
.find(D
);
1161 if (I
!= UDRMap
.end())
1163 emitUserDefinedReduction(/*CGF=*/nullptr, D
);
1164 return UDRMap
.lookup(D
);
1168 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1169 // Builder if one is present.
1170 struct PushAndPopStackRAII
{
1171 PushAndPopStackRAII(llvm::OpenMPIRBuilder
*OMPBuilder
, CodeGenFunction
&CGF
,
1172 bool HasCancel
, llvm::omp::Directive Kind
)
1173 : OMPBuilder(OMPBuilder
) {
1177 // The following callback is the crucial part of clangs cleanup process.
1180 // Once the OpenMPIRBuilder is used to create parallel regions (and
1181 // similar), the cancellation destination (Dest below) is determined via
1182 // IP. That means if we have variables to finalize we split the block at IP,
1183 // use the new block (=BB) as destination to build a JumpDest (via
1184 // getJumpDestInCurrentScope(BB)) which then is fed to
1185 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1186 // to push & pop an FinalizationInfo object.
1187 // The FiniCB will still be needed but at the point where the
1188 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1189 auto FiniCB
= [&CGF
](llvm::OpenMPIRBuilder::InsertPointTy IP
) {
1190 assert(IP
.getBlock()->end() == IP
.getPoint() &&
1191 "Clang CG should cause non-terminated block!");
1192 CGBuilderTy::InsertPointGuard
IPG(CGF
.Builder
);
1193 CGF
.Builder
.restoreIP(IP
);
1194 CodeGenFunction::JumpDest Dest
=
1195 CGF
.getOMPCancelDestination(OMPD_parallel
);
1196 CGF
.EmitBranchThroughCleanup(Dest
);
1199 // TODO: Remove this once we emit parallel regions through the
1200 // OpenMPIRBuilder as it can do this setup internally.
1201 llvm::OpenMPIRBuilder::FinalizationInfo
FI({FiniCB
, Kind
, HasCancel
});
1202 OMPBuilder
->pushFinalizationCB(std::move(FI
));
1204 ~PushAndPopStackRAII() {
1206 OMPBuilder
->popFinalizationCB();
1208 llvm::OpenMPIRBuilder
*OMPBuilder
;
1212 static llvm::Function
*emitParallelOrTeamsOutlinedFunction(
1213 CodeGenModule
&CGM
, const OMPExecutableDirective
&D
, const CapturedStmt
*CS
,
1214 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1215 const StringRef OutlinedHelperName
, const RegionCodeGenTy
&CodeGen
) {
1216 assert(ThreadIDVar
->getType()->isPointerType() &&
1217 "thread id variable must be of type kmp_int32 *");
1218 CodeGenFunction
CGF(CGM
, true);
1219 bool HasCancel
= false;
1220 if (const auto *OPD
= dyn_cast
<OMPParallelDirective
>(&D
))
1221 HasCancel
= OPD
->hasCancel();
1222 else if (const auto *OPD
= dyn_cast
<OMPTargetParallelDirective
>(&D
))
1223 HasCancel
= OPD
->hasCancel();
1224 else if (const auto *OPSD
= dyn_cast
<OMPParallelSectionsDirective
>(&D
))
1225 HasCancel
= OPSD
->hasCancel();
1226 else if (const auto *OPFD
= dyn_cast
<OMPParallelForDirective
>(&D
))
1227 HasCancel
= OPFD
->hasCancel();
1228 else if (const auto *OPFD
= dyn_cast
<OMPTargetParallelForDirective
>(&D
))
1229 HasCancel
= OPFD
->hasCancel();
1230 else if (const auto *OPFD
= dyn_cast
<OMPDistributeParallelForDirective
>(&D
))
1231 HasCancel
= OPFD
->hasCancel();
1232 else if (const auto *OPFD
=
1233 dyn_cast
<OMPTeamsDistributeParallelForDirective
>(&D
))
1234 HasCancel
= OPFD
->hasCancel();
1235 else if (const auto *OPFD
=
1236 dyn_cast
<OMPTargetTeamsDistributeParallelForDirective
>(&D
))
1237 HasCancel
= OPFD
->hasCancel();
1239 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1240 // parallel region to make cancellation barriers work properly.
1241 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1242 PushAndPopStackRAII
PSR(&OMPBuilder
, CGF
, HasCancel
, InnermostKind
);
1243 CGOpenMPOutlinedRegionInfo
CGInfo(*CS
, ThreadIDVar
, CodeGen
, InnermostKind
,
1244 HasCancel
, OutlinedHelperName
);
1245 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
1246 return CGF
.GenerateOpenMPCapturedStmtFunction(*CS
, D
.getBeginLoc());
1249 std::string
CGOpenMPRuntime::getOutlinedHelperName(StringRef Name
) const {
1250 std::string Suffix
= getName({"omp_outlined"});
1251 return (Name
+ Suffix
).str();
1254 std::string
CGOpenMPRuntime::getOutlinedHelperName(CodeGenFunction
&CGF
) const {
1255 return getOutlinedHelperName(CGF
.CurFn
->getName());
1258 std::string
CGOpenMPRuntime::getReductionFuncName(StringRef Name
) const {
1259 std::string Suffix
= getName({"omp", "reduction", "reduction_func"});
1260 return (Name
+ Suffix
).str();
1263 llvm::Function
*CGOpenMPRuntime::emitParallelOutlinedFunction(
1264 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
1265 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1266 const RegionCodeGenTy
&CodeGen
) {
1267 const CapturedStmt
*CS
= D
.getCapturedStmt(OMPD_parallel
);
1268 return emitParallelOrTeamsOutlinedFunction(
1269 CGM
, D
, CS
, ThreadIDVar
, InnermostKind
, getOutlinedHelperName(CGF
),
1273 llvm::Function
*CGOpenMPRuntime::emitTeamsOutlinedFunction(
1274 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
1275 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1276 const RegionCodeGenTy
&CodeGen
) {
1277 const CapturedStmt
*CS
= D
.getCapturedStmt(OMPD_teams
);
1278 return emitParallelOrTeamsOutlinedFunction(
1279 CGM
, D
, CS
, ThreadIDVar
, InnermostKind
, getOutlinedHelperName(CGF
),
1283 llvm::Function
*CGOpenMPRuntime::emitTaskOutlinedFunction(
1284 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
1285 const VarDecl
*PartIDVar
, const VarDecl
*TaskTVar
,
1286 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
1287 bool Tied
, unsigned &NumberOfParts
) {
1288 auto &&UntiedCodeGen
= [this, &D
, TaskTVar
](CodeGenFunction
&CGF
,
1289 PrePostActionTy
&) {
1290 llvm::Value
*ThreadID
= getThreadID(CGF
, D
.getBeginLoc());
1291 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
1292 llvm::Value
*TaskArgs
[] = {
1294 CGF
.EmitLoadOfPointerLValue(CGF
.GetAddrOfLocalVar(TaskTVar
),
1295 TaskTVar
->getType()->castAs
<PointerType
>())
1297 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1298 CGM
.getModule(), OMPRTL___kmpc_omp_task
),
1301 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy
Action(Tied
, PartIDVar
,
1303 CodeGen
.setAction(Action
);
1304 assert(!ThreadIDVar
->getType()->isPointerType() &&
1305 "thread id variable must be of type kmp_int32 for tasks");
1306 const OpenMPDirectiveKind Region
=
1307 isOpenMPTaskLoopDirective(D
.getDirectiveKind()) ? OMPD_taskloop
1309 const CapturedStmt
*CS
= D
.getCapturedStmt(Region
);
1310 bool HasCancel
= false;
1311 if (const auto *TD
= dyn_cast
<OMPTaskDirective
>(&D
))
1312 HasCancel
= TD
->hasCancel();
1313 else if (const auto *TD
= dyn_cast
<OMPTaskLoopDirective
>(&D
))
1314 HasCancel
= TD
->hasCancel();
1315 else if (const auto *TD
= dyn_cast
<OMPMasterTaskLoopDirective
>(&D
))
1316 HasCancel
= TD
->hasCancel();
1317 else if (const auto *TD
= dyn_cast
<OMPParallelMasterTaskLoopDirective
>(&D
))
1318 HasCancel
= TD
->hasCancel();
1320 CodeGenFunction
CGF(CGM
, true);
1321 CGOpenMPTaskOutlinedRegionInfo
CGInfo(*CS
, ThreadIDVar
, CodeGen
,
1322 InnermostKind
, HasCancel
, Action
);
1323 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
1324 llvm::Function
*Res
= CGF
.GenerateCapturedStmtFunction(*CS
);
1326 NumberOfParts
= Action
.getNumberOfParts();
1330 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction
&CGF
,
1331 bool AtCurrentPoint
) {
1332 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1333 assert(!Elem
.second
.ServiceInsertPt
&& "Insert point is set already.");
1335 llvm::Value
*Undef
= llvm::UndefValue::get(CGF
.Int32Ty
);
1336 if (AtCurrentPoint
) {
1337 Elem
.second
.ServiceInsertPt
= new llvm::BitCastInst(
1338 Undef
, CGF
.Int32Ty
, "svcpt", CGF
.Builder
.GetInsertBlock());
1340 Elem
.second
.ServiceInsertPt
=
1341 new llvm::BitCastInst(Undef
, CGF
.Int32Ty
, "svcpt");
1342 Elem
.second
.ServiceInsertPt
->insertAfter(CGF
.AllocaInsertPt
);
1346 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction
&CGF
) {
1347 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1348 if (Elem
.second
.ServiceInsertPt
) {
1349 llvm::Instruction
*Ptr
= Elem
.second
.ServiceInsertPt
;
1350 Elem
.second
.ServiceInsertPt
= nullptr;
1351 Ptr
->eraseFromParent();
1355 static StringRef
getIdentStringFromSourceLocation(CodeGenFunction
&CGF
,
1357 SmallString
<128> &Buffer
) {
1358 llvm::raw_svector_ostream
OS(Buffer
);
1359 // Build debug location
1360 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
1361 OS
<< ";" << PLoc
.getFilename() << ";";
1362 if (const auto *FD
= dyn_cast_or_null
<FunctionDecl
>(CGF
.CurFuncDecl
))
1363 OS
<< FD
->getQualifiedNameAsString();
1364 OS
<< ";" << PLoc
.getLine() << ";" << PLoc
.getColumn() << ";;";
1368 llvm::Value
*CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction
&CGF
,
1370 unsigned Flags
, bool EmitLoc
) {
1371 uint32_t SrcLocStrSize
;
1372 llvm::Constant
*SrcLocStr
;
1373 if ((!EmitLoc
&& CGM
.getCodeGenOpts().getDebugInfo() ==
1374 llvm::codegenoptions::NoDebugInfo
) ||
1376 SrcLocStr
= OMPBuilder
.getOrCreateDefaultSrcLocStr(SrcLocStrSize
);
1378 std::string FunctionName
;
1379 if (const auto *FD
= dyn_cast_or_null
<FunctionDecl
>(CGF
.CurFuncDecl
))
1380 FunctionName
= FD
->getQualifiedNameAsString();
1381 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
1382 const char *FileName
= PLoc
.getFilename();
1383 unsigned Line
= PLoc
.getLine();
1384 unsigned Column
= PLoc
.getColumn();
1385 SrcLocStr
= OMPBuilder
.getOrCreateSrcLocStr(FunctionName
, FileName
, Line
,
1386 Column
, SrcLocStrSize
);
1388 unsigned Reserved2Flags
= getDefaultLocationReserved2Flags();
1389 return OMPBuilder
.getOrCreateIdent(
1390 SrcLocStr
, SrcLocStrSize
, llvm::omp::IdentFlag(Flags
), Reserved2Flags
);
1393 llvm::Value
*CGOpenMPRuntime::getThreadID(CodeGenFunction
&CGF
,
1394 SourceLocation Loc
) {
1395 assert(CGF
.CurFn
&& "No function in current CodeGenFunction.");
1396 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1397 // the clang invariants used below might be broken.
1398 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
1399 SmallString
<128> Buffer
;
1400 OMPBuilder
.updateToLocation(CGF
.Builder
.saveIP());
1401 uint32_t SrcLocStrSize
;
1402 auto *SrcLocStr
= OMPBuilder
.getOrCreateSrcLocStr(
1403 getIdentStringFromSourceLocation(CGF
, Loc
, Buffer
), SrcLocStrSize
);
1404 return OMPBuilder
.getOrCreateThreadID(
1405 OMPBuilder
.getOrCreateIdent(SrcLocStr
, SrcLocStrSize
));
1408 llvm::Value
*ThreadID
= nullptr;
1409 // Check whether we've already cached a load of the thread id in this
1411 auto I
= OpenMPLocThreadIDMap
.find(CGF
.CurFn
);
1412 if (I
!= OpenMPLocThreadIDMap
.end()) {
1413 ThreadID
= I
->second
.ThreadID
;
1414 if (ThreadID
!= nullptr)
1417 // If exceptions are enabled, do not use parameter to avoid possible crash.
1418 if (auto *OMPRegionInfo
=
1419 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
1420 if (OMPRegionInfo
->getThreadIDVariable()) {
1421 // Check if this an outlined function with thread id passed as argument.
1422 LValue LVal
= OMPRegionInfo
->getThreadIDVariableLValue(CGF
);
1423 llvm::BasicBlock
*TopBlock
= CGF
.AllocaInsertPt
->getParent();
1424 if (!CGF
.EHStack
.requiresLandingPad() || !CGF
.getLangOpts().Exceptions
||
1425 !CGF
.getLangOpts().CXXExceptions
||
1426 CGF
.Builder
.GetInsertBlock() == TopBlock
||
1427 !isa
<llvm::Instruction
>(LVal
.getPointer(CGF
)) ||
1428 cast
<llvm::Instruction
>(LVal
.getPointer(CGF
))->getParent() ==
1430 cast
<llvm::Instruction
>(LVal
.getPointer(CGF
))->getParent() ==
1431 CGF
.Builder
.GetInsertBlock()) {
1432 ThreadID
= CGF
.EmitLoadOfScalar(LVal
, Loc
);
1433 // If value loaded in entry block, cache it and use it everywhere in
1435 if (CGF
.Builder
.GetInsertBlock() == TopBlock
) {
1436 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1437 Elem
.second
.ThreadID
= ThreadID
;
1444 // This is not an outlined function region - need to call __kmpc_int32
1445 // kmpc_global_thread_num(ident_t *loc).
1446 // Generate thread id value and cache this value for use across the
1448 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1449 if (!Elem
.second
.ServiceInsertPt
)
1450 setLocThreadIdInsertPt(CGF
);
1451 CGBuilderTy::InsertPointGuard
IPG(CGF
.Builder
);
1452 CGF
.Builder
.SetInsertPoint(Elem
.second
.ServiceInsertPt
);
1453 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
1454 llvm::CallInst
*Call
= CGF
.Builder
.CreateCall(
1455 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
1456 OMPRTL___kmpc_global_thread_num
),
1457 emitUpdateLocation(CGF
, Loc
));
1458 Call
->setCallingConv(CGF
.getRuntimeCC());
1459 Elem
.second
.ThreadID
= Call
;
1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction
&CGF
) {
1464 assert(CGF
.CurFn
&& "No function in current CodeGenFunction.");
1465 if (OpenMPLocThreadIDMap
.count(CGF
.CurFn
)) {
1466 clearLocThreadIdInsertPt(CGF
);
1467 OpenMPLocThreadIDMap
.erase(CGF
.CurFn
);
1469 if (FunctionUDRMap
.count(CGF
.CurFn
) > 0) {
1470 for(const auto *D
: FunctionUDRMap
[CGF
.CurFn
])
1472 FunctionUDRMap
.erase(CGF
.CurFn
);
1474 auto I
= FunctionUDMMap
.find(CGF
.CurFn
);
1475 if (I
!= FunctionUDMMap
.end()) {
1476 for(const auto *D
: I
->second
)
1478 FunctionUDMMap
.erase(I
);
1480 LastprivateConditionalToTypes
.erase(CGF
.CurFn
);
1481 FunctionToUntiedTaskStackMap
.erase(CGF
.CurFn
);
1484 llvm::Type
*CGOpenMPRuntime::getIdentTyPointerTy() {
1485 return OMPBuilder
.IdentPtr
;
1488 llvm::Type
*CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489 if (!Kmpc_MicroTy
) {
1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491 llvm::Type
*MicroParams
[] = {llvm::PointerType::getUnqual(CGM
.Int32Ty
),
1492 llvm::PointerType::getUnqual(CGM
.Int32Ty
)};
1493 Kmpc_MicroTy
= llvm::FunctionType::get(CGM
.VoidTy
, MicroParams
, true);
1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy
);
1498 llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseKind
1499 convertDeviceClause(const VarDecl
*VD
) {
1500 std::optional
<OMPDeclareTargetDeclAttr::DevTypeTy
> DevTy
=
1501 OMPDeclareTargetDeclAttr::getDeviceType(VD
);
1503 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone
;
1505 switch ((int)*DevTy
) { // Avoid -Wcovered-switch-default
1506 case OMPDeclareTargetDeclAttr::DT_Host
:
1507 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseHost
;
1509 case OMPDeclareTargetDeclAttr::DT_NoHost
:
1510 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNoHost
;
1512 case OMPDeclareTargetDeclAttr::DT_Any
:
1513 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseAny
;
1516 return llvm::OffloadEntriesInfoManager::OMPTargetDeviceClauseNone
;
1521 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind
1522 convertCaptureClause(const VarDecl
*VD
) {
1523 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> MapType
=
1524 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
1526 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone
;
1527 switch ((int)*MapType
) { // Avoid -Wcovered-switch-default
1528 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_To
:
1529 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo
;
1531 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Enter
:
1532 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryEnter
;
1534 case OMPDeclareTargetDeclAttr::MapTypeTy::MT_Link
:
1535 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink
;
1538 return llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryNone
;
1543 static llvm::TargetRegionEntryInfo
getEntryInfoFromPresumedLoc(
1544 CodeGenModule
&CGM
, llvm::OpenMPIRBuilder
&OMPBuilder
,
1545 SourceLocation BeginLoc
, llvm::StringRef ParentName
= "") {
1547 auto FileInfoCallBack
= [&]() {
1548 SourceManager
&SM
= CGM
.getContext().getSourceManager();
1549 PresumedLoc PLoc
= SM
.getPresumedLoc(BeginLoc
);
1551 llvm::sys::fs::UniqueID ID
;
1552 if (llvm::sys::fs::getUniqueID(PLoc
.getFilename(), ID
)) {
1553 PLoc
= SM
.getPresumedLoc(BeginLoc
, /*UseLineDirectives=*/false);
1556 return std::pair
<std::string
, uint64_t>(PLoc
.getFilename(), PLoc
.getLine());
1559 return OMPBuilder
.getTargetEntryUniqueInfo(FileInfoCallBack
, ParentName
);
1562 ConstantAddress
CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl
*VD
) {
1563 auto AddrOfGlobal
= [&VD
, this]() { return CGM
.GetAddrOfGlobal(VD
); };
1565 auto LinkageForVariable
= [&VD
, this]() {
1566 return CGM
.getLLVMLinkageVarDefinition(VD
);
1569 std::vector
<llvm::GlobalVariable
*> GeneratedRefs
;
1571 llvm::Type
*LlvmPtrTy
= CGM
.getTypes().ConvertTypeForMem(
1572 CGM
.getContext().getPointerType(VD
->getType()));
1573 llvm::Constant
*addr
= OMPBuilder
.getAddrOfDeclareTargetVar(
1574 convertCaptureClause(VD
), convertDeviceClause(VD
),
1575 VD
->hasDefinition(CGM
.getContext()) == VarDecl::DeclarationOnly
,
1576 VD
->isExternallyVisible(),
1577 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
,
1578 VD
->getCanonicalDecl()->getBeginLoc()),
1579 CGM
.getMangledName(VD
), GeneratedRefs
, CGM
.getLangOpts().OpenMPSimd
,
1580 CGM
.getLangOpts().OMPTargetTriples
, LlvmPtrTy
, AddrOfGlobal
,
1581 LinkageForVariable
);
1584 return ConstantAddress::invalid();
1585 return ConstantAddress(addr
, LlvmPtrTy
, CGM
.getContext().getDeclAlign(VD
));
1589 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl
*VD
) {
1590 assert(!CGM
.getLangOpts().OpenMPUseTLS
||
1591 !CGM
.getContext().getTargetInfo().isTLSSupported());
1592 // Lookup the entry, lazily creating it if necessary.
1593 std::string Suffix
= getName({"cache", ""});
1594 return OMPBuilder
.getOrCreateInternalVariable(
1595 CGM
.Int8PtrPtrTy
, Twine(CGM
.getMangledName(VD
)).concat(Suffix
).str());
1598 Address
CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction
&CGF
,
1601 SourceLocation Loc
) {
1602 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1603 CGM
.getContext().getTargetInfo().isTLSSupported())
1606 llvm::Type
*VarTy
= VDAddr
.getElementType();
1607 llvm::Value
*Args
[] = {
1608 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
1609 CGF
.Builder
.CreatePointerCast(VDAddr
.emitRawPointer(CGF
), CGM
.Int8PtrTy
),
1610 CGM
.getSize(CGM
.GetTargetTypeStoreSize(VarTy
)),
1611 getOrCreateThreadPrivateCache(VD
)};
1613 CGF
.EmitRuntimeCall(
1614 OMPBuilder
.getOrCreateRuntimeFunction(
1615 CGM
.getModule(), OMPRTL___kmpc_threadprivate_cached
),
1617 CGF
.Int8Ty
, VDAddr
.getAlignment());
1620 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1621 CodeGenFunction
&CGF
, Address VDAddr
, llvm::Value
*Ctor
,
1622 llvm::Value
*CopyCtor
, llvm::Value
*Dtor
, SourceLocation Loc
) {
1623 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1625 llvm::Value
*OMPLoc
= emitUpdateLocation(CGF
, Loc
);
1626 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1627 CGM
.getModule(), OMPRTL___kmpc_global_thread_num
),
1629 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1630 // to register constructor/destructor for variable.
1631 llvm::Value
*Args
[] = {
1633 CGF
.Builder
.CreatePointerCast(VDAddr
.emitRawPointer(CGF
), CGM
.VoidPtrTy
),
1634 Ctor
, CopyCtor
, Dtor
};
1635 CGF
.EmitRuntimeCall(
1636 OMPBuilder
.getOrCreateRuntimeFunction(
1637 CGM
.getModule(), OMPRTL___kmpc_threadprivate_register
),
1641 llvm::Function
*CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1642 const VarDecl
*VD
, Address VDAddr
, SourceLocation Loc
,
1643 bool PerformInit
, CodeGenFunction
*CGF
) {
1644 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1645 CGM
.getContext().getTargetInfo().isTLSSupported())
1648 VD
= VD
->getDefinition(CGM
.getContext());
1649 if (VD
&& ThreadPrivateWithDefinition
.insert(CGM
.getMangledName(VD
)).second
) {
1650 QualType ASTTy
= VD
->getType();
1652 llvm::Value
*Ctor
= nullptr, *CopyCtor
= nullptr, *Dtor
= nullptr;
1653 const Expr
*Init
= VD
->getAnyInitializer();
1654 if (CGM
.getLangOpts().CPlusPlus
&& PerformInit
) {
1655 // Generate function that re-emits the declaration's initializer into the
1656 // threadprivate copy of the variable VD
1657 CodeGenFunction
CtorCGF(CGM
);
1658 FunctionArgList Args
;
1659 ImplicitParamDecl
Dst(CGM
.getContext(), /*DC=*/nullptr, Loc
,
1660 /*Id=*/nullptr, CGM
.getContext().VoidPtrTy
,
1661 ImplicitParamKind::Other
);
1662 Args
.push_back(&Dst
);
1664 const auto &FI
= CGM
.getTypes().arrangeBuiltinFunctionDeclaration(
1665 CGM
.getContext().VoidPtrTy
, Args
);
1666 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1667 std::string Name
= getName({"__kmpc_global_ctor_", ""});
1668 llvm::Function
*Fn
=
1669 CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, Name
, FI
, Loc
);
1670 CtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidPtrTy
, Fn
, FI
,
1672 llvm::Value
*ArgVal
= CtorCGF
.EmitLoadOfScalar(
1673 CtorCGF
.GetAddrOfLocalVar(&Dst
), /*Volatile=*/false,
1674 CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1675 Address
Arg(ArgVal
, CtorCGF
.ConvertTypeForMem(ASTTy
),
1676 VDAddr
.getAlignment());
1677 CtorCGF
.EmitAnyExprToMem(Init
, Arg
, Init
->getType().getQualifiers(),
1678 /*IsInitializer=*/true);
1679 ArgVal
= CtorCGF
.EmitLoadOfScalar(
1680 CtorCGF
.GetAddrOfLocalVar(&Dst
), /*Volatile=*/false,
1681 CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1682 CtorCGF
.Builder
.CreateStore(ArgVal
, CtorCGF
.ReturnValue
);
1683 CtorCGF
.FinishFunction();
1686 if (VD
->getType().isDestructedType() != QualType::DK_none
) {
1687 // Generate function that emits destructor call for the threadprivate copy
1688 // of the variable VD
1689 CodeGenFunction
DtorCGF(CGM
);
1690 FunctionArgList Args
;
1691 ImplicitParamDecl
Dst(CGM
.getContext(), /*DC=*/nullptr, Loc
,
1692 /*Id=*/nullptr, CGM
.getContext().VoidPtrTy
,
1693 ImplicitParamKind::Other
);
1694 Args
.push_back(&Dst
);
1696 const auto &FI
= CGM
.getTypes().arrangeBuiltinFunctionDeclaration(
1697 CGM
.getContext().VoidTy
, Args
);
1698 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1699 std::string Name
= getName({"__kmpc_global_dtor_", ""});
1700 llvm::Function
*Fn
=
1701 CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, Name
, FI
, Loc
);
1702 auto NL
= ApplyDebugLocation::CreateEmpty(DtorCGF
);
1703 DtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
, Args
,
1705 // Create a scope with an artificial location for the body of this function.
1706 auto AL
= ApplyDebugLocation::CreateArtificial(DtorCGF
);
1707 llvm::Value
*ArgVal
= DtorCGF
.EmitLoadOfScalar(
1708 DtorCGF
.GetAddrOfLocalVar(&Dst
),
1709 /*Volatile=*/false, CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1710 DtorCGF
.emitDestroy(
1711 Address(ArgVal
, DtorCGF
.Int8Ty
, VDAddr
.getAlignment()), ASTTy
,
1712 DtorCGF
.getDestroyer(ASTTy
.isDestructedType()),
1713 DtorCGF
.needsEHCleanup(ASTTy
.isDestructedType()));
1714 DtorCGF
.FinishFunction();
1717 // Do not emit init function if it is not required.
1721 llvm::Type
*CopyCtorTyArgs
[] = {CGM
.VoidPtrTy
, CGM
.VoidPtrTy
};
1722 auto *CopyCtorTy
= llvm::FunctionType::get(CGM
.VoidPtrTy
, CopyCtorTyArgs
,
1725 // Copying constructor for the threadprivate variable.
1726 // Must be NULL - reserved by runtime, but currently it requires that this
1727 // parameter is always NULL. Otherwise it fires assertion.
1728 CopyCtor
= llvm::Constant::getNullValue(CopyCtorTy
);
1729 if (Ctor
== nullptr) {
1730 auto *CtorTy
= llvm::FunctionType::get(CGM
.VoidPtrTy
, CGM
.VoidPtrTy
,
1733 Ctor
= llvm::Constant::getNullValue(CtorTy
);
1735 if (Dtor
== nullptr) {
1736 auto *DtorTy
= llvm::FunctionType::get(CGM
.VoidTy
, CGM
.VoidPtrTy
,
1739 Dtor
= llvm::Constant::getNullValue(DtorTy
);
1742 auto *InitFunctionTy
=
1743 llvm::FunctionType::get(CGM
.VoidTy
, /*isVarArg*/ false);
1744 std::string Name
= getName({"__omp_threadprivate_init_", ""});
1745 llvm::Function
*InitFunction
= CGM
.CreateGlobalInitOrCleanUpFunction(
1746 InitFunctionTy
, Name
, CGM
.getTypes().arrangeNullaryFunction());
1747 CodeGenFunction
InitCGF(CGM
);
1748 FunctionArgList ArgList
;
1749 InitCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, InitFunction
,
1750 CGM
.getTypes().arrangeNullaryFunction(), ArgList
,
1752 emitThreadPrivateVarInit(InitCGF
, VDAddr
, Ctor
, CopyCtor
, Dtor
, Loc
);
1753 InitCGF
.FinishFunction();
1754 return InitFunction
;
1756 emitThreadPrivateVarInit(*CGF
, VDAddr
, Ctor
, CopyCtor
, Dtor
, Loc
);
1761 void CGOpenMPRuntime::emitDeclareTargetFunction(const FunctionDecl
*FD
,
1762 llvm::GlobalValue
*GV
) {
1763 std::optional
<OMPDeclareTargetDeclAttr
*> ActiveAttr
=
1764 OMPDeclareTargetDeclAttr::getActiveAttr(FD
);
1766 // We only need to handle active 'indirect' declare target functions.
1767 if (!ActiveAttr
|| !(*ActiveAttr
)->getIndirect())
1770 // Get a mangled name to store the new device global in.
1771 llvm::TargetRegionEntryInfo EntryInfo
= getEntryInfoFromPresumedLoc(
1772 CGM
, OMPBuilder
, FD
->getCanonicalDecl()->getBeginLoc(), FD
->getName());
1773 SmallString
<128> Name
;
1774 OMPBuilder
.OffloadInfoManager
.getTargetRegionEntryFnName(Name
, EntryInfo
);
1776 // We need to generate a new global to hold the address of the indirectly
1777 // called device function. Doing this allows us to keep the visibility and
1778 // linkage of the associated function unchanged while allowing the runtime to
1779 // access its value.
1780 llvm::GlobalValue
*Addr
= GV
;
1781 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
1782 Addr
= new llvm::GlobalVariable(
1783 CGM
.getModule(), CGM
.VoidPtrTy
,
1784 /*isConstant=*/true, llvm::GlobalValue::ExternalLinkage
, GV
, Name
,
1785 nullptr, llvm::GlobalValue::NotThreadLocal
,
1786 CGM
.getModule().getDataLayout().getDefaultGlobalsAddressSpace());
1787 Addr
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
1790 OMPBuilder
.OffloadInfoManager
.registerDeviceGlobalVarEntryInfo(
1791 Name
, Addr
, CGM
.GetTargetTypeStoreSize(CGM
.VoidPtrTy
).getQuantity(),
1792 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryIndirect
,
1793 llvm::GlobalValue::WeakODRLinkage
);
1796 Address
CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction
&CGF
,
1799 std::string Suffix
= getName({"artificial", ""});
1800 llvm::Type
*VarLVType
= CGF
.ConvertTypeForMem(VarType
);
1801 llvm::GlobalVariable
*GAddr
= OMPBuilder
.getOrCreateInternalVariable(
1802 VarLVType
, Twine(Name
).concat(Suffix
).str());
1803 if (CGM
.getLangOpts().OpenMP
&& CGM
.getLangOpts().OpenMPUseTLS
&&
1804 CGM
.getTarget().isTLSSupported()) {
1805 GAddr
->setThreadLocal(/*Val=*/true);
1806 return Address(GAddr
, GAddr
->getValueType(),
1807 CGM
.getContext().getTypeAlignInChars(VarType
));
1809 std::string CacheSuffix
= getName({"cache", ""});
1810 llvm::Value
*Args
[] = {
1811 emitUpdateLocation(CGF
, SourceLocation()),
1812 getThreadID(CGF
, SourceLocation()),
1813 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(GAddr
, CGM
.VoidPtrTy
),
1814 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(VarType
), CGM
.SizeTy
,
1815 /*isSigned=*/false),
1816 OMPBuilder
.getOrCreateInternalVariable(
1818 Twine(Name
).concat(Suffix
).concat(CacheSuffix
).str())};
1820 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
1821 CGF
.EmitRuntimeCall(
1822 OMPBuilder
.getOrCreateRuntimeFunction(
1823 CGM
.getModule(), OMPRTL___kmpc_threadprivate_cached
),
1825 VarLVType
->getPointerTo(/*AddrSpace=*/0)),
1826 VarLVType
, CGM
.getContext().getTypeAlignInChars(VarType
));
1829 void CGOpenMPRuntime::emitIfClause(CodeGenFunction
&CGF
, const Expr
*Cond
,
1830 const RegionCodeGenTy
&ThenGen
,
1831 const RegionCodeGenTy
&ElseGen
) {
1832 CodeGenFunction::LexicalScope
ConditionScope(CGF
, Cond
->getSourceRange());
1834 // If the condition constant folds and can be elided, try to avoid emitting
1835 // the condition and the dead arm of the if/else.
1837 if (CGF
.ConstantFoldsToSimpleInteger(Cond
, CondConstant
)) {
1845 // Otherwise, the condition did not fold, or we couldn't elide it. Just
1846 // emit the conditional branch.
1847 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("omp_if.then");
1848 llvm::BasicBlock
*ElseBlock
= CGF
.createBasicBlock("omp_if.else");
1849 llvm::BasicBlock
*ContBlock
= CGF
.createBasicBlock("omp_if.end");
1850 CGF
.EmitBranchOnBoolExpr(Cond
, ThenBlock
, ElseBlock
, /*TrueCount=*/0);
1852 // Emit the 'then' code.
1853 CGF
.EmitBlock(ThenBlock
);
1855 CGF
.EmitBranch(ContBlock
);
1856 // Emit the 'else' code if present.
1857 // There is no need to emit line number for unconditional branch.
1858 (void)ApplyDebugLocation::CreateEmpty(CGF
);
1859 CGF
.EmitBlock(ElseBlock
);
1861 // There is no need to emit line number for unconditional branch.
1862 (void)ApplyDebugLocation::CreateEmpty(CGF
);
1863 CGF
.EmitBranch(ContBlock
);
1864 // Emit the continuation block for code after the if.
1865 CGF
.EmitBlock(ContBlock
, /*IsFinished=*/true);
1868 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
1869 llvm::Function
*OutlinedFn
,
1870 ArrayRef
<llvm::Value
*> CapturedVars
,
1872 llvm::Value
*NumThreads
) {
1873 if (!CGF
.HaveInsertPoint())
1875 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
1876 auto &M
= CGM
.getModule();
1877 auto &&ThenGen
= [&M
, OutlinedFn
, CapturedVars
, RTLoc
,
1878 this](CodeGenFunction
&CGF
, PrePostActionTy
&) {
1879 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
1880 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
1881 llvm::Value
*Args
[] = {
1883 CGF
.Builder
.getInt32(CapturedVars
.size()), // Number of captured vars
1884 CGF
.Builder
.CreateBitCast(OutlinedFn
, RT
.getKmpc_MicroPointerTy())};
1885 llvm::SmallVector
<llvm::Value
*, 16> RealArgs
;
1886 RealArgs
.append(std::begin(Args
), std::end(Args
));
1887 RealArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
1889 llvm::FunctionCallee RTLFn
=
1890 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_fork_call
);
1891 CGF
.EmitRuntimeCall(RTLFn
, RealArgs
);
1893 auto &&ElseGen
= [&M
, OutlinedFn
, CapturedVars
, RTLoc
, Loc
,
1894 this](CodeGenFunction
&CGF
, PrePostActionTy
&) {
1895 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
1896 llvm::Value
*ThreadID
= RT
.getThreadID(CGF
, Loc
);
1898 // __kmpc_serialized_parallel(&Loc, GTid);
1899 llvm::Value
*Args
[] = {RTLoc
, ThreadID
};
1900 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1901 M
, OMPRTL___kmpc_serialized_parallel
),
1904 // OutlinedFn(>id, &zero_bound, CapturedStruct);
1905 Address ThreadIDAddr
= RT
.emitThreadIDAddress(CGF
, Loc
);
1906 RawAddress ZeroAddrBound
=
1907 CGF
.CreateDefaultAlignTempAlloca(CGF
.Int32Ty
,
1908 /*Name=*/".bound.zero.addr");
1909 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(/*C*/ 0), ZeroAddrBound
);
1910 llvm::SmallVector
<llvm::Value
*, 16> OutlinedFnArgs
;
1911 // ThreadId for serialized parallels is 0.
1912 OutlinedFnArgs
.push_back(ThreadIDAddr
.emitRawPointer(CGF
));
1913 OutlinedFnArgs
.push_back(ZeroAddrBound
.getPointer());
1914 OutlinedFnArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
1916 // Ensure we do not inline the function. This is trivially true for the ones
1917 // passed to __kmpc_fork_call but the ones called in serialized regions
1918 // could be inlined. This is not a perfect but it is closer to the invariant
1919 // we want, namely, every data environment starts with a new function.
1920 // TODO: We should pass the if condition to the runtime function and do the
1921 // handling there. Much cleaner code.
1922 OutlinedFn
->removeFnAttr(llvm::Attribute::AlwaysInline
);
1923 OutlinedFn
->addFnAttr(llvm::Attribute::NoInline
);
1924 RT
.emitOutlinedFunctionCall(CGF
, Loc
, OutlinedFn
, OutlinedFnArgs
);
1926 // __kmpc_end_serialized_parallel(&Loc, GTid);
1927 llvm::Value
*EndArgs
[] = {RT
.emitUpdateLocation(CGF
, Loc
), ThreadID
};
1928 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1929 M
, OMPRTL___kmpc_end_serialized_parallel
),
1933 emitIfClause(CGF
, IfCond
, ThenGen
, ElseGen
);
1935 RegionCodeGenTy
ThenRCG(ThenGen
);
1940 // If we're inside an (outlined) parallel region, use the region info's
1941 // thread-ID variable (it is passed in a first argument of the outlined function
1942 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
1943 // regular serial code region, get thread ID by calling kmp_int32
1944 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
1945 // return the address of that temp.
1946 Address
CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction
&CGF
,
1947 SourceLocation Loc
) {
1948 if (auto *OMPRegionInfo
=
1949 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
1950 if (OMPRegionInfo
->getThreadIDVariable())
1951 return OMPRegionInfo
->getThreadIDVariableLValue(CGF
).getAddress();
1953 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
1955 CGF
.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
1956 Address ThreadIDTemp
= CGF
.CreateMemTemp(Int32Ty
, /*Name*/ ".threadid_temp.");
1957 CGF
.EmitStoreOfScalar(ThreadID
,
1958 CGF
.MakeAddrLValue(ThreadIDTemp
, Int32Ty
));
1960 return ThreadIDTemp
;
1963 llvm::Value
*CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName
) {
1964 std::string Prefix
= Twine("gomp_critical_user_", CriticalName
).str();
1965 std::string Name
= getName({Prefix
, "var"});
1966 return OMPBuilder
.getOrCreateInternalVariable(KmpCriticalNameTy
, Name
);
1970 /// Common pre(post)-action for different OpenMP constructs.
1971 class CommonActionTy final
: public PrePostActionTy
{
1972 llvm::FunctionCallee EnterCallee
;
1973 ArrayRef
<llvm::Value
*> EnterArgs
;
1974 llvm::FunctionCallee ExitCallee
;
1975 ArrayRef
<llvm::Value
*> ExitArgs
;
1977 llvm::BasicBlock
*ContBlock
= nullptr;
1980 CommonActionTy(llvm::FunctionCallee EnterCallee
,
1981 ArrayRef
<llvm::Value
*> EnterArgs
,
1982 llvm::FunctionCallee ExitCallee
,
1983 ArrayRef
<llvm::Value
*> ExitArgs
, bool Conditional
= false)
1984 : EnterCallee(EnterCallee
), EnterArgs(EnterArgs
), ExitCallee(ExitCallee
),
1985 ExitArgs(ExitArgs
), Conditional(Conditional
) {}
1986 void Enter(CodeGenFunction
&CGF
) override
{
1987 llvm::Value
*EnterRes
= CGF
.EmitRuntimeCall(EnterCallee
, EnterArgs
);
1989 llvm::Value
*CallBool
= CGF
.Builder
.CreateIsNotNull(EnterRes
);
1990 auto *ThenBlock
= CGF
.createBasicBlock("omp_if.then");
1991 ContBlock
= CGF
.createBasicBlock("omp_if.end");
1992 // Generate the branch (If-stmt)
1993 CGF
.Builder
.CreateCondBr(CallBool
, ThenBlock
, ContBlock
);
1994 CGF
.EmitBlock(ThenBlock
);
1997 void Done(CodeGenFunction
&CGF
) {
1998 // Emit the rest of blocks/branches
1999 CGF
.EmitBranch(ContBlock
);
2000 CGF
.EmitBlock(ContBlock
, true);
2002 void Exit(CodeGenFunction
&CGF
) override
{
2003 CGF
.EmitRuntimeCall(ExitCallee
, ExitArgs
);
2006 } // anonymous namespace
2008 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction
&CGF
,
2009 StringRef CriticalName
,
2010 const RegionCodeGenTy
&CriticalOpGen
,
2011 SourceLocation Loc
, const Expr
*Hint
) {
2012 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2014 // __kmpc_end_critical(ident_t *, gtid, Lock);
2015 // Prepare arguments and build a call to __kmpc_critical
2016 if (!CGF
.HaveInsertPoint())
2018 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2019 getCriticalRegionLock(CriticalName
)};
2020 llvm::SmallVector
<llvm::Value
*, 4> EnterArgs(std::begin(Args
),
2023 EnterArgs
.push_back(CGF
.Builder
.CreateIntCast(
2024 CGF
.EmitScalarExpr(Hint
), CGM
.Int32Ty
, /*isSigned=*/false));
2026 CommonActionTy
Action(
2027 OMPBuilder
.getOrCreateRuntimeFunction(
2029 Hint
? OMPRTL___kmpc_critical_with_hint
: OMPRTL___kmpc_critical
),
2031 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
2032 OMPRTL___kmpc_end_critical
),
2034 CriticalOpGen
.setAction(Action
);
2035 emitInlinedDirective(CGF
, OMPD_critical
, CriticalOpGen
);
2038 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction
&CGF
,
2039 const RegionCodeGenTy
&MasterOpGen
,
2040 SourceLocation Loc
) {
2041 if (!CGF
.HaveInsertPoint())
2043 // if(__kmpc_master(ident_t *, gtid)) {
2045 // __kmpc_end_master(ident_t *, gtid);
2047 // Prepare arguments and build a call to __kmpc_master
2048 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2049 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2050 CGM
.getModule(), OMPRTL___kmpc_master
),
2052 OMPBuilder
.getOrCreateRuntimeFunction(
2053 CGM
.getModule(), OMPRTL___kmpc_end_master
),
2055 /*Conditional=*/true);
2056 MasterOpGen
.setAction(Action
);
2057 emitInlinedDirective(CGF
, OMPD_master
, MasterOpGen
);
2061 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction
&CGF
,
2062 const RegionCodeGenTy
&MaskedOpGen
,
2063 SourceLocation Loc
, const Expr
*Filter
) {
2064 if (!CGF
.HaveInsertPoint())
2066 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2068 // __kmpc_end_masked(iden_t *, gtid);
2070 // Prepare arguments and build a call to __kmpc_masked
2071 llvm::Value
*FilterVal
= Filter
2072 ? CGF
.EmitScalarExpr(Filter
, CGF
.Int32Ty
)
2073 : llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/0);
2074 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2076 llvm::Value
*ArgsEnd
[] = {emitUpdateLocation(CGF
, Loc
),
2077 getThreadID(CGF
, Loc
)};
2078 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2079 CGM
.getModule(), OMPRTL___kmpc_masked
),
2081 OMPBuilder
.getOrCreateRuntimeFunction(
2082 CGM
.getModule(), OMPRTL___kmpc_end_masked
),
2084 /*Conditional=*/true);
2085 MaskedOpGen
.setAction(Action
);
2086 emitInlinedDirective(CGF
, OMPD_masked
, MaskedOpGen
);
2090 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction
&CGF
,
2091 SourceLocation Loc
) {
2092 if (!CGF
.HaveInsertPoint())
2094 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2095 OMPBuilder
.createTaskyield(CGF
.Builder
);
2097 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2098 llvm::Value
*Args
[] = {
2099 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2100 llvm::ConstantInt::get(CGM
.IntTy
, /*V=*/0, /*isSigned=*/true)};
2101 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2102 CGM
.getModule(), OMPRTL___kmpc_omp_taskyield
),
2106 if (auto *Region
= dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
2107 Region
->emitUntiedSwitch(CGF
);
2110 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction
&CGF
,
2111 const RegionCodeGenTy
&TaskgroupOpGen
,
2112 SourceLocation Loc
) {
2113 if (!CGF
.HaveInsertPoint())
2115 // __kmpc_taskgroup(ident_t *, gtid);
2116 // TaskgroupOpGen();
2117 // __kmpc_end_taskgroup(ident_t *, gtid);
2118 // Prepare arguments and build a call to __kmpc_taskgroup
2119 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2120 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2121 CGM
.getModule(), OMPRTL___kmpc_taskgroup
),
2123 OMPBuilder
.getOrCreateRuntimeFunction(
2124 CGM
.getModule(), OMPRTL___kmpc_end_taskgroup
),
2126 TaskgroupOpGen
.setAction(Action
);
2127 emitInlinedDirective(CGF
, OMPD_taskgroup
, TaskgroupOpGen
);
2130 /// Given an array of pointers to variables, project the address of a
2132 static Address
emitAddrOfVarFromArray(CodeGenFunction
&CGF
, Address Array
,
2133 unsigned Index
, const VarDecl
*Var
) {
2134 // Pull out the pointer to the variable.
2135 Address PtrAddr
= CGF
.Builder
.CreateConstArrayGEP(Array
, Index
);
2136 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(PtrAddr
);
2138 llvm::Type
*ElemTy
= CGF
.ConvertTypeForMem(Var
->getType());
2140 CGF
.Builder
.CreateBitCast(
2141 Ptr
, ElemTy
->getPointerTo(Ptr
->getType()->getPointerAddressSpace())),
2142 ElemTy
, CGF
.getContext().getDeclAlign(Var
));
2145 static llvm::Value
*emitCopyprivateCopyFunction(
2146 CodeGenModule
&CGM
, llvm::Type
*ArgsElemType
,
2147 ArrayRef
<const Expr
*> CopyprivateVars
, ArrayRef
<const Expr
*> DestExprs
,
2148 ArrayRef
<const Expr
*> SrcExprs
, ArrayRef
<const Expr
*> AssignmentOps
,
2149 SourceLocation Loc
) {
2150 ASTContext
&C
= CGM
.getContext();
2151 // void copy_func(void *LHSArg, void *RHSArg);
2152 FunctionArgList Args
;
2153 ImplicitParamDecl
LHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
2154 ImplicitParamKind::Other
);
2155 ImplicitParamDecl
RHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
2156 ImplicitParamKind::Other
);
2157 Args
.push_back(&LHSArg
);
2158 Args
.push_back(&RHSArg
);
2160 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
2162 CGM
.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2163 auto *Fn
= llvm::Function::Create(CGM
.getTypes().GetFunctionType(CGFI
),
2164 llvm::GlobalValue::InternalLinkage
, Name
,
2166 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, CGFI
);
2167 Fn
->setDoesNotRecurse();
2168 CodeGenFunction
CGF(CGM
);
2169 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, CGFI
, Args
, Loc
, Loc
);
2170 // Dest = (void*[n])(LHSArg);
2171 // Src = (void*[n])(RHSArg);
2172 Address
LHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2173 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&LHSArg
)),
2174 ArgsElemType
->getPointerTo()),
2175 ArgsElemType
, CGF
.getPointerAlign());
2176 Address
RHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2177 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&RHSArg
)),
2178 ArgsElemType
->getPointerTo()),
2179 ArgsElemType
, CGF
.getPointerAlign());
2180 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2181 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2183 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2184 for (unsigned I
= 0, E
= AssignmentOps
.size(); I
< E
; ++I
) {
2185 const auto *DestVar
=
2186 cast
<VarDecl
>(cast
<DeclRefExpr
>(DestExprs
[I
])->getDecl());
2187 Address DestAddr
= emitAddrOfVarFromArray(CGF
, LHS
, I
, DestVar
);
2189 const auto *SrcVar
=
2190 cast
<VarDecl
>(cast
<DeclRefExpr
>(SrcExprs
[I
])->getDecl());
2191 Address SrcAddr
= emitAddrOfVarFromArray(CGF
, RHS
, I
, SrcVar
);
2193 const auto *VD
= cast
<DeclRefExpr
>(CopyprivateVars
[I
])->getDecl();
2194 QualType Type
= VD
->getType();
2195 CGF
.EmitOMPCopy(Type
, DestAddr
, SrcAddr
, DestVar
, SrcVar
, AssignmentOps
[I
]);
2197 CGF
.FinishFunction();
2201 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction
&CGF
,
2202 const RegionCodeGenTy
&SingleOpGen
,
2204 ArrayRef
<const Expr
*> CopyprivateVars
,
2205 ArrayRef
<const Expr
*> SrcExprs
,
2206 ArrayRef
<const Expr
*> DstExprs
,
2207 ArrayRef
<const Expr
*> AssignmentOps
) {
2208 if (!CGF
.HaveInsertPoint())
2210 assert(CopyprivateVars
.size() == SrcExprs
.size() &&
2211 CopyprivateVars
.size() == DstExprs
.size() &&
2212 CopyprivateVars
.size() == AssignmentOps
.size());
2213 ASTContext
&C
= CGM
.getContext();
2214 // int32 did_it = 0;
2215 // if(__kmpc_single(ident_t *, gtid)) {
2217 // __kmpc_end_single(ident_t *, gtid);
2220 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2221 // <copy_func>, did_it);
2223 Address DidIt
= Address::invalid();
2224 if (!CopyprivateVars
.empty()) {
2225 // int32 did_it = 0;
2226 QualType KmpInt32Ty
=
2227 C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2228 DidIt
= CGF
.CreateMemTemp(KmpInt32Ty
, ".omp.copyprivate.did_it");
2229 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(0), DidIt
);
2231 // Prepare arguments and build a call to __kmpc_single
2232 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2233 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2234 CGM
.getModule(), OMPRTL___kmpc_single
),
2236 OMPBuilder
.getOrCreateRuntimeFunction(
2237 CGM
.getModule(), OMPRTL___kmpc_end_single
),
2239 /*Conditional=*/true);
2240 SingleOpGen
.setAction(Action
);
2241 emitInlinedDirective(CGF
, OMPD_single
, SingleOpGen
);
2242 if (DidIt
.isValid()) {
2244 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(1), DidIt
);
2247 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2248 // <copy_func>, did_it);
2249 if (DidIt
.isValid()) {
2250 llvm::APInt
ArraySize(/*unsigned int numBits=*/32, CopyprivateVars
.size());
2251 QualType CopyprivateArrayTy
= C
.getConstantArrayType(
2252 C
.VoidPtrTy
, ArraySize
, nullptr, ArraySizeModifier::Normal
,
2253 /*IndexTypeQuals=*/0);
2254 // Create a list of all private variables for copyprivate.
2255 Address CopyprivateList
=
2256 CGF
.CreateMemTemp(CopyprivateArrayTy
, ".omp.copyprivate.cpr_list");
2257 for (unsigned I
= 0, E
= CopyprivateVars
.size(); I
< E
; ++I
) {
2258 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(CopyprivateList
, I
);
2259 CGF
.Builder
.CreateStore(
2260 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2261 CGF
.EmitLValue(CopyprivateVars
[I
]).getPointer(CGF
),
2265 // Build function that copies private values from single region to all other
2266 // threads in the corresponding parallel region.
2267 llvm::Value
*CpyFn
= emitCopyprivateCopyFunction(
2268 CGM
, CGF
.ConvertTypeForMem(CopyprivateArrayTy
), CopyprivateVars
,
2269 SrcExprs
, DstExprs
, AssignmentOps
, Loc
);
2270 llvm::Value
*BufSize
= CGF
.getTypeSize(CopyprivateArrayTy
);
2271 Address CL
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2272 CopyprivateList
, CGF
.VoidPtrTy
, CGF
.Int8Ty
);
2273 llvm::Value
*DidItVal
= CGF
.Builder
.CreateLoad(DidIt
);
2274 llvm::Value
*Args
[] = {
2275 emitUpdateLocation(CGF
, Loc
), // ident_t *<loc>
2276 getThreadID(CGF
, Loc
), // i32 <gtid>
2277 BufSize
, // size_t <buf_size>
2278 CL
.emitRawPointer(CGF
), // void *<copyprivate list>
2279 CpyFn
, // void (*) (void *, void *) <copy_func>
2280 DidItVal
// i32 did_it
2282 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2283 CGM
.getModule(), OMPRTL___kmpc_copyprivate
),
2288 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction
&CGF
,
2289 const RegionCodeGenTy
&OrderedOpGen
,
2290 SourceLocation Loc
, bool IsThreads
) {
2291 if (!CGF
.HaveInsertPoint())
2293 // __kmpc_ordered(ident_t *, gtid);
2295 // __kmpc_end_ordered(ident_t *, gtid);
2296 // Prepare arguments and build a call to __kmpc_ordered
2298 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2299 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2300 CGM
.getModule(), OMPRTL___kmpc_ordered
),
2302 OMPBuilder
.getOrCreateRuntimeFunction(
2303 CGM
.getModule(), OMPRTL___kmpc_end_ordered
),
2305 OrderedOpGen
.setAction(Action
);
2306 emitInlinedDirective(CGF
, OMPD_ordered
, OrderedOpGen
);
2309 emitInlinedDirective(CGF
, OMPD_ordered
, OrderedOpGen
);
2312 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind
) {
2314 if (Kind
== OMPD_for
)
2315 Flags
= OMP_IDENT_BARRIER_IMPL_FOR
;
2316 else if (Kind
== OMPD_sections
)
2317 Flags
= OMP_IDENT_BARRIER_IMPL_SECTIONS
;
2318 else if (Kind
== OMPD_single
)
2319 Flags
= OMP_IDENT_BARRIER_IMPL_SINGLE
;
2320 else if (Kind
== OMPD_barrier
)
2321 Flags
= OMP_IDENT_BARRIER_EXPL
;
2323 Flags
= OMP_IDENT_BARRIER_IMPL
;
2327 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2328 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2329 OpenMPScheduleClauseKind
&ScheduleKind
, const Expr
*&ChunkExpr
) const {
2330 // Check if the loop directive is actually a doacross loop directive. In this
2331 // case choose static, 1 schedule.
2333 S
.getClausesOfKind
<OMPOrderedClause
>(),
2334 [](const OMPOrderedClause
*C
) { return C
->getNumForLoops(); })) {
2335 ScheduleKind
= OMPC_SCHEDULE_static
;
2336 // Chunk size is 1 in this case.
2337 llvm::APInt
ChunkSize(32, 1);
2338 ChunkExpr
= IntegerLiteral::Create(
2339 CGF
.getContext(), ChunkSize
,
2340 CGF
.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2345 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2346 OpenMPDirectiveKind Kind
, bool EmitChecks
,
2347 bool ForceSimpleCall
) {
2348 // Check if we should use the OMPBuilder
2349 auto *OMPRegionInfo
=
2350 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
);
2351 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2352 CGF
.Builder
.restoreIP(OMPBuilder
.createBarrier(
2353 CGF
.Builder
, Kind
, ForceSimpleCall
, EmitChecks
));
2357 if (!CGF
.HaveInsertPoint())
2359 // Build call __kmpc_cancel_barrier(loc, thread_id);
2360 // Build call __kmpc_barrier(loc, thread_id);
2361 unsigned Flags
= getDefaultFlagsForBarriers(Kind
);
2362 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2364 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
, Flags
),
2365 getThreadID(CGF
, Loc
)};
2366 if (OMPRegionInfo
) {
2367 if (!ForceSimpleCall
&& OMPRegionInfo
->hasCancel()) {
2368 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
2369 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
2370 OMPRTL___kmpc_cancel_barrier
),
2373 // if (__kmpc_cancel_barrier()) {
2374 // exit from construct;
2376 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
2377 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
2378 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
2379 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
2380 CGF
.EmitBlock(ExitBB
);
2381 // exit from construct;
2382 CodeGenFunction::JumpDest CancelDestination
=
2383 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
2384 CGF
.EmitBranchThroughCleanup(CancelDestination
);
2385 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
2390 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2391 CGM
.getModule(), OMPRTL___kmpc_barrier
),
2395 void CGOpenMPRuntime::emitErrorCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2396 Expr
*ME
, bool IsFatal
) {
2398 ME
? CGF
.EmitStringLiteralLValue(cast
<StringLiteral
>(ME
)).getPointer(CGF
)
2399 : llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
2400 // Build call void __kmpc_error(ident_t *loc, int severity, const char
2402 llvm::Value
*Args
[] = {
2403 emitUpdateLocation(CGF
, Loc
, /*Flags=*/0, /*GenLoc=*/true),
2404 llvm::ConstantInt::get(CGM
.Int32Ty
, IsFatal
? 2 : 1),
2405 CGF
.Builder
.CreatePointerCast(MVL
, CGM
.Int8PtrTy
)};
2406 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2407 CGM
.getModule(), OMPRTL___kmpc_error
),
2411 /// Map the OpenMP loop schedule to the runtime enumeration.
2412 static OpenMPSchedType
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind
,
2413 bool Chunked
, bool Ordered
) {
2414 switch (ScheduleKind
) {
2415 case OMPC_SCHEDULE_static
:
2416 return Chunked
? (Ordered
? OMP_ord_static_chunked
: OMP_sch_static_chunked
)
2417 : (Ordered
? OMP_ord_static
: OMP_sch_static
);
2418 case OMPC_SCHEDULE_dynamic
:
2419 return Ordered
? OMP_ord_dynamic_chunked
: OMP_sch_dynamic_chunked
;
2420 case OMPC_SCHEDULE_guided
:
2421 return Ordered
? OMP_ord_guided_chunked
: OMP_sch_guided_chunked
;
2422 case OMPC_SCHEDULE_runtime
:
2423 return Ordered
? OMP_ord_runtime
: OMP_sch_runtime
;
2424 case OMPC_SCHEDULE_auto
:
2425 return Ordered
? OMP_ord_auto
: OMP_sch_auto
;
2426 case OMPC_SCHEDULE_unknown
:
2427 assert(!Chunked
&& "chunk was specified but schedule kind not known");
2428 return Ordered
? OMP_ord_static
: OMP_sch_static
;
2430 llvm_unreachable("Unexpected runtime schedule");
2433 /// Map the OpenMP distribute schedule to the runtime enumeration.
2434 static OpenMPSchedType
2435 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) {
2436 // only static is allowed for dist_schedule
2437 return Chunked
? OMP_dist_sch_static_chunked
: OMP_dist_sch_static
;
2440 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind
,
2441 bool Chunked
) const {
2442 OpenMPSchedType Schedule
=
2443 getRuntimeSchedule(ScheduleKind
, Chunked
, /*Ordered=*/false);
2444 return Schedule
== OMP_sch_static
;
2447 bool CGOpenMPRuntime::isStaticNonchunked(
2448 OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) const {
2449 OpenMPSchedType Schedule
= getRuntimeSchedule(ScheduleKind
, Chunked
);
2450 return Schedule
== OMP_dist_sch_static
;
2453 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind
,
2454 bool Chunked
) const {
2455 OpenMPSchedType Schedule
=
2456 getRuntimeSchedule(ScheduleKind
, Chunked
, /*Ordered=*/false);
2457 return Schedule
== OMP_sch_static_chunked
;
2460 bool CGOpenMPRuntime::isStaticChunked(
2461 OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) const {
2462 OpenMPSchedType Schedule
= getRuntimeSchedule(ScheduleKind
, Chunked
);
2463 return Schedule
== OMP_dist_sch_static_chunked
;
2466 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind
) const {
2467 OpenMPSchedType Schedule
=
2468 getRuntimeSchedule(ScheduleKind
, /*Chunked=*/false, /*Ordered=*/false);
2469 assert(Schedule
!= OMP_sch_static_chunked
&& "cannot be chunked here");
2470 return Schedule
!= OMP_sch_static
;
2473 static int addMonoNonMonoModifier(CodeGenModule
&CGM
, OpenMPSchedType Schedule
,
2474 OpenMPScheduleClauseModifier M1
,
2475 OpenMPScheduleClauseModifier M2
) {
2478 case OMPC_SCHEDULE_MODIFIER_monotonic
:
2479 Modifier
= OMP_sch_modifier_monotonic
;
2481 case OMPC_SCHEDULE_MODIFIER_nonmonotonic
:
2482 Modifier
= OMP_sch_modifier_nonmonotonic
;
2484 case OMPC_SCHEDULE_MODIFIER_simd
:
2485 if (Schedule
== OMP_sch_static_chunked
)
2486 Schedule
= OMP_sch_static_balanced_chunked
;
2488 case OMPC_SCHEDULE_MODIFIER_last
:
2489 case OMPC_SCHEDULE_MODIFIER_unknown
:
2493 case OMPC_SCHEDULE_MODIFIER_monotonic
:
2494 Modifier
= OMP_sch_modifier_monotonic
;
2496 case OMPC_SCHEDULE_MODIFIER_nonmonotonic
:
2497 Modifier
= OMP_sch_modifier_nonmonotonic
;
2499 case OMPC_SCHEDULE_MODIFIER_simd
:
2500 if (Schedule
== OMP_sch_static_chunked
)
2501 Schedule
= OMP_sch_static_balanced_chunked
;
2503 case OMPC_SCHEDULE_MODIFIER_last
:
2504 case OMPC_SCHEDULE_MODIFIER_unknown
:
2507 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2508 // If the static schedule kind is specified or if the ordered clause is
2509 // specified, and if the nonmonotonic modifier is not specified, the effect is
2510 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2511 // modifier is specified, the effect is as if the nonmonotonic modifier is
2513 if (CGM
.getLangOpts().OpenMP
>= 50 && Modifier
== 0) {
2514 if (!(Schedule
== OMP_sch_static_chunked
|| Schedule
== OMP_sch_static
||
2515 Schedule
== OMP_sch_static_balanced_chunked
||
2516 Schedule
== OMP_ord_static_chunked
|| Schedule
== OMP_ord_static
||
2517 Schedule
== OMP_dist_sch_static_chunked
||
2518 Schedule
== OMP_dist_sch_static
))
2519 Modifier
= OMP_sch_modifier_nonmonotonic
;
2521 return Schedule
| Modifier
;
2524 void CGOpenMPRuntime::emitForDispatchInit(
2525 CodeGenFunction
&CGF
, SourceLocation Loc
,
2526 const OpenMPScheduleTy
&ScheduleKind
, unsigned IVSize
, bool IVSigned
,
2527 bool Ordered
, const DispatchRTInput
&DispatchValues
) {
2528 if (!CGF
.HaveInsertPoint())
2530 OpenMPSchedType Schedule
= getRuntimeSchedule(
2531 ScheduleKind
.Schedule
, DispatchValues
.Chunk
!= nullptr, Ordered
);
2533 (Schedule
!= OMP_sch_static
&& Schedule
!= OMP_sch_static_chunked
&&
2534 Schedule
!= OMP_ord_static
&& Schedule
!= OMP_ord_static_chunked
&&
2535 Schedule
!= OMP_sch_static_balanced_chunked
));
2536 // Call __kmpc_dispatch_init(
2537 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2538 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2539 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2541 // If the Chunk was not specified in the clause - use default value 1.
2542 llvm::Value
*Chunk
= DispatchValues
.Chunk
? DispatchValues
.Chunk
2543 : CGF
.Builder
.getIntN(IVSize
, 1);
2544 llvm::Value
*Args
[] = {
2545 emitUpdateLocation(CGF
, Loc
),
2546 getThreadID(CGF
, Loc
),
2547 CGF
.Builder
.getInt32(addMonoNonMonoModifier(
2548 CGM
, Schedule
, ScheduleKind
.M1
, ScheduleKind
.M2
)), // Schedule type
2549 DispatchValues
.LB
, // Lower
2550 DispatchValues
.UB
, // Upper
2551 CGF
.Builder
.getIntN(IVSize
, 1), // Stride
2554 CGF
.EmitRuntimeCall(OMPBuilder
.createDispatchInitFunction(IVSize
, IVSigned
),
2558 void CGOpenMPRuntime::emitForDispatchDeinit(CodeGenFunction
&CGF
,
2559 SourceLocation Loc
) {
2560 if (!CGF
.HaveInsertPoint())
2562 // Call __kmpc_dispatch_deinit(ident_t *loc, kmp_int32 tid);
2563 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2564 CGF
.EmitRuntimeCall(OMPBuilder
.createDispatchDeinitFunction(), Args
);
2567 static void emitForStaticInitCall(
2568 CodeGenFunction
&CGF
, llvm::Value
*UpdateLocation
, llvm::Value
*ThreadId
,
2569 llvm::FunctionCallee ForStaticInitFunction
, OpenMPSchedType Schedule
,
2570 OpenMPScheduleClauseModifier M1
, OpenMPScheduleClauseModifier M2
,
2571 const CGOpenMPRuntime::StaticRTInput
&Values
) {
2572 if (!CGF
.HaveInsertPoint())
2575 assert(!Values
.Ordered
);
2576 assert(Schedule
== OMP_sch_static
|| Schedule
== OMP_sch_static_chunked
||
2577 Schedule
== OMP_sch_static_balanced_chunked
||
2578 Schedule
== OMP_ord_static
|| Schedule
== OMP_ord_static_chunked
||
2579 Schedule
== OMP_dist_sch_static
||
2580 Schedule
== OMP_dist_sch_static_chunked
);
2582 // Call __kmpc_for_static_init(
2583 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2584 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2585 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2586 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2587 llvm::Value
*Chunk
= Values
.Chunk
;
2588 if (Chunk
== nullptr) {
2589 assert((Schedule
== OMP_sch_static
|| Schedule
== OMP_ord_static
||
2590 Schedule
== OMP_dist_sch_static
) &&
2591 "expected static non-chunked schedule");
2592 // If the Chunk was not specified in the clause - use default value 1.
2593 Chunk
= CGF
.Builder
.getIntN(Values
.IVSize
, 1);
2595 assert((Schedule
== OMP_sch_static_chunked
||
2596 Schedule
== OMP_sch_static_balanced_chunked
||
2597 Schedule
== OMP_ord_static_chunked
||
2598 Schedule
== OMP_dist_sch_static_chunked
) &&
2599 "expected static chunked schedule");
2601 llvm::Value
*Args
[] = {
2604 CGF
.Builder
.getInt32(addMonoNonMonoModifier(CGF
.CGM
, Schedule
, M1
,
2605 M2
)), // Schedule type
2606 Values
.IL
.emitRawPointer(CGF
), // &isLastIter
2607 Values
.LB
.emitRawPointer(CGF
), // &LB
2608 Values
.UB
.emitRawPointer(CGF
), // &UB
2609 Values
.ST
.emitRawPointer(CGF
), // &Stride
2610 CGF
.Builder
.getIntN(Values
.IVSize
, 1), // Incr
2613 CGF
.EmitRuntimeCall(ForStaticInitFunction
, Args
);
2616 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction
&CGF
,
2618 OpenMPDirectiveKind DKind
,
2619 const OpenMPScheduleTy
&ScheduleKind
,
2620 const StaticRTInput
&Values
) {
2621 OpenMPSchedType ScheduleNum
= getRuntimeSchedule(
2622 ScheduleKind
.Schedule
, Values
.Chunk
!= nullptr, Values
.Ordered
);
2623 assert((isOpenMPWorksharingDirective(DKind
) || (DKind
== OMPD_loop
)) &&
2624 "Expected loop-based or sections-based directive.");
2625 llvm::Value
*UpdatedLocation
= emitUpdateLocation(CGF
, Loc
,
2626 isOpenMPLoopDirective(DKind
)
2627 ? OMP_IDENT_WORK_LOOP
2628 : OMP_IDENT_WORK_SECTIONS
);
2629 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
2630 llvm::FunctionCallee StaticInitFunction
=
2631 OMPBuilder
.createForStaticInitFunction(Values
.IVSize
, Values
.IVSigned
,
2633 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
2634 emitForStaticInitCall(CGF
, UpdatedLocation
, ThreadId
, StaticInitFunction
,
2635 ScheduleNum
, ScheduleKind
.M1
, ScheduleKind
.M2
, Values
);
2638 void CGOpenMPRuntime::emitDistributeStaticInit(
2639 CodeGenFunction
&CGF
, SourceLocation Loc
,
2640 OpenMPDistScheduleClauseKind SchedKind
,
2641 const CGOpenMPRuntime::StaticRTInput
&Values
) {
2642 OpenMPSchedType ScheduleNum
=
2643 getRuntimeSchedule(SchedKind
, Values
.Chunk
!= nullptr);
2644 llvm::Value
*UpdatedLocation
=
2645 emitUpdateLocation(CGF
, Loc
, OMP_IDENT_WORK_DISTRIBUTE
);
2646 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
2647 llvm::FunctionCallee StaticInitFunction
;
2648 bool isGPUDistribute
=
2649 CGM
.getLangOpts().OpenMPIsTargetDevice
&&
2650 (CGM
.getTriple().isAMDGCN() || CGM
.getTriple().isNVPTX());
2651 StaticInitFunction
= OMPBuilder
.createForStaticInitFunction(
2652 Values
.IVSize
, Values
.IVSigned
, isGPUDistribute
);
2654 emitForStaticInitCall(CGF
, UpdatedLocation
, ThreadId
, StaticInitFunction
,
2655 ScheduleNum
, OMPC_SCHEDULE_MODIFIER_unknown
,
2656 OMPC_SCHEDULE_MODIFIER_unknown
, Values
);
2659 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction
&CGF
,
2661 OpenMPDirectiveKind DKind
) {
2662 assert((DKind
== OMPD_distribute
|| DKind
== OMPD_for
||
2663 DKind
== OMPD_sections
) &&
2664 "Expected distribute, for, or sections directive kind");
2665 if (!CGF
.HaveInsertPoint())
2667 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2668 llvm::Value
*Args
[] = {
2669 emitUpdateLocation(CGF
, Loc
,
2670 isOpenMPDistributeDirective(DKind
) ||
2671 (DKind
== OMPD_target_teams_loop
)
2672 ? OMP_IDENT_WORK_DISTRIBUTE
2673 : isOpenMPLoopDirective(DKind
)
2674 ? OMP_IDENT_WORK_LOOP
2675 : OMP_IDENT_WORK_SECTIONS
),
2676 getThreadID(CGF
, Loc
)};
2677 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
2678 if (isOpenMPDistributeDirective(DKind
) &&
2679 CGM
.getLangOpts().OpenMPIsTargetDevice
&&
2680 (CGM
.getTriple().isAMDGCN() || CGM
.getTriple().isNVPTX()))
2681 CGF
.EmitRuntimeCall(
2682 OMPBuilder
.getOrCreateRuntimeFunction(
2683 CGM
.getModule(), OMPRTL___kmpc_distribute_static_fini
),
2686 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2687 CGM
.getModule(), OMPRTL___kmpc_for_static_fini
),
2691 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction
&CGF
,
2695 if (!CGF
.HaveInsertPoint())
2697 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2698 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2699 CGF
.EmitRuntimeCall(OMPBuilder
.createDispatchFiniFunction(IVSize
, IVSigned
),
2703 llvm::Value
*CGOpenMPRuntime::emitForNext(CodeGenFunction
&CGF
,
2704 SourceLocation Loc
, unsigned IVSize
,
2705 bool IVSigned
, Address IL
,
2706 Address LB
, Address UB
,
2708 // Call __kmpc_dispatch_next(
2709 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2710 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2711 // kmp_int[32|64] *p_stride);
2712 llvm::Value
*Args
[] = {
2713 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2714 IL
.emitRawPointer(CGF
), // &isLastIter
2715 LB
.emitRawPointer(CGF
), // &Lower
2716 UB
.emitRawPointer(CGF
), // &Upper
2717 ST
.emitRawPointer(CGF
) // &Stride
2719 llvm::Value
*Call
= CGF
.EmitRuntimeCall(
2720 OMPBuilder
.createDispatchNextFunction(IVSize
, IVSigned
), Args
);
2721 return CGF
.EmitScalarConversion(
2722 Call
, CGF
.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2723 CGF
.getContext().BoolTy
, Loc
);
2726 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction
&CGF
,
2727 llvm::Value
*NumThreads
,
2728 SourceLocation Loc
) {
2729 if (!CGF
.HaveInsertPoint())
2731 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2732 llvm::Value
*Args
[] = {
2733 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2734 CGF
.Builder
.CreateIntCast(NumThreads
, CGF
.Int32Ty
, /*isSigned*/ true)};
2735 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2736 CGM
.getModule(), OMPRTL___kmpc_push_num_threads
),
2740 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction
&CGF
,
2741 ProcBindKind ProcBind
,
2742 SourceLocation Loc
) {
2743 if (!CGF
.HaveInsertPoint())
2745 assert(ProcBind
!= OMP_PROC_BIND_unknown
&& "Unsupported proc_bind value.");
2746 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2747 llvm::Value
*Args
[] = {
2748 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2749 llvm::ConstantInt::get(CGM
.IntTy
, unsigned(ProcBind
), /*isSigned=*/true)};
2750 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2751 CGM
.getModule(), OMPRTL___kmpc_push_proc_bind
),
2755 void CGOpenMPRuntime::emitFlush(CodeGenFunction
&CGF
, ArrayRef
<const Expr
*>,
2756 SourceLocation Loc
, llvm::AtomicOrdering AO
) {
2757 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2758 OMPBuilder
.createFlush(CGF
.Builder
);
2760 if (!CGF
.HaveInsertPoint())
2762 // Build call void __kmpc_flush(ident_t *loc)
2763 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2764 CGM
.getModule(), OMPRTL___kmpc_flush
),
2765 emitUpdateLocation(CGF
, Loc
));
2770 /// Indexes of fields for type kmp_task_t.
2771 enum KmpTaskTFields
{
2772 /// List of shared variables.
2776 /// Partition id for the untied tasks.
2778 /// Function with call of destructors for private variables.
2782 /// (Taskloops only) Lower bound.
2784 /// (Taskloops only) Upper bound.
2786 /// (Taskloops only) Stride.
2788 /// (Taskloops only) Is last iteration flag.
2790 /// (Taskloops only) Reduction data.
2793 } // anonymous namespace
2795 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2796 // If we are in simd mode or there are no entries, we don't need to do
2798 if (CGM
.getLangOpts().OpenMPSimd
|| OMPBuilder
.OffloadInfoManager
.empty())
2801 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy
&&ErrorReportFn
=
2802 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind
,
2803 const llvm::TargetRegionEntryInfo
&EntryInfo
) -> void {
2805 if (Kind
!= llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR
) {
2806 for (auto I
= CGM
.getContext().getSourceManager().fileinfo_begin(),
2807 E
= CGM
.getContext().getSourceManager().fileinfo_end();
2809 if (I
->getFirst().getUniqueID().getDevice() == EntryInfo
.DeviceID
&&
2810 I
->getFirst().getUniqueID().getFile() == EntryInfo
.FileID
) {
2811 Loc
= CGM
.getContext().getSourceManager().translateFileLineCol(
2812 I
->getFirst(), EntryInfo
.Line
, 1);
2818 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR
: {
2819 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2820 DiagnosticsEngine::Error
, "Offloading entry for target region in "
2821 "%0 is incorrect: either the "
2822 "address or the ID is invalid.");
2823 CGM
.getDiags().Report(Loc
, DiagID
) << EntryInfo
.ParentName
;
2825 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR
: {
2826 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2827 DiagnosticsEngine::Error
, "Offloading entry for declare target "
2828 "variable %0 is incorrect: the "
2829 "address is invalid.");
2830 CGM
.getDiags().Report(Loc
, DiagID
) << EntryInfo
.ParentName
;
2832 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR
: {
2833 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2834 DiagnosticsEngine::Error
,
2835 "Offloading entry for declare target variable is incorrect: the "
2836 "address is invalid.");
2837 CGM
.getDiags().Report(DiagID
);
2842 OMPBuilder
.createOffloadEntriesAndInfoMetadata(ErrorReportFn
);
2845 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty
) {
2846 if (!KmpRoutineEntryPtrTy
) {
2847 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
2848 ASTContext
&C
= CGM
.getContext();
2849 QualType KmpRoutineEntryTyArgs
[] = {KmpInt32Ty
, C
.VoidPtrTy
};
2850 FunctionProtoType::ExtProtoInfo EPI
;
2851 KmpRoutineEntryPtrQTy
= C
.getPointerType(
2852 C
.getFunctionType(KmpInt32Ty
, KmpRoutineEntryTyArgs
, EPI
));
2853 KmpRoutineEntryPtrTy
= CGM
.getTypes().ConvertType(KmpRoutineEntryPtrQTy
);
2858 struct PrivateHelpersTy
{
2859 PrivateHelpersTy(const Expr
*OriginalRef
, const VarDecl
*Original
,
2860 const VarDecl
*PrivateCopy
, const VarDecl
*PrivateElemInit
)
2861 : OriginalRef(OriginalRef
), Original(Original
), PrivateCopy(PrivateCopy
),
2862 PrivateElemInit(PrivateElemInit
) {}
2863 PrivateHelpersTy(const VarDecl
*Original
) : Original(Original
) {}
2864 const Expr
*OriginalRef
= nullptr;
2865 const VarDecl
*Original
= nullptr;
2866 const VarDecl
*PrivateCopy
= nullptr;
2867 const VarDecl
*PrivateElemInit
= nullptr;
2868 bool isLocalPrivate() const {
2869 return !OriginalRef
&& !PrivateCopy
&& !PrivateElemInit
;
2872 typedef std::pair
<CharUnits
/*Align*/, PrivateHelpersTy
> PrivateDataTy
;
2873 } // anonymous namespace
2875 static bool isAllocatableDecl(const VarDecl
*VD
) {
2876 const VarDecl
*CVD
= VD
->getCanonicalDecl();
2877 if (!CVD
->hasAttr
<OMPAllocateDeclAttr
>())
2879 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
2880 // Use the default allocation.
2881 return !(AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc
&&
2882 !AA
->getAllocator());
2886 createPrivatesRecordDecl(CodeGenModule
&CGM
, ArrayRef
<PrivateDataTy
> Privates
) {
2887 if (!Privates
.empty()) {
2888 ASTContext
&C
= CGM
.getContext();
2889 // Build struct .kmp_privates_t. {
2890 // /* private vars */
2892 RecordDecl
*RD
= C
.buildImplicitRecord(".kmp_privates.t");
2893 RD
->startDefinition();
2894 for (const auto &Pair
: Privates
) {
2895 const VarDecl
*VD
= Pair
.second
.Original
;
2896 QualType Type
= VD
->getType().getNonReferenceType();
2897 // If the private variable is a local variable with lvalue ref type,
2898 // allocate the pointer instead of the pointee type.
2899 if (Pair
.second
.isLocalPrivate()) {
2900 if (VD
->getType()->isLValueReferenceType())
2901 Type
= C
.getPointerType(Type
);
2902 if (isAllocatableDecl(VD
))
2903 Type
= C
.getPointerType(Type
);
2905 FieldDecl
*FD
= addFieldToRecordDecl(C
, RD
, Type
);
2906 if (VD
->hasAttrs()) {
2907 for (specific_attr_iterator
<AlignedAttr
> I(VD
->getAttrs().begin()),
2908 E(VD
->getAttrs().end());
2913 RD
->completeDefinition();
2920 createKmpTaskTRecordDecl(CodeGenModule
&CGM
, OpenMPDirectiveKind Kind
,
2921 QualType KmpInt32Ty
,
2922 QualType KmpRoutineEntryPointerQTy
) {
2923 ASTContext
&C
= CGM
.getContext();
2924 // Build struct kmp_task_t {
2926 // kmp_routine_entry_t routine;
2927 // kmp_int32 part_id;
2928 // kmp_cmplrdata_t data1;
2929 // kmp_cmplrdata_t data2;
2930 // For taskloops additional fields:
2935 // void * reductions;
2937 RecordDecl
*UD
= C
.buildImplicitRecord("kmp_cmplrdata_t", TagTypeKind::Union
);
2938 UD
->startDefinition();
2939 addFieldToRecordDecl(C
, UD
, KmpInt32Ty
);
2940 addFieldToRecordDecl(C
, UD
, KmpRoutineEntryPointerQTy
);
2941 UD
->completeDefinition();
2942 QualType KmpCmplrdataTy
= C
.getRecordType(UD
);
2943 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_task_t");
2944 RD
->startDefinition();
2945 addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
2946 addFieldToRecordDecl(C
, RD
, KmpRoutineEntryPointerQTy
);
2947 addFieldToRecordDecl(C
, RD
, KmpInt32Ty
);
2948 addFieldToRecordDecl(C
, RD
, KmpCmplrdataTy
);
2949 addFieldToRecordDecl(C
, RD
, KmpCmplrdataTy
);
2950 if (isOpenMPTaskLoopDirective(Kind
)) {
2951 QualType KmpUInt64Ty
=
2952 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
2953 QualType KmpInt64Ty
=
2954 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
2955 addFieldToRecordDecl(C
, RD
, KmpUInt64Ty
);
2956 addFieldToRecordDecl(C
, RD
, KmpUInt64Ty
);
2957 addFieldToRecordDecl(C
, RD
, KmpInt64Ty
);
2958 addFieldToRecordDecl(C
, RD
, KmpInt32Ty
);
2959 addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
2961 RD
->completeDefinition();
2966 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule
&CGM
, QualType KmpTaskTQTy
,
2967 ArrayRef
<PrivateDataTy
> Privates
) {
2968 ASTContext
&C
= CGM
.getContext();
2969 // Build struct kmp_task_t_with_privates {
2970 // kmp_task_t task_data;
2971 // .kmp_privates_t. privates;
2973 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_task_t_with_privates");
2974 RD
->startDefinition();
2975 addFieldToRecordDecl(C
, RD
, KmpTaskTQTy
);
2976 if (const RecordDecl
*PrivateRD
= createPrivatesRecordDecl(CGM
, Privates
))
2977 addFieldToRecordDecl(C
, RD
, C
.getRecordType(PrivateRD
));
2978 RD
->completeDefinition();
2982 /// Emit a proxy function which accepts kmp_task_t as the second
2985 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
2986 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
2988 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
2989 /// tt->reductions, tt->shareds);
2993 static llvm::Function
*
2994 emitProxyTaskFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
2995 OpenMPDirectiveKind Kind
, QualType KmpInt32Ty
,
2996 QualType KmpTaskTWithPrivatesPtrQTy
,
2997 QualType KmpTaskTWithPrivatesQTy
, QualType KmpTaskTQTy
,
2998 QualType SharedsPtrTy
, llvm::Function
*TaskFunction
,
2999 llvm::Value
*TaskPrivatesMap
) {
3000 ASTContext
&C
= CGM
.getContext();
3001 FunctionArgList Args
;
3002 ImplicitParamDecl
GtidArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, KmpInt32Ty
,
3003 ImplicitParamKind::Other
);
3004 ImplicitParamDecl
TaskTypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3005 KmpTaskTWithPrivatesPtrQTy
.withRestrict(),
3006 ImplicitParamKind::Other
);
3007 Args
.push_back(&GtidArg
);
3008 Args
.push_back(&TaskTypeArg
);
3009 const auto &TaskEntryFnInfo
=
3010 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty
, Args
);
3011 llvm::FunctionType
*TaskEntryTy
=
3012 CGM
.getTypes().GetFunctionType(TaskEntryFnInfo
);
3013 std::string Name
= CGM
.getOpenMPRuntime().getName({"omp_task_entry", ""});
3014 auto *TaskEntry
= llvm::Function::Create(
3015 TaskEntryTy
, llvm::GlobalValue::InternalLinkage
, Name
, &CGM
.getModule());
3016 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry
, TaskEntryFnInfo
);
3017 TaskEntry
->setDoesNotRecurse();
3018 CodeGenFunction
CGF(CGM
);
3019 CGF
.StartFunction(GlobalDecl(), KmpInt32Ty
, TaskEntry
, TaskEntryFnInfo
, Args
,
3022 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3025 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3026 // tt->task_data.shareds);
3027 llvm::Value
*GtidParam
= CGF
.EmitLoadOfScalar(
3028 CGF
.GetAddrOfLocalVar(&GtidArg
), /*Volatile=*/false, KmpInt32Ty
, Loc
);
3029 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3030 CGF
.GetAddrOfLocalVar(&TaskTypeArg
),
3031 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3032 const auto *KmpTaskTWithPrivatesQTyRD
=
3033 cast
<RecordDecl
>(KmpTaskTWithPrivatesQTy
->getAsTagDecl());
3035 CGF
.EmitLValueForField(TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3036 const auto *KmpTaskTQTyRD
= cast
<RecordDecl
>(KmpTaskTQTy
->getAsTagDecl());
3037 auto PartIdFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTPartId
);
3038 LValue PartIdLVal
= CGF
.EmitLValueForField(Base
, *PartIdFI
);
3039 llvm::Value
*PartidParam
= PartIdLVal
.getPointer(CGF
);
3041 auto SharedsFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTShareds
);
3042 LValue SharedsLVal
= CGF
.EmitLValueForField(Base
, *SharedsFI
);
3043 llvm::Value
*SharedsParam
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3044 CGF
.EmitLoadOfScalar(SharedsLVal
, Loc
),
3045 CGF
.ConvertTypeForMem(SharedsPtrTy
));
3047 auto PrivatesFI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin(), 1);
3048 llvm::Value
*PrivatesParam
;
3049 if (PrivatesFI
!= KmpTaskTWithPrivatesQTyRD
->field_end()) {
3050 LValue PrivatesLVal
= CGF
.EmitLValueForField(TDBase
, *PrivatesFI
);
3051 PrivatesParam
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3052 PrivatesLVal
.getPointer(CGF
), CGF
.VoidPtrTy
);
3054 PrivatesParam
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
3057 llvm::Value
*CommonArgs
[] = {
3058 GtidParam
, PartidParam
, PrivatesParam
, TaskPrivatesMap
,
3060 .CreatePointerBitCastOrAddrSpaceCast(TDBase
.getAddress(),
3061 CGF
.VoidPtrTy
, CGF
.Int8Ty
)
3062 .emitRawPointer(CGF
)};
3063 SmallVector
<llvm::Value
*, 16> CallArgs(std::begin(CommonArgs
),
3064 std::end(CommonArgs
));
3065 if (isOpenMPTaskLoopDirective(Kind
)) {
3066 auto LBFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLowerBound
);
3067 LValue LBLVal
= CGF
.EmitLValueForField(Base
, *LBFI
);
3068 llvm::Value
*LBParam
= CGF
.EmitLoadOfScalar(LBLVal
, Loc
);
3069 auto UBFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTUpperBound
);
3070 LValue UBLVal
= CGF
.EmitLValueForField(Base
, *UBFI
);
3071 llvm::Value
*UBParam
= CGF
.EmitLoadOfScalar(UBLVal
, Loc
);
3072 auto StFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTStride
);
3073 LValue StLVal
= CGF
.EmitLValueForField(Base
, *StFI
);
3074 llvm::Value
*StParam
= CGF
.EmitLoadOfScalar(StLVal
, Loc
);
3075 auto LIFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLastIter
);
3076 LValue LILVal
= CGF
.EmitLValueForField(Base
, *LIFI
);
3077 llvm::Value
*LIParam
= CGF
.EmitLoadOfScalar(LILVal
, Loc
);
3078 auto RFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTReductions
);
3079 LValue RLVal
= CGF
.EmitLValueForField(Base
, *RFI
);
3080 llvm::Value
*RParam
= CGF
.EmitLoadOfScalar(RLVal
, Loc
);
3081 CallArgs
.push_back(LBParam
);
3082 CallArgs
.push_back(UBParam
);
3083 CallArgs
.push_back(StParam
);
3084 CallArgs
.push_back(LIParam
);
3085 CallArgs
.push_back(RParam
);
3087 CallArgs
.push_back(SharedsParam
);
3089 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, Loc
, TaskFunction
,
3091 CGF
.EmitStoreThroughLValue(RValue::get(CGF
.Builder
.getInt32(/*C=*/0)),
3092 CGF
.MakeAddrLValue(CGF
.ReturnValue
, KmpInt32Ty
));
3093 CGF
.FinishFunction();
3097 static llvm::Value
*emitDestructorsFunction(CodeGenModule
&CGM
,
3099 QualType KmpInt32Ty
,
3100 QualType KmpTaskTWithPrivatesPtrQTy
,
3101 QualType KmpTaskTWithPrivatesQTy
) {
3102 ASTContext
&C
= CGM
.getContext();
3103 FunctionArgList Args
;
3104 ImplicitParamDecl
GtidArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, KmpInt32Ty
,
3105 ImplicitParamKind::Other
);
3106 ImplicitParamDecl
TaskTypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3107 KmpTaskTWithPrivatesPtrQTy
.withRestrict(),
3108 ImplicitParamKind::Other
);
3109 Args
.push_back(&GtidArg
);
3110 Args
.push_back(&TaskTypeArg
);
3111 const auto &DestructorFnInfo
=
3112 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty
, Args
);
3113 llvm::FunctionType
*DestructorFnTy
=
3114 CGM
.getTypes().GetFunctionType(DestructorFnInfo
);
3116 CGM
.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3117 auto *DestructorFn
=
3118 llvm::Function::Create(DestructorFnTy
, llvm::GlobalValue::InternalLinkage
,
3119 Name
, &CGM
.getModule());
3120 CGM
.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn
,
3122 DestructorFn
->setDoesNotRecurse();
3123 CodeGenFunction
CGF(CGM
);
3124 CGF
.StartFunction(GlobalDecl(), KmpInt32Ty
, DestructorFn
, DestructorFnInfo
,
3127 LValue Base
= CGF
.EmitLoadOfPointerLValue(
3128 CGF
.GetAddrOfLocalVar(&TaskTypeArg
),
3129 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3130 const auto *KmpTaskTWithPrivatesQTyRD
=
3131 cast
<RecordDecl
>(KmpTaskTWithPrivatesQTy
->getAsTagDecl());
3132 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3133 Base
= CGF
.EmitLValueForField(Base
, *FI
);
3134 for (const auto *Field
:
3135 cast
<RecordDecl
>(FI
->getType()->getAsTagDecl())->fields()) {
3136 if (QualType::DestructionKind DtorKind
=
3137 Field
->getType().isDestructedType()) {
3138 LValue FieldLValue
= CGF
.EmitLValueForField(Base
, Field
);
3139 CGF
.pushDestroy(DtorKind
, FieldLValue
.getAddress(), Field
->getType());
3142 CGF
.FinishFunction();
3143 return DestructorFn
;
3146 /// Emit a privates mapping function for correct handling of private and
3147 /// firstprivate variables.
3149 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3150 /// **noalias priv1,..., <tyn> **noalias privn) {
3151 /// *priv1 = &.privates.priv1;
3153 /// *privn = &.privates.privn;
3156 static llvm::Value
*
3157 emitTaskPrivateMappingFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3158 const OMPTaskDataTy
&Data
, QualType PrivatesQTy
,
3159 ArrayRef
<PrivateDataTy
> Privates
) {
3160 ASTContext
&C
= CGM
.getContext();
3161 FunctionArgList Args
;
3162 ImplicitParamDecl
TaskPrivatesArg(
3163 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3164 C
.getPointerType(PrivatesQTy
).withConst().withRestrict(),
3165 ImplicitParamKind::Other
);
3166 Args
.push_back(&TaskPrivatesArg
);
3167 llvm::DenseMap
<CanonicalDeclPtr
<const VarDecl
>, unsigned> PrivateVarsPos
;
3168 unsigned Counter
= 1;
3169 for (const Expr
*E
: Data
.PrivateVars
) {
3170 Args
.push_back(ImplicitParamDecl::Create(
3171 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3172 C
.getPointerType(C
.getPointerType(E
->getType()))
3175 ImplicitParamKind::Other
));
3176 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3177 PrivateVarsPos
[VD
] = Counter
;
3180 for (const Expr
*E
: Data
.FirstprivateVars
) {
3181 Args
.push_back(ImplicitParamDecl::Create(
3182 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3183 C
.getPointerType(C
.getPointerType(E
->getType()))
3186 ImplicitParamKind::Other
));
3187 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3188 PrivateVarsPos
[VD
] = Counter
;
3191 for (const Expr
*E
: Data
.LastprivateVars
) {
3192 Args
.push_back(ImplicitParamDecl::Create(
3193 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3194 C
.getPointerType(C
.getPointerType(E
->getType()))
3197 ImplicitParamKind::Other
));
3198 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3199 PrivateVarsPos
[VD
] = Counter
;
3202 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
3203 QualType Ty
= VD
->getType().getNonReferenceType();
3204 if (VD
->getType()->isLValueReferenceType())
3205 Ty
= C
.getPointerType(Ty
);
3206 if (isAllocatableDecl(VD
))
3207 Ty
= C
.getPointerType(Ty
);
3208 Args
.push_back(ImplicitParamDecl::Create(
3209 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3210 C
.getPointerType(C
.getPointerType(Ty
)).withConst().withRestrict(),
3211 ImplicitParamKind::Other
));
3212 PrivateVarsPos
[VD
] = Counter
;
3215 const auto &TaskPrivatesMapFnInfo
=
3216 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
3217 llvm::FunctionType
*TaskPrivatesMapTy
=
3218 CGM
.getTypes().GetFunctionType(TaskPrivatesMapFnInfo
);
3220 CGM
.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3221 auto *TaskPrivatesMap
= llvm::Function::Create(
3222 TaskPrivatesMapTy
, llvm::GlobalValue::InternalLinkage
, Name
,
3224 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap
,
3225 TaskPrivatesMapFnInfo
);
3226 if (CGM
.getLangOpts().Optimize
) {
3227 TaskPrivatesMap
->removeFnAttr(llvm::Attribute::NoInline
);
3228 TaskPrivatesMap
->removeFnAttr(llvm::Attribute::OptimizeNone
);
3229 TaskPrivatesMap
->addFnAttr(llvm::Attribute::AlwaysInline
);
3231 CodeGenFunction
CGF(CGM
);
3232 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, TaskPrivatesMap
,
3233 TaskPrivatesMapFnInfo
, Args
, Loc
, Loc
);
3235 // *privi = &.privates.privi;
3236 LValue Base
= CGF
.EmitLoadOfPointerLValue(
3237 CGF
.GetAddrOfLocalVar(&TaskPrivatesArg
),
3238 TaskPrivatesArg
.getType()->castAs
<PointerType
>());
3239 const auto *PrivatesQTyRD
= cast
<RecordDecl
>(PrivatesQTy
->getAsTagDecl());
3241 for (const FieldDecl
*Field
: PrivatesQTyRD
->fields()) {
3242 LValue FieldLVal
= CGF
.EmitLValueForField(Base
, Field
);
3243 const VarDecl
*VD
= Args
[PrivateVarsPos
[Privates
[Counter
].second
.Original
]];
3245 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(VD
), VD
->getType());
3246 LValue RefLoadLVal
= CGF
.EmitLoadOfPointerLValue(
3247 RefLVal
.getAddress(), RefLVal
.getType()->castAs
<PointerType
>());
3248 CGF
.EmitStoreOfScalar(FieldLVal
.getPointer(CGF
), RefLoadLVal
);
3251 CGF
.FinishFunction();
3252 return TaskPrivatesMap
;
3255 /// Emit initialization for private variables in task-based directives.
3256 static void emitPrivatesInit(CodeGenFunction
&CGF
,
3257 const OMPExecutableDirective
&D
,
3258 Address KmpTaskSharedsPtr
, LValue TDBase
,
3259 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3260 QualType SharedsTy
, QualType SharedsPtrTy
,
3261 const OMPTaskDataTy
&Data
,
3262 ArrayRef
<PrivateDataTy
> Privates
, bool ForDup
) {
3263 ASTContext
&C
= CGF
.getContext();
3264 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3265 LValue PrivatesBase
= CGF
.EmitLValueForField(TDBase
, *FI
);
3266 OpenMPDirectiveKind Kind
= isOpenMPTaskLoopDirective(D
.getDirectiveKind())
3269 const CapturedStmt
&CS
= *D
.getCapturedStmt(Kind
);
3270 CodeGenFunction::CGCapturedStmtInfo
CapturesInfo(CS
);
3273 isOpenMPTargetDataManagementDirective(D
.getDirectiveKind()) ||
3274 isOpenMPTargetExecutionDirective(D
.getDirectiveKind());
3275 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3276 // PointersArray, SizesArray, and MappersArray. The original variables for
3277 // these arrays are not captured and we get their addresses explicitly.
3278 if ((!IsTargetTask
&& !Data
.FirstprivateVars
.empty() && ForDup
) ||
3279 (IsTargetTask
&& KmpTaskSharedsPtr
.isValid())) {
3280 SrcBase
= CGF
.MakeAddrLValue(
3281 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3282 KmpTaskSharedsPtr
, CGF
.ConvertTypeForMem(SharedsPtrTy
),
3283 CGF
.ConvertTypeForMem(SharedsTy
)),
3286 FI
= cast
<RecordDecl
>(FI
->getType()->getAsTagDecl())->field_begin();
3287 for (const PrivateDataTy
&Pair
: Privates
) {
3288 // Do not initialize private locals.
3289 if (Pair
.second
.isLocalPrivate()) {
3293 const VarDecl
*VD
= Pair
.second
.PrivateCopy
;
3294 const Expr
*Init
= VD
->getAnyInitializer();
3295 if (Init
&& (!ForDup
|| (isa
<CXXConstructExpr
>(Init
) &&
3296 !CGF
.isTrivialInitializer(Init
)))) {
3297 LValue PrivateLValue
= CGF
.EmitLValueForField(PrivatesBase
, *FI
);
3298 if (const VarDecl
*Elem
= Pair
.second
.PrivateElemInit
) {
3299 const VarDecl
*OriginalVD
= Pair
.second
.Original
;
3300 // Check if the variable is the target-based BasePointersArray,
3301 // PointersArray, SizesArray, or MappersArray.
3302 LValue SharedRefLValue
;
3303 QualType Type
= PrivateLValue
.getType();
3304 const FieldDecl
*SharedField
= CapturesInfo
.lookup(OriginalVD
);
3305 if (IsTargetTask
&& !SharedField
) {
3306 assert(isa
<ImplicitParamDecl
>(OriginalVD
) &&
3307 isa
<CapturedDecl
>(OriginalVD
->getDeclContext()) &&
3308 cast
<CapturedDecl
>(OriginalVD
->getDeclContext())
3309 ->getNumParams() == 0 &&
3310 isa
<TranslationUnitDecl
>(
3311 cast
<CapturedDecl
>(OriginalVD
->getDeclContext())
3312 ->getDeclContext()) &&
3313 "Expected artificial target data variable.");
3315 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(OriginalVD
), Type
);
3316 } else if (ForDup
) {
3317 SharedRefLValue
= CGF
.EmitLValueForField(SrcBase
, SharedField
);
3318 SharedRefLValue
= CGF
.MakeAddrLValue(
3319 SharedRefLValue
.getAddress().withAlignment(
3320 C
.getDeclAlign(OriginalVD
)),
3321 SharedRefLValue
.getType(), LValueBaseInfo(AlignmentSource::Decl
),
3322 SharedRefLValue
.getTBAAInfo());
3323 } else if (CGF
.LambdaCaptureFields
.count(
3324 Pair
.second
.Original
->getCanonicalDecl()) > 0 ||
3325 isa_and_nonnull
<BlockDecl
>(CGF
.CurCodeDecl
)) {
3326 SharedRefLValue
= CGF
.EmitLValue(Pair
.second
.OriginalRef
);
3328 // Processing for implicitly captured variables.
3329 InlinedOpenMPRegionRAII
Region(
3330 CGF
, [](CodeGenFunction
&, PrePostActionTy
&) {}, OMPD_unknown
,
3331 /*HasCancel=*/false, /*NoInheritance=*/true);
3332 SharedRefLValue
= CGF
.EmitLValue(Pair
.second
.OriginalRef
);
3334 if (Type
->isArrayType()) {
3335 // Initialize firstprivate array.
3336 if (!isa
<CXXConstructExpr
>(Init
) || CGF
.isTrivialInitializer(Init
)) {
3337 // Perform simple memcpy.
3338 CGF
.EmitAggregateAssign(PrivateLValue
, SharedRefLValue
, Type
);
3340 // Initialize firstprivate array using element-by-element
3342 CGF
.EmitOMPAggregateAssign(
3343 PrivateLValue
.getAddress(), SharedRefLValue
.getAddress(), Type
,
3344 [&CGF
, Elem
, Init
, &CapturesInfo
](Address DestElement
,
3345 Address SrcElement
) {
3346 // Clean up any temporaries needed by the initialization.
3347 CodeGenFunction::OMPPrivateScope
InitScope(CGF
);
3348 InitScope
.addPrivate(Elem
, SrcElement
);
3349 (void)InitScope
.Privatize();
3350 // Emit initialization for single element.
3351 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(
3352 CGF
, &CapturesInfo
);
3353 CGF
.EmitAnyExprToMem(Init
, DestElement
,
3354 Init
->getType().getQualifiers(),
3355 /*IsInitializer=*/false);
3359 CodeGenFunction::OMPPrivateScope
InitScope(CGF
);
3360 InitScope
.addPrivate(Elem
, SharedRefLValue
.getAddress());
3361 (void)InitScope
.Privatize();
3362 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CapturesInfo
);
3363 CGF
.EmitExprAsInit(Init
, VD
, PrivateLValue
,
3364 /*capturedByInit=*/false);
3367 CGF
.EmitExprAsInit(Init
, VD
, PrivateLValue
, /*capturedByInit=*/false);
3374 /// Check if duplication function is required for taskloops.
3375 static bool checkInitIsRequired(CodeGenFunction
&CGF
,
3376 ArrayRef
<PrivateDataTy
> Privates
) {
3377 bool InitRequired
= false;
3378 for (const PrivateDataTy
&Pair
: Privates
) {
3379 if (Pair
.second
.isLocalPrivate())
3381 const VarDecl
*VD
= Pair
.second
.PrivateCopy
;
3382 const Expr
*Init
= VD
->getAnyInitializer();
3383 InitRequired
= InitRequired
|| (isa_and_nonnull
<CXXConstructExpr
>(Init
) &&
3384 !CGF
.isTrivialInitializer(Init
));
3388 return InitRequired
;
3392 /// Emit task_dup function (for initialization of
3393 /// private/firstprivate/lastprivate vars and last_iter flag)
3395 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3397 /// // setup lastprivate flag
3398 /// task_dst->last = lastpriv;
3399 /// // could be constructor calls here...
3402 static llvm::Value
*
3403 emitTaskDupFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3404 const OMPExecutableDirective
&D
,
3405 QualType KmpTaskTWithPrivatesPtrQTy
,
3406 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3407 const RecordDecl
*KmpTaskTQTyRD
, QualType SharedsTy
,
3408 QualType SharedsPtrTy
, const OMPTaskDataTy
&Data
,
3409 ArrayRef
<PrivateDataTy
> Privates
, bool WithLastIter
) {
3410 ASTContext
&C
= CGM
.getContext();
3411 FunctionArgList Args
;
3412 ImplicitParamDecl
DstArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3413 KmpTaskTWithPrivatesPtrQTy
,
3414 ImplicitParamKind::Other
);
3415 ImplicitParamDecl
SrcArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3416 KmpTaskTWithPrivatesPtrQTy
,
3417 ImplicitParamKind::Other
);
3418 ImplicitParamDecl
LastprivArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.IntTy
,
3419 ImplicitParamKind::Other
);
3420 Args
.push_back(&DstArg
);
3421 Args
.push_back(&SrcArg
);
3422 Args
.push_back(&LastprivArg
);
3423 const auto &TaskDupFnInfo
=
3424 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
3425 llvm::FunctionType
*TaskDupTy
= CGM
.getTypes().GetFunctionType(TaskDupFnInfo
);
3426 std::string Name
= CGM
.getOpenMPRuntime().getName({"omp_task_dup", ""});
3427 auto *TaskDup
= llvm::Function::Create(
3428 TaskDupTy
, llvm::GlobalValue::InternalLinkage
, Name
, &CGM
.getModule());
3429 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskDup
, TaskDupFnInfo
);
3430 TaskDup
->setDoesNotRecurse();
3431 CodeGenFunction
CGF(CGM
);
3432 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, TaskDup
, TaskDupFnInfo
, Args
, Loc
,
3435 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3436 CGF
.GetAddrOfLocalVar(&DstArg
),
3437 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3438 // task_dst->liter = lastpriv;
3440 auto LIFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLastIter
);
3441 LValue Base
= CGF
.EmitLValueForField(
3442 TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3443 LValue LILVal
= CGF
.EmitLValueForField(Base
, *LIFI
);
3444 llvm::Value
*Lastpriv
= CGF
.EmitLoadOfScalar(
3445 CGF
.GetAddrOfLocalVar(&LastprivArg
), /*Volatile=*/false, C
.IntTy
, Loc
);
3446 CGF
.EmitStoreOfScalar(Lastpriv
, LILVal
);
3449 // Emit initial values for private copies (if any).
3450 assert(!Privates
.empty());
3451 Address KmpTaskSharedsPtr
= Address::invalid();
3452 if (!Data
.FirstprivateVars
.empty()) {
3453 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3454 CGF
.GetAddrOfLocalVar(&SrcArg
),
3455 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3456 LValue Base
= CGF
.EmitLValueForField(
3457 TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3458 KmpTaskSharedsPtr
= Address(
3459 CGF
.EmitLoadOfScalar(CGF
.EmitLValueForField(
3460 Base
, *std::next(KmpTaskTQTyRD
->field_begin(),
3463 CGF
.Int8Ty
, CGM
.getNaturalTypeAlignment(SharedsTy
));
3465 emitPrivatesInit(CGF
, D
, KmpTaskSharedsPtr
, TDBase
, KmpTaskTWithPrivatesQTyRD
,
3466 SharedsTy
, SharedsPtrTy
, Data
, Privates
, /*ForDup=*/true);
3467 CGF
.FinishFunction();
3471 /// Checks if destructor function is required to be generated.
3472 /// \return true if cleanups are required, false otherwise.
3474 checkDestructorsRequired(const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3475 ArrayRef
<PrivateDataTy
> Privates
) {
3476 for (const PrivateDataTy
&P
: Privates
) {
3477 if (P
.second
.isLocalPrivate())
3479 QualType Ty
= P
.second
.Original
->getType().getNonReferenceType();
3480 if (Ty
.isDestructedType())
3487 /// Loop generator for OpenMP iterator expression.
3488 class OMPIteratorGeneratorScope final
3489 : public CodeGenFunction::OMPPrivateScope
{
3490 CodeGenFunction
&CGF
;
3491 const OMPIteratorExpr
*E
= nullptr;
3492 SmallVector
<CodeGenFunction::JumpDest
, 4> ContDests
;
3493 SmallVector
<CodeGenFunction::JumpDest
, 4> ExitDests
;
3494 OMPIteratorGeneratorScope() = delete;
3495 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope
&) = delete;
3498 OMPIteratorGeneratorScope(CodeGenFunction
&CGF
, const OMPIteratorExpr
*E
)
3499 : CodeGenFunction::OMPPrivateScope(CGF
), CGF(CGF
), E(E
) {
3502 SmallVector
<llvm::Value
*, 4> Uppers
;
3503 for (unsigned I
= 0, End
= E
->numOfIterators(); I
< End
; ++I
) {
3504 Uppers
.push_back(CGF
.EmitScalarExpr(E
->getHelper(I
).Upper
));
3505 const auto *VD
= cast
<VarDecl
>(E
->getIteratorDecl(I
));
3506 addPrivate(VD
, CGF
.CreateMemTemp(VD
->getType(), VD
->getName()));
3507 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
);
3509 HelperData
.CounterVD
,
3510 CGF
.CreateMemTemp(HelperData
.CounterVD
->getType(), "counter.addr"));
3514 for (unsigned I
= 0, End
= E
->numOfIterators(); I
< End
; ++I
) {
3515 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
);
3517 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(HelperData
.CounterVD
),
3518 HelperData
.CounterVD
->getType());
3520 CGF
.EmitStoreOfScalar(
3521 llvm::ConstantInt::get(CLVal
.getAddress().getElementType(), 0),
3523 CodeGenFunction::JumpDest
&ContDest
=
3524 ContDests
.emplace_back(CGF
.getJumpDestInCurrentScope("iter.cont"));
3525 CodeGenFunction::JumpDest
&ExitDest
=
3526 ExitDests
.emplace_back(CGF
.getJumpDestInCurrentScope("iter.exit"));
3527 // N = <number-of_iterations>;
3528 llvm::Value
*N
= Uppers
[I
];
3530 // if (Counter < N) goto body; else goto exit;
3531 CGF
.EmitBlock(ContDest
.getBlock());
3533 CGF
.EmitLoadOfScalar(CLVal
, HelperData
.CounterVD
->getLocation());
3535 HelperData
.CounterVD
->getType()->isSignedIntegerOrEnumerationType()
3536 ? CGF
.Builder
.CreateICmpSLT(CVal
, N
)
3537 : CGF
.Builder
.CreateICmpULT(CVal
, N
);
3538 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("iter.body");
3539 CGF
.Builder
.CreateCondBr(Cmp
, BodyBB
, ExitDest
.getBlock());
3541 CGF
.EmitBlock(BodyBB
);
3542 // Iteri = Begini + Counter * Stepi;
3543 CGF
.EmitIgnoredExpr(HelperData
.Update
);
3546 ~OMPIteratorGeneratorScope() {
3549 for (unsigned I
= E
->numOfIterators(); I
> 0; --I
) {
3550 // Counter = Counter + 1;
3551 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
- 1);
3552 CGF
.EmitIgnoredExpr(HelperData
.CounterUpdate
);
3554 CGF
.EmitBranchThroughCleanup(ContDests
[I
- 1]);
3556 CGF
.EmitBlock(ExitDests
[I
- 1].getBlock(), /*IsFinished=*/I
== 1);
3562 static std::pair
<llvm::Value
*, llvm::Value
*>
3563 getPointerAndSize(CodeGenFunction
&CGF
, const Expr
*E
) {
3564 const auto *OASE
= dyn_cast
<OMPArrayShapingExpr
>(E
);
3567 const Expr
*Base
= OASE
->getBase();
3568 Addr
= CGF
.EmitScalarExpr(Base
);
3570 Addr
= CGF
.EmitLValue(E
).getPointer(CGF
);
3572 llvm::Value
*SizeVal
;
3573 QualType Ty
= E
->getType();
3575 SizeVal
= CGF
.getTypeSize(OASE
->getBase()->getType()->getPointeeType());
3576 for (const Expr
*SE
: OASE
->getDimensions()) {
3577 llvm::Value
*Sz
= CGF
.EmitScalarExpr(SE
);
3578 Sz
= CGF
.EmitScalarConversion(
3579 Sz
, SE
->getType(), CGF
.getContext().getSizeType(), SE
->getExprLoc());
3580 SizeVal
= CGF
.Builder
.CreateNUWMul(SizeVal
, Sz
);
3582 } else if (const auto *ASE
=
3583 dyn_cast
<ArraySectionExpr
>(E
->IgnoreParenImpCasts())) {
3584 LValue UpAddrLVal
= CGF
.EmitArraySectionExpr(ASE
, /*IsLowerBound=*/false);
3585 Address UpAddrAddress
= UpAddrLVal
.getAddress();
3586 llvm::Value
*UpAddr
= CGF
.Builder
.CreateConstGEP1_32(
3587 UpAddrAddress
.getElementType(), UpAddrAddress
.emitRawPointer(CGF
),
3589 llvm::Value
*LowIntPtr
= CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.SizeTy
);
3590 llvm::Value
*UpIntPtr
= CGF
.Builder
.CreatePtrToInt(UpAddr
, CGF
.SizeTy
);
3591 SizeVal
= CGF
.Builder
.CreateNUWSub(UpIntPtr
, LowIntPtr
);
3593 SizeVal
= CGF
.getTypeSize(Ty
);
3595 return std::make_pair(Addr
, SizeVal
);
3598 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3599 static void getKmpAffinityType(ASTContext
&C
, QualType
&KmpTaskAffinityInfoTy
) {
3600 QualType FlagsTy
= C
.getIntTypeForBitwidth(32, /*Signed=*/false);
3601 if (KmpTaskAffinityInfoTy
.isNull()) {
3602 RecordDecl
*KmpAffinityInfoRD
=
3603 C
.buildImplicitRecord("kmp_task_affinity_info_t");
3604 KmpAffinityInfoRD
->startDefinition();
3605 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, C
.getIntPtrType());
3606 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, C
.getSizeType());
3607 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, FlagsTy
);
3608 KmpAffinityInfoRD
->completeDefinition();
3609 KmpTaskAffinityInfoTy
= C
.getRecordType(KmpAffinityInfoRD
);
3613 CGOpenMPRuntime::TaskResultTy
3614 CGOpenMPRuntime::emitTaskInit(CodeGenFunction
&CGF
, SourceLocation Loc
,
3615 const OMPExecutableDirective
&D
,
3616 llvm::Function
*TaskFunction
, QualType SharedsTy
,
3617 Address Shareds
, const OMPTaskDataTy
&Data
) {
3618 ASTContext
&C
= CGM
.getContext();
3619 llvm::SmallVector
<PrivateDataTy
, 4> Privates
;
3620 // Aggregate privates and sort them by the alignment.
3621 const auto *I
= Data
.PrivateCopies
.begin();
3622 for (const Expr
*E
: Data
.PrivateVars
) {
3623 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3624 Privates
.emplace_back(
3626 PrivateHelpersTy(E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3627 /*PrivateElemInit=*/nullptr));
3630 I
= Data
.FirstprivateCopies
.begin();
3631 const auto *IElemInitRef
= Data
.FirstprivateInits
.begin();
3632 for (const Expr
*E
: Data
.FirstprivateVars
) {
3633 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3634 Privates
.emplace_back(
3637 E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3638 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IElemInitRef
)->getDecl())));
3642 I
= Data
.LastprivateCopies
.begin();
3643 for (const Expr
*E
: Data
.LastprivateVars
) {
3644 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3645 Privates
.emplace_back(
3647 PrivateHelpersTy(E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3648 /*PrivateElemInit=*/nullptr));
3651 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
3652 if (isAllocatableDecl(VD
))
3653 Privates
.emplace_back(CGM
.getPointerAlign(), PrivateHelpersTy(VD
));
3655 Privates
.emplace_back(C
.getDeclAlign(VD
), PrivateHelpersTy(VD
));
3657 llvm::stable_sort(Privates
,
3658 [](const PrivateDataTy
&L
, const PrivateDataTy
&R
) {
3659 return L
.first
> R
.first
;
3661 QualType KmpInt32Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3662 // Build type kmp_routine_entry_t (if not built yet).
3663 emitKmpRoutineEntryT(KmpInt32Ty
);
3664 // Build type kmp_task_t (if not built yet).
3665 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind())) {
3666 if (SavedKmpTaskloopTQTy
.isNull()) {
3667 SavedKmpTaskloopTQTy
= C
.getRecordType(createKmpTaskTRecordDecl(
3668 CGM
, D
.getDirectiveKind(), KmpInt32Ty
, KmpRoutineEntryPtrQTy
));
3670 KmpTaskTQTy
= SavedKmpTaskloopTQTy
;
3672 assert((D
.getDirectiveKind() == OMPD_task
||
3673 isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) ||
3674 isOpenMPTargetDataManagementDirective(D
.getDirectiveKind())) &&
3675 "Expected taskloop, task or target directive");
3676 if (SavedKmpTaskTQTy
.isNull()) {
3677 SavedKmpTaskTQTy
= C
.getRecordType(createKmpTaskTRecordDecl(
3678 CGM
, D
.getDirectiveKind(), KmpInt32Ty
, KmpRoutineEntryPtrQTy
));
3680 KmpTaskTQTy
= SavedKmpTaskTQTy
;
3682 const auto *KmpTaskTQTyRD
= cast
<RecordDecl
>(KmpTaskTQTy
->getAsTagDecl());
3683 // Build particular struct kmp_task_t for the given task.
3684 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
=
3685 createKmpTaskTWithPrivatesRecordDecl(CGM
, KmpTaskTQTy
, Privates
);
3686 QualType KmpTaskTWithPrivatesQTy
= C
.getRecordType(KmpTaskTWithPrivatesQTyRD
);
3687 QualType KmpTaskTWithPrivatesPtrQTy
=
3688 C
.getPointerType(KmpTaskTWithPrivatesQTy
);
3689 llvm::Type
*KmpTaskTWithPrivatesTy
= CGF
.ConvertType(KmpTaskTWithPrivatesQTy
);
3690 llvm::Type
*KmpTaskTWithPrivatesPtrTy
=
3691 KmpTaskTWithPrivatesTy
->getPointerTo();
3692 llvm::Value
*KmpTaskTWithPrivatesTySize
=
3693 CGF
.getTypeSize(KmpTaskTWithPrivatesQTy
);
3694 QualType SharedsPtrTy
= C
.getPointerType(SharedsTy
);
3696 // Emit initial values for private copies (if any).
3697 llvm::Value
*TaskPrivatesMap
= nullptr;
3698 llvm::Type
*TaskPrivatesMapTy
=
3699 std::next(TaskFunction
->arg_begin(), 3)->getType();
3700 if (!Privates
.empty()) {
3701 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3703 emitTaskPrivateMappingFunction(CGM
, Loc
, Data
, FI
->getType(), Privates
);
3704 TaskPrivatesMap
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3705 TaskPrivatesMap
, TaskPrivatesMapTy
);
3707 TaskPrivatesMap
= llvm::ConstantPointerNull::get(
3708 cast
<llvm::PointerType
>(TaskPrivatesMapTy
));
3710 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3712 llvm::Function
*TaskEntry
= emitProxyTaskFunction(
3713 CGM
, Loc
, D
.getDirectiveKind(), KmpInt32Ty
, KmpTaskTWithPrivatesPtrQTy
,
3714 KmpTaskTWithPrivatesQTy
, KmpTaskTQTy
, SharedsPtrTy
, TaskFunction
,
3717 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3718 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3719 // kmp_routine_entry_t *task_entry);
3720 // Task flags. Format is taken from
3721 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3722 // description of kmp_tasking_flags struct.
3726 DestructorsFlag
= 0x8,
3727 PriorityFlag
= 0x20,
3728 DetachableFlag
= 0x40,
3730 unsigned Flags
= Data
.Tied
? TiedFlag
: 0;
3731 bool NeedsCleanup
= false;
3732 if (!Privates
.empty()) {
3734 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD
, Privates
);
3736 Flags
= Flags
| DestructorsFlag
;
3738 if (Data
.Priority
.getInt())
3739 Flags
= Flags
| PriorityFlag
;
3740 if (D
.hasClausesOfKind
<OMPDetachClause
>())
3741 Flags
= Flags
| DetachableFlag
;
3742 llvm::Value
*TaskFlags
=
3743 Data
.Final
.getPointer()
3744 ? CGF
.Builder
.CreateSelect(Data
.Final
.getPointer(),
3745 CGF
.Builder
.getInt32(FinalFlag
),
3746 CGF
.Builder
.getInt32(/*C=*/0))
3747 : CGF
.Builder
.getInt32(Data
.Final
.getInt() ? FinalFlag
: 0);
3748 TaskFlags
= CGF
.Builder
.CreateOr(TaskFlags
, CGF
.Builder
.getInt32(Flags
));
3749 llvm::Value
*SharedsSize
= CGM
.getSize(C
.getTypeSizeInChars(SharedsTy
));
3750 SmallVector
<llvm::Value
*, 8> AllocArgs
= {emitUpdateLocation(CGF
, Loc
),
3751 getThreadID(CGF
, Loc
), TaskFlags
, KmpTaskTWithPrivatesTySize
,
3752 SharedsSize
, CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3753 TaskEntry
, KmpRoutineEntryPtrTy
)};
3754 llvm::Value
*NewTask
;
3755 if (D
.hasClausesOfKind
<OMPNowaitClause
>()) {
3756 // Check if we have any device clause associated with the directive.
3757 const Expr
*Device
= nullptr;
3758 if (auto *C
= D
.getSingleClause
<OMPDeviceClause
>())
3759 Device
= C
->getDevice();
3760 // Emit device ID if any otherwise use default value.
3761 llvm::Value
*DeviceID
;
3763 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
3764 CGF
.Int64Ty
, /*isSigned=*/true);
3766 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
3767 AllocArgs
.push_back(DeviceID
);
3768 NewTask
= CGF
.EmitRuntimeCall(
3769 OMPBuilder
.getOrCreateRuntimeFunction(
3770 CGM
.getModule(), OMPRTL___kmpc_omp_target_task_alloc
),
3774 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
3775 CGM
.getModule(), OMPRTL___kmpc_omp_task_alloc
),
3778 // Emit detach clause initialization.
3779 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3780 // task_descriptor);
3781 if (const auto *DC
= D
.getSingleClause
<OMPDetachClause
>()) {
3782 const Expr
*Evt
= DC
->getEventHandler()->IgnoreParenImpCasts();
3783 LValue EvtLVal
= CGF
.EmitLValue(Evt
);
3785 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3786 // int gtid, kmp_task_t *task);
3787 llvm::Value
*Loc
= emitUpdateLocation(CGF
, DC
->getBeginLoc());
3788 llvm::Value
*Tid
= getThreadID(CGF
, DC
->getBeginLoc());
3789 Tid
= CGF
.Builder
.CreateIntCast(Tid
, CGF
.IntTy
, /*isSigned=*/false);
3790 llvm::Value
*EvtVal
= CGF
.EmitRuntimeCall(
3791 OMPBuilder
.getOrCreateRuntimeFunction(
3792 CGM
.getModule(), OMPRTL___kmpc_task_allow_completion_event
),
3793 {Loc
, Tid
, NewTask
});
3794 EvtVal
= CGF
.EmitScalarConversion(EvtVal
, C
.VoidPtrTy
, Evt
->getType(),
3796 CGF
.EmitStoreOfScalar(EvtVal
, EvtLVal
);
3798 // Process affinity clauses.
3799 if (D
.hasClausesOfKind
<OMPAffinityClause
>()) {
3800 // Process list of affinity data.
3801 ASTContext
&C
= CGM
.getContext();
3802 Address AffinitiesArray
= Address::invalid();
3803 // Calculate number of elements to form the array of affinity data.
3804 llvm::Value
*NumOfElements
= nullptr;
3805 unsigned NumAffinities
= 0;
3806 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
3807 if (const Expr
*Modifier
= C
->getModifier()) {
3808 const auto *IE
= cast
<OMPIteratorExpr
>(Modifier
->IgnoreParenImpCasts());
3809 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
3810 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
3811 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.SizeTy
, /*isSigned=*/false);
3813 NumOfElements
? CGF
.Builder
.CreateNUWMul(NumOfElements
, Sz
) : Sz
;
3816 NumAffinities
+= C
->varlist_size();
3819 getKmpAffinityType(CGM
.getContext(), KmpTaskAffinityInfoTy
);
3820 // Fields ids in kmp_task_affinity_info record.
3821 enum RTLAffinityInfoFieldsTy
{ BaseAddr
, Len
, Flags
};
3823 QualType KmpTaskAffinityInfoArrayTy
;
3824 if (NumOfElements
) {
3825 NumOfElements
= CGF
.Builder
.CreateNUWAdd(
3826 llvm::ConstantInt::get(CGF
.SizeTy
, NumAffinities
), NumOfElements
);
3827 auto *OVE
= new (C
) OpaqueValueExpr(
3829 C
.getIntTypeForBitwidth(C
.getTypeSize(C
.getSizeType()), /*Signed=*/0),
3831 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, OVE
,
3832 RValue::get(NumOfElements
));
3833 KmpTaskAffinityInfoArrayTy
= C
.getVariableArrayType(
3834 KmpTaskAffinityInfoTy
, OVE
, ArraySizeModifier::Normal
,
3835 /*IndexTypeQuals=*/0, SourceRange(Loc
, Loc
));
3836 // Properly emit variable-sized array.
3837 auto *PD
= ImplicitParamDecl::Create(C
, KmpTaskAffinityInfoArrayTy
,
3838 ImplicitParamKind::Other
);
3839 CGF
.EmitVarDecl(*PD
);
3840 AffinitiesArray
= CGF
.GetAddrOfLocalVar(PD
);
3841 NumOfElements
= CGF
.Builder
.CreateIntCast(NumOfElements
, CGF
.Int32Ty
,
3842 /*isSigned=*/false);
3844 KmpTaskAffinityInfoArrayTy
= C
.getConstantArrayType(
3845 KmpTaskAffinityInfoTy
,
3846 llvm::APInt(C
.getTypeSize(C
.getSizeType()), NumAffinities
), nullptr,
3847 ArraySizeModifier::Normal
, /*IndexTypeQuals=*/0);
3849 CGF
.CreateMemTemp(KmpTaskAffinityInfoArrayTy
, ".affs.arr.addr");
3850 AffinitiesArray
= CGF
.Builder
.CreateConstArrayGEP(AffinitiesArray
, 0);
3851 NumOfElements
= llvm::ConstantInt::get(CGM
.Int32Ty
, NumAffinities
,
3852 /*isSigned=*/false);
3855 const auto *KmpAffinityInfoRD
= KmpTaskAffinityInfoTy
->getAsRecordDecl();
3856 // Fill array by elements without iterators.
3858 bool HasIterator
= false;
3859 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
3860 if (C
->getModifier()) {
3864 for (const Expr
*E
: C
->varlists()) {
3867 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
3869 CGF
.MakeAddrLValue(CGF
.Builder
.CreateConstGEP(AffinitiesArray
, Pos
),
3870 KmpTaskAffinityInfoTy
);
3871 // affs[i].base_addr = &<Affinities[i].second>;
3872 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
3873 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), BaseAddr
));
3874 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
),
3876 // affs[i].len = sizeof(<Affinities[i].second>);
3877 LValue LenLVal
= CGF
.EmitLValueForField(
3878 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), Len
));
3879 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
3885 PosLVal
= CGF
.MakeAddrLValue(
3886 CGF
.CreateMemTemp(C
.getSizeType(), "affs.counter.addr"),
3888 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Pos
), PosLVal
);
3890 // Process elements with iterators.
3891 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
3892 const Expr
*Modifier
= C
->getModifier();
3895 OMPIteratorGeneratorScope
IteratorScope(
3896 CGF
, cast_or_null
<OMPIteratorExpr
>(Modifier
->IgnoreParenImpCasts()));
3897 for (const Expr
*E
: C
->varlists()) {
3900 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
3901 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
3903 CGF
.MakeAddrLValue(CGF
.Builder
.CreateGEP(CGF
, AffinitiesArray
, Idx
),
3904 KmpTaskAffinityInfoTy
);
3905 // affs[i].base_addr = &<Affinities[i].second>;
3906 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
3907 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), BaseAddr
));
3908 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
),
3910 // affs[i].len = sizeof(<Affinities[i].second>);
3911 LValue LenLVal
= CGF
.EmitLValueForField(
3912 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), Len
));
3913 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
3914 Idx
= CGF
.Builder
.CreateNUWAdd(
3915 Idx
, llvm::ConstantInt::get(Idx
->getType(), 1));
3916 CGF
.EmitStoreOfScalar(Idx
, PosLVal
);
3919 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
3920 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
3921 // naffins, kmp_task_affinity_info_t *affin_list);
3922 llvm::Value
*LocRef
= emitUpdateLocation(CGF
, Loc
);
3923 llvm::Value
*GTid
= getThreadID(CGF
, Loc
);
3924 llvm::Value
*AffinListPtr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3925 AffinitiesArray
.emitRawPointer(CGF
), CGM
.VoidPtrTy
);
3926 // FIXME: Emit the function and ignore its result for now unless the
3927 // runtime function is properly implemented.
3928 (void)CGF
.EmitRuntimeCall(
3929 OMPBuilder
.getOrCreateRuntimeFunction(
3930 CGM
.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity
),
3931 {LocRef
, GTid
, NewTask
, NumOfElements
, AffinListPtr
});
3933 llvm::Value
*NewTaskNewTaskTTy
=
3934 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3935 NewTask
, KmpTaskTWithPrivatesPtrTy
);
3936 LValue Base
= CGF
.MakeNaturalAlignRawAddrLValue(NewTaskNewTaskTTy
,
3937 KmpTaskTWithPrivatesQTy
);
3939 CGF
.EmitLValueForField(Base
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3940 // Fill the data in the resulting kmp_task_t record.
3941 // Copy shareds if there are any.
3942 Address KmpTaskSharedsPtr
= Address::invalid();
3943 if (!SharedsTy
->getAsStructureType()->getDecl()->field_empty()) {
3944 KmpTaskSharedsPtr
= Address(
3945 CGF
.EmitLoadOfScalar(
3946 CGF
.EmitLValueForField(
3948 *std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTShareds
)),
3950 CGF
.Int8Ty
, CGM
.getNaturalTypeAlignment(SharedsTy
));
3951 LValue Dest
= CGF
.MakeAddrLValue(KmpTaskSharedsPtr
, SharedsTy
);
3952 LValue Src
= CGF
.MakeAddrLValue(Shareds
, SharedsTy
);
3953 CGF
.EmitAggregateCopy(Dest
, Src
, SharedsTy
, AggValueSlot::DoesNotOverlap
);
3955 // Emit initial values for private copies (if any).
3956 TaskResultTy Result
;
3957 if (!Privates
.empty()) {
3958 emitPrivatesInit(CGF
, D
, KmpTaskSharedsPtr
, Base
, KmpTaskTWithPrivatesQTyRD
,
3959 SharedsTy
, SharedsPtrTy
, Data
, Privates
,
3961 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind()) &&
3962 (!Data
.LastprivateVars
.empty() || checkInitIsRequired(CGF
, Privates
))) {
3963 Result
.TaskDupFn
= emitTaskDupFunction(
3964 CGM
, Loc
, D
, KmpTaskTWithPrivatesPtrQTy
, KmpTaskTWithPrivatesQTyRD
,
3965 KmpTaskTQTyRD
, SharedsTy
, SharedsPtrTy
, Data
, Privates
,
3966 /*WithLastIter=*/!Data
.LastprivateVars
.empty());
3969 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
3970 enum { Priority
= 0, Destructors
= 1 };
3971 // Provide pointer to function with destructors for privates.
3972 auto FI
= std::next(KmpTaskTQTyRD
->field_begin(), Data1
);
3973 const RecordDecl
*KmpCmplrdataUD
=
3974 (*FI
)->getType()->getAsUnionType()->getDecl();
3976 llvm::Value
*DestructorFn
= emitDestructorsFunction(
3977 CGM
, Loc
, KmpInt32Ty
, KmpTaskTWithPrivatesPtrQTy
,
3978 KmpTaskTWithPrivatesQTy
);
3979 LValue Data1LV
= CGF
.EmitLValueForField(TDBase
, *FI
);
3980 LValue DestructorsLV
= CGF
.EmitLValueForField(
3981 Data1LV
, *std::next(KmpCmplrdataUD
->field_begin(), Destructors
));
3982 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3983 DestructorFn
, KmpRoutineEntryPtrTy
),
3987 if (Data
.Priority
.getInt()) {
3988 LValue Data2LV
= CGF
.EmitLValueForField(
3989 TDBase
, *std::next(KmpTaskTQTyRD
->field_begin(), Data2
));
3990 LValue PriorityLV
= CGF
.EmitLValueForField(
3991 Data2LV
, *std::next(KmpCmplrdataUD
->field_begin(), Priority
));
3992 CGF
.EmitStoreOfScalar(Data
.Priority
.getPointer(), PriorityLV
);
3994 Result
.NewTask
= NewTask
;
3995 Result
.TaskEntry
= TaskEntry
;
3996 Result
.NewTaskNewTaskTTy
= NewTaskNewTaskTTy
;
3997 Result
.TDBase
= TDBase
;
3998 Result
.KmpTaskTQTyRD
= KmpTaskTQTyRD
;
4002 /// Translates internal dependency kind into the runtime kind.
4003 static RTLDependenceKindTy
translateDependencyKind(OpenMPDependClauseKind K
) {
4004 RTLDependenceKindTy DepKind
;
4006 case OMPC_DEPEND_in
:
4007 DepKind
= RTLDependenceKindTy::DepIn
;
4009 // Out and InOut dependencies must use the same code.
4010 case OMPC_DEPEND_out
:
4011 case OMPC_DEPEND_inout
:
4012 DepKind
= RTLDependenceKindTy::DepInOut
;
4014 case OMPC_DEPEND_mutexinoutset
:
4015 DepKind
= RTLDependenceKindTy::DepMutexInOutSet
;
4017 case OMPC_DEPEND_inoutset
:
4018 DepKind
= RTLDependenceKindTy::DepInOutSet
;
4020 case OMPC_DEPEND_outallmemory
:
4021 DepKind
= RTLDependenceKindTy::DepOmpAllMem
;
4023 case OMPC_DEPEND_source
:
4024 case OMPC_DEPEND_sink
:
4025 case OMPC_DEPEND_depobj
:
4026 case OMPC_DEPEND_inoutallmemory
:
4027 case OMPC_DEPEND_unknown
:
4028 llvm_unreachable("Unknown task dependence type");
4033 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4034 static void getDependTypes(ASTContext
&C
, QualType
&KmpDependInfoTy
,
4035 QualType
&FlagsTy
) {
4036 FlagsTy
= C
.getIntTypeForBitwidth(C
.getTypeSize(C
.BoolTy
), /*Signed=*/false);
4037 if (KmpDependInfoTy
.isNull()) {
4038 RecordDecl
*KmpDependInfoRD
= C
.buildImplicitRecord("kmp_depend_info");
4039 KmpDependInfoRD
->startDefinition();
4040 addFieldToRecordDecl(C
, KmpDependInfoRD
, C
.getIntPtrType());
4041 addFieldToRecordDecl(C
, KmpDependInfoRD
, C
.getSizeType());
4042 addFieldToRecordDecl(C
, KmpDependInfoRD
, FlagsTy
);
4043 KmpDependInfoRD
->completeDefinition();
4044 KmpDependInfoTy
= C
.getRecordType(KmpDependInfoRD
);
4048 std::pair
<llvm::Value
*, LValue
>
4049 CGOpenMPRuntime::getDepobjElements(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4050 SourceLocation Loc
) {
4051 ASTContext
&C
= CGM
.getContext();
4053 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4054 RecordDecl
*KmpDependInfoRD
=
4055 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4056 QualType KmpDependInfoPtrTy
= C
.getPointerType(KmpDependInfoTy
);
4057 LValue Base
= CGF
.EmitLoadOfPointerLValue(
4058 DepobjLVal
.getAddress().withElementType(
4059 CGF
.ConvertTypeForMem(KmpDependInfoPtrTy
)),
4060 KmpDependInfoPtrTy
->castAs
<PointerType
>());
4061 Address DepObjAddr
= CGF
.Builder
.CreateGEP(
4062 CGF
, Base
.getAddress(),
4063 llvm::ConstantInt::get(CGF
.IntPtrTy
, -1, /*isSigned=*/true));
4064 LValue NumDepsBase
= CGF
.MakeAddrLValue(
4065 DepObjAddr
, KmpDependInfoTy
, Base
.getBaseInfo(), Base
.getTBAAInfo());
4066 // NumDeps = deps[i].base_addr;
4067 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4069 *std::next(KmpDependInfoRD
->field_begin(),
4070 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4071 llvm::Value
*NumDeps
= CGF
.EmitLoadOfScalar(BaseAddrLVal
, Loc
);
4072 return std::make_pair(NumDeps
, Base
);
4075 static void emitDependData(CodeGenFunction
&CGF
, QualType
&KmpDependInfoTy
,
4076 llvm::PointerUnion
<unsigned *, LValue
*> Pos
,
4077 const OMPTaskDataTy::DependData
&Data
,
4078 Address DependenciesArray
) {
4079 CodeGenModule
&CGM
= CGF
.CGM
;
4080 ASTContext
&C
= CGM
.getContext();
4082 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4083 RecordDecl
*KmpDependInfoRD
=
4084 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4085 llvm::Type
*LLVMFlagsTy
= CGF
.ConvertTypeForMem(FlagsTy
);
4087 OMPIteratorGeneratorScope
IteratorScope(
4088 CGF
, cast_or_null
<OMPIteratorExpr
>(
4089 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4091 for (const Expr
*E
: Data
.DepExprs
) {
4095 // The expression will be a nullptr in the 'omp_all_memory' case.
4097 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
4098 Addr
= CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
);
4100 Addr
= llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4101 Size
= llvm::ConstantInt::get(CGF
.SizeTy
, 0);
4104 if (unsigned *P
= Pos
.dyn_cast
<unsigned *>()) {
4105 Base
= CGF
.MakeAddrLValue(
4106 CGF
.Builder
.CreateConstGEP(DependenciesArray
, *P
), KmpDependInfoTy
);
4108 assert(E
&& "Expected a non-null expression");
4109 LValue
&PosLVal
= *Pos
.get
<LValue
*>();
4110 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4111 Base
= CGF
.MakeAddrLValue(
4112 CGF
.Builder
.CreateGEP(CGF
, DependenciesArray
, Idx
), KmpDependInfoTy
);
4114 // deps[i].base_addr = &<Dependencies[i].second>;
4115 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4117 *std::next(KmpDependInfoRD
->field_begin(),
4118 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4119 CGF
.EmitStoreOfScalar(Addr
, BaseAddrLVal
);
4120 // deps[i].len = sizeof(<Dependencies[i].second>);
4121 LValue LenLVal
= CGF
.EmitLValueForField(
4122 Base
, *std::next(KmpDependInfoRD
->field_begin(),
4123 static_cast<unsigned int>(RTLDependInfoFields::Len
)));
4124 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4125 // deps[i].flags = <Dependencies[i].first>;
4126 RTLDependenceKindTy DepKind
= translateDependencyKind(Data
.DepKind
);
4127 LValue FlagsLVal
= CGF
.EmitLValueForField(
4129 *std::next(KmpDependInfoRD
->field_begin(),
4130 static_cast<unsigned int>(RTLDependInfoFields::Flags
)));
4131 CGF
.EmitStoreOfScalar(
4132 llvm::ConstantInt::get(LLVMFlagsTy
, static_cast<unsigned int>(DepKind
)),
4134 if (unsigned *P
= Pos
.dyn_cast
<unsigned *>()) {
4137 LValue
&PosLVal
= *Pos
.get
<LValue
*>();
4138 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4139 Idx
= CGF
.Builder
.CreateNUWAdd(Idx
,
4140 llvm::ConstantInt::get(Idx
->getType(), 1));
4141 CGF
.EmitStoreOfScalar(Idx
, PosLVal
);
4146 SmallVector
<llvm::Value
*, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4147 CodeGenFunction
&CGF
, QualType
&KmpDependInfoTy
,
4148 const OMPTaskDataTy::DependData
&Data
) {
4149 assert(Data
.DepKind
== OMPC_DEPEND_depobj
&&
4150 "Expected depobj dependency kind.");
4151 SmallVector
<llvm::Value
*, 4> Sizes
;
4152 SmallVector
<LValue
, 4> SizeLVals
;
4153 ASTContext
&C
= CGF
.getContext();
4155 OMPIteratorGeneratorScope
IteratorScope(
4156 CGF
, cast_or_null
<OMPIteratorExpr
>(
4157 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4159 for (const Expr
*E
: Data
.DepExprs
) {
4160 llvm::Value
*NumDeps
;
4162 LValue DepobjLVal
= CGF
.EmitLValue(E
->IgnoreParenImpCasts());
4163 std::tie(NumDeps
, Base
) =
4164 getDepobjElements(CGF
, DepobjLVal
, E
->getExprLoc());
4165 LValue NumLVal
= CGF
.MakeAddrLValue(
4166 CGF
.CreateMemTemp(C
.getUIntPtrType(), "depobj.size.addr"),
4167 C
.getUIntPtrType());
4168 CGF
.Builder
.CreateStore(llvm::ConstantInt::get(CGF
.IntPtrTy
, 0),
4169 NumLVal
.getAddress());
4170 llvm::Value
*PrevVal
= CGF
.EmitLoadOfScalar(NumLVal
, E
->getExprLoc());
4171 llvm::Value
*Add
= CGF
.Builder
.CreateNUWAdd(PrevVal
, NumDeps
);
4172 CGF
.EmitStoreOfScalar(Add
, NumLVal
);
4173 SizeLVals
.push_back(NumLVal
);
4176 for (unsigned I
= 0, E
= SizeLVals
.size(); I
< E
; ++I
) {
4178 CGF
.EmitLoadOfScalar(SizeLVals
[I
], Data
.DepExprs
[I
]->getExprLoc());
4179 Sizes
.push_back(Size
);
4184 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction
&CGF
,
4185 QualType
&KmpDependInfoTy
,
4187 const OMPTaskDataTy::DependData
&Data
,
4188 Address DependenciesArray
) {
4189 assert(Data
.DepKind
== OMPC_DEPEND_depobj
&&
4190 "Expected depobj dependency kind.");
4191 llvm::Value
*ElSize
= CGF
.getTypeSize(KmpDependInfoTy
);
4193 OMPIteratorGeneratorScope
IteratorScope(
4194 CGF
, cast_or_null
<OMPIteratorExpr
>(
4195 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4197 for (unsigned I
= 0, End
= Data
.DepExprs
.size(); I
< End
; ++I
) {
4198 const Expr
*E
= Data
.DepExprs
[I
];
4199 llvm::Value
*NumDeps
;
4201 LValue DepobjLVal
= CGF
.EmitLValue(E
->IgnoreParenImpCasts());
4202 std::tie(NumDeps
, Base
) =
4203 getDepobjElements(CGF
, DepobjLVal
, E
->getExprLoc());
4205 // memcopy dependency data.
4206 llvm::Value
*Size
= CGF
.Builder
.CreateNUWMul(
4208 CGF
.Builder
.CreateIntCast(NumDeps
, CGF
.SizeTy
, /*isSigned=*/false));
4209 llvm::Value
*Pos
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4210 Address DepAddr
= CGF
.Builder
.CreateGEP(CGF
, DependenciesArray
, Pos
);
4211 CGF
.Builder
.CreateMemCpy(DepAddr
, Base
.getAddress(), Size
);
4215 llvm::Value
*Add
= CGF
.Builder
.CreateNUWAdd(Pos
, NumDeps
);
4216 CGF
.EmitStoreOfScalar(Add
, PosLVal
);
4221 std::pair
<llvm::Value
*, Address
> CGOpenMPRuntime::emitDependClause(
4222 CodeGenFunction
&CGF
, ArrayRef
<OMPTaskDataTy::DependData
> Dependencies
,
4223 SourceLocation Loc
) {
4224 if (llvm::all_of(Dependencies
, [](const OMPTaskDataTy::DependData
&D
) {
4225 return D
.DepExprs
.empty();
4227 return std::make_pair(nullptr, Address::invalid());
4228 // Process list of dependencies.
4229 ASTContext
&C
= CGM
.getContext();
4230 Address DependenciesArray
= Address::invalid();
4231 llvm::Value
*NumOfElements
= nullptr;
4232 unsigned NumDependencies
= std::accumulate(
4233 Dependencies
.begin(), Dependencies
.end(), 0,
4234 [](unsigned V
, const OMPTaskDataTy::DependData
&D
) {
4235 return D
.DepKind
== OMPC_DEPEND_depobj
4237 : (V
+ (D
.IteratorExpr
? 0 : D
.DepExprs
.size()));
4240 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4241 bool HasDepobjDeps
= false;
4242 bool HasRegularWithIterators
= false;
4243 llvm::Value
*NumOfDepobjElements
= llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4244 llvm::Value
*NumOfRegularWithIterators
=
4245 llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4246 // Calculate number of depobj dependencies and regular deps with the
4248 for (const OMPTaskDataTy::DependData
&D
: Dependencies
) {
4249 if (D
.DepKind
== OMPC_DEPEND_depobj
) {
4250 SmallVector
<llvm::Value
*, 4> Sizes
=
4251 emitDepobjElementsSizes(CGF
, KmpDependInfoTy
, D
);
4252 for (llvm::Value
*Size
: Sizes
) {
4253 NumOfDepobjElements
=
4254 CGF
.Builder
.CreateNUWAdd(NumOfDepobjElements
, Size
);
4256 HasDepobjDeps
= true;
4259 // Include number of iterations, if any.
4261 if (const auto *IE
= cast_or_null
<OMPIteratorExpr
>(D
.IteratorExpr
)) {
4262 llvm::Value
*ClauseIteratorSpace
=
4263 llvm::ConstantInt::get(CGF
.IntPtrTy
, 1);
4264 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4265 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4266 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.IntPtrTy
, /*isSigned=*/false);
4267 ClauseIteratorSpace
= CGF
.Builder
.CreateNUWMul(Sz
, ClauseIteratorSpace
);
4269 llvm::Value
*NumClauseDeps
= CGF
.Builder
.CreateNUWMul(
4270 ClauseIteratorSpace
,
4271 llvm::ConstantInt::get(CGF
.IntPtrTy
, D
.DepExprs
.size()));
4272 NumOfRegularWithIterators
=
4273 CGF
.Builder
.CreateNUWAdd(NumOfRegularWithIterators
, NumClauseDeps
);
4274 HasRegularWithIterators
= true;
4279 QualType KmpDependInfoArrayTy
;
4280 if (HasDepobjDeps
|| HasRegularWithIterators
) {
4281 NumOfElements
= llvm::ConstantInt::get(CGM
.IntPtrTy
, NumDependencies
,
4282 /*isSigned=*/false);
4283 if (HasDepobjDeps
) {
4285 CGF
.Builder
.CreateNUWAdd(NumOfDepobjElements
, NumOfElements
);
4287 if (HasRegularWithIterators
) {
4289 CGF
.Builder
.CreateNUWAdd(NumOfRegularWithIterators
, NumOfElements
);
4291 auto *OVE
= new (C
) OpaqueValueExpr(
4292 Loc
, C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4294 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, OVE
,
4295 RValue::get(NumOfElements
));
4296 KmpDependInfoArrayTy
=
4297 C
.getVariableArrayType(KmpDependInfoTy
, OVE
, ArraySizeModifier::Normal
,
4298 /*IndexTypeQuals=*/0, SourceRange(Loc
, Loc
));
4299 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4300 // Properly emit variable-sized array.
4301 auto *PD
= ImplicitParamDecl::Create(C
, KmpDependInfoArrayTy
,
4302 ImplicitParamKind::Other
);
4303 CGF
.EmitVarDecl(*PD
);
4304 DependenciesArray
= CGF
.GetAddrOfLocalVar(PD
);
4305 NumOfElements
= CGF
.Builder
.CreateIntCast(NumOfElements
, CGF
.Int32Ty
,
4306 /*isSigned=*/false);
4308 KmpDependInfoArrayTy
= C
.getConstantArrayType(
4309 KmpDependInfoTy
, llvm::APInt(/*numBits=*/64, NumDependencies
), nullptr,
4310 ArraySizeModifier::Normal
, /*IndexTypeQuals=*/0);
4312 CGF
.CreateMemTemp(KmpDependInfoArrayTy
, ".dep.arr.addr");
4313 DependenciesArray
= CGF
.Builder
.CreateConstArrayGEP(DependenciesArray
, 0);
4314 NumOfElements
= llvm::ConstantInt::get(CGM
.Int32Ty
, NumDependencies
,
4315 /*isSigned=*/false);
4318 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4319 if (Dependencies
[I
].DepKind
== OMPC_DEPEND_depobj
||
4320 Dependencies
[I
].IteratorExpr
)
4322 emitDependData(CGF
, KmpDependInfoTy
, &Pos
, Dependencies
[I
],
4325 // Copy regular dependencies with iterators.
4326 LValue PosLVal
= CGF
.MakeAddrLValue(
4327 CGF
.CreateMemTemp(C
.getSizeType(), "dep.counter.addr"), C
.getSizeType());
4328 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Pos
), PosLVal
);
4329 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4330 if (Dependencies
[I
].DepKind
== OMPC_DEPEND_depobj
||
4331 !Dependencies
[I
].IteratorExpr
)
4333 emitDependData(CGF
, KmpDependInfoTy
, &PosLVal
, Dependencies
[I
],
4336 // Copy final depobj arrays without iterators.
4337 if (HasDepobjDeps
) {
4338 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4339 if (Dependencies
[I
].DepKind
!= OMPC_DEPEND_depobj
)
4341 emitDepobjElements(CGF
, KmpDependInfoTy
, PosLVal
, Dependencies
[I
],
4345 DependenciesArray
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4346 DependenciesArray
, CGF
.VoidPtrTy
, CGF
.Int8Ty
);
4347 return std::make_pair(NumOfElements
, DependenciesArray
);
4350 Address
CGOpenMPRuntime::emitDepobjDependClause(
4351 CodeGenFunction
&CGF
, const OMPTaskDataTy::DependData
&Dependencies
,
4352 SourceLocation Loc
) {
4353 if (Dependencies
.DepExprs
.empty())
4354 return Address::invalid();
4355 // Process list of dependencies.
4356 ASTContext
&C
= CGM
.getContext();
4357 Address DependenciesArray
= Address::invalid();
4358 unsigned NumDependencies
= Dependencies
.DepExprs
.size();
4360 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4361 RecordDecl
*KmpDependInfoRD
=
4362 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4365 // Define type kmp_depend_info[<Dependencies.size()>];
4366 // For depobj reserve one extra element to store the number of elements.
4367 // It is required to handle depobj(x) update(in) construct.
4368 // kmp_depend_info[<Dependencies.size()>] deps;
4369 llvm::Value
*NumDepsVal
;
4370 CharUnits Align
= C
.getTypeAlignInChars(KmpDependInfoTy
);
4371 if (const auto *IE
=
4372 cast_or_null
<OMPIteratorExpr
>(Dependencies
.IteratorExpr
)) {
4373 NumDepsVal
= llvm::ConstantInt::get(CGF
.SizeTy
, 1);
4374 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4375 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4376 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.SizeTy
, /*isSigned=*/false);
4377 NumDepsVal
= CGF
.Builder
.CreateNUWMul(NumDepsVal
, Sz
);
4379 Size
= CGF
.Builder
.CreateNUWAdd(llvm::ConstantInt::get(CGF
.SizeTy
, 1),
4381 CharUnits SizeInBytes
=
4382 C
.getTypeSizeInChars(KmpDependInfoTy
).alignTo(Align
);
4383 llvm::Value
*RecSize
= CGM
.getSize(SizeInBytes
);
4384 Size
= CGF
.Builder
.CreateNUWMul(Size
, RecSize
);
4386 CGF
.Builder
.CreateIntCast(NumDepsVal
, CGF
.IntPtrTy
, /*isSigned=*/false);
4388 QualType KmpDependInfoArrayTy
= C
.getConstantArrayType(
4389 KmpDependInfoTy
, llvm::APInt(/*numBits=*/64, NumDependencies
+ 1),
4390 nullptr, ArraySizeModifier::Normal
, /*IndexTypeQuals=*/0);
4391 CharUnits Sz
= C
.getTypeSizeInChars(KmpDependInfoArrayTy
);
4392 Size
= CGM
.getSize(Sz
.alignTo(Align
));
4393 NumDepsVal
= llvm::ConstantInt::get(CGF
.IntPtrTy
, NumDependencies
);
4395 // Need to allocate on the dynamic memory.
4396 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4397 // Use default allocator.
4398 llvm::Value
*Allocator
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4399 llvm::Value
*Args
[] = {ThreadID
, Size
, Allocator
};
4402 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4403 CGM
.getModule(), OMPRTL___kmpc_alloc
),
4404 Args
, ".dep.arr.addr");
4405 llvm::Type
*KmpDependInfoLlvmTy
= CGF
.ConvertTypeForMem(KmpDependInfoTy
);
4406 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4407 Addr
, KmpDependInfoLlvmTy
->getPointerTo());
4408 DependenciesArray
= Address(Addr
, KmpDependInfoLlvmTy
, Align
);
4409 // Write number of elements in the first element of array for depobj.
4410 LValue Base
= CGF
.MakeAddrLValue(DependenciesArray
, KmpDependInfoTy
);
4411 // deps[i].base_addr = NumDependencies;
4412 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4414 *std::next(KmpDependInfoRD
->field_begin(),
4415 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4416 CGF
.EmitStoreOfScalar(NumDepsVal
, BaseAddrLVal
);
4417 llvm::PointerUnion
<unsigned *, LValue
*> Pos
;
4420 if (Dependencies
.IteratorExpr
) {
4421 PosLVal
= CGF
.MakeAddrLValue(
4422 CGF
.CreateMemTemp(C
.getSizeType(), "iterator.counter.addr"),
4424 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Idx
), PosLVal
,
4430 emitDependData(CGF
, KmpDependInfoTy
, Pos
, Dependencies
, DependenciesArray
);
4431 DependenciesArray
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4432 CGF
.Builder
.CreateConstGEP(DependenciesArray
, 1), CGF
.VoidPtrTy
,
4434 return DependenciesArray
;
4437 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4438 SourceLocation Loc
) {
4439 ASTContext
&C
= CGM
.getContext();
4441 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4442 LValue Base
= CGF
.EmitLoadOfPointerLValue(DepobjLVal
.getAddress(),
4443 C
.VoidPtrTy
.castAs
<PointerType
>());
4444 QualType KmpDependInfoPtrTy
= C
.getPointerType(KmpDependInfoTy
);
4445 Address Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4446 Base
.getAddress(), CGF
.ConvertTypeForMem(KmpDependInfoPtrTy
),
4447 CGF
.ConvertTypeForMem(KmpDependInfoTy
));
4448 llvm::Value
*DepObjAddr
= CGF
.Builder
.CreateGEP(
4449 Addr
.getElementType(), Addr
.emitRawPointer(CGF
),
4450 llvm::ConstantInt::get(CGF
.IntPtrTy
, -1, /*isSigned=*/true));
4451 DepObjAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr
,
4453 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4454 // Use default allocator.
4455 llvm::Value
*Allocator
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4456 llvm::Value
*Args
[] = {ThreadID
, DepObjAddr
, Allocator
};
4458 // _kmpc_free(gtid, addr, nullptr);
4459 (void)CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4460 CGM
.getModule(), OMPRTL___kmpc_free
),
4464 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4465 OpenMPDependClauseKind NewDepKind
,
4466 SourceLocation Loc
) {
4467 ASTContext
&C
= CGM
.getContext();
4469 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4470 RecordDecl
*KmpDependInfoRD
=
4471 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4472 llvm::Type
*LLVMFlagsTy
= CGF
.ConvertTypeForMem(FlagsTy
);
4473 llvm::Value
*NumDeps
;
4475 std::tie(NumDeps
, Base
) = getDepobjElements(CGF
, DepobjLVal
, Loc
);
4477 Address Begin
= Base
.getAddress();
4478 // Cast from pointer to array type to pointer to single element.
4479 llvm::Value
*End
= CGF
.Builder
.CreateGEP(Begin
.getElementType(),
4480 Begin
.emitRawPointer(CGF
), NumDeps
);
4481 // The basic structure here is a while-do loop.
4482 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.body");
4483 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.done");
4484 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
4485 CGF
.EmitBlock(BodyBB
);
4486 llvm::PHINode
*ElementPHI
=
4487 CGF
.Builder
.CreatePHI(Begin
.getType(), 2, "omp.elementPast");
4488 ElementPHI
->addIncoming(Begin
.emitRawPointer(CGF
), EntryBB
);
4489 Begin
= Begin
.withPointer(ElementPHI
, KnownNonNull
);
4490 Base
= CGF
.MakeAddrLValue(Begin
, KmpDependInfoTy
, Base
.getBaseInfo(),
4491 Base
.getTBAAInfo());
4492 // deps[i].flags = NewDepKind;
4493 RTLDependenceKindTy DepKind
= translateDependencyKind(NewDepKind
);
4494 LValue FlagsLVal
= CGF
.EmitLValueForField(
4495 Base
, *std::next(KmpDependInfoRD
->field_begin(),
4496 static_cast<unsigned int>(RTLDependInfoFields::Flags
)));
4497 CGF
.EmitStoreOfScalar(
4498 llvm::ConstantInt::get(LLVMFlagsTy
, static_cast<unsigned int>(DepKind
)),
4501 // Shift the address forward by one element.
4502 llvm::Value
*ElementNext
=
4503 CGF
.Builder
.CreateConstGEP(Begin
, /*Index=*/1, "omp.elementNext")
4504 .emitRawPointer(CGF
);
4505 ElementPHI
->addIncoming(ElementNext
, CGF
.Builder
.GetInsertBlock());
4506 llvm::Value
*IsEmpty
=
4507 CGF
.Builder
.CreateICmpEQ(ElementNext
, End
, "omp.isempty");
4508 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
4510 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
4513 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
4514 const OMPExecutableDirective
&D
,
4515 llvm::Function
*TaskFunction
,
4516 QualType SharedsTy
, Address Shareds
,
4518 const OMPTaskDataTy
&Data
) {
4519 if (!CGF
.HaveInsertPoint())
4522 TaskResultTy Result
=
4523 emitTaskInit(CGF
, Loc
, D
, TaskFunction
, SharedsTy
, Shareds
, Data
);
4524 llvm::Value
*NewTask
= Result
.NewTask
;
4525 llvm::Function
*TaskEntry
= Result
.TaskEntry
;
4526 llvm::Value
*NewTaskNewTaskTTy
= Result
.NewTaskNewTaskTTy
;
4527 LValue TDBase
= Result
.TDBase
;
4528 const RecordDecl
*KmpTaskTQTyRD
= Result
.KmpTaskTQTyRD
;
4529 // Process list of dependences.
4530 Address DependenciesArray
= Address::invalid();
4531 llvm::Value
*NumOfElements
;
4532 std::tie(NumOfElements
, DependenciesArray
) =
4533 emitDependClause(CGF
, Data
.Dependences
, Loc
);
4535 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4537 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4538 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4539 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4540 // list is not empty
4541 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4542 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
4543 llvm::Value
*TaskArgs
[] = { UpLoc
, ThreadID
, NewTask
};
4544 llvm::Value
*DepTaskArgs
[7];
4545 if (!Data
.Dependences
.empty()) {
4546 DepTaskArgs
[0] = UpLoc
;
4547 DepTaskArgs
[1] = ThreadID
;
4548 DepTaskArgs
[2] = NewTask
;
4549 DepTaskArgs
[3] = NumOfElements
;
4550 DepTaskArgs
[4] = DependenciesArray
.emitRawPointer(CGF
);
4551 DepTaskArgs
[5] = CGF
.Builder
.getInt32(0);
4552 DepTaskArgs
[6] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4554 auto &&ThenCodeGen
= [this, &Data
, TDBase
, KmpTaskTQTyRD
, &TaskArgs
,
4555 &DepTaskArgs
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4557 auto PartIdFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTPartId
);
4558 LValue PartIdLVal
= CGF
.EmitLValueForField(TDBase
, *PartIdFI
);
4559 CGF
.EmitStoreOfScalar(CGF
.Builder
.getInt32(0), PartIdLVal
);
4561 if (!Data
.Dependences
.empty()) {
4562 CGF
.EmitRuntimeCall(
4563 OMPBuilder
.getOrCreateRuntimeFunction(
4564 CGM
.getModule(), OMPRTL___kmpc_omp_task_with_deps
),
4567 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4568 CGM
.getModule(), OMPRTL___kmpc_omp_task
),
4571 // Check if parent region is untied and build return for untied task;
4573 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
4574 Region
->emitUntiedSwitch(CGF
);
4577 llvm::Value
*DepWaitTaskArgs
[7];
4578 if (!Data
.Dependences
.empty()) {
4579 DepWaitTaskArgs
[0] = UpLoc
;
4580 DepWaitTaskArgs
[1] = ThreadID
;
4581 DepWaitTaskArgs
[2] = NumOfElements
;
4582 DepWaitTaskArgs
[3] = DependenciesArray
.emitRawPointer(CGF
);
4583 DepWaitTaskArgs
[4] = CGF
.Builder
.getInt32(0);
4584 DepWaitTaskArgs
[5] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4585 DepWaitTaskArgs
[6] =
4586 llvm::ConstantInt::get(CGF
.Int32Ty
, Data
.HasNowaitClause
);
4588 auto &M
= CGM
.getModule();
4589 auto &&ElseCodeGen
= [this, &M
, &TaskArgs
, ThreadID
, NewTaskNewTaskTTy
,
4590 TaskEntry
, &Data
, &DepWaitTaskArgs
,
4591 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4592 CodeGenFunction::RunCleanupsScope
LocalScope(CGF
);
4593 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4594 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4595 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4597 if (!Data
.Dependences
.empty())
4598 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4599 M
, OMPRTL___kmpc_omp_taskwait_deps_51
),
4601 // Call proxy_task_entry(gtid, new_task);
4602 auto &&CodeGen
= [TaskEntry
, ThreadID
, NewTaskNewTaskTTy
,
4603 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4605 llvm::Value
*OutlinedFnArgs
[] = {ThreadID
, NewTaskNewTaskTTy
};
4606 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, Loc
, TaskEntry
,
4610 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4611 // kmp_task_t *new_task);
4612 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4613 // kmp_task_t *new_task);
4614 RegionCodeGenTy
RCG(CodeGen
);
4615 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
4616 M
, OMPRTL___kmpc_omp_task_begin_if0
),
4618 OMPBuilder
.getOrCreateRuntimeFunction(
4619 M
, OMPRTL___kmpc_omp_task_complete_if0
),
4621 RCG
.setAction(Action
);
4626 emitIfClause(CGF
, IfCond
, ThenCodeGen
, ElseCodeGen
);
4628 RegionCodeGenTy
ThenRCG(ThenCodeGen
);
4633 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
4634 const OMPLoopDirective
&D
,
4635 llvm::Function
*TaskFunction
,
4636 QualType SharedsTy
, Address Shareds
,
4638 const OMPTaskDataTy
&Data
) {
4639 if (!CGF
.HaveInsertPoint())
4641 TaskResultTy Result
=
4642 emitTaskInit(CGF
, Loc
, D
, TaskFunction
, SharedsTy
, Shareds
, Data
);
4643 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4645 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4646 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4647 // sched, kmp_uint64 grainsize, void *task_dup);
4648 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4649 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
4652 IfVal
= CGF
.Builder
.CreateIntCast(CGF
.EvaluateExprAsBool(IfCond
), CGF
.IntTy
,
4655 IfVal
= llvm::ConstantInt::getSigned(CGF
.IntTy
, /*V=*/1);
4658 LValue LBLVal
= CGF
.EmitLValueForField(
4660 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTLowerBound
));
4662 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getLowerBoundVariable())->getDecl());
4663 CGF
.EmitAnyExprToMem(LBVar
->getInit(), LBLVal
.getAddress(), LBLVal
.getQuals(),
4664 /*IsInitializer=*/true);
4665 LValue UBLVal
= CGF
.EmitLValueForField(
4667 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTUpperBound
));
4669 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getUpperBoundVariable())->getDecl());
4670 CGF
.EmitAnyExprToMem(UBVar
->getInit(), UBLVal
.getAddress(), UBLVal
.getQuals(),
4671 /*IsInitializer=*/true);
4672 LValue StLVal
= CGF
.EmitLValueForField(
4674 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTStride
));
4676 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getStrideVariable())->getDecl());
4677 CGF
.EmitAnyExprToMem(StVar
->getInit(), StLVal
.getAddress(), StLVal
.getQuals(),
4678 /*IsInitializer=*/true);
4679 // Store reductions address.
4680 LValue RedLVal
= CGF
.EmitLValueForField(
4682 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTReductions
));
4683 if (Data
.Reductions
) {
4684 CGF
.EmitStoreOfScalar(Data
.Reductions
, RedLVal
);
4686 CGF
.EmitNullInitialization(RedLVal
.getAddress(),
4687 CGF
.getContext().VoidPtrTy
);
4689 enum { NoSchedule
= 0, Grainsize
= 1, NumTasks
= 2 };
4690 llvm::Value
*TaskArgs
[] = {
4695 LBLVal
.getPointer(CGF
),
4696 UBLVal
.getPointer(CGF
),
4697 CGF
.EmitLoadOfScalar(StLVal
, Loc
),
4698 llvm::ConstantInt::getSigned(
4699 CGF
.IntTy
, 1), // Always 1 because taskgroup emitted by the compiler
4700 llvm::ConstantInt::getSigned(
4701 CGF
.IntTy
, Data
.Schedule
.getPointer()
4702 ? Data
.Schedule
.getInt() ? NumTasks
: Grainsize
4704 Data
.Schedule
.getPointer()
4705 ? CGF
.Builder
.CreateIntCast(Data
.Schedule
.getPointer(), CGF
.Int64Ty
,
4707 : llvm::ConstantInt::get(CGF
.Int64Ty
, /*V=*/0),
4708 Result
.TaskDupFn
? CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4709 Result
.TaskDupFn
, CGF
.VoidPtrTy
)
4710 : llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
)};
4711 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4712 CGM
.getModule(), OMPRTL___kmpc_taskloop
),
4716 /// Emit reduction operation for each element of array (required for
4717 /// array sections) LHS op = RHS.
4718 /// \param Type Type of array.
4719 /// \param LHSVar Variable on the left side of the reduction operation
4720 /// (references element of array in original variable).
4721 /// \param RHSVar Variable on the right side of the reduction operation
4722 /// (references element of array in original variable).
4723 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4725 static void EmitOMPAggregateReduction(
4726 CodeGenFunction
&CGF
, QualType Type
, const VarDecl
*LHSVar
,
4727 const VarDecl
*RHSVar
,
4728 const llvm::function_ref
<void(CodeGenFunction
&CGF
, const Expr
*,
4729 const Expr
*, const Expr
*)> &RedOpGen
,
4730 const Expr
*XExpr
= nullptr, const Expr
*EExpr
= nullptr,
4731 const Expr
*UpExpr
= nullptr) {
4732 // Perform element-by-element initialization.
4734 Address LHSAddr
= CGF
.GetAddrOfLocalVar(LHSVar
);
4735 Address RHSAddr
= CGF
.GetAddrOfLocalVar(RHSVar
);
4737 // Drill down to the base element type on both arrays.
4738 const ArrayType
*ArrayTy
= Type
->getAsArrayTypeUnsafe();
4739 llvm::Value
*NumElements
= CGF
.emitArrayLength(ArrayTy
, ElementTy
, LHSAddr
);
4741 llvm::Value
*RHSBegin
= RHSAddr
.emitRawPointer(CGF
);
4742 llvm::Value
*LHSBegin
= LHSAddr
.emitRawPointer(CGF
);
4743 // Cast from pointer to array type to pointer to single element.
4744 llvm::Value
*LHSEnd
=
4745 CGF
.Builder
.CreateGEP(LHSAddr
.getElementType(), LHSBegin
, NumElements
);
4746 // The basic structure here is a while-do loop.
4747 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.arraycpy.body");
4748 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.arraycpy.done");
4749 llvm::Value
*IsEmpty
=
4750 CGF
.Builder
.CreateICmpEQ(LHSBegin
, LHSEnd
, "omp.arraycpy.isempty");
4751 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
4753 // Enter the loop body, making that address the current address.
4754 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
4755 CGF
.EmitBlock(BodyBB
);
4757 CharUnits ElementSize
= CGF
.getContext().getTypeSizeInChars(ElementTy
);
4759 llvm::PHINode
*RHSElementPHI
= CGF
.Builder
.CreatePHI(
4760 RHSBegin
->getType(), 2, "omp.arraycpy.srcElementPast");
4761 RHSElementPHI
->addIncoming(RHSBegin
, EntryBB
);
4762 Address
RHSElementCurrent(
4763 RHSElementPHI
, RHSAddr
.getElementType(),
4764 RHSAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
4766 llvm::PHINode
*LHSElementPHI
= CGF
.Builder
.CreatePHI(
4767 LHSBegin
->getType(), 2, "omp.arraycpy.destElementPast");
4768 LHSElementPHI
->addIncoming(LHSBegin
, EntryBB
);
4769 Address
LHSElementCurrent(
4770 LHSElementPHI
, LHSAddr
.getElementType(),
4771 LHSAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
4774 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
4775 Scope
.addPrivate(LHSVar
, LHSElementCurrent
);
4776 Scope
.addPrivate(RHSVar
, RHSElementCurrent
);
4778 RedOpGen(CGF
, XExpr
, EExpr
, UpExpr
);
4779 Scope
.ForceCleanup();
4781 // Shift the address forward by one element.
4782 llvm::Value
*LHSElementNext
= CGF
.Builder
.CreateConstGEP1_32(
4783 LHSAddr
.getElementType(), LHSElementPHI
, /*Idx0=*/1,
4784 "omp.arraycpy.dest.element");
4785 llvm::Value
*RHSElementNext
= CGF
.Builder
.CreateConstGEP1_32(
4786 RHSAddr
.getElementType(), RHSElementPHI
, /*Idx0=*/1,
4787 "omp.arraycpy.src.element");
4788 // Check whether we've reached the end.
4790 CGF
.Builder
.CreateICmpEQ(LHSElementNext
, LHSEnd
, "omp.arraycpy.done");
4791 CGF
.Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
4792 LHSElementPHI
->addIncoming(LHSElementNext
, CGF
.Builder
.GetInsertBlock());
4793 RHSElementPHI
->addIncoming(RHSElementNext
, CGF
.Builder
.GetInsertBlock());
4796 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
4799 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4800 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4801 /// UDR combiner function.
4802 static void emitReductionCombiner(CodeGenFunction
&CGF
,
4803 const Expr
*ReductionOp
) {
4804 if (const auto *CE
= dyn_cast
<CallExpr
>(ReductionOp
))
4805 if (const auto *OVE
= dyn_cast
<OpaqueValueExpr
>(CE
->getCallee()))
4806 if (const auto *DRE
=
4807 dyn_cast
<DeclRefExpr
>(OVE
->getSourceExpr()->IgnoreImpCasts()))
4808 if (const auto *DRD
=
4809 dyn_cast
<OMPDeclareReductionDecl
>(DRE
->getDecl())) {
4810 std::pair
<llvm::Function
*, llvm::Function
*> Reduction
=
4811 CGF
.CGM
.getOpenMPRuntime().getUserDefinedReduction(DRD
);
4812 RValue Func
= RValue::get(Reduction
.first
);
4813 CodeGenFunction::OpaqueValueMapping
Map(CGF
, OVE
, Func
);
4814 CGF
.EmitIgnoredExpr(ReductionOp
);
4817 CGF
.EmitIgnoredExpr(ReductionOp
);
4820 llvm::Function
*CGOpenMPRuntime::emitReductionFunction(
4821 StringRef ReducerName
, SourceLocation Loc
, llvm::Type
*ArgsElemType
,
4822 ArrayRef
<const Expr
*> Privates
, ArrayRef
<const Expr
*> LHSExprs
,
4823 ArrayRef
<const Expr
*> RHSExprs
, ArrayRef
<const Expr
*> ReductionOps
) {
4824 ASTContext
&C
= CGM
.getContext();
4826 // void reduction_func(void *LHSArg, void *RHSArg);
4827 FunctionArgList Args
;
4828 ImplicitParamDecl
LHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
4829 ImplicitParamKind::Other
);
4830 ImplicitParamDecl
RHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
4831 ImplicitParamKind::Other
);
4832 Args
.push_back(&LHSArg
);
4833 Args
.push_back(&RHSArg
);
4835 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
4836 std::string Name
= getReductionFuncName(ReducerName
);
4837 auto *Fn
= llvm::Function::Create(CGM
.getTypes().GetFunctionType(CGFI
),
4838 llvm::GlobalValue::InternalLinkage
, Name
,
4840 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, CGFI
);
4841 Fn
->setDoesNotRecurse();
4842 CodeGenFunction
CGF(CGM
);
4843 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, CGFI
, Args
, Loc
, Loc
);
4845 // Dst = (void*[n])(LHSArg);
4846 // Src = (void*[n])(RHSArg);
4847 Address
LHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4848 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&LHSArg
)),
4849 ArgsElemType
->getPointerTo()),
4850 ArgsElemType
, CGF
.getPointerAlign());
4851 Address
RHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4852 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&RHSArg
)),
4853 ArgsElemType
->getPointerTo()),
4854 ArgsElemType
, CGF
.getPointerAlign());
4857 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
4859 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
4860 const auto *IPriv
= Privates
.begin();
4862 for (unsigned I
= 0, E
= ReductionOps
.size(); I
< E
; ++I
, ++IPriv
, ++Idx
) {
4863 const auto *RHSVar
=
4864 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSExprs
[I
])->getDecl());
4865 Scope
.addPrivate(RHSVar
, emitAddrOfVarFromArray(CGF
, RHS
, Idx
, RHSVar
));
4866 const auto *LHSVar
=
4867 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSExprs
[I
])->getDecl());
4868 Scope
.addPrivate(LHSVar
, emitAddrOfVarFromArray(CGF
, LHS
, Idx
, LHSVar
));
4869 QualType PrivTy
= (*IPriv
)->getType();
4870 if (PrivTy
->isVariablyModifiedType()) {
4871 // Get array size and emit VLA type.
4873 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(LHS
, Idx
);
4874 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Elem
);
4875 const VariableArrayType
*VLA
=
4876 CGF
.getContext().getAsVariableArrayType(PrivTy
);
4877 const auto *OVE
= cast
<OpaqueValueExpr
>(VLA
->getSizeExpr());
4878 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
4879 CGF
, OVE
, RValue::get(CGF
.Builder
.CreatePtrToInt(Ptr
, CGF
.SizeTy
)));
4880 CGF
.EmitVariablyModifiedType(PrivTy
);
4884 IPriv
= Privates
.begin();
4885 const auto *ILHS
= LHSExprs
.begin();
4886 const auto *IRHS
= RHSExprs
.begin();
4887 for (const Expr
*E
: ReductionOps
) {
4888 if ((*IPriv
)->getType()->isArrayType()) {
4889 // Emit reduction for array section.
4890 const auto *LHSVar
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
4891 const auto *RHSVar
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
4892 EmitOMPAggregateReduction(
4893 CGF
, (*IPriv
)->getType(), LHSVar
, RHSVar
,
4894 [=](CodeGenFunction
&CGF
, const Expr
*, const Expr
*, const Expr
*) {
4895 emitReductionCombiner(CGF
, E
);
4898 // Emit reduction for array subscript or single variable.
4899 emitReductionCombiner(CGF
, E
);
4905 Scope
.ForceCleanup();
4906 CGF
.FinishFunction();
4910 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction
&CGF
,
4911 const Expr
*ReductionOp
,
4912 const Expr
*PrivateRef
,
4913 const DeclRefExpr
*LHS
,
4914 const DeclRefExpr
*RHS
) {
4915 if (PrivateRef
->getType()->isArrayType()) {
4916 // Emit reduction for array section.
4917 const auto *LHSVar
= cast
<VarDecl
>(LHS
->getDecl());
4918 const auto *RHSVar
= cast
<VarDecl
>(RHS
->getDecl());
4919 EmitOMPAggregateReduction(
4920 CGF
, PrivateRef
->getType(), LHSVar
, RHSVar
,
4921 [=](CodeGenFunction
&CGF
, const Expr
*, const Expr
*, const Expr
*) {
4922 emitReductionCombiner(CGF
, ReductionOp
);
4925 // Emit reduction for array subscript or single variable.
4926 emitReductionCombiner(CGF
, ReductionOp
);
4930 void CGOpenMPRuntime::emitReduction(CodeGenFunction
&CGF
, SourceLocation Loc
,
4931 ArrayRef
<const Expr
*> Privates
,
4932 ArrayRef
<const Expr
*> LHSExprs
,
4933 ArrayRef
<const Expr
*> RHSExprs
,
4934 ArrayRef
<const Expr
*> ReductionOps
,
4935 ReductionOptionsTy Options
) {
4936 if (!CGF
.HaveInsertPoint())
4939 bool WithNowait
= Options
.WithNowait
;
4940 bool SimpleReduction
= Options
.SimpleReduction
;
4942 // Next code should be emitted for reduction:
4944 // static kmp_critical_name lock = { 0 };
4946 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
4947 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
4949 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
4950 // *(Type<n>-1*)rhs[<n>-1]);
4954 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
4955 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
4956 // RedList, reduce_func, &<lock>)) {
4959 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4961 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
4965 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
4967 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
4972 // if SimpleReduction is true, only the next code is generated:
4974 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
4977 ASTContext
&C
= CGM
.getContext();
4979 if (SimpleReduction
) {
4980 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
4981 const auto *IPriv
= Privates
.begin();
4982 const auto *ILHS
= LHSExprs
.begin();
4983 const auto *IRHS
= RHSExprs
.begin();
4984 for (const Expr
*E
: ReductionOps
) {
4985 emitSingleReductionCombiner(CGF
, E
, *IPriv
, cast
<DeclRefExpr
>(*ILHS
),
4986 cast
<DeclRefExpr
>(*IRHS
));
4994 // 1. Build a list of reduction variables.
4995 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
4996 auto Size
= RHSExprs
.size();
4997 for (const Expr
*E
: Privates
) {
4998 if (E
->getType()->isVariablyModifiedType())
4999 // Reserve place for array size.
5002 llvm::APInt
ArraySize(/*unsigned int numBits=*/32, Size
);
5003 QualType ReductionArrayTy
= C
.getConstantArrayType(
5004 C
.VoidPtrTy
, ArraySize
, nullptr, ArraySizeModifier::Normal
,
5005 /*IndexTypeQuals=*/0);
5006 RawAddress ReductionList
=
5007 CGF
.CreateMemTemp(ReductionArrayTy
, ".omp.reduction.red_list");
5008 const auto *IPriv
= Privates
.begin();
5010 for (unsigned I
= 0, E
= RHSExprs
.size(); I
< E
; ++I
, ++IPriv
, ++Idx
) {
5011 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(ReductionList
, Idx
);
5012 CGF
.Builder
.CreateStore(
5013 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5014 CGF
.EmitLValue(RHSExprs
[I
]).getPointer(CGF
), CGF
.VoidPtrTy
),
5016 if ((*IPriv
)->getType()->isVariablyModifiedType()) {
5017 // Store array size.
5019 Elem
= CGF
.Builder
.CreateConstArrayGEP(ReductionList
, Idx
);
5020 llvm::Value
*Size
= CGF
.Builder
.CreateIntCast(
5022 CGF
.getContext().getAsVariableArrayType((*IPriv
)->getType()))
5024 CGF
.SizeTy
, /*isSigned=*/false);
5025 CGF
.Builder
.CreateStore(CGF
.Builder
.CreateIntToPtr(Size
, CGF
.VoidPtrTy
),
5030 // 2. Emit reduce_func().
5031 llvm::Function
*ReductionFn
= emitReductionFunction(
5032 CGF
.CurFn
->getName(), Loc
, CGF
.ConvertTypeForMem(ReductionArrayTy
),
5033 Privates
, LHSExprs
, RHSExprs
, ReductionOps
);
5035 // 3. Create static kmp_critical_name lock = { 0 };
5036 std::string Name
= getName({"reduction"});
5037 llvm::Value
*Lock
= getCriticalRegionLock(Name
);
5039 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5040 // RedList, reduce_func, &<lock>);
5041 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
, OMP_ATOMIC_REDUCE
);
5042 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
5043 llvm::Value
*ReductionArrayTySize
= CGF
.getTypeSize(ReductionArrayTy
);
5044 llvm::Value
*RL
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5045 ReductionList
.getPointer(), CGF
.VoidPtrTy
);
5046 llvm::Value
*Args
[] = {
5047 IdentTLoc
, // ident_t *<loc>
5048 ThreadId
, // i32 <gtid>
5049 CGF
.Builder
.getInt32(RHSExprs
.size()), // i32 <n>
5050 ReductionArrayTySize
, // size_type sizeof(RedList)
5051 RL
, // void *RedList
5052 ReductionFn
, // void (*) (void *, void *) <reduce_func>
5053 Lock
// kmp_critical_name *&<lock>
5055 llvm::Value
*Res
= CGF
.EmitRuntimeCall(
5056 OMPBuilder
.getOrCreateRuntimeFunction(
5058 WithNowait
? OMPRTL___kmpc_reduce_nowait
: OMPRTL___kmpc_reduce
),
5061 // 5. Build switch(res)
5062 llvm::BasicBlock
*DefaultBB
= CGF
.createBasicBlock(".omp.reduction.default");
5063 llvm::SwitchInst
*SwInst
=
5064 CGF
.Builder
.CreateSwitch(Res
, DefaultBB
, /*NumCases=*/2);
5068 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5070 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5072 llvm::BasicBlock
*Case1BB
= CGF
.createBasicBlock(".omp.reduction.case1");
5073 SwInst
->addCase(CGF
.Builder
.getInt32(1), Case1BB
);
5074 CGF
.EmitBlock(Case1BB
);
5076 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5077 llvm::Value
*EndArgs
[] = {
5078 IdentTLoc
, // ident_t *<loc>
5079 ThreadId
, // i32 <gtid>
5080 Lock
// kmp_critical_name *&<lock>
5082 auto &&CodeGen
= [Privates
, LHSExprs
, RHSExprs
, ReductionOps
](
5083 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5084 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5085 const auto *IPriv
= Privates
.begin();
5086 const auto *ILHS
= LHSExprs
.begin();
5087 const auto *IRHS
= RHSExprs
.begin();
5088 for (const Expr
*E
: ReductionOps
) {
5089 RT
.emitSingleReductionCombiner(CGF
, E
, *IPriv
, cast
<DeclRefExpr
>(*ILHS
),
5090 cast
<DeclRefExpr
>(*IRHS
));
5096 RegionCodeGenTy
RCG(CodeGen
);
5097 CommonActionTy
Action(
5098 nullptr, std::nullopt
,
5099 OMPBuilder
.getOrCreateRuntimeFunction(
5100 CGM
.getModule(), WithNowait
? OMPRTL___kmpc_end_reduce_nowait
5101 : OMPRTL___kmpc_end_reduce
),
5103 RCG
.setAction(Action
);
5106 CGF
.EmitBranch(DefaultBB
);
5110 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5113 llvm::BasicBlock
*Case2BB
= CGF
.createBasicBlock(".omp.reduction.case2");
5114 SwInst
->addCase(CGF
.Builder
.getInt32(2), Case2BB
);
5115 CGF
.EmitBlock(Case2BB
);
5117 auto &&AtomicCodeGen
= [Loc
, Privates
, LHSExprs
, RHSExprs
, ReductionOps
](
5118 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5119 const auto *ILHS
= LHSExprs
.begin();
5120 const auto *IRHS
= RHSExprs
.begin();
5121 const auto *IPriv
= Privates
.begin();
5122 for (const Expr
*E
: ReductionOps
) {
5123 const Expr
*XExpr
= nullptr;
5124 const Expr
*EExpr
= nullptr;
5125 const Expr
*UpExpr
= nullptr;
5126 BinaryOperatorKind BO
= BO_Comma
;
5127 if (const auto *BO
= dyn_cast
<BinaryOperator
>(E
)) {
5128 if (BO
->getOpcode() == BO_Assign
) {
5129 XExpr
= BO
->getLHS();
5130 UpExpr
= BO
->getRHS();
5133 // Try to emit update expression as a simple atomic.
5134 const Expr
*RHSExpr
= UpExpr
;
5136 // Analyze RHS part of the whole expression.
5137 if (const auto *ACO
= dyn_cast
<AbstractConditionalOperator
>(
5138 RHSExpr
->IgnoreParenImpCasts())) {
5139 // If this is a conditional operator, analyze its condition for
5140 // min/max reduction operator.
5141 RHSExpr
= ACO
->getCond();
5143 if (const auto *BORHS
=
5144 dyn_cast
<BinaryOperator
>(RHSExpr
->IgnoreParenImpCasts())) {
5145 EExpr
= BORHS
->getRHS();
5146 BO
= BORHS
->getOpcode();
5150 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5151 auto &&AtomicRedGen
= [BO
, VD
,
5152 Loc
](CodeGenFunction
&CGF
, const Expr
*XExpr
,
5153 const Expr
*EExpr
, const Expr
*UpExpr
) {
5154 LValue X
= CGF
.EmitLValue(XExpr
);
5157 E
= CGF
.EmitAnyExpr(EExpr
);
5158 CGF
.EmitOMPAtomicSimpleUpdateExpr(
5159 X
, E
, BO
, /*IsXLHSInRHSPart=*/true,
5160 llvm::AtomicOrdering::Monotonic
, Loc
,
5161 [&CGF
, UpExpr
, VD
, Loc
](RValue XRValue
) {
5162 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
5163 Address LHSTemp
= CGF
.CreateMemTemp(VD
->getType());
5164 CGF
.emitOMPSimpleStore(
5165 CGF
.MakeAddrLValue(LHSTemp
, VD
->getType()), XRValue
,
5166 VD
->getType().getNonReferenceType(), Loc
);
5167 PrivateScope
.addPrivate(VD
, LHSTemp
);
5168 (void)PrivateScope
.Privatize();
5169 return CGF
.EmitAnyExpr(UpExpr
);
5172 if ((*IPriv
)->getType()->isArrayType()) {
5173 // Emit atomic reduction for array section.
5174 const auto *RHSVar
=
5175 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5176 EmitOMPAggregateReduction(CGF
, (*IPriv
)->getType(), VD
, RHSVar
,
5177 AtomicRedGen
, XExpr
, EExpr
, UpExpr
);
5179 // Emit atomic reduction for array subscript or single variable.
5180 AtomicRedGen(CGF
, XExpr
, EExpr
, UpExpr
);
5183 // Emit as a critical region.
5184 auto &&CritRedGen
= [E
, Loc
](CodeGenFunction
&CGF
, const Expr
*,
5185 const Expr
*, const Expr
*) {
5186 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5187 std::string Name
= RT
.getName({"atomic_reduction"});
5188 RT
.emitCriticalRegion(
5190 [=](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5192 emitReductionCombiner(CGF
, E
);
5196 if ((*IPriv
)->getType()->isArrayType()) {
5197 const auto *LHSVar
=
5198 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5199 const auto *RHSVar
=
5200 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5201 EmitOMPAggregateReduction(CGF
, (*IPriv
)->getType(), LHSVar
, RHSVar
,
5204 CritRedGen(CGF
, nullptr, nullptr, nullptr);
5212 RegionCodeGenTy
AtomicRCG(AtomicCodeGen
);
5214 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5215 llvm::Value
*EndArgs
[] = {
5216 IdentTLoc
, // ident_t *<loc>
5217 ThreadId
, // i32 <gtid>
5218 Lock
// kmp_critical_name *&<lock>
5220 CommonActionTy
Action(nullptr, std::nullopt
,
5221 OMPBuilder
.getOrCreateRuntimeFunction(
5222 CGM
.getModule(), OMPRTL___kmpc_end_reduce
),
5224 AtomicRCG
.setAction(Action
);
5230 CGF
.EmitBranch(DefaultBB
);
5231 CGF
.EmitBlock(DefaultBB
, /*IsFinished=*/true);
5234 /// Generates unique name for artificial threadprivate variables.
5235 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5236 static std::string
generateUniqueName(CodeGenModule
&CGM
, StringRef Prefix
,
5238 SmallString
<256> Buffer
;
5239 llvm::raw_svector_ostream
Out(Buffer
);
5240 const clang::DeclRefExpr
*DE
;
5241 const VarDecl
*D
= ::getBaseDecl(Ref
, DE
);
5243 D
= cast
<VarDecl
>(cast
<DeclRefExpr
>(Ref
)->getDecl());
5244 D
= D
->getCanonicalDecl();
5245 std::string Name
= CGM
.getOpenMPRuntime().getName(
5246 {D
->isLocalVarDeclOrParm() ? D
->getName() : CGM
.getMangledName(D
)});
5247 Out
<< Prefix
<< Name
<< "_"
5248 << D
->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5249 return std::string(Out
.str());
5252 /// Emits reduction initializer function:
5254 /// void @.red_init(void* %arg, void* %orig) {
5255 /// %0 = bitcast void* %arg to <type>*
5256 /// store <type> <init>, <type>* %0
5260 static llvm::Value
*emitReduceInitFunction(CodeGenModule
&CGM
,
5262 ReductionCodeGen
&RCG
, unsigned N
) {
5263 ASTContext
&C
= CGM
.getContext();
5264 QualType VoidPtrTy
= C
.VoidPtrTy
;
5265 VoidPtrTy
.addRestrict();
5266 FunctionArgList Args
;
5267 ImplicitParamDecl
Param(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, VoidPtrTy
,
5268 ImplicitParamKind::Other
);
5269 ImplicitParamDecl
ParamOrig(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, VoidPtrTy
,
5270 ImplicitParamKind::Other
);
5271 Args
.emplace_back(&Param
);
5272 Args
.emplace_back(&ParamOrig
);
5273 const auto &FnInfo
=
5274 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5275 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5276 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_init", ""});
5277 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5278 Name
, &CGM
.getModule());
5279 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5280 Fn
->setDoesNotRecurse();
5281 CodeGenFunction
CGF(CGM
);
5282 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5283 QualType PrivateType
= RCG
.getPrivateType(N
);
5284 Address PrivateAddr
= CGF
.EmitLoadOfPointer(
5285 CGF
.GetAddrOfLocalVar(&Param
).withElementType(
5286 CGF
.ConvertTypeForMem(PrivateType
)->getPointerTo()),
5287 C
.getPointerType(PrivateType
)->castAs
<PointerType
>());
5288 llvm::Value
*Size
= nullptr;
5289 // If the size of the reduction item is non-constant, load it from global
5290 // threadprivate variable.
5291 if (RCG
.getSizes(N
).second
) {
5292 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5293 CGF
, CGM
.getContext().getSizeType(),
5294 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5295 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5296 CGM
.getContext().getSizeType(), Loc
);
5298 RCG
.emitAggregateType(CGF
, N
, Size
);
5299 Address OrigAddr
= Address::invalid();
5300 // If initializer uses initializer from declare reduction construct, emit a
5301 // pointer to the address of the original reduction item (reuired by reduction
5303 if (RCG
.usesReductionInitializer(N
)) {
5304 Address SharedAddr
= CGF
.GetAddrOfLocalVar(&ParamOrig
);
5305 OrigAddr
= CGF
.EmitLoadOfPointer(
5307 CGM
.getContext().VoidPtrTy
.castAs
<PointerType
>()->getTypePtr());
5309 // Emit the initializer:
5310 // %0 = bitcast void* %arg to <type>*
5311 // store <type> <init>, <type>* %0
5312 RCG
.emitInitialization(CGF
, N
, PrivateAddr
, OrigAddr
,
5313 [](CodeGenFunction
&) { return false; });
5314 CGF
.FinishFunction();
5318 /// Emits reduction combiner function:
5320 /// void @.red_comb(void* %arg0, void* %arg1) {
5321 /// %lhs = bitcast void* %arg0 to <type>*
5322 /// %rhs = bitcast void* %arg1 to <type>*
5323 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5324 /// store <type> %2, <type>* %lhs
5328 static llvm::Value
*emitReduceCombFunction(CodeGenModule
&CGM
,
5330 ReductionCodeGen
&RCG
, unsigned N
,
5331 const Expr
*ReductionOp
,
5332 const Expr
*LHS
, const Expr
*RHS
,
5333 const Expr
*PrivateRef
) {
5334 ASTContext
&C
= CGM
.getContext();
5335 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(LHS
)->getDecl());
5336 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(RHS
)->getDecl());
5337 FunctionArgList Args
;
5338 ImplicitParamDecl
ParamInOut(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
5339 C
.VoidPtrTy
, ImplicitParamKind::Other
);
5340 ImplicitParamDecl
ParamIn(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5341 ImplicitParamKind::Other
);
5342 Args
.emplace_back(&ParamInOut
);
5343 Args
.emplace_back(&ParamIn
);
5344 const auto &FnInfo
=
5345 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5346 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5347 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_comb", ""});
5348 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5349 Name
, &CGM
.getModule());
5350 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5351 Fn
->setDoesNotRecurse();
5352 CodeGenFunction
CGF(CGM
);
5353 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5354 llvm::Value
*Size
= nullptr;
5355 // If the size of the reduction item is non-constant, load it from global
5356 // threadprivate variable.
5357 if (RCG
.getSizes(N
).second
) {
5358 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5359 CGF
, CGM
.getContext().getSizeType(),
5360 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5361 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5362 CGM
.getContext().getSizeType(), Loc
);
5364 RCG
.emitAggregateType(CGF
, N
, Size
);
5365 // Remap lhs and rhs variables to the addresses of the function arguments.
5366 // %lhs = bitcast void* %arg0 to <type>*
5367 // %rhs = bitcast void* %arg1 to <type>*
5368 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
5369 PrivateScope
.addPrivate(
5371 // Pull out the pointer to the variable.
5372 CGF
.EmitLoadOfPointer(
5373 CGF
.GetAddrOfLocalVar(&ParamInOut
)
5375 CGF
.ConvertTypeForMem(LHSVD
->getType())->getPointerTo()),
5376 C
.getPointerType(LHSVD
->getType())->castAs
<PointerType
>()));
5377 PrivateScope
.addPrivate(
5379 // Pull out the pointer to the variable.
5380 CGF
.EmitLoadOfPointer(
5381 CGF
.GetAddrOfLocalVar(&ParamIn
).withElementType(
5382 CGF
.ConvertTypeForMem(RHSVD
->getType())->getPointerTo()),
5383 C
.getPointerType(RHSVD
->getType())->castAs
<PointerType
>()));
5384 PrivateScope
.Privatize();
5385 // Emit the combiner body:
5386 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5387 // store <type> %2, <type>* %lhs
5388 CGM
.getOpenMPRuntime().emitSingleReductionCombiner(
5389 CGF
, ReductionOp
, PrivateRef
, cast
<DeclRefExpr
>(LHS
),
5390 cast
<DeclRefExpr
>(RHS
));
5391 CGF
.FinishFunction();
5395 /// Emits reduction finalizer function:
5397 /// void @.red_fini(void* %arg) {
5398 /// %0 = bitcast void* %arg to <type>*
5399 /// <destroy>(<type>* %0)
5403 static llvm::Value
*emitReduceFiniFunction(CodeGenModule
&CGM
,
5405 ReductionCodeGen
&RCG
, unsigned N
) {
5406 if (!RCG
.needCleanups(N
))
5408 ASTContext
&C
= CGM
.getContext();
5409 FunctionArgList Args
;
5410 ImplicitParamDecl
Param(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5411 ImplicitParamKind::Other
);
5412 Args
.emplace_back(&Param
);
5413 const auto &FnInfo
=
5414 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5415 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5416 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_fini", ""});
5417 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5418 Name
, &CGM
.getModule());
5419 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5420 Fn
->setDoesNotRecurse();
5421 CodeGenFunction
CGF(CGM
);
5422 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5423 Address PrivateAddr
= CGF
.EmitLoadOfPointer(
5424 CGF
.GetAddrOfLocalVar(&Param
), C
.VoidPtrTy
.castAs
<PointerType
>());
5425 llvm::Value
*Size
= nullptr;
5426 // If the size of the reduction item is non-constant, load it from global
5427 // threadprivate variable.
5428 if (RCG
.getSizes(N
).second
) {
5429 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5430 CGF
, CGM
.getContext().getSizeType(),
5431 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5432 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5433 CGM
.getContext().getSizeType(), Loc
);
5435 RCG
.emitAggregateType(CGF
, N
, Size
);
5436 // Emit the finalizer body:
5437 // <destroy>(<type>* %0)
5438 RCG
.emitCleanups(CGF
, N
, PrivateAddr
);
5439 CGF
.FinishFunction(Loc
);
5443 llvm::Value
*CGOpenMPRuntime::emitTaskReductionInit(
5444 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> LHSExprs
,
5445 ArrayRef
<const Expr
*> RHSExprs
, const OMPTaskDataTy
&Data
) {
5446 if (!CGF
.HaveInsertPoint() || Data
.ReductionVars
.empty())
5449 // Build typedef struct:
5450 // kmp_taskred_input {
5451 // void *reduce_shar; // shared reduction item
5452 // void *reduce_orig; // original reduction item used for initialization
5453 // size_t reduce_size; // size of data item
5454 // void *reduce_init; // data initialization routine
5455 // void *reduce_fini; // data finalization routine
5456 // void *reduce_comb; // data combiner routine
5457 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5458 // } kmp_taskred_input_t;
5459 ASTContext
&C
= CGM
.getContext();
5460 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_taskred_input_t");
5461 RD
->startDefinition();
5462 const FieldDecl
*SharedFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5463 const FieldDecl
*OrigFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5464 const FieldDecl
*SizeFD
= addFieldToRecordDecl(C
, RD
, C
.getSizeType());
5465 const FieldDecl
*InitFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5466 const FieldDecl
*FiniFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5467 const FieldDecl
*CombFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5468 const FieldDecl
*FlagsFD
= addFieldToRecordDecl(
5469 C
, RD
, C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5470 RD
->completeDefinition();
5471 QualType RDType
= C
.getRecordType(RD
);
5472 unsigned Size
= Data
.ReductionVars
.size();
5473 llvm::APInt
ArraySize(/*numBits=*/64, Size
);
5474 QualType ArrayRDType
=
5475 C
.getConstantArrayType(RDType
, ArraySize
, nullptr,
5476 ArraySizeModifier::Normal
, /*IndexTypeQuals=*/0);
5477 // kmp_task_red_input_t .rd_input.[Size];
5478 RawAddress TaskRedInput
= CGF
.CreateMemTemp(ArrayRDType
, ".rd_input.");
5479 ReductionCodeGen
RCG(Data
.ReductionVars
, Data
.ReductionOrigs
,
5480 Data
.ReductionCopies
, Data
.ReductionOps
);
5481 for (unsigned Cnt
= 0; Cnt
< Size
; ++Cnt
) {
5482 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5483 llvm::Value
*Idxs
[] = {llvm::ConstantInt::get(CGM
.SizeTy
, /*V=*/0),
5484 llvm::ConstantInt::get(CGM
.SizeTy
, Cnt
)};
5485 llvm::Value
*GEP
= CGF
.EmitCheckedInBoundsGEP(
5486 TaskRedInput
.getElementType(), TaskRedInput
.getPointer(), Idxs
,
5487 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc
,
5489 LValue ElemLVal
= CGF
.MakeNaturalAlignRawAddrLValue(GEP
, RDType
);
5490 // ElemLVal.reduce_shar = &Shareds[Cnt];
5491 LValue SharedLVal
= CGF
.EmitLValueForField(ElemLVal
, SharedFD
);
5492 RCG
.emitSharedOrigLValue(CGF
, Cnt
);
5493 llvm::Value
*Shared
= RCG
.getSharedLValue(Cnt
).getPointer(CGF
);
5494 CGF
.EmitStoreOfScalar(Shared
, SharedLVal
);
5495 // ElemLVal.reduce_orig = &Origs[Cnt];
5496 LValue OrigLVal
= CGF
.EmitLValueForField(ElemLVal
, OrigFD
);
5497 llvm::Value
*Orig
= RCG
.getOrigLValue(Cnt
).getPointer(CGF
);
5498 CGF
.EmitStoreOfScalar(Orig
, OrigLVal
);
5499 RCG
.emitAggregateType(CGF
, Cnt
);
5500 llvm::Value
*SizeValInChars
;
5501 llvm::Value
*SizeVal
;
5502 std::tie(SizeValInChars
, SizeVal
) = RCG
.getSizes(Cnt
);
5503 // We use delayed creation/initialization for VLAs and array sections. It is
5504 // required because runtime does not provide the way to pass the sizes of
5505 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5506 // threadprivate global variables are used to store these values and use
5507 // them in the functions.
5508 bool DelayedCreation
= !!SizeVal
;
5509 SizeValInChars
= CGF
.Builder
.CreateIntCast(SizeValInChars
, CGM
.SizeTy
,
5510 /*isSigned=*/false);
5511 LValue SizeLVal
= CGF
.EmitLValueForField(ElemLVal
, SizeFD
);
5512 CGF
.EmitStoreOfScalar(SizeValInChars
, SizeLVal
);
5513 // ElemLVal.reduce_init = init;
5514 LValue InitLVal
= CGF
.EmitLValueForField(ElemLVal
, InitFD
);
5515 llvm::Value
*InitAddr
= emitReduceInitFunction(CGM
, Loc
, RCG
, Cnt
);
5516 CGF
.EmitStoreOfScalar(InitAddr
, InitLVal
);
5517 // ElemLVal.reduce_fini = fini;
5518 LValue FiniLVal
= CGF
.EmitLValueForField(ElemLVal
, FiniFD
);
5519 llvm::Value
*Fini
= emitReduceFiniFunction(CGM
, Loc
, RCG
, Cnt
);
5520 llvm::Value
*FiniAddr
=
5521 Fini
? Fini
: llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
);
5522 CGF
.EmitStoreOfScalar(FiniAddr
, FiniLVal
);
5523 // ElemLVal.reduce_comb = comb;
5524 LValue CombLVal
= CGF
.EmitLValueForField(ElemLVal
, CombFD
);
5525 llvm::Value
*CombAddr
= emitReduceCombFunction(
5526 CGM
, Loc
, RCG
, Cnt
, Data
.ReductionOps
[Cnt
], LHSExprs
[Cnt
],
5527 RHSExprs
[Cnt
], Data
.ReductionCopies
[Cnt
]);
5528 CGF
.EmitStoreOfScalar(CombAddr
, CombLVal
);
5529 // ElemLVal.flags = 0;
5530 LValue FlagsLVal
= CGF
.EmitLValueForField(ElemLVal
, FlagsFD
);
5531 if (DelayedCreation
) {
5532 CGF
.EmitStoreOfScalar(
5533 llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/1, /*isSigned=*/true),
5536 CGF
.EmitNullInitialization(FlagsLVal
.getAddress(), FlagsLVal
.getType());
5538 if (Data
.IsReductionWithTaskMod
) {
5539 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5540 // is_ws, int num, void *data);
5541 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
);
5542 llvm::Value
*GTid
= CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5543 CGM
.IntTy
, /*isSigned=*/true);
5544 llvm::Value
*Args
[] = {
5546 llvm::ConstantInt::get(CGM
.IntTy
, Data
.IsWorksharingReduction
? 1 : 0,
5548 llvm::ConstantInt::get(CGM
.IntTy
, Size
, /*isSigned=*/true),
5549 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5550 TaskRedInput
.getPointer(), CGM
.VoidPtrTy
)};
5551 return CGF
.EmitRuntimeCall(
5552 OMPBuilder
.getOrCreateRuntimeFunction(
5553 CGM
.getModule(), OMPRTL___kmpc_taskred_modifier_init
),
5556 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5557 llvm::Value
*Args
[] = {
5558 CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
), CGM
.IntTy
,
5560 llvm::ConstantInt::get(CGM
.IntTy
, Size
, /*isSigned=*/true),
5561 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput
.getPointer(),
5563 return CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5564 CGM
.getModule(), OMPRTL___kmpc_taskred_init
),
5568 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction
&CGF
,
5570 bool IsWorksharingReduction
) {
5571 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5572 // is_ws, int num, void *data);
5573 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
);
5574 llvm::Value
*GTid
= CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5575 CGM
.IntTy
, /*isSigned=*/true);
5576 llvm::Value
*Args
[] = {IdentTLoc
, GTid
,
5577 llvm::ConstantInt::get(CGM
.IntTy
,
5578 IsWorksharingReduction
? 1 : 0,
5579 /*isSigned=*/true)};
5580 (void)CGF
.EmitRuntimeCall(
5581 OMPBuilder
.getOrCreateRuntimeFunction(
5582 CGM
.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini
),
5586 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction
&CGF
,
5588 ReductionCodeGen
&RCG
,
5590 auto Sizes
= RCG
.getSizes(N
);
5591 // Emit threadprivate global variable if the type is non-constant
5592 // (Sizes.second = nullptr).
5594 llvm::Value
*SizeVal
= CGF
.Builder
.CreateIntCast(Sizes
.second
, CGM
.SizeTy
,
5595 /*isSigned=*/false);
5596 Address SizeAddr
= getAddrOfArtificialThreadPrivate(
5597 CGF
, CGM
.getContext().getSizeType(),
5598 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5599 CGF
.Builder
.CreateStore(SizeVal
, SizeAddr
, /*IsVolatile=*/false);
5603 Address
CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction
&CGF
,
5605 llvm::Value
*ReductionsPtr
,
5606 LValue SharedLVal
) {
5607 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5609 llvm::Value
*Args
[] = {CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5613 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5614 SharedLVal
.getPointer(CGF
), CGM
.VoidPtrTy
)};
5616 CGF
.EmitRuntimeCall(
5617 OMPBuilder
.getOrCreateRuntimeFunction(
5618 CGM
.getModule(), OMPRTL___kmpc_task_reduction_get_th_data
),
5620 CGF
.Int8Ty
, SharedLVal
.getAlignment());
5623 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
5624 const OMPTaskDataTy
&Data
) {
5625 if (!CGF
.HaveInsertPoint())
5628 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
&& Data
.Dependences
.empty()) {
5629 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5630 OMPBuilder
.createTaskwait(CGF
.Builder
);
5632 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
5633 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
5634 auto &M
= CGM
.getModule();
5635 Address DependenciesArray
= Address::invalid();
5636 llvm::Value
*NumOfElements
;
5637 std::tie(NumOfElements
, DependenciesArray
) =
5638 emitDependClause(CGF
, Data
.Dependences
, Loc
);
5639 if (!Data
.Dependences
.empty()) {
5640 llvm::Value
*DepWaitTaskArgs
[7];
5641 DepWaitTaskArgs
[0] = UpLoc
;
5642 DepWaitTaskArgs
[1] = ThreadID
;
5643 DepWaitTaskArgs
[2] = NumOfElements
;
5644 DepWaitTaskArgs
[3] = DependenciesArray
.emitRawPointer(CGF
);
5645 DepWaitTaskArgs
[4] = CGF
.Builder
.getInt32(0);
5646 DepWaitTaskArgs
[5] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5647 DepWaitTaskArgs
[6] =
5648 llvm::ConstantInt::get(CGF
.Int32Ty
, Data
.HasNowaitClause
);
5650 CodeGenFunction::RunCleanupsScope
LocalScope(CGF
);
5652 // Build void __kmpc_omp_taskwait_deps_51(ident_t *, kmp_int32 gtid,
5653 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5654 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list,
5655 // kmp_int32 has_no_wait); if dependence info is specified.
5656 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5657 M
, OMPRTL___kmpc_omp_taskwait_deps_51
),
5662 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5664 llvm::Value
*Args
[] = {UpLoc
, ThreadID
};
5665 // Ignore return result until untied tasks are supported.
5666 CGF
.EmitRuntimeCall(
5667 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_omp_taskwait
),
5672 if (auto *Region
= dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
5673 Region
->emitUntiedSwitch(CGF
);
5676 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction
&CGF
,
5677 OpenMPDirectiveKind InnerKind
,
5678 const RegionCodeGenTy
&CodeGen
,
5680 if (!CGF
.HaveInsertPoint())
5682 InlinedOpenMPRegionRAII
Region(CGF
, CodeGen
, InnerKind
, HasCancel
,
5683 InnerKind
!= OMPD_critical
&&
5684 InnerKind
!= OMPD_master
&&
5685 InnerKind
!= OMPD_masked
);
5686 CGF
.CapturedStmtInfo
->EmitBody(CGF
, /*S=*/nullptr);
5697 } // anonymous namespace
5699 static RTCancelKind
getCancellationKind(OpenMPDirectiveKind CancelRegion
) {
5700 RTCancelKind CancelKind
= CancelNoreq
;
5701 if (CancelRegion
== OMPD_parallel
)
5702 CancelKind
= CancelParallel
;
5703 else if (CancelRegion
== OMPD_for
)
5704 CancelKind
= CancelLoop
;
5705 else if (CancelRegion
== OMPD_sections
)
5706 CancelKind
= CancelSections
;
5708 assert(CancelRegion
== OMPD_taskgroup
);
5709 CancelKind
= CancelTaskgroup
;
5714 void CGOpenMPRuntime::emitCancellationPointCall(
5715 CodeGenFunction
&CGF
, SourceLocation Loc
,
5716 OpenMPDirectiveKind CancelRegion
) {
5717 if (!CGF
.HaveInsertPoint())
5719 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5720 // global_tid, kmp_int32 cncl_kind);
5721 if (auto *OMPRegionInfo
=
5722 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
5723 // For 'cancellation point taskgroup', the task region info may not have a
5724 // cancel. This may instead happen in another adjacent task.
5725 if (CancelRegion
== OMPD_taskgroup
|| OMPRegionInfo
->hasCancel()) {
5726 llvm::Value
*Args
[] = {
5727 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
5728 CGF
.Builder
.getInt32(getCancellationKind(CancelRegion
))};
5729 // Ignore return result until untied tasks are supported.
5730 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
5731 OMPBuilder
.getOrCreateRuntimeFunction(
5732 CGM
.getModule(), OMPRTL___kmpc_cancellationpoint
),
5734 // if (__kmpc_cancellationpoint()) {
5735 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5736 // exit from construct;
5738 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
5739 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
5740 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
5741 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
5742 CGF
.EmitBlock(ExitBB
);
5743 if (CancelRegion
== OMPD_parallel
)
5744 emitBarrierCall(CGF
, Loc
, OMPD_unknown
, /*EmitChecks=*/false);
5745 // exit from construct;
5746 CodeGenFunction::JumpDest CancelDest
=
5747 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
5748 CGF
.EmitBranchThroughCleanup(CancelDest
);
5749 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
5754 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
5756 OpenMPDirectiveKind CancelRegion
) {
5757 if (!CGF
.HaveInsertPoint())
5759 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5760 // kmp_int32 cncl_kind);
5761 auto &M
= CGM
.getModule();
5762 if (auto *OMPRegionInfo
=
5763 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
5764 auto &&ThenGen
= [this, &M
, Loc
, CancelRegion
,
5765 OMPRegionInfo
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5766 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5767 llvm::Value
*Args
[] = {
5768 RT
.emitUpdateLocation(CGF
, Loc
), RT
.getThreadID(CGF
, Loc
),
5769 CGF
.Builder
.getInt32(getCancellationKind(CancelRegion
))};
5770 // Ignore return result until untied tasks are supported.
5771 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
5772 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_cancel
), Args
);
5773 // if (__kmpc_cancel()) {
5774 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5775 // exit from construct;
5777 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
5778 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
5779 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
5780 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
5781 CGF
.EmitBlock(ExitBB
);
5782 if (CancelRegion
== OMPD_parallel
)
5783 RT
.emitBarrierCall(CGF
, Loc
, OMPD_unknown
, /*EmitChecks=*/false);
5784 // exit from construct;
5785 CodeGenFunction::JumpDest CancelDest
=
5786 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
5787 CGF
.EmitBranchThroughCleanup(CancelDest
);
5788 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
5791 emitIfClause(CGF
, IfCond
, ThenGen
,
5792 [](CodeGenFunction
&, PrePostActionTy
&) {});
5794 RegionCodeGenTy
ThenRCG(ThenGen
);
5801 /// Cleanup action for uses_allocators support.
5802 class OMPUsesAllocatorsActionTy final
: public PrePostActionTy
{
5803 ArrayRef
<std::pair
<const Expr
*, const Expr
*>> Allocators
;
5806 OMPUsesAllocatorsActionTy(
5807 ArrayRef
<std::pair
<const Expr
*, const Expr
*>> Allocators
)
5808 : Allocators(Allocators
) {}
5809 void Enter(CodeGenFunction
&CGF
) override
{
5810 if (!CGF
.HaveInsertPoint())
5812 for (const auto &AllocatorData
: Allocators
) {
5813 CGF
.CGM
.getOpenMPRuntime().emitUsesAllocatorsInit(
5814 CGF
, AllocatorData
.first
, AllocatorData
.second
);
5817 void Exit(CodeGenFunction
&CGF
) override
{
5818 if (!CGF
.HaveInsertPoint())
5820 for (const auto &AllocatorData
: Allocators
) {
5821 CGF
.CGM
.getOpenMPRuntime().emitUsesAllocatorsFini(CGF
,
5822 AllocatorData
.first
);
5828 void CGOpenMPRuntime::emitTargetOutlinedFunction(
5829 const OMPExecutableDirective
&D
, StringRef ParentName
,
5830 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
5831 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
5832 assert(!ParentName
.empty() && "Invalid target entry parent name!");
5833 HasEmittedTargetRegion
= true;
5834 SmallVector
<std::pair
<const Expr
*, const Expr
*>, 4> Allocators
;
5835 for (const auto *C
: D
.getClausesOfKind
<OMPUsesAllocatorsClause
>()) {
5836 for (unsigned I
= 0, E
= C
->getNumberOfAllocators(); I
< E
; ++I
) {
5837 const OMPUsesAllocatorsClause::Data D
= C
->getAllocatorData(I
);
5838 if (!D
.AllocatorTraits
)
5840 Allocators
.emplace_back(D
.Allocator
, D
.AllocatorTraits
);
5843 OMPUsesAllocatorsActionTy
UsesAllocatorAction(Allocators
);
5844 CodeGen
.setAction(UsesAllocatorAction
);
5845 emitTargetOutlinedFunctionHelper(D
, ParentName
, OutlinedFn
, OutlinedFnID
,
5846 IsOffloadEntry
, CodeGen
);
5849 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction
&CGF
,
5850 const Expr
*Allocator
,
5851 const Expr
*AllocatorTraits
) {
5852 llvm::Value
*ThreadId
= getThreadID(CGF
, Allocator
->getExprLoc());
5853 ThreadId
= CGF
.Builder
.CreateIntCast(ThreadId
, CGF
.IntTy
, /*isSigned=*/true);
5854 // Use default memspace handle.
5855 llvm::Value
*MemSpaceHandle
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5856 llvm::Value
*NumTraits
= llvm::ConstantInt::get(
5857 CGF
.IntTy
, cast
<ConstantArrayType
>(
5858 AllocatorTraits
->getType()->getAsArrayTypeUnsafe())
5860 .getLimitedValue());
5861 LValue AllocatorTraitsLVal
= CGF
.EmitLValue(AllocatorTraits
);
5862 Address Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5863 AllocatorTraitsLVal
.getAddress(), CGF
.VoidPtrPtrTy
, CGF
.VoidPtrTy
);
5864 AllocatorTraitsLVal
= CGF
.MakeAddrLValue(Addr
, CGF
.getContext().VoidPtrTy
,
5865 AllocatorTraitsLVal
.getBaseInfo(),
5866 AllocatorTraitsLVal
.getTBAAInfo());
5867 llvm::Value
*Traits
= Addr
.emitRawPointer(CGF
);
5869 llvm::Value
*AllocatorVal
=
5870 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5871 CGM
.getModule(), OMPRTL___kmpc_init_allocator
),
5872 {ThreadId
, MemSpaceHandle
, NumTraits
, Traits
});
5873 // Store to allocator.
5874 CGF
.EmitAutoVarAlloca(*cast
<VarDecl
>(
5875 cast
<DeclRefExpr
>(Allocator
->IgnoreParenImpCasts())->getDecl()));
5876 LValue AllocatorLVal
= CGF
.EmitLValue(Allocator
->IgnoreParenImpCasts());
5878 CGF
.EmitScalarConversion(AllocatorVal
, CGF
.getContext().VoidPtrTy
,
5879 Allocator
->getType(), Allocator
->getExprLoc());
5880 CGF
.EmitStoreOfScalar(AllocatorVal
, AllocatorLVal
);
5883 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction
&CGF
,
5884 const Expr
*Allocator
) {
5885 llvm::Value
*ThreadId
= getThreadID(CGF
, Allocator
->getExprLoc());
5886 ThreadId
= CGF
.Builder
.CreateIntCast(ThreadId
, CGF
.IntTy
, /*isSigned=*/true);
5887 LValue AllocatorLVal
= CGF
.EmitLValue(Allocator
->IgnoreParenImpCasts());
5888 llvm::Value
*AllocatorVal
=
5889 CGF
.EmitLoadOfScalar(AllocatorLVal
, Allocator
->getExprLoc());
5890 AllocatorVal
= CGF
.EmitScalarConversion(AllocatorVal
, Allocator
->getType(),
5891 CGF
.getContext().VoidPtrTy
,
5892 Allocator
->getExprLoc());
5893 (void)CGF
.EmitRuntimeCall(
5894 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
5895 OMPRTL___kmpc_destroy_allocator
),
5896 {ThreadId
, AllocatorVal
});
5899 void CGOpenMPRuntime::computeMinAndMaxThreadsAndTeams(
5900 const OMPExecutableDirective
&D
, CodeGenFunction
&CGF
,
5901 int32_t &MinThreadsVal
, int32_t &MaxThreadsVal
, int32_t &MinTeamsVal
,
5902 int32_t &MaxTeamsVal
) {
5904 getNumTeamsExprForTargetDirective(CGF
, D
, MinTeamsVal
, MaxTeamsVal
);
5905 getNumThreadsExprForTargetDirective(CGF
, D
, MaxThreadsVal
,
5906 /*UpperBoundOnly=*/true);
5908 for (auto *C
: D
.getClausesOfKind
<OMPXAttributeClause
>()) {
5909 for (auto *A
: C
->getAttrs()) {
5910 int32_t AttrMinThreadsVal
= 1, AttrMaxThreadsVal
= -1;
5911 int32_t AttrMinBlocksVal
= 1, AttrMaxBlocksVal
= -1;
5912 if (auto *Attr
= dyn_cast
<CUDALaunchBoundsAttr
>(A
))
5913 CGM
.handleCUDALaunchBoundsAttr(nullptr, Attr
, &AttrMaxThreadsVal
,
5914 &AttrMinBlocksVal
, &AttrMaxBlocksVal
);
5915 else if (auto *Attr
= dyn_cast
<AMDGPUFlatWorkGroupSizeAttr
>(A
))
5916 CGM
.handleAMDGPUFlatWorkGroupSizeAttr(
5917 nullptr, Attr
, /*ReqdWGS=*/nullptr, &AttrMinThreadsVal
,
5918 &AttrMaxThreadsVal
);
5922 MinThreadsVal
= std::max(MinThreadsVal
, AttrMinThreadsVal
);
5923 if (AttrMaxThreadsVal
> 0)
5924 MaxThreadsVal
= MaxThreadsVal
> 0
5925 ? std::min(MaxThreadsVal
, AttrMaxThreadsVal
)
5926 : AttrMaxThreadsVal
;
5927 MinTeamsVal
= std::max(MinTeamsVal
, AttrMinBlocksVal
);
5928 if (AttrMaxBlocksVal
> 0)
5929 MaxTeamsVal
= MaxTeamsVal
> 0 ? std::min(MaxTeamsVal
, AttrMaxBlocksVal
)
5935 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
5936 const OMPExecutableDirective
&D
, StringRef ParentName
,
5937 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
5938 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
5940 llvm::TargetRegionEntryInfo EntryInfo
=
5941 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
, D
.getBeginLoc(), ParentName
);
5943 CodeGenFunction
CGF(CGM
, true);
5944 llvm::OpenMPIRBuilder::FunctionGenCallback
&&GenerateOutlinedFunction
=
5945 [&CGF
, &D
, &CodeGen
](StringRef EntryFnName
) {
5946 const CapturedStmt
&CS
= *D
.getCapturedStmt(OMPD_target
);
5948 CGOpenMPTargetRegionInfo
CGInfo(CS
, CodeGen
, EntryFnName
);
5949 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
5950 return CGF
.GenerateOpenMPCapturedStmtFunction(CS
, D
.getBeginLoc());
5953 OMPBuilder
.emitTargetRegionFunction(EntryInfo
, GenerateOutlinedFunction
,
5954 IsOffloadEntry
, OutlinedFn
, OutlinedFnID
);
5959 CGM
.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn
, CGM
);
5961 for (auto *C
: D
.getClausesOfKind
<OMPXAttributeClause
>()) {
5962 for (auto *A
: C
->getAttrs()) {
5963 if (auto *Attr
= dyn_cast
<AMDGPUWavesPerEUAttr
>(A
))
5964 CGM
.handleAMDGPUWavesPerEUAttr(OutlinedFn
, Attr
);
5969 /// Checks if the expression is constant or does not have non-trivial function
5971 static bool isTrivial(ASTContext
&Ctx
, const Expr
* E
) {
5972 // We can skip constant expressions.
5973 // We can skip expressions with trivial calls or simple expressions.
5974 return (E
->isEvaluatable(Ctx
, Expr::SE_AllowUndefinedBehavior
) ||
5975 !E
->hasNonTrivialCall(Ctx
)) &&
5976 !E
->HasSideEffects(Ctx
, /*IncludePossibleEffects=*/true);
5979 const Stmt
*CGOpenMPRuntime::getSingleCompoundChild(ASTContext
&Ctx
,
5981 const Stmt
*Child
= Body
->IgnoreContainers();
5982 while (const auto *C
= dyn_cast_or_null
<CompoundStmt
>(Child
)) {
5984 for (const Stmt
*S
: C
->body()) {
5985 if (const auto *E
= dyn_cast
<Expr
>(S
)) {
5986 if (isTrivial(Ctx
, E
))
5989 // Some of the statements can be ignored.
5990 if (isa
<AsmStmt
>(S
) || isa
<NullStmt
>(S
) || isa
<OMPFlushDirective
>(S
) ||
5991 isa
<OMPBarrierDirective
>(S
) || isa
<OMPTaskyieldDirective
>(S
))
5993 // Analyze declarations.
5994 if (const auto *DS
= dyn_cast
<DeclStmt
>(S
)) {
5995 if (llvm::all_of(DS
->decls(), [](const Decl
*D
) {
5996 if (isa
<EmptyDecl
>(D
) || isa
<DeclContext
>(D
) ||
5997 isa
<TypeDecl
>(D
) || isa
<PragmaCommentDecl
>(D
) ||
5998 isa
<PragmaDetectMismatchDecl
>(D
) || isa
<UsingDecl
>(D
) ||
5999 isa
<UsingDirectiveDecl
>(D
) ||
6000 isa
<OMPDeclareReductionDecl
>(D
) ||
6001 isa
<OMPThreadPrivateDecl
>(D
) || isa
<OMPAllocateDecl
>(D
))
6003 const auto *VD
= dyn_cast
<VarDecl
>(D
);
6006 return VD
->hasGlobalStorage() || !VD
->isUsed();
6010 // Found multiple children - cannot get the one child only.
6016 Child
= Child
->IgnoreContainers();
6021 const Expr
*CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6022 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, int32_t &MinTeamsVal
,
6023 int32_t &MaxTeamsVal
) {
6025 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6026 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6027 "Expected target-based executable directive.");
6028 switch (DirectiveKind
) {
6030 const auto *CS
= D
.getInnermostCapturedStmt();
6032 CS
->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6033 const Stmt
*ChildStmt
=
6034 CGOpenMPRuntime::getSingleCompoundChild(CGF
.getContext(), Body
);
6035 if (const auto *NestedDir
=
6036 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
6037 if (isOpenMPTeamsDirective(NestedDir
->getDirectiveKind())) {
6038 if (NestedDir
->hasClausesOfKind
<OMPNumTeamsClause
>()) {
6039 const Expr
*NumTeams
=
6040 NestedDir
->getSingleClause
<OMPNumTeamsClause
>()->getNumTeams();
6041 if (NumTeams
->isIntegerConstantExpr(CGF
.getContext()))
6043 NumTeams
->getIntegerConstantExpr(CGF
.getContext()))
6044 MinTeamsVal
= MaxTeamsVal
= Constant
->getExtValue();
6047 MinTeamsVal
= MaxTeamsVal
= 0;
6050 if (isOpenMPParallelDirective(NestedDir
->getDirectiveKind()) ||
6051 isOpenMPSimdDirective(NestedDir
->getDirectiveKind())) {
6052 MinTeamsVal
= MaxTeamsVal
= 1;
6055 MinTeamsVal
= MaxTeamsVal
= 1;
6058 // A value of -1 is used to check if we need to emit no teams region
6059 MinTeamsVal
= MaxTeamsVal
= -1;
6062 case OMPD_target_teams_loop
:
6063 case OMPD_target_teams
:
6064 case OMPD_target_teams_distribute
:
6065 case OMPD_target_teams_distribute_simd
:
6066 case OMPD_target_teams_distribute_parallel_for
:
6067 case OMPD_target_teams_distribute_parallel_for_simd
: {
6068 if (D
.hasClausesOfKind
<OMPNumTeamsClause
>()) {
6069 const Expr
*NumTeams
=
6070 D
.getSingleClause
<OMPNumTeamsClause
>()->getNumTeams();
6071 if (NumTeams
->isIntegerConstantExpr(CGF
.getContext()))
6072 if (auto Constant
= NumTeams
->getIntegerConstantExpr(CGF
.getContext()))
6073 MinTeamsVal
= MaxTeamsVal
= Constant
->getExtValue();
6076 MinTeamsVal
= MaxTeamsVal
= 0;
6079 case OMPD_target_parallel
:
6080 case OMPD_target_parallel_for
:
6081 case OMPD_target_parallel_for_simd
:
6082 case OMPD_target_parallel_loop
:
6083 case OMPD_target_simd
:
6084 MinTeamsVal
= MaxTeamsVal
= 1;
6088 case OMPD_parallel_for
:
6089 case OMPD_parallel_loop
:
6090 case OMPD_parallel_master
:
6091 case OMPD_parallel_sections
:
6093 case OMPD_parallel_for_simd
:
6095 case OMPD_cancellation_point
:
6097 case OMPD_threadprivate
:
6108 case OMPD_taskyield
:
6111 case OMPD_taskgroup
:
6117 case OMPD_target_data
:
6118 case OMPD_target_exit_data
:
6119 case OMPD_target_enter_data
:
6120 case OMPD_distribute
:
6121 case OMPD_distribute_simd
:
6122 case OMPD_distribute_parallel_for
:
6123 case OMPD_distribute_parallel_for_simd
:
6124 case OMPD_teams_distribute
:
6125 case OMPD_teams_distribute_simd
:
6126 case OMPD_teams_distribute_parallel_for
:
6127 case OMPD_teams_distribute_parallel_for_simd
:
6128 case OMPD_target_update
:
6129 case OMPD_declare_simd
:
6130 case OMPD_declare_variant
:
6131 case OMPD_begin_declare_variant
:
6132 case OMPD_end_declare_variant
:
6133 case OMPD_declare_target
:
6134 case OMPD_end_declare_target
:
6135 case OMPD_declare_reduction
:
6136 case OMPD_declare_mapper
:
6138 case OMPD_taskloop_simd
:
6139 case OMPD_master_taskloop
:
6140 case OMPD_master_taskloop_simd
:
6141 case OMPD_parallel_master_taskloop
:
6142 case OMPD_parallel_master_taskloop_simd
:
6144 case OMPD_metadirective
:
6150 llvm_unreachable("Unexpected directive kind.");
6153 llvm::Value
*CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6154 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) {
6155 assert(!CGF
.getLangOpts().OpenMPIsTargetDevice
&&
6156 "Clauses associated with the teams directive expected to be emitted "
6157 "only for the host!");
6158 CGBuilderTy
&Bld
= CGF
.Builder
;
6159 int32_t MinNT
= -1, MaxNT
= -1;
6160 const Expr
*NumTeams
=
6161 getNumTeamsExprForTargetDirective(CGF
, D
, MinNT
, MaxNT
);
6162 if (NumTeams
!= nullptr) {
6163 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6165 switch (DirectiveKind
) {
6167 const auto *CS
= D
.getInnermostCapturedStmt();
6168 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6169 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6170 llvm::Value
*NumTeamsVal
= CGF
.EmitScalarExpr(NumTeams
,
6171 /*IgnoreResultAssign*/ true);
6172 return Bld
.CreateIntCast(NumTeamsVal
, CGF
.Int32Ty
,
6175 case OMPD_target_teams
:
6176 case OMPD_target_teams_distribute
:
6177 case OMPD_target_teams_distribute_simd
:
6178 case OMPD_target_teams_distribute_parallel_for
:
6179 case OMPD_target_teams_distribute_parallel_for_simd
: {
6180 CodeGenFunction::RunCleanupsScope
NumTeamsScope(CGF
);
6181 llvm::Value
*NumTeamsVal
= CGF
.EmitScalarExpr(NumTeams
,
6182 /*IgnoreResultAssign*/ true);
6183 return Bld
.CreateIntCast(NumTeamsVal
, CGF
.Int32Ty
,
6191 assert(MinNT
== MaxNT
&& "Num threads ranges require handling here.");
6192 return llvm::ConstantInt::get(CGF
.Int32Ty
, MinNT
);
6195 /// Check for a num threads constant value (stored in \p DefaultVal), or
6196 /// expression (stored in \p E). If the value is conditional (via an if-clause),
6197 /// store the condition in \p CondVal. If \p E, and \p CondVal respectively, are
6198 /// nullptr, no expression evaluation is perfomed.
6199 static void getNumThreads(CodeGenFunction
&CGF
, const CapturedStmt
*CS
,
6200 const Expr
**E
, int32_t &UpperBound
,
6201 bool UpperBoundOnly
, llvm::Value
**CondVal
) {
6202 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6203 CGF
.getContext(), CS
->getCapturedStmt());
6204 const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
);
6208 if (isOpenMPParallelDirective(Dir
->getDirectiveKind())) {
6209 // Handle if clause. If if clause present, the number of threads is
6210 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6211 if (CondVal
&& Dir
->hasClausesOfKind
<OMPIfClause
>()) {
6212 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6213 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6214 const OMPIfClause
*IfClause
= nullptr;
6215 for (const auto *C
: Dir
->getClausesOfKind
<OMPIfClause
>()) {
6216 if (C
->getNameModifier() == OMPD_unknown
||
6217 C
->getNameModifier() == OMPD_parallel
) {
6223 const Expr
*CondExpr
= IfClause
->getCondition();
6225 if (CondExpr
->EvaluateAsBooleanCondition(Result
, CGF
.getContext())) {
6231 CodeGenFunction::LexicalScope
Scope(CGF
, CondExpr
->getSourceRange());
6232 if (const auto *PreInit
=
6233 cast_or_null
<DeclStmt
>(IfClause
->getPreInitStmt())) {
6234 for (const auto *I
: PreInit
->decls()) {
6235 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6236 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6238 CodeGenFunction::AutoVarEmission Emission
=
6239 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6240 CGF
.EmitAutoVarCleanups(Emission
);
6243 *CondVal
= CGF
.EvaluateExprAsBool(CondExpr
);
6248 // Check the value of num_threads clause iff if clause was not specified
6249 // or is not evaluated to false.
6250 if (Dir
->hasClausesOfKind
<OMPNumThreadsClause
>()) {
6251 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6252 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6253 const auto *NumThreadsClause
=
6254 Dir
->getSingleClause
<OMPNumThreadsClause
>();
6255 const Expr
*NTExpr
= NumThreadsClause
->getNumThreads();
6256 if (NTExpr
->isIntegerConstantExpr(CGF
.getContext()))
6257 if (auto Constant
= NTExpr
->getIntegerConstantExpr(CGF
.getContext()))
6260 ? Constant
->getZExtValue()
6261 : std::min(UpperBound
,
6262 static_cast<int32_t>(Constant
->getZExtValue()));
6263 // If we haven't found a upper bound, remember we saw a thread limiting
6265 if (UpperBound
== -1)
6269 CodeGenFunction::LexicalScope
Scope(CGF
, NTExpr
->getSourceRange());
6270 if (const auto *PreInit
=
6271 cast_or_null
<DeclStmt
>(NumThreadsClause
->getPreInitStmt())) {
6272 for (const auto *I
: PreInit
->decls()) {
6273 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6274 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6276 CodeGenFunction::AutoVarEmission Emission
=
6277 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6278 CGF
.EmitAutoVarCleanups(Emission
);
6286 if (isOpenMPSimdDirective(Dir
->getDirectiveKind()))
6290 const Expr
*CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6291 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, int32_t &UpperBound
,
6292 bool UpperBoundOnly
, llvm::Value
**CondVal
, const Expr
**ThreadLimitExpr
) {
6293 assert((!CGF
.getLangOpts().OpenMPIsTargetDevice
|| UpperBoundOnly
) &&
6294 "Clauses associated with the teams directive expected to be emitted "
6295 "only for the host!");
6296 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6297 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6298 "Expected target-based executable directive.");
6300 const Expr
*NT
= nullptr;
6301 const Expr
**NTPtr
= UpperBoundOnly
? nullptr : &NT
;
6303 auto CheckForConstExpr
= [&](const Expr
*E
, const Expr
**EPtr
) {
6304 if (E
->isIntegerConstantExpr(CGF
.getContext())) {
6305 if (auto Constant
= E
->getIntegerConstantExpr(CGF
.getContext()))
6306 UpperBound
= UpperBound
? Constant
->getZExtValue()
6307 : std::min(UpperBound
,
6308 int32_t(Constant
->getZExtValue()));
6310 // If we haven't found a upper bound, remember we saw a thread limiting
6312 if (UpperBound
== -1)
6318 auto ReturnSequential
= [&]() {
6323 switch (DirectiveKind
) {
6325 const CapturedStmt
*CS
= D
.getInnermostCapturedStmt();
6326 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6327 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6328 CGF
.getContext(), CS
->getCapturedStmt());
6329 // TODO: The standard is not clear how to resolve two thread limit clauses,
6330 // let's pick the teams one if it's present, otherwise the target one.
6331 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6332 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6333 if (const auto *TLC
= Dir
->getSingleClause
<OMPThreadLimitClause
>()) {
6334 ThreadLimitClause
= TLC
;
6335 if (ThreadLimitExpr
) {
6336 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6337 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6338 CodeGenFunction::LexicalScope
Scope(
6339 CGF
, ThreadLimitClause
->getThreadLimit()->getSourceRange());
6340 if (const auto *PreInit
=
6341 cast_or_null
<DeclStmt
>(ThreadLimitClause
->getPreInitStmt())) {
6342 for (const auto *I
: PreInit
->decls()) {
6343 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6344 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6346 CodeGenFunction::AutoVarEmission Emission
=
6347 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6348 CGF
.EmitAutoVarCleanups(Emission
);
6355 if (ThreadLimitClause
)
6356 CheckForConstExpr(ThreadLimitClause
->getThreadLimit(), ThreadLimitExpr
);
6357 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6358 if (isOpenMPTeamsDirective(Dir
->getDirectiveKind()) &&
6359 !isOpenMPDistributeDirective(Dir
->getDirectiveKind())) {
6360 CS
= Dir
->getInnermostCapturedStmt();
6361 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6362 CGF
.getContext(), CS
->getCapturedStmt());
6363 Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
);
6365 if (Dir
&& isOpenMPParallelDirective(Dir
->getDirectiveKind())) {
6366 CS
= Dir
->getInnermostCapturedStmt();
6367 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6368 } else if (Dir
&& isOpenMPSimdDirective(Dir
->getDirectiveKind()))
6369 return ReturnSequential();
6373 case OMPD_target_teams
: {
6374 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6375 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6376 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6377 CheckForConstExpr(ThreadLimitClause
->getThreadLimit(), ThreadLimitExpr
);
6379 const CapturedStmt
*CS
= D
.getInnermostCapturedStmt();
6380 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6381 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6382 CGF
.getContext(), CS
->getCapturedStmt());
6383 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6384 if (Dir
->getDirectiveKind() == OMPD_distribute
) {
6385 CS
= Dir
->getInnermostCapturedStmt();
6386 getNumThreads(CGF
, CS
, NTPtr
, UpperBound
, UpperBoundOnly
, CondVal
);
6391 case OMPD_target_teams_distribute
:
6392 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6393 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6394 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6395 CheckForConstExpr(ThreadLimitClause
->getThreadLimit(), ThreadLimitExpr
);
6397 getNumThreads(CGF
, D
.getInnermostCapturedStmt(), NTPtr
, UpperBound
,
6398 UpperBoundOnly
, CondVal
);
6400 case OMPD_target_teams_loop
:
6401 case OMPD_target_parallel_loop
:
6402 case OMPD_target_parallel
:
6403 case OMPD_target_parallel_for
:
6404 case OMPD_target_parallel_for_simd
:
6405 case OMPD_target_teams_distribute_parallel_for
:
6406 case OMPD_target_teams_distribute_parallel_for_simd
: {
6407 if (CondVal
&& D
.hasClausesOfKind
<OMPIfClause
>()) {
6408 const OMPIfClause
*IfClause
= nullptr;
6409 for (const auto *C
: D
.getClausesOfKind
<OMPIfClause
>()) {
6410 if (C
->getNameModifier() == OMPD_unknown
||
6411 C
->getNameModifier() == OMPD_parallel
) {
6417 const Expr
*Cond
= IfClause
->getCondition();
6419 if (Cond
->EvaluateAsBooleanCondition(Result
, CGF
.getContext())) {
6421 return ReturnSequential();
6423 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
6424 *CondVal
= CGF
.EvaluateExprAsBool(Cond
);
6428 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6429 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6430 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6431 CheckForConstExpr(ThreadLimitClause
->getThreadLimit(), ThreadLimitExpr
);
6433 if (D
.hasClausesOfKind
<OMPNumThreadsClause
>()) {
6434 CodeGenFunction::RunCleanupsScope
NumThreadsScope(CGF
);
6435 const auto *NumThreadsClause
= D
.getSingleClause
<OMPNumThreadsClause
>();
6436 CheckForConstExpr(NumThreadsClause
->getNumThreads(), nullptr);
6437 return NumThreadsClause
->getNumThreads();
6441 case OMPD_target_teams_distribute_simd
:
6442 case OMPD_target_simd
:
6443 return ReturnSequential();
6447 llvm_unreachable("Unsupported directive kind.");
6450 llvm::Value
*CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6451 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) {
6452 llvm::Value
*NumThreadsVal
= nullptr;
6453 llvm::Value
*CondVal
= nullptr;
6454 llvm::Value
*ThreadLimitVal
= nullptr;
6455 const Expr
*ThreadLimitExpr
= nullptr;
6456 int32_t UpperBound
= -1;
6458 const Expr
*NT
= getNumThreadsExprForTargetDirective(
6459 CGF
, D
, UpperBound
, /* UpperBoundOnly */ false, &CondVal
,
6462 // Thread limit expressions are used below, emit them.
6463 if (ThreadLimitExpr
) {
6465 CGF
.EmitScalarExpr(ThreadLimitExpr
, /*IgnoreResultAssign=*/true);
6466 ThreadLimitVal
= CGF
.Builder
.CreateIntCast(ThreadLimitVal
, CGF
.Int32Ty
,
6467 /*isSigned=*/false);
6470 // Generate the num teams expression.
6471 if (UpperBound
== 1) {
6472 NumThreadsVal
= CGF
.Builder
.getInt32(UpperBound
);
6474 NumThreadsVal
= CGF
.EmitScalarExpr(NT
, /*IgnoreResultAssign=*/true);
6475 NumThreadsVal
= CGF
.Builder
.CreateIntCast(NumThreadsVal
, CGF
.Int32Ty
,
6476 /*isSigned=*/false);
6477 } else if (ThreadLimitVal
) {
6478 // If we do not have a num threads value but a thread limit, replace the
6479 // former with the latter. We know handled the thread limit expression.
6480 NumThreadsVal
= ThreadLimitVal
;
6481 ThreadLimitVal
= nullptr;
6483 // Default to "0" which means runtime choice.
6484 assert(!ThreadLimitVal
&& "Default not applicable with thread limit value");
6485 NumThreadsVal
= CGF
.Builder
.getInt32(0);
6488 // Handle if clause. If if clause present, the number of threads is
6489 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6491 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
6492 NumThreadsVal
= CGF
.Builder
.CreateSelect(CondVal
, NumThreadsVal
,
6493 CGF
.Builder
.getInt32(1));
6496 // If the thread limit and num teams expression were present, take the
6498 if (ThreadLimitVal
) {
6499 NumThreadsVal
= CGF
.Builder
.CreateSelect(
6500 CGF
.Builder
.CreateICmpULT(ThreadLimitVal
, NumThreadsVal
),
6501 ThreadLimitVal
, NumThreadsVal
);
6504 return NumThreadsVal
;
6508 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6510 // Utility to handle information from clauses associated with a given
6511 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6512 // It provides a convenient interface to obtain the information and generate
6513 // code for that information.
6514 class MappableExprsHandler
{
6516 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6517 static unsigned getFlagMemberOffset() {
6518 unsigned Offset
= 0;
6519 for (uint64_t Remain
=
6520 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
6521 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
6522 !(Remain
& 1); Remain
= Remain
>> 1)
6527 /// Class that holds debugging information for a data mapping to be passed to
6528 /// the runtime library.
6529 class MappingExprInfo
{
6530 /// The variable declaration used for the data mapping.
6531 const ValueDecl
*MapDecl
= nullptr;
6532 /// The original expression used in the map clause, or null if there is
6534 const Expr
*MapExpr
= nullptr;
6537 MappingExprInfo(const ValueDecl
*MapDecl
, const Expr
*MapExpr
= nullptr)
6538 : MapDecl(MapDecl
), MapExpr(MapExpr
) {}
6540 const ValueDecl
*getMapDecl() const { return MapDecl
; }
6541 const Expr
*getMapExpr() const { return MapExpr
; }
6544 using DeviceInfoTy
= llvm::OpenMPIRBuilder::DeviceInfoTy
;
6545 using MapBaseValuesArrayTy
= llvm::OpenMPIRBuilder::MapValuesArrayTy
;
6546 using MapValuesArrayTy
= llvm::OpenMPIRBuilder::MapValuesArrayTy
;
6547 using MapFlagsArrayTy
= llvm::OpenMPIRBuilder::MapFlagsArrayTy
;
6548 using MapDimArrayTy
= llvm::OpenMPIRBuilder::MapDimArrayTy
;
6549 using MapNonContiguousArrayTy
=
6550 llvm::OpenMPIRBuilder::MapNonContiguousArrayTy
;
6551 using MapExprsArrayTy
= SmallVector
<MappingExprInfo
, 4>;
6552 using MapValueDeclsArrayTy
= SmallVector
<const ValueDecl
*, 4>;
6554 /// This structure contains combined information generated for mappable
6555 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6556 /// mappers, and non-contiguous information.
6557 struct MapCombinedInfoTy
: llvm::OpenMPIRBuilder::MapInfosTy
{
6558 MapExprsArrayTy Exprs
;
6559 MapValueDeclsArrayTy Mappers
;
6560 MapValueDeclsArrayTy DevicePtrDecls
;
6562 /// Append arrays in \a CurInfo.
6563 void append(MapCombinedInfoTy
&CurInfo
) {
6564 Exprs
.append(CurInfo
.Exprs
.begin(), CurInfo
.Exprs
.end());
6565 DevicePtrDecls
.append(CurInfo
.DevicePtrDecls
.begin(),
6566 CurInfo
.DevicePtrDecls
.end());
6567 Mappers
.append(CurInfo
.Mappers
.begin(), CurInfo
.Mappers
.end());
6568 llvm::OpenMPIRBuilder::MapInfosTy::append(CurInfo
);
6572 /// Map between a struct and the its lowest & highest elements which have been
6574 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
6575 /// HE(FieldIndex, Pointer)}
6576 struct StructRangeInfoTy
{
6577 MapCombinedInfoTy PreliminaryMapData
;
6578 std::pair
<unsigned /*FieldIndex*/, Address
/*Pointer*/> LowestElem
= {
6579 0, Address::invalid()};
6580 std::pair
<unsigned /*FieldIndex*/, Address
/*Pointer*/> HighestElem
= {
6581 0, Address::invalid()};
6582 Address Base
= Address::invalid();
6583 Address LB
= Address::invalid();
6584 bool IsArraySection
= false;
6585 bool HasCompleteRecord
= false;
6589 /// Kind that defines how a device pointer has to be returned.
6591 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
6592 OpenMPMapClauseKind MapType
= OMPC_MAP_unknown
;
6593 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
6594 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
;
6595 bool ReturnDevicePointer
= false;
6596 bool IsImplicit
= false;
6597 const ValueDecl
*Mapper
= nullptr;
6598 const Expr
*VarRef
= nullptr;
6599 bool ForDeviceAddr
= false;
6601 MapInfo() = default;
6603 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
,
6604 OpenMPMapClauseKind MapType
,
6605 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6606 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
6607 bool ReturnDevicePointer
, bool IsImplicit
,
6608 const ValueDecl
*Mapper
= nullptr, const Expr
*VarRef
= nullptr,
6609 bool ForDeviceAddr
= false)
6610 : Components(Components
), MapType(MapType
), MapModifiers(MapModifiers
),
6611 MotionModifiers(MotionModifiers
),
6612 ReturnDevicePointer(ReturnDevicePointer
), IsImplicit(IsImplicit
),
6613 Mapper(Mapper
), VarRef(VarRef
), ForDeviceAddr(ForDeviceAddr
) {}
6616 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
6617 /// member and there is no map information about it, then emission of that
6618 /// entry is deferred until the whole struct has been processed.
6619 struct DeferredDevicePtrEntryTy
{
6620 const Expr
*IE
= nullptr;
6621 const ValueDecl
*VD
= nullptr;
6622 bool ForDeviceAddr
= false;
6624 DeferredDevicePtrEntryTy(const Expr
*IE
, const ValueDecl
*VD
,
6626 : IE(IE
), VD(VD
), ForDeviceAddr(ForDeviceAddr
) {}
6629 /// The target directive from where the mappable clauses were extracted. It
6630 /// is either a executable directive or a user-defined mapper directive.
6631 llvm::PointerUnion
<const OMPExecutableDirective
*,
6632 const OMPDeclareMapperDecl
*>
6635 /// Function the directive is being generated for.
6636 CodeGenFunction
&CGF
;
6638 /// Set of all first private variables in the current directive.
6639 /// bool data is set to true if the variable is implicitly marked as
6640 /// firstprivate, false otherwise.
6641 llvm::DenseMap
<CanonicalDeclPtr
<const VarDecl
>, bool> FirstPrivateDecls
;
6643 /// Map between device pointer declarations and their expression components.
6644 /// The key value for declarations in 'this' is null.
6647 SmallVector
<OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>>
6650 /// Map between device addr declarations and their expression components.
6651 /// The key value for declarations in 'this' is null.
6654 SmallVector
<OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>>
6657 /// Map between lambda declarations and their map type.
6658 llvm::DenseMap
<const ValueDecl
*, const OMPMapClause
*> LambdasMap
;
6660 llvm::Value
*getExprTypeSize(const Expr
*E
) const {
6661 QualType ExprTy
= E
->getType().getCanonicalType();
6663 // Calculate the size for array shaping expression.
6664 if (const auto *OAE
= dyn_cast
<OMPArrayShapingExpr
>(E
)) {
6666 CGF
.getTypeSize(OAE
->getBase()->getType()->getPointeeType());
6667 for (const Expr
*SE
: OAE
->getDimensions()) {
6668 llvm::Value
*Sz
= CGF
.EmitScalarExpr(SE
);
6669 Sz
= CGF
.EmitScalarConversion(Sz
, SE
->getType(),
6670 CGF
.getContext().getSizeType(),
6672 Size
= CGF
.Builder
.CreateNUWMul(Size
, Sz
);
6677 // Reference types are ignored for mapping purposes.
6678 if (const auto *RefTy
= ExprTy
->getAs
<ReferenceType
>())
6679 ExprTy
= RefTy
->getPointeeType().getCanonicalType();
6681 // Given that an array section is considered a built-in type, we need to
6682 // do the calculation based on the length of the section instead of relying
6683 // on CGF.getTypeSize(E->getType()).
6684 if (const auto *OAE
= dyn_cast
<ArraySectionExpr
>(E
)) {
6685 QualType BaseTy
= ArraySectionExpr::getBaseOriginalType(
6686 OAE
->getBase()->IgnoreParenImpCasts())
6687 .getCanonicalType();
6689 // If there is no length associated with the expression and lower bound is
6690 // not specified too, that means we are using the whole length of the
6692 if (!OAE
->getLength() && OAE
->getColonLocFirst().isValid() &&
6693 !OAE
->getLowerBound())
6694 return CGF
.getTypeSize(BaseTy
);
6696 llvm::Value
*ElemSize
;
6697 if (const auto *PTy
= BaseTy
->getAs
<PointerType
>()) {
6698 ElemSize
= CGF
.getTypeSize(PTy
->getPointeeType().getCanonicalType());
6700 const auto *ATy
= cast
<ArrayType
>(BaseTy
.getTypePtr());
6701 assert(ATy
&& "Expecting array type if not a pointer type.");
6702 ElemSize
= CGF
.getTypeSize(ATy
->getElementType().getCanonicalType());
6705 // If we don't have a length at this point, that is because we have an
6706 // array section with a single element.
6707 if (!OAE
->getLength() && OAE
->getColonLocFirst().isInvalid())
6710 if (const Expr
*LenExpr
= OAE
->getLength()) {
6711 llvm::Value
*LengthVal
= CGF
.EmitScalarExpr(LenExpr
);
6712 LengthVal
= CGF
.EmitScalarConversion(LengthVal
, LenExpr
->getType(),
6713 CGF
.getContext().getSizeType(),
6714 LenExpr
->getExprLoc());
6715 return CGF
.Builder
.CreateNUWMul(LengthVal
, ElemSize
);
6717 assert(!OAE
->getLength() && OAE
->getColonLocFirst().isValid() &&
6718 OAE
->getLowerBound() && "expected array_section[lb:].");
6719 // Size = sizetype - lb * elemtype;
6720 llvm::Value
*LengthVal
= CGF
.getTypeSize(BaseTy
);
6721 llvm::Value
*LBVal
= CGF
.EmitScalarExpr(OAE
->getLowerBound());
6722 LBVal
= CGF
.EmitScalarConversion(LBVal
, OAE
->getLowerBound()->getType(),
6723 CGF
.getContext().getSizeType(),
6724 OAE
->getLowerBound()->getExprLoc());
6725 LBVal
= CGF
.Builder
.CreateNUWMul(LBVal
, ElemSize
);
6726 llvm::Value
*Cmp
= CGF
.Builder
.CreateICmpUGT(LengthVal
, LBVal
);
6727 llvm::Value
*TrueVal
= CGF
.Builder
.CreateNUWSub(LengthVal
, LBVal
);
6728 LengthVal
= CGF
.Builder
.CreateSelect(
6729 Cmp
, TrueVal
, llvm::ConstantInt::get(CGF
.SizeTy
, 0));
6732 return CGF
.getTypeSize(ExprTy
);
6735 /// Return the corresponding bits for a given map clause modifier. Add
6736 /// a flag marking the map as a pointer if requested. Add a flag marking the
6737 /// map as the first one of a series of maps that relate to the same map
6739 OpenMPOffloadMappingFlags
getMapTypeBits(
6740 OpenMPMapClauseKind MapType
, ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6741 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
, bool IsImplicit
,
6742 bool AddPtrFlag
, bool AddIsTargetParamFlag
, bool IsNonContiguous
) const {
6743 OpenMPOffloadMappingFlags Bits
=
6744 IsImplicit
? OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
6745 : OpenMPOffloadMappingFlags::OMP_MAP_NONE
;
6747 case OMPC_MAP_alloc
:
6748 case OMPC_MAP_release
:
6749 // alloc and release is the default behavior in the runtime library, i.e.
6750 // if we don't pass any bits alloc/release that is what the runtime is
6751 // going to do. Therefore, we don't need to signal anything for these two
6755 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TO
;
6758 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
6760 case OMPC_MAP_tofrom
:
6761 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TO
|
6762 OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
6764 case OMPC_MAP_delete
:
6765 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_DELETE
;
6767 case OMPC_MAP_unknown
:
6768 llvm_unreachable("Unexpected map type!");
6771 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
;
6772 if (AddIsTargetParamFlag
)
6773 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
6774 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_always
))
6775 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS
;
6776 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_close
))
6777 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_CLOSE
;
6778 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_present
) ||
6779 llvm::is_contained(MotionModifiers
, OMPC_MOTION_MODIFIER_present
))
6780 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
;
6781 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_ompx_hold
))
6782 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
6783 if (IsNonContiguous
)
6784 Bits
|= OpenMPOffloadMappingFlags::OMP_MAP_NON_CONTIG
;
6788 /// Return true if the provided expression is a final array section. A
6789 /// final array section, is one whose length can't be proved to be one.
6790 bool isFinalArraySectionExpression(const Expr
*E
) const {
6791 const auto *OASE
= dyn_cast
<ArraySectionExpr
>(E
);
6793 // It is not an array section and therefore not a unity-size one.
6797 // An array section with no colon always refer to a single element.
6798 if (OASE
->getColonLocFirst().isInvalid())
6801 const Expr
*Length
= OASE
->getLength();
6803 // If we don't have a length we have to check if the array has size 1
6804 // for this dimension. Also, we should always expect a length if the
6805 // base type is pointer.
6807 QualType BaseQTy
= ArraySectionExpr::getBaseOriginalType(
6808 OASE
->getBase()->IgnoreParenImpCasts())
6809 .getCanonicalType();
6810 if (const auto *ATy
= dyn_cast
<ConstantArrayType
>(BaseQTy
.getTypePtr()))
6811 return ATy
->getSExtSize() != 1;
6812 // If we don't have a constant dimension length, we have to consider
6813 // the current section as having any size, so it is not necessarily
6814 // unitary. If it happen to be unity size, that's user fault.
6818 // Check if the length evaluates to 1.
6819 Expr::EvalResult Result
;
6820 if (!Length
->EvaluateAsInt(Result
, CGF
.getContext()))
6821 return true; // Can have more that size 1.
6823 llvm::APSInt ConstLength
= Result
.Val
.getInt();
6824 return ConstLength
.getSExtValue() != 1;
6827 /// Generate the base pointers, section pointers, sizes, map type bits, and
6828 /// user-defined mappers (all included in \a CombinedInfo) for the provided
6829 /// map type, map or motion modifiers, and expression components.
6830 /// \a IsFirstComponent should be set to true if the provided set of
6831 /// components is the first associated with a capture.
6832 void generateInfoForComponentList(
6833 OpenMPMapClauseKind MapType
, ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
6834 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
6835 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
,
6836 MapCombinedInfoTy
&CombinedInfo
,
6837 MapCombinedInfoTy
&StructBaseCombinedInfo
,
6838 StructRangeInfoTy
&PartialStruct
, bool IsFirstComponentList
,
6839 bool IsImplicit
, bool GenerateAllInfoForClauses
,
6840 const ValueDecl
*Mapper
= nullptr, bool ForDeviceAddr
= false,
6841 const ValueDecl
*BaseDecl
= nullptr, const Expr
*MapExpr
= nullptr,
6842 ArrayRef
<OMPClauseMappableExprCommon::MappableExprComponentListRef
>
6843 OverlappedElements
= std::nullopt
,
6844 bool AreBothBasePtrAndPteeMapped
= false) const {
6845 // The following summarizes what has to be generated for each map and the
6846 // types below. The generated information is expressed in this order:
6847 // base pointer, section pointer, size, flags
6848 // (to add to the ones that come from the map type and modifier).
6871 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
6874 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
6877 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
6880 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
6883 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
6884 // in unified shared memory mode or for local pointers
6885 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
6888 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6889 // &(*a), &(*a)[0], 3*sizeof(int), PTR_AND_OBJ | TO | FROM
6892 // &(*a), &(*a), sizeof(pointer), TARGET_PARAM | TO | FROM
6893 // &(*a), &(**a), sizeof(int), PTR_AND_OBJ | TO | FROM
6896 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
6899 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
6902 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6905 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
6907 // map(to: s.p[:22])
6908 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
6909 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
6910 // &(s.p), &(s.p[0]), 22*sizeof(double),
6911 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6912 // (*) alloc space for struct members, only this is a target parameter
6913 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6914 // optimizes this entry out, same in the examples below)
6915 // (***) map the pointee (map: to)
6918 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
6919 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
6920 // (*) alloc space for struct members, only this is a target parameter
6921 // (**) map the pointer (nothing to be mapped in this example) (the compiler
6922 // optimizes this entry out, same in the examples below)
6923 // (***) map the pointee (map: to)
6926 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6928 // map(from: s.ps->s.i)
6929 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6930 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6931 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6933 // map(to: s.ps->ps)
6934 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6935 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6936 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
6938 // map(s.ps->ps->ps)
6939 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6940 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6941 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6942 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6944 // map(to: s.ps->ps->s.f[:22])
6945 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
6946 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
6947 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6948 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6951 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
6954 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
6957 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
6960 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
6962 // map(to: ps->p[:22])
6963 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
6964 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
6965 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
6968 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
6970 // map(from: ps->ps->s.i)
6971 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6972 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6973 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6975 // map(from: ps->ps->ps)
6976 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6977 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6978 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6980 // map(ps->ps->ps->ps)
6981 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6982 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6983 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6984 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
6986 // map(to: ps->ps->ps->s.f[:22])
6987 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
6988 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
6989 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
6990 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
6992 // map(to: s.f[:22]) map(from: s.p[:33])
6993 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
6994 // sizeof(double*) (**), TARGET_PARAM
6995 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
6996 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
6997 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
6998 // (*) allocate contiguous space needed to fit all mapped members even if
6999 // we allocate space for members not mapped (in this example,
7000 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7001 // them as well because they fall between &s.f[0] and &s.p)
7003 // map(from: s.f[:22]) map(to: ps->p[:33])
7004 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7005 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7006 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7007 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7008 // (*) the struct this entry pertains to is the 2nd element in the list of
7009 // arguments, hence MEMBER_OF(2)
7011 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7012 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7013 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7014 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7015 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7016 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7017 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7018 // (*) the struct this entry pertains to is the 4th element in the list
7019 // of arguments, hence MEMBER_OF(4)
7022 // ===> map(p[:100])
7023 // &p, &p[0], 100*sizeof(float), TARGET_PARAM | PTR_AND_OBJ | TO | FROM
7025 // Track if the map information being generated is the first for a capture.
7026 bool IsCaptureFirstInfo
= IsFirstComponentList
;
7027 // When the variable is on a declare target link or in a to clause with
7028 // unified memory, a reference is needed to hold the host/device address
7030 bool RequiresReference
= false;
7032 // Scan the components from the base to the complete expression.
7033 auto CI
= Components
.rbegin();
7034 auto CE
= Components
.rend();
7037 // Track if the map information being generated is the first for a list of
7039 bool IsExpressionFirstInfo
= true;
7040 bool FirstPointerInComplexData
= false;
7041 Address BP
= Address::invalid();
7042 const Expr
*AssocExpr
= I
->getAssociatedExpression();
7043 const auto *AE
= dyn_cast
<ArraySubscriptExpr
>(AssocExpr
);
7044 const auto *OASE
= dyn_cast
<ArraySectionExpr
>(AssocExpr
);
7045 const auto *OAShE
= dyn_cast
<OMPArrayShapingExpr
>(AssocExpr
);
7047 if (AreBothBasePtrAndPteeMapped
&& std::next(I
) == CE
)
7049 if (isa
<MemberExpr
>(AssocExpr
)) {
7050 // The base is the 'this' pointer. The content of the pointer is going
7051 // to be the base of the field being mapped.
7052 BP
= CGF
.LoadCXXThisAddress();
7053 } else if ((AE
&& isa
<CXXThisExpr
>(AE
->getBase()->IgnoreParenImpCasts())) ||
7055 isa
<CXXThisExpr
>(OASE
->getBase()->IgnoreParenImpCasts()))) {
7056 BP
= CGF
.EmitOMPSharedLValue(AssocExpr
).getAddress();
7058 isa
<CXXThisExpr
>(OAShE
->getBase()->IgnoreParenCasts())) {
7060 CGF
.EmitScalarExpr(OAShE
->getBase()),
7061 CGF
.ConvertTypeForMem(OAShE
->getBase()->getType()->getPointeeType()),
7062 CGF
.getContext().getTypeAlignInChars(OAShE
->getBase()->getType()));
7064 // The base is the reference to the variable.
7066 BP
= CGF
.EmitOMPSharedLValue(AssocExpr
).getAddress();
7067 if (const auto *VD
=
7068 dyn_cast_or_null
<VarDecl
>(I
->getAssociatedDeclaration())) {
7069 if (std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
7070 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
)) {
7071 if ((*Res
== OMPDeclareTargetDeclAttr::MT_Link
) ||
7072 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
7073 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
7074 CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7075 RequiresReference
= true;
7076 BP
= CGF
.CGM
.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD
);
7081 // If the variable is a pointer and is being dereferenced (i.e. is not
7082 // the last component), the base has to be the pointer itself, not its
7083 // reference. References are ignored for mapping purposes.
7085 I
->getAssociatedDeclaration()->getType().getNonReferenceType();
7086 if (Ty
->isAnyPointerType() && std::next(I
) != CE
) {
7087 // No need to generate individual map information for the pointer, it
7088 // can be associated with the combined storage if shared memory mode is
7089 // active or the base declaration is not global variable.
7090 const auto *VD
= dyn_cast
<VarDecl
>(I
->getAssociatedDeclaration());
7091 if (!AreBothBasePtrAndPteeMapped
&&
7092 (CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7093 !VD
|| VD
->hasLocalStorage()))
7094 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7096 FirstPointerInComplexData
= true;
7101 // Track whether a component of the list should be marked as MEMBER_OF some
7102 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7103 // in a component list should be marked as MEMBER_OF, all subsequent entries
7104 // do not belong to the base struct. E.g.
7106 // s.ps->ps->ps->f[:]
7108 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7109 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7110 // is the pointee of ps(2) which is not member of struct s, so it should not
7111 // be marked as such (it is still PTR_AND_OBJ).
7112 // The variable is initialized to false so that PTR_AND_OBJ entries which
7113 // are not struct members are not considered (e.g. array of pointers to
7115 bool ShouldBeMemberOf
= false;
7117 // Variable keeping track of whether or not we have encountered a component
7118 // in the component list which is a member expression. Useful when we have a
7119 // pointer or a final array section, in which case it is the previous
7120 // component in the list which tells us whether we have a member expression.
7122 // While processing the final array section "[:]" it is "f" which tells us
7123 // whether we are dealing with a member of a declared struct.
7124 const MemberExpr
*EncounteredME
= nullptr;
7126 // Track for the total number of dimension. Start from one for the dummy
7128 uint64_t DimSize
= 1;
7130 bool IsNonContiguous
= CombinedInfo
.NonContigInfo
.IsNonContiguous
;
7131 bool IsPrevMemberReference
= false;
7133 // We need to check if we will be encountering any MEs. If we do not
7134 // encounter any ME expression it means we will be mapping the whole struct.
7135 // In that case we need to skip adding an entry for the struct to the
7136 // CombinedInfo list and instead add an entry to the StructBaseCombinedInfo
7137 // list only when generating all info for clauses.
7138 bool IsMappingWholeStruct
= true;
7139 if (!GenerateAllInfoForClauses
) {
7140 IsMappingWholeStruct
= false;
7142 for (auto TempI
= I
; TempI
!= CE
; ++TempI
) {
7143 const MemberExpr
*PossibleME
=
7144 dyn_cast
<MemberExpr
>(TempI
->getAssociatedExpression());
7146 IsMappingWholeStruct
= false;
7152 for (; I
!= CE
; ++I
) {
7153 // If the current component is member of a struct (parent struct) mark it.
7154 if (!EncounteredME
) {
7155 EncounteredME
= dyn_cast
<MemberExpr
>(I
->getAssociatedExpression());
7156 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7157 // as MEMBER_OF the parent struct.
7158 if (EncounteredME
) {
7159 ShouldBeMemberOf
= true;
7160 // Do not emit as complex pointer if this is actually not array-like
7162 if (FirstPointerInComplexData
) {
7163 QualType Ty
= std::prev(I
)
7164 ->getAssociatedDeclaration()
7166 .getNonReferenceType();
7167 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7168 FirstPointerInComplexData
= false;
7173 auto Next
= std::next(I
);
7175 // We need to generate the addresses and sizes if this is the last
7176 // component, if the component is a pointer or if it is an array section
7177 // whose length can't be proved to be one. If this is a pointer, it
7178 // becomes the base address for the following components.
7180 // A final array section, is one whose length can't be proved to be one.
7181 // If the map item is non-contiguous then we don't treat any array section
7182 // as final array section.
7183 bool IsFinalArraySection
=
7185 isFinalArraySectionExpression(I
->getAssociatedExpression());
7187 // If we have a declaration for the mapping use that, otherwise use
7188 // the base declaration of the map clause.
7189 const ValueDecl
*MapDecl
= (I
->getAssociatedDeclaration())
7190 ? I
->getAssociatedDeclaration()
7192 MapExpr
= (I
->getAssociatedExpression()) ? I
->getAssociatedExpression()
7195 // Get information on whether the element is a pointer. Have to do a
7196 // special treatment for array sections given that they are built-in
7199 dyn_cast
<ArraySectionExpr
>(I
->getAssociatedExpression());
7201 dyn_cast
<OMPArrayShapingExpr
>(I
->getAssociatedExpression());
7202 const auto *UO
= dyn_cast
<UnaryOperator
>(I
->getAssociatedExpression());
7203 const auto *BO
= dyn_cast
<BinaryOperator
>(I
->getAssociatedExpression());
7206 (OASE
&& ArraySectionExpr::getBaseOriginalType(OASE
)
7208 ->isAnyPointerType()) ||
7209 I
->getAssociatedExpression()->getType()->isAnyPointerType();
7210 bool IsMemberReference
= isa
<MemberExpr
>(I
->getAssociatedExpression()) &&
7212 MapDecl
->getType()->isLValueReferenceType();
7213 bool IsNonDerefPointer
= IsPointer
&&
7214 !(UO
&& UO
->getOpcode() != UO_Deref
) && !BO
&&
7220 if (Next
== CE
|| IsMemberReference
|| IsNonDerefPointer
||
7221 IsFinalArraySection
) {
7222 // If this is not the last component, we expect the pointer to be
7223 // associated with an array expression or member expression.
7224 assert((Next
== CE
||
7225 isa
<MemberExpr
>(Next
->getAssociatedExpression()) ||
7226 isa
<ArraySubscriptExpr
>(Next
->getAssociatedExpression()) ||
7227 isa
<ArraySectionExpr
>(Next
->getAssociatedExpression()) ||
7228 isa
<OMPArrayShapingExpr
>(Next
->getAssociatedExpression()) ||
7229 isa
<UnaryOperator
>(Next
->getAssociatedExpression()) ||
7230 isa
<BinaryOperator
>(Next
->getAssociatedExpression())) &&
7231 "Unexpected expression");
7233 Address LB
= Address::invalid();
7234 Address LowestElem
= Address::invalid();
7235 auto &&EmitMemberExprBase
= [](CodeGenFunction
&CGF
,
7236 const MemberExpr
*E
) {
7237 const Expr
*BaseExpr
= E
->getBase();
7238 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7242 LValueBaseInfo BaseInfo
;
7243 TBAAAccessInfo TBAAInfo
;
7245 CGF
.EmitPointerWithAlignment(BaseExpr
, &BaseInfo
, &TBAAInfo
);
7246 QualType PtrTy
= BaseExpr
->getType()->getPointeeType();
7247 BaseLV
= CGF
.MakeAddrLValue(Addr
, PtrTy
, BaseInfo
, TBAAInfo
);
7249 BaseLV
= CGF
.EmitOMPSharedLValue(BaseExpr
);
7255 Address(CGF
.EmitScalarExpr(OAShE
->getBase()),
7256 CGF
.ConvertTypeForMem(
7257 OAShE
->getBase()->getType()->getPointeeType()),
7258 CGF
.getContext().getTypeAlignInChars(
7259 OAShE
->getBase()->getType()));
7260 } else if (IsMemberReference
) {
7261 const auto *ME
= cast
<MemberExpr
>(I
->getAssociatedExpression());
7262 LValue BaseLVal
= EmitMemberExprBase(CGF
, ME
);
7263 LowestElem
= CGF
.EmitLValueForFieldInitialization(
7264 BaseLVal
, cast
<FieldDecl
>(MapDecl
))
7266 LB
= CGF
.EmitLoadOfReferenceLValue(LowestElem
, MapDecl
->getType())
7270 CGF
.EmitOMPSharedLValue(I
->getAssociatedExpression())
7274 // If this component is a pointer inside the base struct then we don't
7275 // need to create any entry for it - it will be combined with the object
7276 // it is pointing to into a single PTR_AND_OBJ entry.
7277 bool IsMemberPointerOrAddr
=
7279 (((IsPointer
|| ForDeviceAddr
) &&
7280 I
->getAssociatedExpression() == EncounteredME
) ||
7281 (IsPrevMemberReference
&& !IsPointer
) ||
7282 (IsMemberReference
&& Next
!= CE
&&
7283 !Next
->getAssociatedExpression()->getType()->isPointerType()));
7284 if (!OverlappedElements
.empty() && Next
== CE
) {
7285 // Handle base element with the info for overlapped elements.
7286 assert(!PartialStruct
.Base
.isValid() && "The base element is set.");
7287 assert(!IsPointer
&&
7288 "Unexpected base element with the pointer type.");
7289 // Mark the whole struct as the struct that requires allocation on the
7291 PartialStruct
.LowestElem
= {0, LowestElem
};
7292 CharUnits TypeSize
= CGF
.getContext().getTypeSizeInChars(
7293 I
->getAssociatedExpression()->getType());
7294 Address HB
= CGF
.Builder
.CreateConstGEP(
7295 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
7296 LowestElem
, CGF
.VoidPtrTy
, CGF
.Int8Ty
),
7297 TypeSize
.getQuantity() - 1);
7298 PartialStruct
.HighestElem
= {
7299 std::numeric_limits
<decltype(
7300 PartialStruct
.HighestElem
.first
)>::max(),
7302 PartialStruct
.Base
= BP
;
7303 PartialStruct
.LB
= LB
;
7305 PartialStruct
.PreliminaryMapData
.BasePointers
.empty() &&
7306 "Overlapped elements must be used only once for the variable.");
7307 std::swap(PartialStruct
.PreliminaryMapData
, CombinedInfo
);
7308 // Emit data for non-overlapped data.
7309 OpenMPOffloadMappingFlags Flags
=
7310 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
7311 getMapTypeBits(MapType
, MapModifiers
, MotionModifiers
, IsImplicit
,
7312 /*AddPtrFlag=*/false,
7313 /*AddIsTargetParamFlag=*/false, IsNonContiguous
);
7314 llvm::Value
*Size
= nullptr;
7315 // Do bitcopy of all non-overlapped structure elements.
7316 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7317 Component
: OverlappedElements
) {
7318 Address ComponentLB
= Address::invalid();
7319 for (const OMPClauseMappableExprCommon::MappableComponent
&MC
:
7321 if (const ValueDecl
*VD
= MC
.getAssociatedDeclaration()) {
7322 const auto *FD
= dyn_cast
<FieldDecl
>(VD
);
7323 if (FD
&& FD
->getType()->isLValueReferenceType()) {
7325 cast
<MemberExpr
>(MC
.getAssociatedExpression());
7326 LValue BaseLVal
= EmitMemberExprBase(CGF
, ME
);
7328 CGF
.EmitLValueForFieldInitialization(BaseLVal
, FD
)
7332 CGF
.EmitOMPSharedLValue(MC
.getAssociatedExpression())
7335 llvm::Value
*ComponentLBPtr
= ComponentLB
.emitRawPointer(CGF
);
7336 llvm::Value
*LBPtr
= LB
.emitRawPointer(CGF
);
7337 Size
= CGF
.Builder
.CreatePtrDiff(CGF
.Int8Ty
, ComponentLBPtr
,
7342 assert(Size
&& "Failed to determine structure size");
7343 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7344 CombinedInfo
.BasePointers
.push_back(BP
.emitRawPointer(CGF
));
7345 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7346 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7347 CombinedInfo
.Pointers
.push_back(LB
.emitRawPointer(CGF
));
7348 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
7349 Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7350 CombinedInfo
.Types
.push_back(Flags
);
7351 CombinedInfo
.Mappers
.push_back(nullptr);
7352 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7354 LB
= CGF
.Builder
.CreateConstGEP(ComponentLB
, 1);
7356 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7357 CombinedInfo
.BasePointers
.push_back(BP
.emitRawPointer(CGF
));
7358 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7359 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7360 CombinedInfo
.Pointers
.push_back(LB
.emitRawPointer(CGF
));
7361 llvm::Value
*LBPtr
= LB
.emitRawPointer(CGF
);
7362 Size
= CGF
.Builder
.CreatePtrDiff(
7363 CGF
.Int8Ty
, CGF
.Builder
.CreateConstGEP(HB
, 1).emitRawPointer(CGF
),
7365 CombinedInfo
.Sizes
.push_back(
7366 CGF
.Builder
.CreateIntCast(Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7367 CombinedInfo
.Types
.push_back(Flags
);
7368 CombinedInfo
.Mappers
.push_back(nullptr);
7369 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7373 llvm::Value
*Size
= getExprTypeSize(I
->getAssociatedExpression());
7374 // Skip adding an entry in the CurInfo of this combined entry if the
7375 // whole struct is currently being mapped. The struct needs to be added
7376 // in the first position before any data internal to the struct is being
7378 if (!IsMemberPointerOrAddr
||
7379 (Next
== CE
&& MapType
!= OMPC_MAP_unknown
)) {
7380 if (!IsMappingWholeStruct
) {
7381 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7382 CombinedInfo
.BasePointers
.push_back(BP
.emitRawPointer(CGF
));
7383 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7384 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7385 CombinedInfo
.Pointers
.push_back(LB
.emitRawPointer(CGF
));
7386 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
7387 Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7388 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7391 StructBaseCombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7392 StructBaseCombinedInfo
.BasePointers
.push_back(
7393 BP
.emitRawPointer(CGF
));
7394 StructBaseCombinedInfo
.DevicePtrDecls
.push_back(nullptr);
7395 StructBaseCombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
7396 StructBaseCombinedInfo
.Pointers
.push_back(LB
.emitRawPointer(CGF
));
7397 StructBaseCombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
7398 Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7399 StructBaseCombinedInfo
.NonContigInfo
.Dims
.push_back(
7400 IsNonContiguous
? DimSize
: 1);
7403 // If Mapper is valid, the last component inherits the mapper.
7404 bool HasMapper
= Mapper
&& Next
== CE
;
7405 if (!IsMappingWholeStruct
)
7406 CombinedInfo
.Mappers
.push_back(HasMapper
? Mapper
: nullptr);
7408 StructBaseCombinedInfo
.Mappers
.push_back(HasMapper
? Mapper
7411 // We need to add a pointer flag for each map that comes from the
7412 // same expression except for the first one. We also need to signal
7413 // this map is the first one that relates with the current capture
7414 // (there is a set of entries for each capture).
7415 OpenMPOffloadMappingFlags Flags
=
7416 getMapTypeBits(MapType
, MapModifiers
, MotionModifiers
, IsImplicit
,
7417 !IsExpressionFirstInfo
|| RequiresReference
||
7418 FirstPointerInComplexData
|| IsMemberReference
,
7419 AreBothBasePtrAndPteeMapped
||
7420 (IsCaptureFirstInfo
&& !RequiresReference
),
7423 if (!IsExpressionFirstInfo
|| IsMemberReference
) {
7424 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7425 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7426 if (IsPointer
|| (IsMemberReference
&& Next
!= CE
))
7427 Flags
&= ~(OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7428 OpenMPOffloadMappingFlags::OMP_MAP_FROM
|
7429 OpenMPOffloadMappingFlags::OMP_MAP_ALWAYS
|
7430 OpenMPOffloadMappingFlags::OMP_MAP_DELETE
|
7431 OpenMPOffloadMappingFlags::OMP_MAP_CLOSE
);
7433 if (ShouldBeMemberOf
) {
7434 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7435 // should be later updated with the correct value of MEMBER_OF.
7436 Flags
|= OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
;
7437 // From now on, all subsequent PTR_AND_OBJ entries should not be
7438 // marked as MEMBER_OF.
7439 ShouldBeMemberOf
= false;
7443 if (!IsMappingWholeStruct
)
7444 CombinedInfo
.Types
.push_back(Flags
);
7446 StructBaseCombinedInfo
.Types
.push_back(Flags
);
7449 // If we have encountered a member expression so far, keep track of the
7450 // mapped member. If the parent is "*this", then the value declaration
7452 if (EncounteredME
) {
7453 const auto *FD
= cast
<FieldDecl
>(EncounteredME
->getMemberDecl());
7454 unsigned FieldIndex
= FD
->getFieldIndex();
7456 // Update info about the lowest and highest elements for this struct
7457 if (!PartialStruct
.Base
.isValid()) {
7458 PartialStruct
.LowestElem
= {FieldIndex
, LowestElem
};
7459 if (IsFinalArraySection
) {
7461 CGF
.EmitArraySectionExpr(OASE
, /*IsLowerBound=*/false)
7463 PartialStruct
.HighestElem
= {FieldIndex
, HB
};
7465 PartialStruct
.HighestElem
= {FieldIndex
, LowestElem
};
7467 PartialStruct
.Base
= BP
;
7468 PartialStruct
.LB
= BP
;
7469 } else if (FieldIndex
< PartialStruct
.LowestElem
.first
) {
7470 PartialStruct
.LowestElem
= {FieldIndex
, LowestElem
};
7471 } else if (FieldIndex
> PartialStruct
.HighestElem
.first
) {
7472 if (IsFinalArraySection
) {
7474 CGF
.EmitArraySectionExpr(OASE
, /*IsLowerBound=*/false)
7476 PartialStruct
.HighestElem
= {FieldIndex
, HB
};
7478 PartialStruct
.HighestElem
= {FieldIndex
, LowestElem
};
7483 // Need to emit combined struct for array sections.
7484 if (IsFinalArraySection
|| IsNonContiguous
)
7485 PartialStruct
.IsArraySection
= true;
7487 // If we have a final array section, we are done with this expression.
7488 if (IsFinalArraySection
)
7491 // The pointer becomes the base for the next element.
7493 BP
= IsMemberReference
? LowestElem
: LB
;
7495 IsExpressionFirstInfo
= false;
7496 IsCaptureFirstInfo
= false;
7497 FirstPointerInComplexData
= false;
7498 IsPrevMemberReference
= IsMemberReference
;
7499 } else if (FirstPointerInComplexData
) {
7500 QualType Ty
= Components
.rbegin()
7501 ->getAssociatedDeclaration()
7503 .getNonReferenceType();
7504 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7505 FirstPointerInComplexData
= false;
7508 // If ran into the whole component - allocate the space for the whole
7511 PartialStruct
.HasCompleteRecord
= true;
7513 if (!IsNonContiguous
)
7516 const ASTContext
&Context
= CGF
.getContext();
7518 // For supporting stride in array section, we need to initialize the first
7519 // dimension size as 1, first offset as 0, and first count as 1
7520 MapValuesArrayTy CurOffsets
= {llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 0)};
7521 MapValuesArrayTy CurCounts
= {llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 1)};
7522 MapValuesArrayTy CurStrides
;
7523 MapValuesArrayTy DimSizes
{llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 1)};
7524 uint64_t ElementTypeSize
;
7526 // Collect Size information for each dimension and get the element size as
7527 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7528 // should be [10, 10] and the first stride is 4 btyes.
7529 for (const OMPClauseMappableExprCommon::MappableComponent
&Component
:
7531 const Expr
*AssocExpr
= Component
.getAssociatedExpression();
7532 const auto *OASE
= dyn_cast
<ArraySectionExpr
>(AssocExpr
);
7537 QualType Ty
= ArraySectionExpr::getBaseOriginalType(OASE
->getBase());
7538 auto *CAT
= Context
.getAsConstantArrayType(Ty
);
7539 auto *VAT
= Context
.getAsVariableArrayType(Ty
);
7541 // We need all the dimension size except for the last dimension.
7542 assert((VAT
|| CAT
|| &Component
== &*Components
.begin()) &&
7543 "Should be either ConstantArray or VariableArray if not the "
7546 // Get element size if CurStrides is empty.
7547 if (CurStrides
.empty()) {
7548 const Type
*ElementType
= nullptr;
7550 ElementType
= CAT
->getElementType().getTypePtr();
7552 ElementType
= VAT
->getElementType().getTypePtr();
7554 assert(&Component
== &*Components
.begin() &&
7555 "Only expect pointer (non CAT or VAT) when this is the "
7557 // If ElementType is null, then it means the base is a pointer
7558 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7559 // for next iteration.
7561 // For the case that having pointer as base, we need to remove one
7562 // level of indirection.
7563 if (&Component
!= &*Components
.begin())
7564 ElementType
= ElementType
->getPointeeOrArrayElementType();
7566 Context
.getTypeSizeInChars(ElementType
).getQuantity();
7567 CurStrides
.push_back(
7568 llvm::ConstantInt::get(CGF
.Int64Ty
, ElementTypeSize
));
7571 // Get dimension value except for the last dimension since we don't need
7573 if (DimSizes
.size() < Components
.size() - 1) {
7576 llvm::ConstantInt::get(CGF
.Int64Ty
, CAT
->getZExtSize()));
7578 DimSizes
.push_back(CGF
.Builder
.CreateIntCast(
7579 CGF
.EmitScalarExpr(VAT
->getSizeExpr()), CGF
.Int64Ty
,
7580 /*IsSigned=*/false));
7584 // Skip the dummy dimension since we have already have its information.
7585 auto *DI
= DimSizes
.begin() + 1;
7586 // Product of dimension.
7587 llvm::Value
*DimProd
=
7588 llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, ElementTypeSize
);
7590 // Collect info for non-contiguous. Notice that offset, count, and stride
7591 // are only meaningful for array-section, so we insert a null for anything
7592 // other than array-section.
7593 // Also, the size of offset, count, and stride are not the same as
7594 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7595 // count, and stride are the same as the number of non-contiguous
7596 // declaration in target update to/from clause.
7597 for (const OMPClauseMappableExprCommon::MappableComponent
&Component
:
7599 const Expr
*AssocExpr
= Component
.getAssociatedExpression();
7601 if (const auto *AE
= dyn_cast
<ArraySubscriptExpr
>(AssocExpr
)) {
7602 llvm::Value
*Offset
= CGF
.Builder
.CreateIntCast(
7603 CGF
.EmitScalarExpr(AE
->getIdx()), CGF
.Int64Ty
,
7604 /*isSigned=*/false);
7605 CurOffsets
.push_back(Offset
);
7606 CurCounts
.push_back(llvm::ConstantInt::get(CGF
.Int64Ty
, /*V=*/1));
7607 CurStrides
.push_back(CurStrides
.back());
7611 const auto *OASE
= dyn_cast
<ArraySectionExpr
>(AssocExpr
);
7617 const Expr
*OffsetExpr
= OASE
->getLowerBound();
7618 llvm::Value
*Offset
= nullptr;
7620 // If offset is absent, then we just set it to zero.
7621 Offset
= llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
7623 Offset
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(OffsetExpr
),
7625 /*isSigned=*/false);
7627 CurOffsets
.push_back(Offset
);
7630 const Expr
*CountExpr
= OASE
->getLength();
7631 llvm::Value
*Count
= nullptr;
7633 // In Clang, once a high dimension is an array section, we construct all
7634 // the lower dimension as array section, however, for case like
7635 // arr[0:2][2], Clang construct the inner dimension as an array section
7636 // but it actually is not in an array section form according to spec.
7637 if (!OASE
->getColonLocFirst().isValid() &&
7638 !OASE
->getColonLocSecond().isValid()) {
7639 Count
= llvm::ConstantInt::get(CGF
.Int64Ty
, 1);
7641 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7642 // When the length is absent it defaults to ⌈(size −
7643 // lower-bound)/stride⌉, where size is the size of the array
7645 const Expr
*StrideExpr
= OASE
->getStride();
7646 llvm::Value
*Stride
=
7648 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(StrideExpr
),
7649 CGF
.Int64Ty
, /*isSigned=*/false)
7652 Count
= CGF
.Builder
.CreateUDiv(
7653 CGF
.Builder
.CreateNUWSub(*DI
, Offset
), Stride
);
7655 Count
= CGF
.Builder
.CreateNUWSub(*DI
, Offset
);
7658 Count
= CGF
.EmitScalarExpr(CountExpr
);
7660 Count
= CGF
.Builder
.CreateIntCast(Count
, CGF
.Int64Ty
, /*isSigned=*/false);
7661 CurCounts
.push_back(Count
);
7663 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
7664 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
7665 // Offset Count Stride
7666 // D0 0 1 4 (int) <- dummy dimension
7667 // D1 0 2 8 (2 * (1) * 4)
7668 // D2 1 2 20 (1 * (1 * 5) * 4)
7669 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
7670 const Expr
*StrideExpr
= OASE
->getStride();
7671 llvm::Value
*Stride
=
7673 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(StrideExpr
),
7674 CGF
.Int64Ty
, /*isSigned=*/false)
7676 DimProd
= CGF
.Builder
.CreateNUWMul(DimProd
, *(DI
- 1));
7678 CurStrides
.push_back(CGF
.Builder
.CreateNUWMul(DimProd
, Stride
));
7680 CurStrides
.push_back(DimProd
);
7681 if (DI
!= DimSizes
.end())
7685 CombinedInfo
.NonContigInfo
.Offsets
.push_back(CurOffsets
);
7686 CombinedInfo
.NonContigInfo
.Counts
.push_back(CurCounts
);
7687 CombinedInfo
.NonContigInfo
.Strides
.push_back(CurStrides
);
7690 /// Return the adjusted map modifiers if the declaration a capture refers to
7691 /// appears in a first-private clause. This is expected to be used only with
7692 /// directives that start with 'target'.
7693 OpenMPOffloadMappingFlags
7694 getMapModifiersForPrivateClauses(const CapturedStmt::Capture
&Cap
) const {
7695 assert(Cap
.capturesVariable() && "Expected capture by reference only!");
7697 // A first private variable captured by reference will use only the
7698 // 'private ptr' and 'map to' flag. Return the right flags if the captured
7699 // declaration is known as first-private in this handler.
7700 if (FirstPrivateDecls
.count(Cap
.getCapturedVar())) {
7701 if (Cap
.getCapturedVar()->getType()->isAnyPointerType())
7702 return OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7703 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
;
7704 return OpenMPOffloadMappingFlags::OMP_MAP_PRIVATE
|
7705 OpenMPOffloadMappingFlags::OMP_MAP_TO
;
7707 auto I
= LambdasMap
.find(Cap
.getCapturedVar()->getCanonicalDecl());
7708 if (I
!= LambdasMap
.end())
7709 // for map(to: lambda): using user specified map type.
7710 return getMapTypeBits(
7711 I
->getSecond()->getMapType(), I
->getSecond()->getMapTypeModifiers(),
7712 /*MotionModifiers=*/std::nullopt
, I
->getSecond()->isImplicit(),
7713 /*AddPtrFlag=*/false,
7714 /*AddIsTargetParamFlag=*/false,
7715 /*isNonContiguous=*/false);
7716 return OpenMPOffloadMappingFlags::OMP_MAP_TO
|
7717 OpenMPOffloadMappingFlags::OMP_MAP_FROM
;
7720 void getPlainLayout(const CXXRecordDecl
*RD
,
7721 llvm::SmallVectorImpl
<const FieldDecl
*> &Layout
,
7722 bool AsBase
) const {
7723 const CGRecordLayout
&RL
= CGF
.getTypes().getCGRecordLayout(RD
);
7725 llvm::StructType
*St
=
7726 AsBase
? RL
.getBaseSubobjectLLVMType() : RL
.getLLVMType();
7728 unsigned NumElements
= St
->getNumElements();
7730 llvm::PointerUnion
<const CXXRecordDecl
*, const FieldDecl
*>, 4>
7731 RecordLayout(NumElements
);
7734 for (const auto &I
: RD
->bases()) {
7738 QualType BaseTy
= I
.getType();
7739 const auto *Base
= BaseTy
->getAsCXXRecordDecl();
7740 // Ignore empty bases.
7741 if (isEmptyRecordForLayout(CGF
.getContext(), BaseTy
) ||
7743 .getASTRecordLayout(Base
)
7744 .getNonVirtualSize()
7748 unsigned FieldIndex
= RL
.getNonVirtualBaseLLVMFieldNo(Base
);
7749 RecordLayout
[FieldIndex
] = Base
;
7751 // Fill in virtual bases.
7752 for (const auto &I
: RD
->vbases()) {
7753 QualType BaseTy
= I
.getType();
7754 // Ignore empty bases.
7755 if (isEmptyRecordForLayout(CGF
.getContext(), BaseTy
))
7758 const auto *Base
= BaseTy
->getAsCXXRecordDecl();
7759 unsigned FieldIndex
= RL
.getVirtualBaseIndex(Base
);
7760 if (RecordLayout
[FieldIndex
])
7762 RecordLayout
[FieldIndex
] = Base
;
7764 // Fill in all the fields.
7765 assert(!RD
->isUnion() && "Unexpected union.");
7766 for (const auto *Field
: RD
->fields()) {
7767 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
7768 // will fill in later.)
7769 if (!Field
->isBitField() &&
7770 !isEmptyFieldForLayout(CGF
.getContext(), Field
)) {
7771 unsigned FieldIndex
= RL
.getLLVMFieldNo(Field
);
7772 RecordLayout
[FieldIndex
] = Field
;
7775 for (const llvm::PointerUnion
<const CXXRecordDecl
*, const FieldDecl
*>
7776 &Data
: RecordLayout
) {
7779 if (const auto *Base
= Data
.dyn_cast
<const CXXRecordDecl
*>())
7780 getPlainLayout(Base
, Layout
, /*AsBase=*/true);
7782 Layout
.push_back(Data
.get
<const FieldDecl
*>());
7786 /// Generate all the base pointers, section pointers, sizes, map types, and
7787 /// mappers for the extracted mappable expressions (all included in \a
7788 /// CombinedInfo). Also, for each item that relates with a device pointer, a
7789 /// pair of the relevant declaration and index where it occurs is appended to
7790 /// the device pointers info array.
7791 void generateAllInfoForClauses(
7792 ArrayRef
<const OMPClause
*> Clauses
, MapCombinedInfoTy
&CombinedInfo
,
7793 llvm::OpenMPIRBuilder
&OMPBuilder
,
7794 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkipVarSet
=
7795 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) const {
7796 // We have to process the component lists that relate with the same
7797 // declaration in a single chunk so that we can generate the map flags
7798 // correctly. Therefore, we organize all lists in a map.
7799 enum MapKind
{ Present
, Allocs
, Other
, Total
};
7800 llvm::MapVector
<CanonicalDeclPtr
<const Decl
>,
7801 SmallVector
<SmallVector
<MapInfo
, 8>, 4>>
7804 // Helper function to fill the information map for the different supported
7807 [&Info
, &SkipVarSet
](
7808 const ValueDecl
*D
, MapKind Kind
,
7809 OMPClauseMappableExprCommon::MappableExprComponentListRef L
,
7810 OpenMPMapClauseKind MapType
,
7811 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
7812 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
7813 bool ReturnDevicePointer
, bool IsImplicit
, const ValueDecl
*Mapper
,
7814 const Expr
*VarRef
= nullptr, bool ForDeviceAddr
= false) {
7815 if (SkipVarSet
.contains(D
))
7817 auto It
= Info
.find(D
);
7818 if (It
== Info
.end())
7820 .insert(std::make_pair(
7821 D
, SmallVector
<SmallVector
<MapInfo
, 8>, 4>(Total
)))
7823 It
->second
[Kind
].emplace_back(
7824 L
, MapType
, MapModifiers
, MotionModifiers
, ReturnDevicePointer
,
7825 IsImplicit
, Mapper
, VarRef
, ForDeviceAddr
);
7828 for (const auto *Cl
: Clauses
) {
7829 const auto *C
= dyn_cast
<OMPMapClause
>(Cl
);
7832 MapKind Kind
= Other
;
7833 if (llvm::is_contained(C
->getMapTypeModifiers(),
7834 OMPC_MAP_MODIFIER_present
))
7836 else if (C
->getMapType() == OMPC_MAP_alloc
)
7838 const auto *EI
= C
->getVarRefs().begin();
7839 for (const auto L
: C
->component_lists()) {
7840 const Expr
*E
= (C
->getMapLoc().isValid()) ? *EI
: nullptr;
7841 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), C
->getMapType(),
7842 C
->getMapTypeModifiers(), std::nullopt
,
7843 /*ReturnDevicePointer=*/false, C
->isImplicit(), std::get
<2>(L
),
7848 for (const auto *Cl
: Clauses
) {
7849 const auto *C
= dyn_cast
<OMPToClause
>(Cl
);
7852 MapKind Kind
= Other
;
7853 if (llvm::is_contained(C
->getMotionModifiers(),
7854 OMPC_MOTION_MODIFIER_present
))
7856 const auto *EI
= C
->getVarRefs().begin();
7857 for (const auto L
: C
->component_lists()) {
7858 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), OMPC_MAP_to
, std::nullopt
,
7859 C
->getMotionModifiers(), /*ReturnDevicePointer=*/false,
7860 C
->isImplicit(), std::get
<2>(L
), *EI
);
7864 for (const auto *Cl
: Clauses
) {
7865 const auto *C
= dyn_cast
<OMPFromClause
>(Cl
);
7868 MapKind Kind
= Other
;
7869 if (llvm::is_contained(C
->getMotionModifiers(),
7870 OMPC_MOTION_MODIFIER_present
))
7872 const auto *EI
= C
->getVarRefs().begin();
7873 for (const auto L
: C
->component_lists()) {
7874 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), OMPC_MAP_from
,
7875 std::nullopt
, C
->getMotionModifiers(),
7876 /*ReturnDevicePointer=*/false, C
->isImplicit(), std::get
<2>(L
),
7882 // Look at the use_device_ptr and use_device_addr clauses information and
7883 // mark the existing map entries as such. If there is no map information for
7884 // an entry in the use_device_ptr and use_device_addr list, we create one
7885 // with map type 'alloc' and zero size section. It is the user fault if that
7886 // was not mapped before. If there is no map information and the pointer is
7887 // a struct member, then we defer the emission of that entry until the whole
7888 // struct has been processed.
7889 llvm::MapVector
<CanonicalDeclPtr
<const Decl
>,
7890 SmallVector
<DeferredDevicePtrEntryTy
, 4>>
7892 MapCombinedInfoTy UseDeviceDataCombinedInfo
;
7894 auto &&UseDeviceDataCombinedInfoGen
=
7895 [&UseDeviceDataCombinedInfo
](const ValueDecl
*VD
, llvm::Value
*Ptr
,
7896 CodeGenFunction
&CGF
, bool IsDevAddr
) {
7897 UseDeviceDataCombinedInfo
.Exprs
.push_back(VD
);
7898 UseDeviceDataCombinedInfo
.BasePointers
.emplace_back(Ptr
);
7899 UseDeviceDataCombinedInfo
.DevicePtrDecls
.emplace_back(VD
);
7900 UseDeviceDataCombinedInfo
.DevicePointers
.emplace_back(
7901 IsDevAddr
? DeviceInfoTy::Address
: DeviceInfoTy::Pointer
);
7902 UseDeviceDataCombinedInfo
.Pointers
.push_back(Ptr
);
7903 UseDeviceDataCombinedInfo
.Sizes
.push_back(
7904 llvm::Constant::getNullValue(CGF
.Int64Ty
));
7905 UseDeviceDataCombinedInfo
.Types
.push_back(
7906 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
);
7907 UseDeviceDataCombinedInfo
.Mappers
.push_back(nullptr);
7911 [&DeferredInfo
, &UseDeviceDataCombinedInfoGen
,
7912 &InfoGen
](CodeGenFunction
&CGF
, const Expr
*IE
, const ValueDecl
*VD
,
7913 OMPClauseMappableExprCommon::MappableExprComponentListRef
7915 bool IsImplicit
, bool IsDevAddr
) {
7916 // We didn't find any match in our map information - generate a zero
7917 // size array section - if the pointer is a struct member we defer
7918 // this action until the whole struct has been processed.
7919 if (isa
<MemberExpr
>(IE
)) {
7920 // Insert the pointer into Info to be processed by
7921 // generateInfoForComponentList. Because it is a member pointer
7922 // without a pointee, no entry will be generated for it, therefore
7923 // we need to generate one after the whole struct has been
7924 // processed. Nonetheless, generateInfoForComponentList must be
7925 // called to take the pointer into account for the calculation of
7926 // the range of the partial struct.
7927 InfoGen(nullptr, Other
, Components
, OMPC_MAP_unknown
, std::nullopt
,
7928 std::nullopt
, /*ReturnDevicePointer=*/false, IsImplicit
,
7929 nullptr, nullptr, IsDevAddr
);
7930 DeferredInfo
[nullptr].emplace_back(IE
, VD
, IsDevAddr
);
7934 if (IE
->isGLValue())
7935 Ptr
= CGF
.EmitLValue(IE
).getPointer(CGF
);
7937 Ptr
= CGF
.EmitScalarExpr(IE
);
7939 Ptr
= CGF
.EmitLoadOfScalar(CGF
.EmitLValue(IE
), IE
->getExprLoc());
7941 UseDeviceDataCombinedInfoGen(VD
, Ptr
, CGF
, IsDevAddr
);
7945 auto &&IsMapInfoExist
= [&Info
](CodeGenFunction
&CGF
, const ValueDecl
*VD
,
7946 const Expr
*IE
, bool IsDevAddr
) -> bool {
7947 // We potentially have map information for this declaration already.
7948 // Look for the first set of components that refer to it. If found,
7950 // If the first component is a member expression, we have to look into
7951 // 'this', which maps to null in the map of map information. Otherwise
7952 // look directly for the information.
7953 auto It
= Info
.find(isa
<MemberExpr
>(IE
) ? nullptr : VD
);
7954 if (It
!= Info
.end()) {
7956 for (auto &Data
: It
->second
) {
7957 auto *CI
= llvm::find_if(Data
, [VD
](const MapInfo
&MI
) {
7958 return MI
.Components
.back().getAssociatedDeclaration() == VD
;
7960 // If we found a map entry, signal that the pointer has to be
7961 // returned and move on to the next declaration. Exclude cases where
7962 // the base pointer is mapped as array subscript, array section or
7963 // array shaping. The base address is passed as a pointer to base in
7964 // this case and cannot be used as a base for use_device_ptr list
7966 if (CI
!= Data
.end()) {
7968 CI
->ForDeviceAddr
= IsDevAddr
;
7969 CI
->ReturnDevicePointer
= true;
7973 auto PrevCI
= std::next(CI
->Components
.rbegin());
7974 const auto *VarD
= dyn_cast
<VarDecl
>(VD
);
7975 if (CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7976 isa
<MemberExpr
>(IE
) ||
7977 !VD
->getType().getNonReferenceType()->isPointerType() ||
7978 PrevCI
== CI
->Components
.rend() ||
7979 isa
<MemberExpr
>(PrevCI
->getAssociatedExpression()) || !VarD
||
7980 VarD
->hasLocalStorage()) {
7981 CI
->ForDeviceAddr
= IsDevAddr
;
7982 CI
->ReturnDevicePointer
= true;
7994 // Look at the use_device_ptr clause information and mark the existing map
7995 // entries as such. If there is no map information for an entry in the
7996 // use_device_ptr list, we create one with map type 'alloc' and zero size
7997 // section. It is the user fault if that was not mapped before. If there is
7998 // no map information and the pointer is a struct member, then we defer the
7999 // emission of that entry until the whole struct has been processed.
8000 for (const auto *Cl
: Clauses
) {
8001 const auto *C
= dyn_cast
<OMPUseDevicePtrClause
>(Cl
);
8004 for (const auto L
: C
->component_lists()) {
8005 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
=
8007 assert(!Components
.empty() &&
8008 "Not expecting empty list of components!");
8009 const ValueDecl
*VD
= Components
.back().getAssociatedDeclaration();
8010 VD
= cast
<ValueDecl
>(VD
->getCanonicalDecl());
8011 const Expr
*IE
= Components
.back().getAssociatedExpression();
8012 if (IsMapInfoExist(CGF
, VD
, IE
, /*IsDevAddr=*/false))
8014 MapInfoGen(CGF
, IE
, VD
, Components
, C
->isImplicit(),
8015 /*IsDevAddr=*/false);
8019 llvm::SmallDenseSet
<CanonicalDeclPtr
<const Decl
>, 4> Processed
;
8020 for (const auto *Cl
: Clauses
) {
8021 const auto *C
= dyn_cast
<OMPUseDeviceAddrClause
>(Cl
);
8024 for (const auto L
: C
->component_lists()) {
8025 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
=
8027 assert(!std::get
<1>(L
).empty() &&
8028 "Not expecting empty list of components!");
8029 const ValueDecl
*VD
= std::get
<1>(L
).back().getAssociatedDeclaration();
8030 if (!Processed
.insert(VD
).second
)
8032 VD
= cast
<ValueDecl
>(VD
->getCanonicalDecl());
8033 const Expr
*IE
= std::get
<1>(L
).back().getAssociatedExpression();
8034 if (IsMapInfoExist(CGF
, VD
, IE
, /*IsDevAddr=*/true))
8036 MapInfoGen(CGF
, IE
, VD
, Components
, C
->isImplicit(),
8037 /*IsDevAddr=*/true);
8041 for (const auto &Data
: Info
) {
8042 StructRangeInfoTy PartialStruct
;
8043 // Current struct information:
8044 MapCombinedInfoTy CurInfo
;
8045 // Current struct base information:
8046 MapCombinedInfoTy StructBaseCurInfo
;
8047 const Decl
*D
= Data
.first
;
8048 const ValueDecl
*VD
= cast_or_null
<ValueDecl
>(D
);
8049 bool HasMapBasePtr
= false;
8050 bool HasMapArraySec
= false;
8051 if (VD
&& VD
->getType()->isAnyPointerType()) {
8052 for (const auto &M
: Data
.second
) {
8053 HasMapBasePtr
= any_of(M
, [](const MapInfo
&L
) {
8054 return isa_and_present
<DeclRefExpr
>(L
.VarRef
);
8056 HasMapArraySec
= any_of(M
, [](const MapInfo
&L
) {
8057 return isa_and_present
<ArraySectionExpr
, ArraySubscriptExpr
>(
8060 if (HasMapBasePtr
&& HasMapArraySec
)
8064 for (const auto &M
: Data
.second
) {
8065 for (const MapInfo
&L
: M
) {
8066 assert(!L
.Components
.empty() &&
8067 "Not expecting declaration with no component lists.");
8069 // Remember the current base pointer index.
8070 unsigned CurrentBasePointersIdx
= CurInfo
.BasePointers
.size();
8071 unsigned StructBasePointersIdx
=
8072 StructBaseCurInfo
.BasePointers
.size();
8073 CurInfo
.NonContigInfo
.IsNonContiguous
=
8074 L
.Components
.back().isNonContiguous();
8075 generateInfoForComponentList(
8076 L
.MapType
, L
.MapModifiers
, L
.MotionModifiers
, L
.Components
,
8077 CurInfo
, StructBaseCurInfo
, PartialStruct
,
8078 /*IsFirstComponentList=*/false, L
.IsImplicit
,
8079 /*GenerateAllInfoForClauses*/ true, L
.Mapper
, L
.ForDeviceAddr
, VD
,
8080 L
.VarRef
, /*OverlappedElements*/ std::nullopt
,
8081 HasMapBasePtr
&& HasMapArraySec
);
8083 // If this entry relates to a device pointer, set the relevant
8084 // declaration and add the 'return pointer' flag.
8085 if (L
.ReturnDevicePointer
) {
8086 // Check whether a value was added to either CurInfo or
8087 // StructBaseCurInfo and error if no value was added to either of
8089 assert((CurrentBasePointersIdx
< CurInfo
.BasePointers
.size() ||
8090 StructBasePointersIdx
<
8091 StructBaseCurInfo
.BasePointers
.size()) &&
8092 "Unexpected number of mapped base pointers.");
8094 // Choose a base pointer index which is always valid:
8095 const ValueDecl
*RelevantVD
=
8096 L
.Components
.back().getAssociatedDeclaration();
8097 assert(RelevantVD
&&
8098 "No relevant declaration related with device pointer??");
8100 // If StructBaseCurInfo has been updated this iteration then work on
8101 // the first new entry added to it i.e. make sure that when multiple
8102 // values are added to any of the lists, the first value added is
8103 // being modified by the assignments below (not the last value
8105 if (StructBasePointersIdx
< StructBaseCurInfo
.BasePointers
.size()) {
8106 StructBaseCurInfo
.DevicePtrDecls
[StructBasePointersIdx
] =
8108 StructBaseCurInfo
.DevicePointers
[StructBasePointersIdx
] =
8109 L
.ForDeviceAddr
? DeviceInfoTy::Address
8110 : DeviceInfoTy::Pointer
;
8111 StructBaseCurInfo
.Types
[StructBasePointersIdx
] |=
8112 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
;
8114 CurInfo
.DevicePtrDecls
[CurrentBasePointersIdx
] = RelevantVD
;
8115 CurInfo
.DevicePointers
[CurrentBasePointersIdx
] =
8116 L
.ForDeviceAddr
? DeviceInfoTy::Address
8117 : DeviceInfoTy::Pointer
;
8118 CurInfo
.Types
[CurrentBasePointersIdx
] |=
8119 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
;
8125 // Append any pending zero-length pointers which are struct members and
8126 // used with use_device_ptr or use_device_addr.
8127 auto CI
= DeferredInfo
.find(Data
.first
);
8128 if (CI
!= DeferredInfo
.end()) {
8129 for (const DeferredDevicePtrEntryTy
&L
: CI
->second
) {
8130 llvm::Value
*BasePtr
;
8132 if (L
.ForDeviceAddr
) {
8133 if (L
.IE
->isGLValue())
8134 Ptr
= this->CGF
.EmitLValue(L
.IE
).getPointer(CGF
);
8136 Ptr
= this->CGF
.EmitScalarExpr(L
.IE
);
8138 // Entry is RETURN_PARAM. Also, set the placeholder value
8139 // MEMBER_OF=FFFF so that the entry is later updated with the
8140 // correct value of MEMBER_OF.
8141 CurInfo
.Types
.push_back(
8142 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
|
8143 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
8145 BasePtr
= this->CGF
.EmitLValue(L
.IE
).getPointer(CGF
);
8146 Ptr
= this->CGF
.EmitLoadOfScalar(this->CGF
.EmitLValue(L
.IE
),
8147 L
.IE
->getExprLoc());
8148 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8149 // placeholder value MEMBER_OF=FFFF so that the entry is later
8150 // updated with the correct value of MEMBER_OF.
8151 CurInfo
.Types
.push_back(
8152 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8153 OpenMPOffloadMappingFlags::OMP_MAP_RETURN_PARAM
|
8154 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
);
8156 CurInfo
.Exprs
.push_back(L
.VD
);
8157 CurInfo
.BasePointers
.emplace_back(BasePtr
);
8158 CurInfo
.DevicePtrDecls
.emplace_back(L
.VD
);
8159 CurInfo
.DevicePointers
.emplace_back(
8160 L
.ForDeviceAddr
? DeviceInfoTy::Address
: DeviceInfoTy::Pointer
);
8161 CurInfo
.Pointers
.push_back(Ptr
);
8162 CurInfo
.Sizes
.push_back(
8163 llvm::Constant::getNullValue(this->CGF
.Int64Ty
));
8164 CurInfo
.Mappers
.push_back(nullptr);
8168 // Unify entries in one list making sure the struct mapping precedes the
8169 // individual fields:
8170 MapCombinedInfoTy UnionCurInfo
;
8171 UnionCurInfo
.append(StructBaseCurInfo
);
8172 UnionCurInfo
.append(CurInfo
);
8174 // If there is an entry in PartialStruct it means we have a struct with
8175 // individual members mapped. Emit an extra combined entry.
8176 if (PartialStruct
.Base
.isValid()) {
8177 UnionCurInfo
.NonContigInfo
.Dims
.push_back(0);
8178 // Emit a combined entry:
8179 emitCombinedEntry(CombinedInfo
, UnionCurInfo
.Types
, PartialStruct
,
8180 /*IsMapThis*/ !VD
, OMPBuilder
, VD
);
8183 // We need to append the results of this capture to what we already have.
8184 CombinedInfo
.append(UnionCurInfo
);
8186 // Append data for use_device_ptr clauses.
8187 CombinedInfo
.append(UseDeviceDataCombinedInfo
);
8191 MappableExprsHandler(const OMPExecutableDirective
&Dir
, CodeGenFunction
&CGF
)
8192 : CurDir(&Dir
), CGF(CGF
) {
8193 // Extract firstprivate clause information.
8194 for (const auto *C
: Dir
.getClausesOfKind
<OMPFirstprivateClause
>())
8195 for (const auto *D
: C
->varlists())
8196 FirstPrivateDecls
.try_emplace(
8197 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
)->getDecl()), C
->isImplicit());
8198 // Extract implicit firstprivates from uses_allocators clauses.
8199 for (const auto *C
: Dir
.getClausesOfKind
<OMPUsesAllocatorsClause
>()) {
8200 for (unsigned I
= 0, E
= C
->getNumberOfAllocators(); I
< E
; ++I
) {
8201 OMPUsesAllocatorsClause::Data D
= C
->getAllocatorData(I
);
8202 if (const auto *DRE
= dyn_cast_or_null
<DeclRefExpr
>(D
.AllocatorTraits
))
8203 FirstPrivateDecls
.try_emplace(cast
<VarDecl
>(DRE
->getDecl()),
8205 else if (const auto *VD
= dyn_cast
<VarDecl
>(
8206 cast
<DeclRefExpr
>(D
.Allocator
->IgnoreParenImpCasts())
8208 FirstPrivateDecls
.try_emplace(VD
, /*Implicit=*/true);
8211 // Extract device pointer clause information.
8212 for (const auto *C
: Dir
.getClausesOfKind
<OMPIsDevicePtrClause
>())
8213 for (auto L
: C
->component_lists())
8214 DevPointersMap
[std::get
<0>(L
)].push_back(std::get
<1>(L
));
8215 // Extract device addr clause information.
8216 for (const auto *C
: Dir
.getClausesOfKind
<OMPHasDeviceAddrClause
>())
8217 for (auto L
: C
->component_lists())
8218 HasDevAddrsMap
[std::get
<0>(L
)].push_back(std::get
<1>(L
));
8219 // Extract map information.
8220 for (const auto *C
: Dir
.getClausesOfKind
<OMPMapClause
>()) {
8221 if (C
->getMapType() != OMPC_MAP_to
)
8223 for (auto L
: C
->component_lists()) {
8224 const ValueDecl
*VD
= std::get
<0>(L
);
8225 const auto *RD
= VD
? VD
->getType()
8227 .getNonReferenceType()
8228 ->getAsCXXRecordDecl()
8230 if (RD
&& RD
->isLambda())
8231 LambdasMap
.try_emplace(std::get
<0>(L
), C
);
8236 /// Constructor for the declare mapper directive.
8237 MappableExprsHandler(const OMPDeclareMapperDecl
&Dir
, CodeGenFunction
&CGF
)
8238 : CurDir(&Dir
), CGF(CGF
) {}
8240 /// Generate code for the combined entry if we have a partially mapped struct
8241 /// and take care of the mapping flags of the arguments corresponding to
8242 /// individual struct members.
8243 void emitCombinedEntry(MapCombinedInfoTy
&CombinedInfo
,
8244 MapFlagsArrayTy
&CurTypes
,
8245 const StructRangeInfoTy
&PartialStruct
, bool IsMapThis
,
8246 llvm::OpenMPIRBuilder
&OMPBuilder
,
8247 const ValueDecl
*VD
= nullptr,
8248 bool NotTargetParams
= true) const {
8249 if (CurTypes
.size() == 1 &&
8250 ((CurTypes
.back() & OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
) !=
8251 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
) &&
8252 !PartialStruct
.IsArraySection
)
8254 Address LBAddr
= PartialStruct
.LowestElem
.second
;
8255 Address HBAddr
= PartialStruct
.HighestElem
.second
;
8256 if (PartialStruct
.HasCompleteRecord
) {
8257 LBAddr
= PartialStruct
.LB
;
8258 HBAddr
= PartialStruct
.LB
;
8260 CombinedInfo
.Exprs
.push_back(VD
);
8261 // Base is the base of the struct
8262 CombinedInfo
.BasePointers
.push_back(PartialStruct
.Base
.emitRawPointer(CGF
));
8263 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8264 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8265 // Pointer is the address of the lowest element
8266 llvm::Value
*LB
= LBAddr
.emitRawPointer(CGF
);
8267 const CXXMethodDecl
*MD
=
8268 CGF
.CurFuncDecl
? dyn_cast
<CXXMethodDecl
>(CGF
.CurFuncDecl
) : nullptr;
8269 const CXXRecordDecl
*RD
= MD
? MD
->getParent() : nullptr;
8270 bool HasBaseClass
= RD
&& IsMapThis
? RD
->getNumBases() > 0 : false;
8271 // There should not be a mapper for a combined entry.
8273 // OpenMP 5.2 148:21:
8274 // If the target construct is within a class non-static member function,
8275 // and a variable is an accessible data member of the object for which the
8276 // non-static data member function is invoked, the variable is treated as
8277 // if the this[:1] expression had appeared in a map clause with a map-type
8280 CombinedInfo
.Pointers
.push_back(PartialStruct
.Base
.emitRawPointer(CGF
));
8281 QualType Ty
= MD
->getFunctionObjectParameterType();
8283 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(Ty
), CGF
.Int64Ty
,
8285 CombinedInfo
.Sizes
.push_back(Size
);
8287 CombinedInfo
.Pointers
.push_back(LB
);
8288 // Size is (addr of {highest+1} element) - (addr of lowest element)
8289 llvm::Value
*HB
= HBAddr
.emitRawPointer(CGF
);
8290 llvm::Value
*HAddr
= CGF
.Builder
.CreateConstGEP1_32(
8291 HBAddr
.getElementType(), HB
, /*Idx0=*/1);
8292 llvm::Value
*CLAddr
= CGF
.Builder
.CreatePointerCast(LB
, CGF
.VoidPtrTy
);
8293 llvm::Value
*CHAddr
= CGF
.Builder
.CreatePointerCast(HAddr
, CGF
.VoidPtrTy
);
8294 llvm::Value
*Diff
= CGF
.Builder
.CreatePtrDiff(CGF
.Int8Ty
, CHAddr
, CLAddr
);
8295 llvm::Value
*Size
= CGF
.Builder
.CreateIntCast(Diff
, CGF
.Int64Ty
,
8296 /*isSigned=*/false);
8297 CombinedInfo
.Sizes
.push_back(Size
);
8299 CombinedInfo
.Mappers
.push_back(nullptr);
8300 // Map type is always TARGET_PARAM, if generate info for captures.
8301 CombinedInfo
.Types
.push_back(
8302 NotTargetParams
? OpenMPOffloadMappingFlags::OMP_MAP_NONE
8303 : OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
);
8304 // If any element has the present modifier, then make sure the runtime
8305 // doesn't attempt to allocate the struct.
8306 if (CurTypes
.end() !=
8307 llvm::find_if(CurTypes
, [](OpenMPOffloadMappingFlags Type
) {
8308 return static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
8309 Type
& OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
);
8311 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_PRESENT
;
8312 // Remove TARGET_PARAM flag from the first element
8313 (*CurTypes
.begin()) &= ~OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
8314 // If any element has the ompx_hold modifier, then make sure the runtime
8315 // uses the hold reference count for the struct as a whole so that it won't
8316 // be unmapped by an extra dynamic reference count decrement. Add it to all
8317 // elements as well so the runtime knows which reference count to check
8318 // when determining whether it's time for device-to-host transfers of
8319 // individual elements.
8320 if (CurTypes
.end() !=
8321 llvm::find_if(CurTypes
, [](OpenMPOffloadMappingFlags Type
) {
8322 return static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
8323 Type
& OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
);
8325 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
8326 for (auto &M
: CurTypes
)
8327 M
|= OpenMPOffloadMappingFlags::OMP_MAP_OMPX_HOLD
;
8330 // All other current entries will be MEMBER_OF the combined entry
8331 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8332 // 0xFFFF in the MEMBER_OF field).
8333 OpenMPOffloadMappingFlags MemberOfFlag
=
8334 OMPBuilder
.getMemberOfFlag(CombinedInfo
.BasePointers
.size() - 1);
8335 for (auto &M
: CurTypes
)
8336 OMPBuilder
.setCorrectMemberOfFlag(M
, MemberOfFlag
);
8339 /// Generate all the base pointers, section pointers, sizes, map types, and
8340 /// mappers for the extracted mappable expressions (all included in \a
8341 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8342 /// pair of the relevant declaration and index where it occurs is appended to
8343 /// the device pointers info array.
8344 void generateAllInfo(
8345 MapCombinedInfoTy
&CombinedInfo
, llvm::OpenMPIRBuilder
&OMPBuilder
,
8346 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkipVarSet
=
8347 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) const {
8348 assert(CurDir
.is
<const OMPExecutableDirective
*>() &&
8349 "Expect a executable directive");
8350 const auto *CurExecDir
= CurDir
.get
<const OMPExecutableDirective
*>();
8351 generateAllInfoForClauses(CurExecDir
->clauses(), CombinedInfo
, OMPBuilder
,
8355 /// Generate all the base pointers, section pointers, sizes, map types, and
8356 /// mappers for the extracted map clauses of user-defined mapper (all included
8357 /// in \a CombinedInfo).
8358 void generateAllInfoForMapper(MapCombinedInfoTy
&CombinedInfo
,
8359 llvm::OpenMPIRBuilder
&OMPBuilder
) const {
8360 assert(CurDir
.is
<const OMPDeclareMapperDecl
*>() &&
8361 "Expect a declare mapper directive");
8362 const auto *CurMapperDir
= CurDir
.get
<const OMPDeclareMapperDecl
*>();
8363 generateAllInfoForClauses(CurMapperDir
->clauses(), CombinedInfo
,
8367 /// Emit capture info for lambdas for variables captured by reference.
8368 void generateInfoForLambdaCaptures(
8369 const ValueDecl
*VD
, llvm::Value
*Arg
, MapCombinedInfoTy
&CombinedInfo
,
8370 llvm::DenseMap
<llvm::Value
*, llvm::Value
*> &LambdaPointers
) const {
8371 QualType VDType
= VD
->getType().getCanonicalType().getNonReferenceType();
8372 const auto *RD
= VDType
->getAsCXXRecordDecl();
8373 if (!RD
|| !RD
->isLambda())
8375 Address
VDAddr(Arg
, CGF
.ConvertTypeForMem(VDType
),
8376 CGF
.getContext().getDeclAlign(VD
));
8377 LValue VDLVal
= CGF
.MakeAddrLValue(VDAddr
, VDType
);
8378 llvm::DenseMap
<const ValueDecl
*, FieldDecl
*> Captures
;
8379 FieldDecl
*ThisCapture
= nullptr;
8380 RD
->getCaptureFields(Captures
, ThisCapture
);
8383 CGF
.EmitLValueForFieldInitialization(VDLVal
, ThisCapture
);
8384 LValue ThisLValVal
= CGF
.EmitLValueForField(VDLVal
, ThisCapture
);
8385 LambdaPointers
.try_emplace(ThisLVal
.getPointer(CGF
),
8386 VDLVal
.getPointer(CGF
));
8387 CombinedInfo
.Exprs
.push_back(VD
);
8388 CombinedInfo
.BasePointers
.push_back(ThisLVal
.getPointer(CGF
));
8389 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8390 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8391 CombinedInfo
.Pointers
.push_back(ThisLValVal
.getPointer(CGF
));
8392 CombinedInfo
.Sizes
.push_back(
8393 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(CGF
.getContext().VoidPtrTy
),
8394 CGF
.Int64Ty
, /*isSigned=*/true));
8395 CombinedInfo
.Types
.push_back(
8396 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8397 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8398 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8399 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
8400 CombinedInfo
.Mappers
.push_back(nullptr);
8402 for (const LambdaCapture
&LC
: RD
->captures()) {
8403 if (!LC
.capturesVariable())
8405 const VarDecl
*VD
= cast
<VarDecl
>(LC
.getCapturedVar());
8406 if (LC
.getCaptureKind() != LCK_ByRef
&& !VD
->getType()->isPointerType())
8408 auto It
= Captures
.find(VD
);
8409 assert(It
!= Captures
.end() && "Found lambda capture without field.");
8410 LValue VarLVal
= CGF
.EmitLValueForFieldInitialization(VDLVal
, It
->second
);
8411 if (LC
.getCaptureKind() == LCK_ByRef
) {
8412 LValue VarLValVal
= CGF
.EmitLValueForField(VDLVal
, It
->second
);
8413 LambdaPointers
.try_emplace(VarLVal
.getPointer(CGF
),
8414 VDLVal
.getPointer(CGF
));
8415 CombinedInfo
.Exprs
.push_back(VD
);
8416 CombinedInfo
.BasePointers
.push_back(VarLVal
.getPointer(CGF
));
8417 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8418 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8419 CombinedInfo
.Pointers
.push_back(VarLValVal
.getPointer(CGF
));
8420 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8422 VD
->getType().getCanonicalType().getNonReferenceType()),
8423 CGF
.Int64Ty
, /*isSigned=*/true));
8425 RValue VarRVal
= CGF
.EmitLoadOfLValue(VarLVal
, RD
->getLocation());
8426 LambdaPointers
.try_emplace(VarLVal
.getPointer(CGF
),
8427 VDLVal
.getPointer(CGF
));
8428 CombinedInfo
.Exprs
.push_back(VD
);
8429 CombinedInfo
.BasePointers
.push_back(VarLVal
.getPointer(CGF
));
8430 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8431 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8432 CombinedInfo
.Pointers
.push_back(VarRVal
.getScalarVal());
8433 CombinedInfo
.Sizes
.push_back(llvm::ConstantInt::get(CGF
.Int64Ty
, 0));
8435 CombinedInfo
.Types
.push_back(
8436 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8437 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8438 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8439 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
8440 CombinedInfo
.Mappers
.push_back(nullptr);
8444 /// Set correct indices for lambdas captures.
8445 void adjustMemberOfForLambdaCaptures(
8446 llvm::OpenMPIRBuilder
&OMPBuilder
,
8447 const llvm::DenseMap
<llvm::Value
*, llvm::Value
*> &LambdaPointers
,
8448 MapBaseValuesArrayTy
&BasePointers
, MapValuesArrayTy
&Pointers
,
8449 MapFlagsArrayTy
&Types
) const {
8450 for (unsigned I
= 0, E
= Types
.size(); I
< E
; ++I
) {
8451 // Set correct member_of idx for all implicit lambda captures.
8452 if (Types
[I
] != (OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
|
8453 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8454 OpenMPOffloadMappingFlags::OMP_MAP_MEMBER_OF
|
8455 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
))
8457 llvm::Value
*BasePtr
= LambdaPointers
.lookup(BasePointers
[I
]);
8458 assert(BasePtr
&& "Unable to find base lambda address.");
8460 for (unsigned J
= I
; J
> 0; --J
) {
8461 unsigned Idx
= J
- 1;
8462 if (Pointers
[Idx
] != BasePtr
)
8467 assert(TgtIdx
!= -1 && "Unable to find parent lambda.");
8468 // All other current entries will be MEMBER_OF the combined entry
8469 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8470 // 0xFFFF in the MEMBER_OF field).
8471 OpenMPOffloadMappingFlags MemberOfFlag
=
8472 OMPBuilder
.getMemberOfFlag(TgtIdx
);
8473 OMPBuilder
.setCorrectMemberOfFlag(Types
[I
], MemberOfFlag
);
8477 /// Generate the base pointers, section pointers, sizes, map types, and
8478 /// mappers associated to a given capture (all included in \a CombinedInfo).
8479 void generateInfoForCapture(const CapturedStmt::Capture
*Cap
,
8480 llvm::Value
*Arg
, MapCombinedInfoTy
&CombinedInfo
,
8481 StructRangeInfoTy
&PartialStruct
) const {
8482 assert(!Cap
->capturesVariableArrayType() &&
8483 "Not expecting to generate map info for a variable array type!");
8485 // We need to know when we generating information for the first component
8486 const ValueDecl
*VD
= Cap
->capturesThis()
8488 : Cap
->getCapturedVar()->getCanonicalDecl();
8490 // for map(to: lambda): skip here, processing it in
8491 // generateDefaultMapInfo
8492 if (LambdasMap
.count(VD
))
8495 // If this declaration appears in a is_device_ptr clause we just have to
8496 // pass the pointer by value. If it is a reference to a declaration, we just
8498 if (VD
&& (DevPointersMap
.count(VD
) || HasDevAddrsMap
.count(VD
))) {
8499 CombinedInfo
.Exprs
.push_back(VD
);
8500 CombinedInfo
.BasePointers
.emplace_back(Arg
);
8501 CombinedInfo
.DevicePtrDecls
.emplace_back(VD
);
8502 CombinedInfo
.DevicePointers
.emplace_back(DeviceInfoTy::Pointer
);
8503 CombinedInfo
.Pointers
.push_back(Arg
);
8504 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8505 CGF
.getTypeSize(CGF
.getContext().VoidPtrTy
), CGF
.Int64Ty
,
8506 /*isSigned=*/true));
8507 CombinedInfo
.Types
.push_back(
8508 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
8509 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
);
8510 CombinedInfo
.Mappers
.push_back(nullptr);
8515 std::tuple
<OMPClauseMappableExprCommon::MappableExprComponentListRef
,
8516 OpenMPMapClauseKind
, ArrayRef
<OpenMPMapModifierKind
>, bool,
8517 const ValueDecl
*, const Expr
*>;
8518 SmallVector
<MapData
, 4> DeclComponentLists
;
8519 // For member fields list in is_device_ptr, store it in
8520 // DeclComponentLists for generating components info.
8521 static const OpenMPMapModifierKind Unknown
= OMPC_MAP_MODIFIER_unknown
;
8522 auto It
= DevPointersMap
.find(VD
);
8523 if (It
!= DevPointersMap
.end())
8524 for (const auto &MCL
: It
->second
)
8525 DeclComponentLists
.emplace_back(MCL
, OMPC_MAP_to
, Unknown
,
8526 /*IsImpicit = */ true, nullptr,
8528 auto I
= HasDevAddrsMap
.find(VD
);
8529 if (I
!= HasDevAddrsMap
.end())
8530 for (const auto &MCL
: I
->second
)
8531 DeclComponentLists
.emplace_back(MCL
, OMPC_MAP_tofrom
, Unknown
,
8532 /*IsImpicit = */ true, nullptr,
8534 assert(CurDir
.is
<const OMPExecutableDirective
*>() &&
8535 "Expect a executable directive");
8536 const auto *CurExecDir
= CurDir
.get
<const OMPExecutableDirective
*>();
8537 bool HasMapBasePtr
= false;
8538 bool HasMapArraySec
= false;
8539 for (const auto *C
: CurExecDir
->getClausesOfKind
<OMPMapClause
>()) {
8540 const auto *EI
= C
->getVarRefs().begin();
8541 for (const auto L
: C
->decl_component_lists(VD
)) {
8542 const ValueDecl
*VDecl
, *Mapper
;
8543 // The Expression is not correct if the mapping is implicit
8544 const Expr
*E
= (C
->getMapLoc().isValid()) ? *EI
: nullptr;
8545 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8546 std::tie(VDecl
, Components
, Mapper
) = L
;
8547 assert(VDecl
== VD
&& "We got information for the wrong declaration??");
8548 assert(!Components
.empty() &&
8549 "Not expecting declaration with no component lists.");
8550 if (VD
&& E
&& VD
->getType()->isAnyPointerType() && isa
<DeclRefExpr
>(E
))
8551 HasMapBasePtr
= true;
8552 if (VD
&& E
&& VD
->getType()->isAnyPointerType() &&
8553 (isa
<ArraySectionExpr
>(E
) || isa
<ArraySubscriptExpr
>(E
)))
8554 HasMapArraySec
= true;
8555 DeclComponentLists
.emplace_back(Components
, C
->getMapType(),
8556 C
->getMapTypeModifiers(),
8557 C
->isImplicit(), Mapper
, E
);
8561 llvm::stable_sort(DeclComponentLists
, [](const MapData
&LHS
,
8562 const MapData
&RHS
) {
8563 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
= std::get
<2>(LHS
);
8564 OpenMPMapClauseKind MapType
= std::get
<1>(RHS
);
8566 llvm::is_contained(MapModifiers
, clang::OMPC_MAP_MODIFIER_present
);
8567 bool HasAllocs
= MapType
== OMPC_MAP_alloc
;
8568 MapModifiers
= std::get
<2>(RHS
);
8569 MapType
= std::get
<1>(LHS
);
8571 llvm::is_contained(MapModifiers
, clang::OMPC_MAP_MODIFIER_present
);
8572 bool HasAllocsR
= MapType
== OMPC_MAP_alloc
;
8573 return (HasPresent
&& !HasPresentR
) || (HasAllocs
&& !HasAllocsR
);
8576 // Find overlapping elements (including the offset from the base element).
8577 llvm::SmallDenseMap
<
8580 OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>,
8584 for (const MapData
&L
: DeclComponentLists
) {
8585 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8586 OpenMPMapClauseKind MapType
;
8587 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8589 const ValueDecl
*Mapper
;
8591 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8594 for (const MapData
&L1
: ArrayRef(DeclComponentLists
).slice(Count
)) {
8595 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1
;
8596 std::tie(Components1
, MapType
, MapModifiers
, IsImplicit
, Mapper
,
8598 auto CI
= Components
.rbegin();
8599 auto CE
= Components
.rend();
8600 auto SI
= Components1
.rbegin();
8601 auto SE
= Components1
.rend();
8602 for (; CI
!= CE
&& SI
!= SE
; ++CI
, ++SI
) {
8603 if (CI
->getAssociatedExpression()->getStmtClass() !=
8604 SI
->getAssociatedExpression()->getStmtClass())
8606 // Are we dealing with different variables/fields?
8607 if (CI
->getAssociatedDeclaration() != SI
->getAssociatedDeclaration())
8610 // Found overlapping if, at least for one component, reached the head
8611 // of the components list.
8612 if (CI
== CE
|| SI
== SE
) {
8613 // Ignore it if it is the same component.
8614 if (CI
== CE
&& SI
== SE
)
8616 const auto It
= (SI
== SE
) ? CI
: SI
;
8617 // If one component is a pointer and another one is a kind of
8618 // dereference of this pointer (array subscript, section, dereference,
8619 // etc.), it is not an overlapping.
8620 // Same, if one component is a base and another component is a
8621 // dereferenced pointer memberexpr with the same base.
8622 if (!isa
<MemberExpr
>(It
->getAssociatedExpression()) ||
8623 (std::prev(It
)->getAssociatedDeclaration() &&
8625 ->getAssociatedDeclaration()
8627 ->isPointerType()) ||
8628 (It
->getAssociatedDeclaration() &&
8629 It
->getAssociatedDeclaration()->getType()->isPointerType() &&
8630 std::next(It
) != CE
&& std::next(It
) != SE
))
8632 const MapData
&BaseData
= CI
== CE
? L
: L1
;
8633 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData
=
8634 SI
== SE
? Components
: Components1
;
8635 auto &OverlappedElements
= OverlappedData
.FindAndConstruct(&BaseData
);
8636 OverlappedElements
.getSecond().push_back(SubData
);
8640 // Sort the overlapped elements for each item.
8641 llvm::SmallVector
<const FieldDecl
*, 4> Layout
;
8642 if (!OverlappedData
.empty()) {
8643 const Type
*BaseType
= VD
->getType().getCanonicalType().getTypePtr();
8644 const Type
*OrigType
= BaseType
->getPointeeOrArrayElementType();
8645 while (BaseType
!= OrigType
) {
8646 BaseType
= OrigType
->getCanonicalTypeInternal().getTypePtr();
8647 OrigType
= BaseType
->getPointeeOrArrayElementType();
8650 if (const auto *CRD
= BaseType
->getAsCXXRecordDecl())
8651 getPlainLayout(CRD
, Layout
, /*AsBase=*/false);
8653 const auto *RD
= BaseType
->getAsRecordDecl();
8654 Layout
.append(RD
->field_begin(), RD
->field_end());
8657 for (auto &Pair
: OverlappedData
) {
8661 OMPClauseMappableExprCommon::MappableExprComponentListRef First
,
8662 OMPClauseMappableExprCommon::MappableExprComponentListRef
8664 auto CI
= First
.rbegin();
8665 auto CE
= First
.rend();
8666 auto SI
= Second
.rbegin();
8667 auto SE
= Second
.rend();
8668 for (; CI
!= CE
&& SI
!= SE
; ++CI
, ++SI
) {
8669 if (CI
->getAssociatedExpression()->getStmtClass() !=
8670 SI
->getAssociatedExpression()->getStmtClass())
8672 // Are we dealing with different variables/fields?
8673 if (CI
->getAssociatedDeclaration() !=
8674 SI
->getAssociatedDeclaration())
8678 // Lists contain the same elements.
8679 if (CI
== CE
&& SI
== SE
)
8682 // List with less elements is less than list with more elements.
8683 if (CI
== CE
|| SI
== SE
)
8686 const auto *FD1
= cast
<FieldDecl
>(CI
->getAssociatedDeclaration());
8687 const auto *FD2
= cast
<FieldDecl
>(SI
->getAssociatedDeclaration());
8688 if (FD1
->getParent() == FD2
->getParent())
8689 return FD1
->getFieldIndex() < FD2
->getFieldIndex();
8691 llvm::find_if(Layout
, [FD1
, FD2
](const FieldDecl
*FD
) {
8692 return FD
== FD1
|| FD
== FD2
;
8698 // Associated with a capture, because the mapping flags depend on it.
8699 // Go through all of the elements with the overlapped elements.
8700 bool IsFirstComponentList
= true;
8701 MapCombinedInfoTy StructBaseCombinedInfo
;
8702 for (const auto &Pair
: OverlappedData
) {
8703 const MapData
&L
= *Pair
.getFirst();
8704 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8705 OpenMPMapClauseKind MapType
;
8706 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8708 const ValueDecl
*Mapper
;
8710 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8712 ArrayRef
<OMPClauseMappableExprCommon::MappableExprComponentListRef
>
8713 OverlappedComponents
= Pair
.getSecond();
8714 generateInfoForComponentList(
8715 MapType
, MapModifiers
, std::nullopt
, Components
, CombinedInfo
,
8716 StructBaseCombinedInfo
, PartialStruct
, IsFirstComponentList
,
8717 IsImplicit
, /*GenerateAllInfoForClauses*/ false, Mapper
,
8718 /*ForDeviceAddr=*/false, VD
, VarRef
, OverlappedComponents
);
8719 IsFirstComponentList
= false;
8721 // Go through other elements without overlapped elements.
8722 for (const MapData
&L
: DeclComponentLists
) {
8723 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8724 OpenMPMapClauseKind MapType
;
8725 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8727 const ValueDecl
*Mapper
;
8729 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8731 auto It
= OverlappedData
.find(&L
);
8732 if (It
== OverlappedData
.end())
8733 generateInfoForComponentList(
8734 MapType
, MapModifiers
, std::nullopt
, Components
, CombinedInfo
,
8735 StructBaseCombinedInfo
, PartialStruct
, IsFirstComponentList
,
8736 IsImplicit
, /*GenerateAllInfoForClauses*/ false, Mapper
,
8737 /*ForDeviceAddr=*/false, VD
, VarRef
,
8738 /*OverlappedElements*/ std::nullopt
,
8739 HasMapBasePtr
&& HasMapArraySec
);
8740 IsFirstComponentList
= false;
8744 /// Generate the default map information for a given capture \a CI,
8745 /// record field declaration \a RI and captured value \a CV.
8746 void generateDefaultMapInfo(const CapturedStmt::Capture
&CI
,
8747 const FieldDecl
&RI
, llvm::Value
*CV
,
8748 MapCombinedInfoTy
&CombinedInfo
) const {
8749 bool IsImplicit
= true;
8750 // Do the default mapping.
8751 if (CI
.capturesThis()) {
8752 CombinedInfo
.Exprs
.push_back(nullptr);
8753 CombinedInfo
.BasePointers
.push_back(CV
);
8754 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8755 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8756 CombinedInfo
.Pointers
.push_back(CV
);
8757 const auto *PtrTy
= cast
<PointerType
>(RI
.getType().getTypePtr());
8758 CombinedInfo
.Sizes
.push_back(
8759 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(PtrTy
->getPointeeType()),
8760 CGF
.Int64Ty
, /*isSigned=*/true));
8761 // Default map type.
8762 CombinedInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_TO
|
8763 OpenMPOffloadMappingFlags::OMP_MAP_FROM
);
8764 } else if (CI
.capturesVariableByCopy()) {
8765 const VarDecl
*VD
= CI
.getCapturedVar();
8766 CombinedInfo
.Exprs
.push_back(VD
->getCanonicalDecl());
8767 CombinedInfo
.BasePointers
.push_back(CV
);
8768 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8769 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8770 CombinedInfo
.Pointers
.push_back(CV
);
8771 if (!RI
.getType()->isAnyPointerType()) {
8772 // We have to signal to the runtime captures passed by value that are
8774 CombinedInfo
.Types
.push_back(
8775 OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
);
8776 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8777 CGF
.getTypeSize(RI
.getType()), CGF
.Int64Ty
, /*isSigned=*/true));
8779 // Pointers are implicitly mapped with a zero size and no flags
8780 // (other than first map that is added for all implicit maps).
8781 CombinedInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_NONE
);
8782 CombinedInfo
.Sizes
.push_back(llvm::Constant::getNullValue(CGF
.Int64Ty
));
8784 auto I
= FirstPrivateDecls
.find(VD
);
8785 if (I
!= FirstPrivateDecls
.end())
8786 IsImplicit
= I
->getSecond();
8788 assert(CI
.capturesVariable() && "Expected captured reference.");
8789 const auto *PtrTy
= cast
<ReferenceType
>(RI
.getType().getTypePtr());
8790 QualType ElementType
= PtrTy
->getPointeeType();
8791 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8792 CGF
.getTypeSize(ElementType
), CGF
.Int64Ty
, /*isSigned=*/true));
8793 // The default map type for a scalar/complex type is 'to' because by
8794 // default the value doesn't have to be retrieved. For an aggregate
8795 // type, the default is 'tofrom'.
8796 CombinedInfo
.Types
.push_back(getMapModifiersForPrivateClauses(CI
));
8797 const VarDecl
*VD
= CI
.getCapturedVar();
8798 auto I
= FirstPrivateDecls
.find(VD
);
8799 CombinedInfo
.Exprs
.push_back(VD
->getCanonicalDecl());
8800 CombinedInfo
.BasePointers
.push_back(CV
);
8801 CombinedInfo
.DevicePtrDecls
.push_back(nullptr);
8802 CombinedInfo
.DevicePointers
.push_back(DeviceInfoTy::None
);
8803 if (I
!= FirstPrivateDecls
.end() && ElementType
->isAnyPointerType()) {
8804 Address PtrAddr
= CGF
.EmitLoadOfReference(CGF
.MakeAddrLValue(
8805 CV
, ElementType
, CGF
.getContext().getDeclAlign(VD
),
8806 AlignmentSource::Decl
));
8807 CombinedInfo
.Pointers
.push_back(PtrAddr
.emitRawPointer(CGF
));
8809 CombinedInfo
.Pointers
.push_back(CV
);
8811 if (I
!= FirstPrivateDecls
.end())
8812 IsImplicit
= I
->getSecond();
8814 // Every default map produces a single argument which is a target parameter.
8815 CombinedInfo
.Types
.back() |=
8816 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
;
8818 // Add flag stating this is an implicit map.
8820 CombinedInfo
.Types
.back() |= OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
;
8822 // No user-defined mapper for default mapping.
8823 CombinedInfo
.Mappers
.push_back(nullptr);
8826 } // anonymous namespace
8828 // Try to extract the base declaration from a `this->x` expression if possible.
8829 static ValueDecl
*getDeclFromThisExpr(const Expr
*E
) {
8833 if (const auto *OASE
= dyn_cast
<ArraySectionExpr
>(E
->IgnoreParenCasts()))
8834 if (const MemberExpr
*ME
=
8835 dyn_cast
<MemberExpr
>(OASE
->getBase()->IgnoreParenImpCasts()))
8836 return ME
->getMemberDecl();
8840 /// Emit a string constant containing the names of the values mapped to the
8841 /// offloading runtime library.
8843 emitMappingInformation(CodeGenFunction
&CGF
, llvm::OpenMPIRBuilder
&OMPBuilder
,
8844 MappableExprsHandler::MappingExprInfo
&MapExprs
) {
8846 uint32_t SrcLocStrSize
;
8847 if (!MapExprs
.getMapDecl() && !MapExprs
.getMapExpr())
8848 return OMPBuilder
.getOrCreateDefaultSrcLocStr(SrcLocStrSize
);
8851 if (!MapExprs
.getMapDecl() && MapExprs
.getMapExpr()) {
8852 if (const ValueDecl
*VD
= getDeclFromThisExpr(MapExprs
.getMapExpr()))
8853 Loc
= VD
->getLocation();
8855 Loc
= MapExprs
.getMapExpr()->getExprLoc();
8857 Loc
= MapExprs
.getMapDecl()->getLocation();
8860 std::string ExprName
;
8861 if (MapExprs
.getMapExpr()) {
8862 PrintingPolicy
P(CGF
.getContext().getLangOpts());
8863 llvm::raw_string_ostream
OS(ExprName
);
8864 MapExprs
.getMapExpr()->printPretty(OS
, nullptr, P
);
8867 ExprName
= MapExprs
.getMapDecl()->getNameAsString();
8870 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
8871 return OMPBuilder
.getOrCreateSrcLocStr(PLoc
.getFilename(), ExprName
,
8872 PLoc
.getLine(), PLoc
.getColumn(),
8876 /// Emit the arrays used to pass the captures and map information to the
8877 /// offloading runtime library. If there is no map or capture information,
8878 /// return nullptr by reference.
8879 static void emitOffloadingArrays(
8880 CodeGenFunction
&CGF
, MappableExprsHandler::MapCombinedInfoTy
&CombinedInfo
,
8881 CGOpenMPRuntime::TargetDataInfo
&Info
, llvm::OpenMPIRBuilder
&OMPBuilder
,
8882 bool IsNonContiguous
= false) {
8883 CodeGenModule
&CGM
= CGF
.CGM
;
8885 // Reset the array information.
8886 Info
.clearArrayInfo();
8887 Info
.NumberOfPtrs
= CombinedInfo
.BasePointers
.size();
8889 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
8890 InsertPointTy
AllocaIP(CGF
.AllocaInsertPt
->getParent(),
8891 CGF
.AllocaInsertPt
->getIterator());
8892 InsertPointTy
CodeGenIP(CGF
.Builder
.GetInsertBlock(),
8893 CGF
.Builder
.GetInsertPoint());
8895 auto FillInfoMap
= [&](MappableExprsHandler::MappingExprInfo
&MapExpr
) {
8896 return emitMappingInformation(CGF
, OMPBuilder
, MapExpr
);
8898 if (CGM
.getCodeGenOpts().getDebugInfo() !=
8899 llvm::codegenoptions::NoDebugInfo
) {
8900 CombinedInfo
.Names
.resize(CombinedInfo
.Exprs
.size());
8901 llvm::transform(CombinedInfo
.Exprs
, CombinedInfo
.Names
.begin(),
8905 auto DeviceAddrCB
= [&](unsigned int I
, llvm::Value
*NewDecl
) {
8906 if (const ValueDecl
*DevVD
= CombinedInfo
.DevicePtrDecls
[I
]) {
8907 Info
.CaptureDeviceAddrMap
.try_emplace(DevVD
, NewDecl
);
8911 auto CustomMapperCB
= [&](unsigned int I
) {
8912 llvm::Value
*MFunc
= nullptr;
8913 if (CombinedInfo
.Mappers
[I
]) {
8914 Info
.HasMapper
= true;
8915 MFunc
= CGF
.CGM
.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
8916 cast
<OMPDeclareMapperDecl
>(CombinedInfo
.Mappers
[I
]));
8920 OMPBuilder
.emitOffloadingArrays(AllocaIP
, CodeGenIP
, CombinedInfo
, Info
,
8921 /*IsNonContiguous=*/true, DeviceAddrCB
,
8925 /// Check for inner distribute directive.
8926 static const OMPExecutableDirective
*
8927 getNestedDistributeDirective(ASTContext
&Ctx
, const OMPExecutableDirective
&D
) {
8928 const auto *CS
= D
.getInnermostCapturedStmt();
8930 CS
->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
8931 const Stmt
*ChildStmt
=
8932 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx
, Body
);
8934 if (const auto *NestedDir
=
8935 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
8936 OpenMPDirectiveKind DKind
= NestedDir
->getDirectiveKind();
8937 switch (D
.getDirectiveKind()) {
8939 // For now, treat 'target' with nested 'teams loop' as if it's
8940 // distributed (target teams distribute).
8941 if (isOpenMPDistributeDirective(DKind
) || DKind
== OMPD_teams_loop
)
8943 if (DKind
== OMPD_teams
) {
8944 Body
= NestedDir
->getInnermostCapturedStmt()->IgnoreContainers(
8945 /*IgnoreCaptured=*/true);
8948 ChildStmt
= CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx
, Body
);
8949 if (const auto *NND
=
8950 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
8951 DKind
= NND
->getDirectiveKind();
8952 if (isOpenMPDistributeDirective(DKind
))
8957 case OMPD_target_teams
:
8958 if (isOpenMPDistributeDirective(DKind
))
8961 case OMPD_target_parallel
:
8962 case OMPD_target_simd
:
8963 case OMPD_target_parallel_for
:
8964 case OMPD_target_parallel_for_simd
:
8966 case OMPD_target_teams_distribute
:
8967 case OMPD_target_teams_distribute_simd
:
8968 case OMPD_target_teams_distribute_parallel_for
:
8969 case OMPD_target_teams_distribute_parallel_for_simd
:
8972 case OMPD_parallel_for
:
8973 case OMPD_parallel_master
:
8974 case OMPD_parallel_sections
:
8976 case OMPD_parallel_for_simd
:
8978 case OMPD_cancellation_point
:
8980 case OMPD_threadprivate
:
8991 case OMPD_taskyield
:
8994 case OMPD_taskgroup
:
9000 case OMPD_target_data
:
9001 case OMPD_target_exit_data
:
9002 case OMPD_target_enter_data
:
9003 case OMPD_distribute
:
9004 case OMPD_distribute_simd
:
9005 case OMPD_distribute_parallel_for
:
9006 case OMPD_distribute_parallel_for_simd
:
9007 case OMPD_teams_distribute
:
9008 case OMPD_teams_distribute_simd
:
9009 case OMPD_teams_distribute_parallel_for
:
9010 case OMPD_teams_distribute_parallel_for_simd
:
9011 case OMPD_target_update
:
9012 case OMPD_declare_simd
:
9013 case OMPD_declare_variant
:
9014 case OMPD_begin_declare_variant
:
9015 case OMPD_end_declare_variant
:
9016 case OMPD_declare_target
:
9017 case OMPD_end_declare_target
:
9018 case OMPD_declare_reduction
:
9019 case OMPD_declare_mapper
:
9021 case OMPD_taskloop_simd
:
9022 case OMPD_master_taskloop
:
9023 case OMPD_master_taskloop_simd
:
9024 case OMPD_parallel_master_taskloop
:
9025 case OMPD_parallel_master_taskloop_simd
:
9027 case OMPD_metadirective
:
9030 llvm_unreachable("Unexpected directive.");
9037 /// Emit the user-defined mapper function. The code generation follows the
9038 /// pattern in the example below.
9040 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9041 /// void *base, void *begin,
9042 /// int64_t size, int64_t type,
9043 /// void *name = nullptr) {
9044 /// // Allocate space for an array section first or add a base/begin for
9045 /// // pointer dereference.
9046 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9047 /// !maptype.IsDelete)
9048 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9049 /// size*sizeof(Ty), clearToFromMember(type));
9051 /// for (unsigned i = 0; i < size; i++) {
9052 /// // For each component specified by this mapper:
9053 /// for (auto c : begin[i]->all_components) {
9054 /// if (c.hasMapper())
9055 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9056 /// c.arg_type, c.arg_name);
9058 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9059 /// c.arg_begin, c.arg_size, c.arg_type,
9063 /// // Delete the array section.
9064 /// if (size > 1 && maptype.IsDelete)
9065 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9066 /// size*sizeof(Ty), clearToFromMember(type));
9069 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl
*D
,
9070 CodeGenFunction
*CGF
) {
9071 if (UDMMap
.count(D
) > 0)
9073 ASTContext
&C
= CGM
.getContext();
9074 QualType Ty
= D
->getType();
9075 QualType PtrTy
= C
.getPointerType(Ty
).withRestrict();
9076 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9077 auto *MapperVarDecl
=
9078 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getMapperVarRef())->getDecl());
9079 SourceLocation Loc
= D
->getLocation();
9080 CharUnits ElementSize
= C
.getTypeSizeInChars(Ty
);
9081 llvm::Type
*ElemTy
= CGM
.getTypes().ConvertTypeForMem(Ty
);
9083 // Prepare mapper function arguments and attributes.
9084 ImplicitParamDecl
HandleArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
9085 C
.VoidPtrTy
, ImplicitParamKind::Other
);
9086 ImplicitParamDecl
BaseArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
9087 ImplicitParamKind::Other
);
9088 ImplicitParamDecl
BeginArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
9089 C
.VoidPtrTy
, ImplicitParamKind::Other
);
9090 ImplicitParamDecl
SizeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, Int64Ty
,
9091 ImplicitParamKind::Other
);
9092 ImplicitParamDecl
TypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, Int64Ty
,
9093 ImplicitParamKind::Other
);
9094 ImplicitParamDecl
NameArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
9095 ImplicitParamKind::Other
);
9096 FunctionArgList Args
;
9097 Args
.push_back(&HandleArg
);
9098 Args
.push_back(&BaseArg
);
9099 Args
.push_back(&BeginArg
);
9100 Args
.push_back(&SizeArg
);
9101 Args
.push_back(&TypeArg
);
9102 Args
.push_back(&NameArg
);
9103 const CGFunctionInfo
&FnInfo
=
9104 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
9105 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
9106 SmallString
<64> TyStr
;
9107 llvm::raw_svector_ostream
Out(TyStr
);
9108 CGM
.getCXXABI().getMangleContext().mangleCanonicalTypeName(Ty
, Out
);
9109 std::string Name
= getName({"omp_mapper", TyStr
, D
->getName()});
9110 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
9111 Name
, &CGM
.getModule());
9112 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
9113 Fn
->removeFnAttr(llvm::Attribute::OptimizeNone
);
9114 // Start the mapper function code generation.
9115 CodeGenFunction
MapperCGF(CGM
);
9116 MapperCGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
9117 // Compute the starting and end addresses of array elements.
9118 llvm::Value
*Size
= MapperCGF
.EmitLoadOfScalar(
9119 MapperCGF
.GetAddrOfLocalVar(&SizeArg
), /*Volatile=*/false,
9120 C
.getPointerType(Int64Ty
), Loc
);
9121 // Prepare common arguments for array initiation and deletion.
9122 llvm::Value
*Handle
= MapperCGF
.EmitLoadOfScalar(
9123 MapperCGF
.GetAddrOfLocalVar(&HandleArg
),
9124 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9125 llvm::Value
*BaseIn
= MapperCGF
.EmitLoadOfScalar(
9126 MapperCGF
.GetAddrOfLocalVar(&BaseArg
),
9127 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9128 llvm::Value
*BeginIn
= MapperCGF
.EmitLoadOfScalar(
9129 MapperCGF
.GetAddrOfLocalVar(&BeginArg
),
9130 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9131 // Convert the size in bytes into the number of array elements.
9132 Size
= MapperCGF
.Builder
.CreateExactUDiv(
9133 Size
, MapperCGF
.Builder
.getInt64(ElementSize
.getQuantity()));
9134 llvm::Value
*PtrBegin
= MapperCGF
.Builder
.CreateBitCast(
9135 BeginIn
, CGM
.getTypes().ConvertTypeForMem(PtrTy
));
9136 llvm::Value
*PtrEnd
= MapperCGF
.Builder
.CreateGEP(ElemTy
, PtrBegin
, Size
);
9137 llvm::Value
*MapType
= MapperCGF
.EmitLoadOfScalar(
9138 MapperCGF
.GetAddrOfLocalVar(&TypeArg
), /*Volatile=*/false,
9139 C
.getPointerType(Int64Ty
), Loc
);
9140 llvm::Value
*MapName
= MapperCGF
.EmitLoadOfScalar(
9141 MapperCGF
.GetAddrOfLocalVar(&NameArg
),
9142 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9144 // Emit array initiation if this is an array section and \p MapType indicates
9145 // that memory allocation is required.
9146 llvm::BasicBlock
*HeadBB
= MapperCGF
.createBasicBlock("omp.arraymap.head");
9147 emitUDMapperArrayInitOrDel(MapperCGF
, Handle
, BaseIn
, BeginIn
, Size
, MapType
,
9148 MapName
, ElementSize
, HeadBB
, /*IsInit=*/true);
9150 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9152 // Emit the loop header block.
9153 MapperCGF
.EmitBlock(HeadBB
);
9154 llvm::BasicBlock
*BodyBB
= MapperCGF
.createBasicBlock("omp.arraymap.body");
9155 llvm::BasicBlock
*DoneBB
= MapperCGF
.createBasicBlock("omp.done");
9156 // Evaluate whether the initial condition is satisfied.
9157 llvm::Value
*IsEmpty
=
9158 MapperCGF
.Builder
.CreateICmpEQ(PtrBegin
, PtrEnd
, "omp.arraymap.isempty");
9159 MapperCGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
9160 llvm::BasicBlock
*EntryBB
= MapperCGF
.Builder
.GetInsertBlock();
9162 // Emit the loop body block.
9163 MapperCGF
.EmitBlock(BodyBB
);
9164 llvm::BasicBlock
*LastBB
= BodyBB
;
9165 llvm::PHINode
*PtrPHI
= MapperCGF
.Builder
.CreatePHI(
9166 PtrBegin
->getType(), 2, "omp.arraymap.ptrcurrent");
9167 PtrPHI
->addIncoming(PtrBegin
, EntryBB
);
9168 Address
PtrCurrent(PtrPHI
, ElemTy
,
9169 MapperCGF
.GetAddrOfLocalVar(&BeginArg
)
9171 .alignmentOfArrayElement(ElementSize
));
9172 // Privatize the declared variable of mapper to be the current array element.
9173 CodeGenFunction::OMPPrivateScope
Scope(MapperCGF
);
9174 Scope
.addPrivate(MapperVarDecl
, PtrCurrent
);
9175 (void)Scope
.Privatize();
9177 // Get map clause information. Fill up the arrays with all mapped variables.
9178 MappableExprsHandler::MapCombinedInfoTy Info
;
9179 MappableExprsHandler
MEHandler(*D
, MapperCGF
);
9180 MEHandler
.generateAllInfoForMapper(Info
, OMPBuilder
);
9182 // Call the runtime API __tgt_mapper_num_components to get the number of
9183 // pre-existing components.
9184 llvm::Value
*OffloadingArgs
[] = {Handle
};
9185 llvm::Value
*PreviousSize
= MapperCGF
.EmitRuntimeCall(
9186 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
9187 OMPRTL___tgt_mapper_num_components
),
9189 llvm::Value
*ShiftedPreviousSize
= MapperCGF
.Builder
.CreateShl(
9191 MapperCGF
.Builder
.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9193 // Fill up the runtime mapper handle for all components.
9194 for (unsigned I
= 0; I
< Info
.BasePointers
.size(); ++I
) {
9195 llvm::Value
*CurBaseArg
= MapperCGF
.Builder
.CreateBitCast(
9196 Info
.BasePointers
[I
], CGM
.getTypes().ConvertTypeForMem(C
.VoidPtrTy
));
9197 llvm::Value
*CurBeginArg
= MapperCGF
.Builder
.CreateBitCast(
9198 Info
.Pointers
[I
], CGM
.getTypes().ConvertTypeForMem(C
.VoidPtrTy
));
9199 llvm::Value
*CurSizeArg
= Info
.Sizes
[I
];
9200 llvm::Value
*CurNameArg
=
9201 (CGM
.getCodeGenOpts().getDebugInfo() ==
9202 llvm::codegenoptions::NoDebugInfo
)
9203 ? llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
)
9204 : emitMappingInformation(MapperCGF
, OMPBuilder
, Info
.Exprs
[I
]);
9206 // Extract the MEMBER_OF field from the map type.
9207 llvm::Value
*OriMapType
= MapperCGF
.Builder
.getInt64(
9208 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9210 llvm::Value
*MemberMapType
=
9211 MapperCGF
.Builder
.CreateNUWAdd(OriMapType
, ShiftedPreviousSize
);
9213 // Combine the map type inherited from user-defined mapper with that
9214 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9215 // bits of the \a MapType, which is the input argument of the mapper
9216 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9217 // bits of MemberMapType.
9218 // [OpenMP 5.0], 1.2.6. map-type decay.
9219 // | alloc | to | from | tofrom | release | delete
9220 // ----------------------------------------------------------
9221 // alloc | alloc | alloc | alloc | alloc | release | delete
9222 // to | alloc | to | alloc | to | release | delete
9223 // from | alloc | alloc | from | from | release | delete
9224 // tofrom | alloc | to | from | tofrom | release | delete
9225 llvm::Value
*LeftToFrom
= MapperCGF
.Builder
.CreateAnd(
9227 MapperCGF
.Builder
.getInt64(
9228 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9229 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9230 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9231 llvm::BasicBlock
*AllocBB
= MapperCGF
.createBasicBlock("omp.type.alloc");
9232 llvm::BasicBlock
*AllocElseBB
=
9233 MapperCGF
.createBasicBlock("omp.type.alloc.else");
9234 llvm::BasicBlock
*ToBB
= MapperCGF
.createBasicBlock("omp.type.to");
9235 llvm::BasicBlock
*ToElseBB
= MapperCGF
.createBasicBlock("omp.type.to.else");
9236 llvm::BasicBlock
*FromBB
= MapperCGF
.createBasicBlock("omp.type.from");
9237 llvm::BasicBlock
*EndBB
= MapperCGF
.createBasicBlock("omp.type.end");
9238 llvm::Value
*IsAlloc
= MapperCGF
.Builder
.CreateIsNull(LeftToFrom
);
9239 MapperCGF
.Builder
.CreateCondBr(IsAlloc
, AllocBB
, AllocElseBB
);
9240 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9241 MapperCGF
.EmitBlock(AllocBB
);
9242 llvm::Value
*AllocMapType
= MapperCGF
.Builder
.CreateAnd(
9244 MapperCGF
.Builder
.getInt64(
9245 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9246 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9247 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9248 MapperCGF
.Builder
.CreateBr(EndBB
);
9249 MapperCGF
.EmitBlock(AllocElseBB
);
9250 llvm::Value
*IsTo
= MapperCGF
.Builder
.CreateICmpEQ(
9252 MapperCGF
.Builder
.getInt64(
9253 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9254 OpenMPOffloadMappingFlags::OMP_MAP_TO
)));
9255 MapperCGF
.Builder
.CreateCondBr(IsTo
, ToBB
, ToElseBB
);
9256 // In case of to, clear OMP_MAP_FROM.
9257 MapperCGF
.EmitBlock(ToBB
);
9258 llvm::Value
*ToMapType
= MapperCGF
.Builder
.CreateAnd(
9260 MapperCGF
.Builder
.getInt64(
9261 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9262 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9263 MapperCGF
.Builder
.CreateBr(EndBB
);
9264 MapperCGF
.EmitBlock(ToElseBB
);
9265 llvm::Value
*IsFrom
= MapperCGF
.Builder
.CreateICmpEQ(
9267 MapperCGF
.Builder
.getInt64(
9268 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9269 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9270 MapperCGF
.Builder
.CreateCondBr(IsFrom
, FromBB
, EndBB
);
9271 // In case of from, clear OMP_MAP_TO.
9272 MapperCGF
.EmitBlock(FromBB
);
9273 llvm::Value
*FromMapType
= MapperCGF
.Builder
.CreateAnd(
9275 MapperCGF
.Builder
.getInt64(
9276 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9277 OpenMPOffloadMappingFlags::OMP_MAP_TO
)));
9278 // In case of tofrom, do nothing.
9279 MapperCGF
.EmitBlock(EndBB
);
9281 llvm::PHINode
*CurMapType
=
9282 MapperCGF
.Builder
.CreatePHI(CGM
.Int64Ty
, 4, "omp.maptype");
9283 CurMapType
->addIncoming(AllocMapType
, AllocBB
);
9284 CurMapType
->addIncoming(ToMapType
, ToBB
);
9285 CurMapType
->addIncoming(FromMapType
, FromBB
);
9286 CurMapType
->addIncoming(MemberMapType
, ToElseBB
);
9288 llvm::Value
*OffloadingArgs
[] = {Handle
, CurBaseArg
, CurBeginArg
,
9289 CurSizeArg
, CurMapType
, CurNameArg
};
9290 if (Info
.Mappers
[I
]) {
9291 // Call the corresponding mapper function.
9292 llvm::Function
*MapperFunc
= getOrCreateUserDefinedMapperFunc(
9293 cast
<OMPDeclareMapperDecl
>(Info
.Mappers
[I
]));
9294 assert(MapperFunc
&& "Expect a valid mapper function is available.");
9295 MapperCGF
.EmitNounwindRuntimeCall(MapperFunc
, OffloadingArgs
);
9297 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9299 MapperCGF
.EmitRuntimeCall(
9300 OMPBuilder
.getOrCreateRuntimeFunction(
9301 CGM
.getModule(), OMPRTL___tgt_push_mapper_component
),
9306 // Update the pointer to point to the next element that needs to be mapped,
9307 // and check whether we have mapped all elements.
9308 llvm::Value
*PtrNext
= MapperCGF
.Builder
.CreateConstGEP1_32(
9309 ElemTy
, PtrPHI
, /*Idx0=*/1, "omp.arraymap.next");
9310 PtrPHI
->addIncoming(PtrNext
, LastBB
);
9311 llvm::Value
*IsDone
=
9312 MapperCGF
.Builder
.CreateICmpEQ(PtrNext
, PtrEnd
, "omp.arraymap.isdone");
9313 llvm::BasicBlock
*ExitBB
= MapperCGF
.createBasicBlock("omp.arraymap.exit");
9314 MapperCGF
.Builder
.CreateCondBr(IsDone
, ExitBB
, BodyBB
);
9316 MapperCGF
.EmitBlock(ExitBB
);
9317 // Emit array deletion if this is an array section and \p MapType indicates
9318 // that deletion is required.
9319 emitUDMapperArrayInitOrDel(MapperCGF
, Handle
, BaseIn
, BeginIn
, Size
, MapType
,
9320 MapName
, ElementSize
, DoneBB
, /*IsInit=*/false);
9322 // Emit the function exit block.
9323 MapperCGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
9324 MapperCGF
.FinishFunction();
9325 UDMMap
.try_emplace(D
, Fn
);
9327 auto &Decls
= FunctionUDMMap
.FindAndConstruct(CGF
->CurFn
);
9328 Decls
.second
.push_back(D
);
9332 /// Emit the array initialization or deletion portion for user-defined mapper
9333 /// code generation. First, it evaluates whether an array section is mapped and
9334 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9335 /// true, and \a MapType indicates to not delete this array, array
9336 /// initialization code is generated. If \a IsInit is false, and \a MapType
9337 /// indicates to not this array, array deletion code is generated.
9338 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9339 CodeGenFunction
&MapperCGF
, llvm::Value
*Handle
, llvm::Value
*Base
,
9340 llvm::Value
*Begin
, llvm::Value
*Size
, llvm::Value
*MapType
,
9341 llvm::Value
*MapName
, CharUnits ElementSize
, llvm::BasicBlock
*ExitBB
,
9343 StringRef Prefix
= IsInit
? ".init" : ".del";
9345 // Evaluate if this is an array section.
9346 llvm::BasicBlock
*BodyBB
=
9347 MapperCGF
.createBasicBlock(getName({"omp.array", Prefix
}));
9348 llvm::Value
*IsArray
= MapperCGF
.Builder
.CreateICmpSGT(
9349 Size
, MapperCGF
.Builder
.getInt64(1), "omp.arrayinit.isarray");
9350 llvm::Value
*DeleteBit
= MapperCGF
.Builder
.CreateAnd(
9352 MapperCGF
.Builder
.getInt64(
9353 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9354 OpenMPOffloadMappingFlags::OMP_MAP_DELETE
)));
9355 llvm::Value
*DeleteCond
;
9359 llvm::Value
*BaseIsBegin
= MapperCGF
.Builder
.CreateICmpNE(Base
, Begin
);
9361 llvm::Value
*PtrAndObjBit
= MapperCGF
.Builder
.CreateAnd(
9363 MapperCGF
.Builder
.getInt64(
9364 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9365 OpenMPOffloadMappingFlags::OMP_MAP_PTR_AND_OBJ
)));
9366 PtrAndObjBit
= MapperCGF
.Builder
.CreateIsNotNull(PtrAndObjBit
);
9367 BaseIsBegin
= MapperCGF
.Builder
.CreateAnd(BaseIsBegin
, PtrAndObjBit
);
9368 Cond
= MapperCGF
.Builder
.CreateOr(IsArray
, BaseIsBegin
);
9369 DeleteCond
= MapperCGF
.Builder
.CreateIsNull(
9370 DeleteBit
, getName({"omp.array", Prefix
, ".delete"}));
9373 DeleteCond
= MapperCGF
.Builder
.CreateIsNotNull(
9374 DeleteBit
, getName({"omp.array", Prefix
, ".delete"}));
9376 Cond
= MapperCGF
.Builder
.CreateAnd(Cond
, DeleteCond
);
9377 MapperCGF
.Builder
.CreateCondBr(Cond
, BodyBB
, ExitBB
);
9379 MapperCGF
.EmitBlock(BodyBB
);
9380 // Get the array size by multiplying element size and element number (i.e., \p
9382 llvm::Value
*ArraySize
= MapperCGF
.Builder
.CreateNUWMul(
9383 Size
, MapperCGF
.Builder
.getInt64(ElementSize
.getQuantity()));
9384 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9385 // memory allocation/deletion purpose only.
9386 llvm::Value
*MapTypeArg
= MapperCGF
.Builder
.CreateAnd(
9388 MapperCGF
.Builder
.getInt64(
9389 ~static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9390 OpenMPOffloadMappingFlags::OMP_MAP_TO
|
9391 OpenMPOffloadMappingFlags::OMP_MAP_FROM
)));
9392 MapTypeArg
= MapperCGF
.Builder
.CreateOr(
9394 MapperCGF
.Builder
.getInt64(
9395 static_cast<std::underlying_type_t
<OpenMPOffloadMappingFlags
>>(
9396 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
)));
9398 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9400 llvm::Value
*OffloadingArgs
[] = {Handle
, Base
, Begin
,
9401 ArraySize
, MapTypeArg
, MapName
};
9402 MapperCGF
.EmitRuntimeCall(
9403 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
9404 OMPRTL___tgt_push_mapper_component
),
9408 llvm::Function
*CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9409 const OMPDeclareMapperDecl
*D
) {
9410 auto I
= UDMMap
.find(D
);
9411 if (I
!= UDMMap
.end())
9413 emitUserDefinedMapper(D
);
9414 return UDMMap
.lookup(D
);
9417 llvm::Value
*CGOpenMPRuntime::emitTargetNumIterationsCall(
9418 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
9419 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9420 const OMPLoopDirective
&D
)>
9422 OpenMPDirectiveKind Kind
= D
.getDirectiveKind();
9423 const OMPExecutableDirective
*TD
= &D
;
9424 // Get nested teams distribute kind directive, if any. For now, treat
9425 // 'target_teams_loop' as if it's really a target_teams_distribute.
9426 if ((!isOpenMPDistributeDirective(Kind
) || !isOpenMPTeamsDirective(Kind
)) &&
9427 Kind
!= OMPD_target_teams_loop
)
9428 TD
= getNestedDistributeDirective(CGM
.getContext(), D
);
9430 return llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
9432 const auto *LD
= cast
<OMPLoopDirective
>(TD
);
9433 if (llvm::Value
*NumIterations
= SizeEmitter(CGF
, *LD
))
9434 return NumIterations
;
9435 return llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
9439 emitTargetCallFallback(CGOpenMPRuntime
*OMPRuntime
, llvm::Function
*OutlinedFn
,
9440 const OMPExecutableDirective
&D
,
9441 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
,
9442 bool RequiresOuterTask
, const CapturedStmt
&CS
,
9443 bool OffloadingMandatory
, CodeGenFunction
&CGF
) {
9444 if (OffloadingMandatory
) {
9445 CGF
.Builder
.CreateUnreachable();
9447 if (RequiresOuterTask
) {
9448 CapturedVars
.clear();
9449 CGF
.GenerateOpenMPCapturedVars(CS
, CapturedVars
);
9451 OMPRuntime
->emitOutlinedFunctionCall(CGF
, D
.getBeginLoc(), OutlinedFn
,
9456 static llvm::Value
*emitDeviceID(
9457 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9458 CodeGenFunction
&CGF
) {
9459 // Emit device ID if any.
9460 llvm::Value
*DeviceID
;
9461 if (Device
.getPointer()) {
9462 assert((Device
.getInt() == OMPC_DEVICE_unknown
||
9463 Device
.getInt() == OMPC_DEVICE_device_num
) &&
9464 "Expected device_num modifier.");
9465 llvm::Value
*DevVal
= CGF
.EmitScalarExpr(Device
.getPointer());
9467 CGF
.Builder
.CreateIntCast(DevVal
, CGF
.Int64Ty
, /*isSigned=*/true);
9469 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
9474 llvm::Value
*emitDynCGGroupMem(const OMPExecutableDirective
&D
,
9475 CodeGenFunction
&CGF
) {
9476 llvm::Value
*DynCGroupMem
= CGF
.Builder
.getInt32(0);
9478 if (auto *DynMemClause
= D
.getSingleClause
<OMPXDynCGroupMemClause
>()) {
9479 CodeGenFunction::RunCleanupsScope
DynCGroupMemScope(CGF
);
9480 llvm::Value
*DynCGroupMemVal
= CGF
.EmitScalarExpr(
9481 DynMemClause
->getSize(), /*IgnoreResultAssign=*/true);
9482 DynCGroupMem
= CGF
.Builder
.CreateIntCast(DynCGroupMemVal
, CGF
.Int32Ty
,
9483 /*isSigned=*/false);
9485 return DynCGroupMem
;
9488 static void emitTargetCallKernelLaunch(
9489 CGOpenMPRuntime
*OMPRuntime
, llvm::Function
*OutlinedFn
,
9490 const OMPExecutableDirective
&D
,
9491 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
, bool RequiresOuterTask
,
9492 const CapturedStmt
&CS
, bool OffloadingMandatory
,
9493 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9494 llvm::Value
*OutlinedFnID
, CodeGenFunction::OMPTargetDataInfo
&InputInfo
,
9495 llvm::Value
*&MapTypesArray
, llvm::Value
*&MapNamesArray
,
9496 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9497 const OMPLoopDirective
&D
)>
9499 CodeGenFunction
&CGF
, CodeGenModule
&CGM
) {
9500 llvm::OpenMPIRBuilder
&OMPBuilder
= OMPRuntime
->getOMPBuilder();
9502 // Fill up the arrays with all the captured variables.
9503 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
9505 // Get mappable expression information.
9506 MappableExprsHandler
MEHandler(D
, CGF
);
9507 llvm::DenseMap
<llvm::Value
*, llvm::Value
*> LambdaPointers
;
9508 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> MappedVarSet
;
9510 auto RI
= CS
.getCapturedRecordDecl()->field_begin();
9511 auto *CV
= CapturedVars
.begin();
9512 for (CapturedStmt::const_capture_iterator CI
= CS
.capture_begin(),
9513 CE
= CS
.capture_end();
9514 CI
!= CE
; ++CI
, ++RI
, ++CV
) {
9515 MappableExprsHandler::MapCombinedInfoTy CurInfo
;
9516 MappableExprsHandler::StructRangeInfoTy PartialStruct
;
9518 // VLA sizes are passed to the outlined region by copy and do not have map
9519 // information associated.
9520 if (CI
->capturesVariableArrayType()) {
9521 CurInfo
.Exprs
.push_back(nullptr);
9522 CurInfo
.BasePointers
.push_back(*CV
);
9523 CurInfo
.DevicePtrDecls
.push_back(nullptr);
9524 CurInfo
.DevicePointers
.push_back(
9525 MappableExprsHandler::DeviceInfoTy::None
);
9526 CurInfo
.Pointers
.push_back(*CV
);
9527 CurInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
9528 CGF
.getTypeSize(RI
->getType()), CGF
.Int64Ty
, /*isSigned=*/true));
9529 // Copy to the device as an argument. No need to retrieve it.
9530 CurInfo
.Types
.push_back(OpenMPOffloadMappingFlags::OMP_MAP_LITERAL
|
9531 OpenMPOffloadMappingFlags::OMP_MAP_TARGET_PARAM
|
9532 OpenMPOffloadMappingFlags::OMP_MAP_IMPLICIT
);
9533 CurInfo
.Mappers
.push_back(nullptr);
9535 // If we have any information in the map clause, we use it, otherwise we
9536 // just do a default mapping.
9537 MEHandler
.generateInfoForCapture(CI
, *CV
, CurInfo
, PartialStruct
);
9538 if (!CI
->capturesThis())
9539 MappedVarSet
.insert(CI
->getCapturedVar());
9541 MappedVarSet
.insert(nullptr);
9542 if (CurInfo
.BasePointers
.empty() && !PartialStruct
.Base
.isValid())
9543 MEHandler
.generateDefaultMapInfo(*CI
, **RI
, *CV
, CurInfo
);
9544 // Generate correct mapping for variables captured by reference in
9546 if (CI
->capturesVariable())
9547 MEHandler
.generateInfoForLambdaCaptures(CI
->getCapturedVar(), *CV
,
9548 CurInfo
, LambdaPointers
);
9550 // We expect to have at least an element of information for this capture.
9551 assert((!CurInfo
.BasePointers
.empty() || PartialStruct
.Base
.isValid()) &&
9552 "Non-existing map pointer for capture!");
9553 assert(CurInfo
.BasePointers
.size() == CurInfo
.Pointers
.size() &&
9554 CurInfo
.BasePointers
.size() == CurInfo
.Sizes
.size() &&
9555 CurInfo
.BasePointers
.size() == CurInfo
.Types
.size() &&
9556 CurInfo
.BasePointers
.size() == CurInfo
.Mappers
.size() &&
9557 "Inconsistent map information sizes!");
9559 // If there is an entry in PartialStruct it means we have a struct with
9560 // individual members mapped. Emit an extra combined entry.
9561 if (PartialStruct
.Base
.isValid()) {
9562 CombinedInfo
.append(PartialStruct
.PreliminaryMapData
);
9563 MEHandler
.emitCombinedEntry(
9564 CombinedInfo
, CurInfo
.Types
, PartialStruct
, CI
->capturesThis(),
9565 OMPBuilder
, nullptr,
9566 !PartialStruct
.PreliminaryMapData
.BasePointers
.empty());
9569 // We need to append the results of this capture to what we already have.
9570 CombinedInfo
.append(CurInfo
);
9572 // Adjust MEMBER_OF flags for the lambdas captures.
9573 MEHandler
.adjustMemberOfForLambdaCaptures(
9574 OMPBuilder
, LambdaPointers
, CombinedInfo
.BasePointers
,
9575 CombinedInfo
.Pointers
, CombinedInfo
.Types
);
9576 // Map any list items in a map clause that were not captures because they
9577 // weren't referenced within the construct.
9578 MEHandler
.generateAllInfo(CombinedInfo
, OMPBuilder
, MappedVarSet
);
9580 CGOpenMPRuntime::TargetDataInfo Info
;
9581 // Fill up the arrays and create the arguments.
9582 emitOffloadingArrays(CGF
, CombinedInfo
, Info
, OMPBuilder
);
9583 bool EmitDebug
= CGF
.CGM
.getCodeGenOpts().getDebugInfo() !=
9584 llvm::codegenoptions::NoDebugInfo
;
9585 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, Info
.RTArgs
, Info
,
9587 /*ForEndCall=*/false);
9589 InputInfo
.NumberOfTargetItems
= Info
.NumberOfPtrs
;
9590 InputInfo
.BasePointersArray
= Address(Info
.RTArgs
.BasePointersArray
,
9591 CGF
.VoidPtrTy
, CGM
.getPointerAlign());
9592 InputInfo
.PointersArray
=
9593 Address(Info
.RTArgs
.PointersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
9594 InputInfo
.SizesArray
=
9595 Address(Info
.RTArgs
.SizesArray
, CGF
.Int64Ty
, CGM
.getPointerAlign());
9596 InputInfo
.MappersArray
=
9597 Address(Info
.RTArgs
.MappersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
9598 MapTypesArray
= Info
.RTArgs
.MapTypesArray
;
9599 MapNamesArray
= Info
.RTArgs
.MapNamesArray
;
9601 auto &&ThenGen
= [&OMPRuntime
, OutlinedFn
, &D
, &CapturedVars
,
9602 RequiresOuterTask
, &CS
, OffloadingMandatory
, Device
,
9603 OutlinedFnID
, &InputInfo
, &MapTypesArray
, &MapNamesArray
,
9604 SizeEmitter
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9605 bool IsReverseOffloading
= Device
.getInt() == OMPC_DEVICE_ancestor
;
9607 if (IsReverseOffloading
) {
9608 // Reverse offloading is not supported, so just execute on the host.
9609 // FIXME: This fallback solution is incorrect since it ignores the
9610 // OMP_TARGET_OFFLOAD environment variable. Instead it would be better to
9611 // assert here and ensure SEMA emits an error.
9612 emitTargetCallFallback(OMPRuntime
, OutlinedFn
, D
, CapturedVars
,
9613 RequiresOuterTask
, CS
, OffloadingMandatory
, CGF
);
9617 bool HasNoWait
= D
.hasClausesOfKind
<OMPNowaitClause
>();
9618 unsigned NumTargetItems
= InputInfo
.NumberOfTargetItems
;
9620 llvm::Value
*BasePointersArray
=
9621 InputInfo
.BasePointersArray
.emitRawPointer(CGF
);
9622 llvm::Value
*PointersArray
= InputInfo
.PointersArray
.emitRawPointer(CGF
);
9623 llvm::Value
*SizesArray
= InputInfo
.SizesArray
.emitRawPointer(CGF
);
9624 llvm::Value
*MappersArray
= InputInfo
.MappersArray
.emitRawPointer(CGF
);
9626 auto &&EmitTargetCallFallbackCB
=
9627 [&OMPRuntime
, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
, &CS
,
9628 OffloadingMandatory
, &CGF
](llvm::OpenMPIRBuilder::InsertPointTy IP
)
9629 -> llvm::OpenMPIRBuilder::InsertPointTy
{
9630 CGF
.Builder
.restoreIP(IP
);
9631 emitTargetCallFallback(OMPRuntime
, OutlinedFn
, D
, CapturedVars
,
9632 RequiresOuterTask
, CS
, OffloadingMandatory
, CGF
);
9633 return CGF
.Builder
.saveIP();
9636 llvm::Value
*DeviceID
= emitDeviceID(Device
, CGF
);
9637 llvm::Value
*NumTeams
= OMPRuntime
->emitNumTeamsForTargetDirective(CGF
, D
);
9638 llvm::Value
*NumThreads
=
9639 OMPRuntime
->emitNumThreadsForTargetDirective(CGF
, D
);
9640 llvm::Value
*RTLoc
= OMPRuntime
->emitUpdateLocation(CGF
, D
.getBeginLoc());
9641 llvm::Value
*NumIterations
=
9642 OMPRuntime
->emitTargetNumIterationsCall(CGF
, D
, SizeEmitter
);
9643 llvm::Value
*DynCGGroupMem
= emitDynCGGroupMem(D
, CGF
);
9644 llvm::OpenMPIRBuilder::InsertPointTy
AllocaIP(
9645 CGF
.AllocaInsertPt
->getParent(), CGF
.AllocaInsertPt
->getIterator());
9647 llvm::OpenMPIRBuilder::TargetDataRTArgs
RTArgs(
9648 BasePointersArray
, PointersArray
, SizesArray
, MapTypesArray
,
9649 nullptr /* MapTypesArrayEnd */, MappersArray
, MapNamesArray
);
9651 llvm::OpenMPIRBuilder::TargetKernelArgs
Args(
9652 NumTargetItems
, RTArgs
, NumIterations
, NumTeams
, NumThreads
,
9653 DynCGGroupMem
, HasNoWait
);
9655 CGF
.Builder
.restoreIP(OMPRuntime
->getOMPBuilder().emitKernelLaunch(
9656 CGF
.Builder
, OutlinedFn
, OutlinedFnID
, EmitTargetCallFallbackCB
, Args
,
9657 DeviceID
, RTLoc
, AllocaIP
));
9660 if (RequiresOuterTask
)
9661 CGF
.EmitOMPTargetTaskBasedDirective(D
, ThenGen
, InputInfo
);
9663 OMPRuntime
->emitInlinedDirective(CGF
, D
.getDirectiveKind(), ThenGen
);
9667 emitTargetCallElse(CGOpenMPRuntime
*OMPRuntime
, llvm::Function
*OutlinedFn
,
9668 const OMPExecutableDirective
&D
,
9669 llvm::SmallVectorImpl
<llvm::Value
*> &CapturedVars
,
9670 bool RequiresOuterTask
, const CapturedStmt
&CS
,
9671 bool OffloadingMandatory
, CodeGenFunction
&CGF
) {
9673 // Notify that the host version must be executed.
9675 [&OMPRuntime
, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
, &CS
,
9676 OffloadingMandatory
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9677 emitTargetCallFallback(OMPRuntime
, OutlinedFn
, D
, CapturedVars
,
9678 RequiresOuterTask
, CS
, OffloadingMandatory
, CGF
);
9681 if (RequiresOuterTask
) {
9682 CodeGenFunction::OMPTargetDataInfo InputInfo
;
9683 CGF
.EmitOMPTargetTaskBasedDirective(D
, ElseGen
, InputInfo
);
9685 OMPRuntime
->emitInlinedDirective(CGF
, D
.getDirectiveKind(), ElseGen
);
9689 void CGOpenMPRuntime::emitTargetCall(
9690 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
9691 llvm::Function
*OutlinedFn
, llvm::Value
*OutlinedFnID
, const Expr
*IfCond
,
9692 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9693 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9694 const OMPLoopDirective
&D
)>
9696 if (!CGF
.HaveInsertPoint())
9699 const bool OffloadingMandatory
= !CGM
.getLangOpts().OpenMPIsTargetDevice
&&
9700 CGM
.getLangOpts().OpenMPOffloadMandatory
;
9702 assert((OffloadingMandatory
|| OutlinedFn
) && "Invalid outlined function!");
9704 const bool RequiresOuterTask
=
9705 D
.hasClausesOfKind
<OMPDependClause
>() ||
9706 D
.hasClausesOfKind
<OMPNowaitClause
>() ||
9707 D
.hasClausesOfKind
<OMPInReductionClause
>() ||
9708 (CGM
.getLangOpts().OpenMP
>= 51 &&
9709 needsTaskBasedThreadLimit(D
.getDirectiveKind()) &&
9710 D
.hasClausesOfKind
<OMPThreadLimitClause
>());
9711 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
9712 const CapturedStmt
&CS
= *D
.getCapturedStmt(OMPD_target
);
9713 auto &&ArgsCodegen
= [&CS
, &CapturedVars
](CodeGenFunction
&CGF
,
9714 PrePostActionTy
&) {
9715 CGF
.GenerateOpenMPCapturedVars(CS
, CapturedVars
);
9717 emitInlinedDirective(CGF
, OMPD_unknown
, ArgsCodegen
);
9719 CodeGenFunction::OMPTargetDataInfo InputInfo
;
9720 llvm::Value
*MapTypesArray
= nullptr;
9721 llvm::Value
*MapNamesArray
= nullptr;
9723 auto &&TargetThenGen
= [this, OutlinedFn
, &D
, &CapturedVars
,
9724 RequiresOuterTask
, &CS
, OffloadingMandatory
, Device
,
9725 OutlinedFnID
, &InputInfo
, &MapTypesArray
,
9726 &MapNamesArray
, SizeEmitter
](CodeGenFunction
&CGF
,
9727 PrePostActionTy
&) {
9728 emitTargetCallKernelLaunch(this, OutlinedFn
, D
, CapturedVars
,
9729 RequiresOuterTask
, CS
, OffloadingMandatory
,
9730 Device
, OutlinedFnID
, InputInfo
, MapTypesArray
,
9731 MapNamesArray
, SizeEmitter
, CGF
, CGM
);
9734 auto &&TargetElseGen
=
9735 [this, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
, &CS
,
9736 OffloadingMandatory
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9737 emitTargetCallElse(this, OutlinedFn
, D
, CapturedVars
, RequiresOuterTask
,
9738 CS
, OffloadingMandatory
, CGF
);
9741 // If we have a target function ID it means that we need to support
9742 // offloading, otherwise, just execute on the host. We need to execute on host
9743 // regardless of the conditional in the if clause if, e.g., the user do not
9744 // specify target triples.
9747 emitIfClause(CGF
, IfCond
, TargetThenGen
, TargetElseGen
);
9749 RegionCodeGenTy
ThenRCG(TargetThenGen
);
9753 RegionCodeGenTy
ElseRCG(TargetElseGen
);
9758 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt
*S
,
9759 StringRef ParentName
) {
9763 // Codegen OMP target directives that offload compute to the device.
9764 bool RequiresDeviceCodegen
=
9765 isa
<OMPExecutableDirective
>(S
) &&
9766 isOpenMPTargetExecutionDirective(
9767 cast
<OMPExecutableDirective
>(S
)->getDirectiveKind());
9769 if (RequiresDeviceCodegen
) {
9770 const auto &E
= *cast
<OMPExecutableDirective
>(S
);
9772 llvm::TargetRegionEntryInfo EntryInfo
= getEntryInfoFromPresumedLoc(
9773 CGM
, OMPBuilder
, E
.getBeginLoc(), ParentName
);
9775 // Is this a target region that should not be emitted as an entry point? If
9776 // so just signal we are done with this target region.
9777 if (!OMPBuilder
.OffloadInfoManager
.hasTargetRegionEntryInfo(EntryInfo
))
9780 switch (E
.getDirectiveKind()) {
9782 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM
, ParentName
,
9783 cast
<OMPTargetDirective
>(E
));
9785 case OMPD_target_parallel
:
9786 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
9787 CGM
, ParentName
, cast
<OMPTargetParallelDirective
>(E
));
9789 case OMPD_target_teams
:
9790 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
9791 CGM
, ParentName
, cast
<OMPTargetTeamsDirective
>(E
));
9793 case OMPD_target_teams_distribute
:
9794 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
9795 CGM
, ParentName
, cast
<OMPTargetTeamsDistributeDirective
>(E
));
9797 case OMPD_target_teams_distribute_simd
:
9798 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
9799 CGM
, ParentName
, cast
<OMPTargetTeamsDistributeSimdDirective
>(E
));
9801 case OMPD_target_parallel_for
:
9802 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
9803 CGM
, ParentName
, cast
<OMPTargetParallelForDirective
>(E
));
9805 case OMPD_target_parallel_for_simd
:
9806 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
9807 CGM
, ParentName
, cast
<OMPTargetParallelForSimdDirective
>(E
));
9809 case OMPD_target_simd
:
9810 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
9811 CGM
, ParentName
, cast
<OMPTargetSimdDirective
>(E
));
9813 case OMPD_target_teams_distribute_parallel_for
:
9814 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
9816 cast
<OMPTargetTeamsDistributeParallelForDirective
>(E
));
9818 case OMPD_target_teams_distribute_parallel_for_simd
:
9820 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
9822 cast
<OMPTargetTeamsDistributeParallelForSimdDirective
>(E
));
9824 case OMPD_target_teams_loop
:
9825 CodeGenFunction::EmitOMPTargetTeamsGenericLoopDeviceFunction(
9826 CGM
, ParentName
, cast
<OMPTargetTeamsGenericLoopDirective
>(E
));
9828 case OMPD_target_parallel_loop
:
9829 CodeGenFunction::EmitOMPTargetParallelGenericLoopDeviceFunction(
9830 CGM
, ParentName
, cast
<OMPTargetParallelGenericLoopDirective
>(E
));
9834 case OMPD_parallel_for
:
9835 case OMPD_parallel_master
:
9836 case OMPD_parallel_sections
:
9838 case OMPD_parallel_for_simd
:
9840 case OMPD_cancellation_point
:
9842 case OMPD_threadprivate
:
9853 case OMPD_taskyield
:
9856 case OMPD_taskgroup
:
9862 case OMPD_target_data
:
9863 case OMPD_target_exit_data
:
9864 case OMPD_target_enter_data
:
9865 case OMPD_distribute
:
9866 case OMPD_distribute_simd
:
9867 case OMPD_distribute_parallel_for
:
9868 case OMPD_distribute_parallel_for_simd
:
9869 case OMPD_teams_distribute
:
9870 case OMPD_teams_distribute_simd
:
9871 case OMPD_teams_distribute_parallel_for
:
9872 case OMPD_teams_distribute_parallel_for_simd
:
9873 case OMPD_target_update
:
9874 case OMPD_declare_simd
:
9875 case OMPD_declare_variant
:
9876 case OMPD_begin_declare_variant
:
9877 case OMPD_end_declare_variant
:
9878 case OMPD_declare_target
:
9879 case OMPD_end_declare_target
:
9880 case OMPD_declare_reduction
:
9881 case OMPD_declare_mapper
:
9883 case OMPD_taskloop_simd
:
9884 case OMPD_master_taskloop
:
9885 case OMPD_master_taskloop_simd
:
9886 case OMPD_parallel_master_taskloop
:
9887 case OMPD_parallel_master_taskloop_simd
:
9889 case OMPD_metadirective
:
9892 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
9897 if (const auto *E
= dyn_cast
<OMPExecutableDirective
>(S
)) {
9898 if (!E
->hasAssociatedStmt() || !E
->getAssociatedStmt())
9901 scanForTargetRegionsFunctions(E
->getRawStmt(), ParentName
);
9905 // If this is a lambda function, look into its body.
9906 if (const auto *L
= dyn_cast
<LambdaExpr
>(S
))
9909 // Keep looking for target regions recursively.
9910 for (const Stmt
*II
: S
->children())
9911 scanForTargetRegionsFunctions(II
, ParentName
);
9914 static bool isAssumedToBeNotEmitted(const ValueDecl
*VD
, bool IsDevice
) {
9915 std::optional
<OMPDeclareTargetDeclAttr::DevTypeTy
> DevTy
=
9916 OMPDeclareTargetDeclAttr::getDeviceType(VD
);
9919 // Do not emit device_type(nohost) functions for the host.
9920 if (!IsDevice
&& DevTy
== OMPDeclareTargetDeclAttr::DT_NoHost
)
9922 // Do not emit device_type(host) functions for the device.
9923 if (IsDevice
&& DevTy
== OMPDeclareTargetDeclAttr::DT_Host
)
9928 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD
) {
9929 // If emitting code for the host, we do not process FD here. Instead we do
9930 // the normal code generation.
9931 if (!CGM
.getLangOpts().OpenMPIsTargetDevice
) {
9932 if (const auto *FD
= dyn_cast
<FunctionDecl
>(GD
.getDecl()))
9933 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(FD
),
9934 CGM
.getLangOpts().OpenMPIsTargetDevice
))
9939 const ValueDecl
*VD
= cast
<ValueDecl
>(GD
.getDecl());
9940 // Try to detect target regions in the function.
9941 if (const auto *FD
= dyn_cast
<FunctionDecl
>(VD
)) {
9942 StringRef Name
= CGM
.getMangledName(GD
);
9943 scanForTargetRegionsFunctions(FD
->getBody(), Name
);
9944 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(FD
),
9945 CGM
.getLangOpts().OpenMPIsTargetDevice
))
9949 // Do not to emit function if it is not marked as declare target.
9950 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
) &&
9951 AlreadyEmittedTargetDecls
.count(VD
) == 0;
9954 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD
) {
9955 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(GD
.getDecl()),
9956 CGM
.getLangOpts().OpenMPIsTargetDevice
))
9959 if (!CGM
.getLangOpts().OpenMPIsTargetDevice
)
9962 // Check if there are Ctors/Dtors in this declaration and look for target
9963 // regions in it. We use the complete variant to produce the kernel name
9965 QualType RDTy
= cast
<VarDecl
>(GD
.getDecl())->getType();
9966 if (const auto *RD
= RDTy
->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
9967 for (const CXXConstructorDecl
*Ctor
: RD
->ctors()) {
9968 StringRef ParentName
=
9969 CGM
.getMangledName(GlobalDecl(Ctor
, Ctor_Complete
));
9970 scanForTargetRegionsFunctions(Ctor
->getBody(), ParentName
);
9972 if (const CXXDestructorDecl
*Dtor
= RD
->getDestructor()) {
9973 StringRef ParentName
=
9974 CGM
.getMangledName(GlobalDecl(Dtor
, Dtor_Complete
));
9975 scanForTargetRegionsFunctions(Dtor
->getBody(), ParentName
);
9979 // Do not to emit variable if it is not marked as declare target.
9980 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
9981 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
9982 cast
<VarDecl
>(GD
.getDecl()));
9983 if (!Res
|| *Res
== OMPDeclareTargetDeclAttr::MT_Link
||
9984 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
9985 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
9986 HasRequiresUnifiedSharedMemory
)) {
9987 DeferredGlobalVariables
.insert(cast
<VarDecl
>(GD
.getDecl()));
9993 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl
*VD
,
9994 llvm::Constant
*Addr
) {
9995 if (CGM
.getLangOpts().OMPTargetTriples
.empty() &&
9996 !CGM
.getLangOpts().OpenMPIsTargetDevice
)
9999 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10000 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
10002 // If this is an 'extern' declaration we defer to the canonical definition and
10003 // do not emit an offloading entry.
10004 if (Res
&& *Res
!= OMPDeclareTargetDeclAttr::MT_Link
&&
10005 VD
->hasExternalStorage())
10009 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
10010 // Register non-target variables being emitted in device code (debug info
10011 // may cause this).
10012 StringRef VarName
= CGM
.getMangledName(VD
);
10013 EmittedNonTargetVariables
.try_emplace(VarName
, Addr
);
10018 auto AddrOfGlobal
= [&VD
, this]() { return CGM
.GetAddrOfGlobal(VD
); };
10019 auto LinkageForVariable
= [&VD
, this]() {
10020 return CGM
.getLLVMLinkageVarDefinition(VD
);
10023 std::vector
<llvm::GlobalVariable
*> GeneratedRefs
;
10024 OMPBuilder
.registerTargetGlobalVariable(
10025 convertCaptureClause(VD
), convertDeviceClause(VD
),
10026 VD
->hasDefinition(CGM
.getContext()) == VarDecl::DeclarationOnly
,
10027 VD
->isExternallyVisible(),
10028 getEntryInfoFromPresumedLoc(CGM
, OMPBuilder
,
10029 VD
->getCanonicalDecl()->getBeginLoc()),
10030 CGM
.getMangledName(VD
), GeneratedRefs
, CGM
.getLangOpts().OpenMPSimd
,
10031 CGM
.getLangOpts().OMPTargetTriples
, AddrOfGlobal
, LinkageForVariable
,
10032 CGM
.getTypes().ConvertTypeForMem(
10033 CGM
.getContext().getPointerType(VD
->getType())),
10036 for (auto *ref
: GeneratedRefs
)
10037 CGM
.addCompilerUsedGlobal(ref
);
10040 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD
) {
10041 if (isa
<FunctionDecl
>(GD
.getDecl()) ||
10042 isa
<OMPDeclareReductionDecl
>(GD
.getDecl()))
10043 return emitTargetFunctions(GD
);
10045 return emitTargetGlobalVariable(GD
);
10048 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10049 for (const VarDecl
*VD
: DeferredGlobalVariables
) {
10050 std::optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10051 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
10054 if ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10055 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10056 !HasRequiresUnifiedSharedMemory
) {
10057 CGM
.EmitGlobal(VD
);
10059 assert((*Res
== OMPDeclareTargetDeclAttr::MT_Link
||
10060 ((*Res
== OMPDeclareTargetDeclAttr::MT_To
||
10061 *Res
== OMPDeclareTargetDeclAttr::MT_Enter
) &&
10062 HasRequiresUnifiedSharedMemory
)) &&
10063 "Expected link clause or to clause with unified memory.");
10064 (void)CGM
.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD
);
10069 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10070 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) const {
10071 assert(isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) &&
10072 " Expected target-based directive.");
10075 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl
*D
) {
10076 for (const OMPClause
*Clause
: D
->clauselists()) {
10077 if (Clause
->getClauseKind() == OMPC_unified_shared_memory
) {
10078 HasRequiresUnifiedSharedMemory
= true;
10079 OMPBuilder
.Config
.setHasRequiresUnifiedSharedMemory(true);
10080 } else if (const auto *AC
=
10081 dyn_cast
<OMPAtomicDefaultMemOrderClause
>(Clause
)) {
10082 switch (AC
->getAtomicDefaultMemOrderKind()) {
10083 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel
:
10084 RequiresAtomicOrdering
= llvm::AtomicOrdering::AcquireRelease
;
10086 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst
:
10087 RequiresAtomicOrdering
= llvm::AtomicOrdering::SequentiallyConsistent
;
10089 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed
:
10090 RequiresAtomicOrdering
= llvm::AtomicOrdering::Monotonic
;
10092 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
:
10099 llvm::AtomicOrdering
CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10100 return RequiresAtomicOrdering
;
10103 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl
*VD
,
10105 if (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())
10107 const auto *A
= VD
->getAttr
<OMPAllocateDeclAttr
>();
10108 switch(A
->getAllocatorType()) {
10109 case OMPAllocateDeclAttr::OMPNullMemAlloc
:
10110 case OMPAllocateDeclAttr::OMPDefaultMemAlloc
:
10111 // Not supported, fallback to the default mem space.
10112 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc
:
10113 case OMPAllocateDeclAttr::OMPCGroupMemAlloc
:
10114 case OMPAllocateDeclAttr::OMPHighBWMemAlloc
:
10115 case OMPAllocateDeclAttr::OMPLowLatMemAlloc
:
10116 case OMPAllocateDeclAttr::OMPThreadMemAlloc
:
10117 case OMPAllocateDeclAttr::OMPConstMemAlloc
:
10118 case OMPAllocateDeclAttr::OMPPTeamMemAlloc
:
10119 AS
= LangAS::Default
;
10121 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc
:
10122 llvm_unreachable("Expected predefined allocator for the variables with the "
10123 "static storage.");
10128 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10129 return HasRequiresUnifiedSharedMemory
;
10132 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10133 CodeGenModule
&CGM
)
10135 if (CGM
.getLangOpts().OpenMPIsTargetDevice
) {
10136 SavedShouldMarkAsGlobal
= CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
;
10137 CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
= false;
10141 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10142 if (CGM
.getLangOpts().OpenMPIsTargetDevice
)
10143 CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
= SavedShouldMarkAsGlobal
;
10146 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD
) {
10147 if (!CGM
.getLangOpts().OpenMPIsTargetDevice
|| !ShouldMarkAsGlobal
)
10150 const auto *D
= cast
<FunctionDecl
>(GD
.getDecl());
10151 // Do not to emit function if it is marked as declare target as it was already
10153 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D
)) {
10154 if (D
->hasBody() && AlreadyEmittedTargetDecls
.count(D
) == 0) {
10155 if (auto *F
= dyn_cast_or_null
<llvm::Function
>(
10156 CGM
.GetGlobalValue(CGM
.getMangledName(GD
))))
10157 return !F
->isDeclaration();
10163 return !AlreadyEmittedTargetDecls
.insert(D
).second
;
10166 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction
&CGF
,
10167 const OMPExecutableDirective
&D
,
10168 SourceLocation Loc
,
10169 llvm::Function
*OutlinedFn
,
10170 ArrayRef
<llvm::Value
*> CapturedVars
) {
10171 if (!CGF
.HaveInsertPoint())
10174 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10175 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
10177 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10178 llvm::Value
*Args
[] = {
10180 CGF
.Builder
.getInt32(CapturedVars
.size()), // Number of captured vars
10181 CGF
.Builder
.CreateBitCast(OutlinedFn
, getKmpc_MicroPointerTy())};
10182 llvm::SmallVector
<llvm::Value
*, 16> RealArgs
;
10183 RealArgs
.append(std::begin(Args
), std::end(Args
));
10184 RealArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
10186 llvm::FunctionCallee RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
10187 CGM
.getModule(), OMPRTL___kmpc_fork_teams
);
10188 CGF
.EmitRuntimeCall(RTLFn
, RealArgs
);
10191 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction
&CGF
,
10192 const Expr
*NumTeams
,
10193 const Expr
*ThreadLimit
,
10194 SourceLocation Loc
) {
10195 if (!CGF
.HaveInsertPoint())
10198 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10200 llvm::Value
*NumTeamsVal
=
10202 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(NumTeams
),
10203 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10204 : CGF
.Builder
.getInt32(0);
10206 llvm::Value
*ThreadLimitVal
=
10208 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(ThreadLimit
),
10209 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10210 : CGF
.Builder
.getInt32(0);
10212 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10213 llvm::Value
*PushNumTeamsArgs
[] = {RTLoc
, getThreadID(CGF
, Loc
), NumTeamsVal
,
10215 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10216 CGM
.getModule(), OMPRTL___kmpc_push_num_teams
),
10220 void CGOpenMPRuntime::emitThreadLimitClause(CodeGenFunction
&CGF
,
10221 const Expr
*ThreadLimit
,
10222 SourceLocation Loc
) {
10223 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10224 llvm::Value
*ThreadLimitVal
=
10226 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(ThreadLimit
),
10227 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10228 : CGF
.Builder
.getInt32(0);
10230 // Build call __kmpc_set_thread_limit(&loc, global_tid, thread_limit)
10231 llvm::Value
*ThreadLimitArgs
[] = {RTLoc
, getThreadID(CGF
, Loc
),
10233 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10234 CGM
.getModule(), OMPRTL___kmpc_set_thread_limit
),
10238 void CGOpenMPRuntime::emitTargetDataCalls(
10239 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
10240 const Expr
*Device
, const RegionCodeGenTy
&CodeGen
,
10241 CGOpenMPRuntime::TargetDataInfo
&Info
) {
10242 if (!CGF
.HaveInsertPoint())
10245 // Action used to replace the default codegen action and turn privatization
10247 PrePostActionTy NoPrivAction
;
10249 using InsertPointTy
= llvm::OpenMPIRBuilder::InsertPointTy
;
10251 llvm::Value
*IfCondVal
= nullptr;
10253 IfCondVal
= CGF
.EvaluateExprAsBool(IfCond
);
10255 // Emit device ID if any.
10256 llvm::Value
*DeviceID
= nullptr;
10258 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10259 CGF
.Int64Ty
, /*isSigned=*/true);
10261 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10264 // Fill up the arrays with all the mapped variables.
10265 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10266 auto GenMapInfoCB
=
10267 [&](InsertPointTy CodeGenIP
) -> llvm::OpenMPIRBuilder::MapInfosTy
& {
10268 CGF
.Builder
.restoreIP(CodeGenIP
);
10269 // Get map clause information.
10270 MappableExprsHandler
MEHandler(D
, CGF
);
10271 MEHandler
.generateAllInfo(CombinedInfo
, OMPBuilder
);
10273 auto FillInfoMap
= [&](MappableExprsHandler::MappingExprInfo
&MapExpr
) {
10274 return emitMappingInformation(CGF
, OMPBuilder
, MapExpr
);
10276 if (CGM
.getCodeGenOpts().getDebugInfo() !=
10277 llvm::codegenoptions::NoDebugInfo
) {
10278 CombinedInfo
.Names
.resize(CombinedInfo
.Exprs
.size());
10279 llvm::transform(CombinedInfo
.Exprs
, CombinedInfo
.Names
.begin(),
10283 return CombinedInfo
;
10285 using BodyGenTy
= llvm::OpenMPIRBuilder::BodyGenTy
;
10286 auto BodyCB
= [&](InsertPointTy CodeGenIP
, BodyGenTy BodyGenType
) {
10287 CGF
.Builder
.restoreIP(CodeGenIP
);
10288 switch (BodyGenType
) {
10289 case BodyGenTy::Priv
:
10290 if (!Info
.CaptureDeviceAddrMap
.empty())
10293 case BodyGenTy::DupNoPriv
:
10294 if (!Info
.CaptureDeviceAddrMap
.empty()) {
10295 CodeGen
.setAction(NoPrivAction
);
10299 case BodyGenTy::NoPriv
:
10300 if (Info
.CaptureDeviceAddrMap
.empty()) {
10301 CodeGen
.setAction(NoPrivAction
);
10306 return InsertPointTy(CGF
.Builder
.GetInsertBlock(),
10307 CGF
.Builder
.GetInsertPoint());
10310 auto DeviceAddrCB
= [&](unsigned int I
, llvm::Value
*NewDecl
) {
10311 if (const ValueDecl
*DevVD
= CombinedInfo
.DevicePtrDecls
[I
]) {
10312 Info
.CaptureDeviceAddrMap
.try_emplace(DevVD
, NewDecl
);
10316 auto CustomMapperCB
= [&](unsigned int I
) {
10317 llvm::Value
*MFunc
= nullptr;
10318 if (CombinedInfo
.Mappers
[I
]) {
10319 Info
.HasMapper
= true;
10320 MFunc
= CGF
.CGM
.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
10321 cast
<OMPDeclareMapperDecl
>(CombinedInfo
.Mappers
[I
]));
10326 // Source location for the ident struct
10327 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10329 InsertPointTy
AllocaIP(CGF
.AllocaInsertPt
->getParent(),
10330 CGF
.AllocaInsertPt
->getIterator());
10331 InsertPointTy
CodeGenIP(CGF
.Builder
.GetInsertBlock(),
10332 CGF
.Builder
.GetInsertPoint());
10333 llvm::OpenMPIRBuilder::LocationDescription
OmpLoc(CodeGenIP
);
10334 CGF
.Builder
.restoreIP(OMPBuilder
.createTargetData(
10335 OmpLoc
, AllocaIP
, CodeGenIP
, DeviceID
, IfCondVal
, Info
, GenMapInfoCB
,
10336 /*MapperFunc=*/nullptr, BodyCB
, DeviceAddrCB
, CustomMapperCB
, RTLoc
));
10339 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10340 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
10341 const Expr
*Device
) {
10342 if (!CGF
.HaveInsertPoint())
10345 assert((isa
<OMPTargetEnterDataDirective
>(D
) ||
10346 isa
<OMPTargetExitDataDirective
>(D
) ||
10347 isa
<OMPTargetUpdateDirective
>(D
)) &&
10348 "Expecting either target enter, exit data, or update directives.");
10350 CodeGenFunction::OMPTargetDataInfo InputInfo
;
10351 llvm::Value
*MapTypesArray
= nullptr;
10352 llvm::Value
*MapNamesArray
= nullptr;
10353 // Generate the code for the opening of the data environment.
10354 auto &&ThenGen
= [this, &D
, Device
, &InputInfo
, &MapTypesArray
,
10355 &MapNamesArray
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
10356 // Emit device ID if any.
10357 llvm::Value
*DeviceID
= nullptr;
10359 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10360 CGF
.Int64Ty
, /*isSigned=*/true);
10362 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10365 // Emit the number of elements in the offloading arrays.
10366 llvm::Constant
*PointerNum
=
10367 CGF
.Builder
.getInt32(InputInfo
.NumberOfTargetItems
);
10369 // Source location for the ident struct
10370 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10372 SmallVector
<llvm::Value
*, 13> OffloadingArgs(
10373 {RTLoc
, DeviceID
, PointerNum
,
10374 InputInfo
.BasePointersArray
.emitRawPointer(CGF
),
10375 InputInfo
.PointersArray
.emitRawPointer(CGF
),
10376 InputInfo
.SizesArray
.emitRawPointer(CGF
), MapTypesArray
, MapNamesArray
,
10377 InputInfo
.MappersArray
.emitRawPointer(CGF
)});
10379 // Select the right runtime function call for each standalone
10381 const bool HasNowait
= D
.hasClausesOfKind
<OMPNowaitClause
>();
10382 RuntimeFunction RTLFn
;
10383 switch (D
.getDirectiveKind()) {
10384 case OMPD_target_enter_data
:
10385 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_begin_nowait_mapper
10386 : OMPRTL___tgt_target_data_begin_mapper
;
10388 case OMPD_target_exit_data
:
10389 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_end_nowait_mapper
10390 : OMPRTL___tgt_target_data_end_mapper
;
10392 case OMPD_target_update
:
10393 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_update_nowait_mapper
10394 : OMPRTL___tgt_target_data_update_mapper
;
10396 case OMPD_parallel
:
10398 case OMPD_parallel_for
:
10399 case OMPD_parallel_master
:
10400 case OMPD_parallel_sections
:
10401 case OMPD_for_simd
:
10402 case OMPD_parallel_for_simd
:
10404 case OMPD_cancellation_point
:
10406 case OMPD_threadprivate
:
10407 case OMPD_allocate
:
10412 case OMPD_sections
:
10416 case OMPD_critical
:
10417 case OMPD_taskyield
:
10419 case OMPD_taskwait
:
10420 case OMPD_taskgroup
:
10426 case OMPD_target_data
:
10427 case OMPD_distribute
:
10428 case OMPD_distribute_simd
:
10429 case OMPD_distribute_parallel_for
:
10430 case OMPD_distribute_parallel_for_simd
:
10431 case OMPD_teams_distribute
:
10432 case OMPD_teams_distribute_simd
:
10433 case OMPD_teams_distribute_parallel_for
:
10434 case OMPD_teams_distribute_parallel_for_simd
:
10435 case OMPD_declare_simd
:
10436 case OMPD_declare_variant
:
10437 case OMPD_begin_declare_variant
:
10438 case OMPD_end_declare_variant
:
10439 case OMPD_declare_target
:
10440 case OMPD_end_declare_target
:
10441 case OMPD_declare_reduction
:
10442 case OMPD_declare_mapper
:
10443 case OMPD_taskloop
:
10444 case OMPD_taskloop_simd
:
10445 case OMPD_master_taskloop
:
10446 case OMPD_master_taskloop_simd
:
10447 case OMPD_parallel_master_taskloop
:
10448 case OMPD_parallel_master_taskloop_simd
:
10450 case OMPD_target_simd
:
10451 case OMPD_target_teams_distribute
:
10452 case OMPD_target_teams_distribute_simd
:
10453 case OMPD_target_teams_distribute_parallel_for
:
10454 case OMPD_target_teams_distribute_parallel_for_simd
:
10455 case OMPD_target_teams
:
10456 case OMPD_target_parallel
:
10457 case OMPD_target_parallel_for
:
10458 case OMPD_target_parallel_for_simd
:
10459 case OMPD_requires
:
10460 case OMPD_metadirective
:
10463 llvm_unreachable("Unexpected standalone target data directive.");
10467 OffloadingArgs
.push_back(llvm::Constant::getNullValue(CGF
.Int32Ty
));
10468 OffloadingArgs
.push_back(llvm::Constant::getNullValue(CGF
.VoidPtrTy
));
10469 OffloadingArgs
.push_back(llvm::Constant::getNullValue(CGF
.Int32Ty
));
10470 OffloadingArgs
.push_back(llvm::Constant::getNullValue(CGF
.VoidPtrTy
));
10472 CGF
.EmitRuntimeCall(
10473 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(), RTLFn
),
10477 auto &&TargetThenGen
= [this, &ThenGen
, &D
, &InputInfo
, &MapTypesArray
,
10478 &MapNamesArray
](CodeGenFunction
&CGF
,
10479 PrePostActionTy
&) {
10480 // Fill up the arrays with all the mapped variables.
10481 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10483 // Get map clause information.
10484 MappableExprsHandler
MEHandler(D
, CGF
);
10485 MEHandler
.generateAllInfo(CombinedInfo
, OMPBuilder
);
10487 CGOpenMPRuntime::TargetDataInfo Info
;
10488 // Fill up the arrays and create the arguments.
10489 emitOffloadingArrays(CGF
, CombinedInfo
, Info
, OMPBuilder
,
10490 /*IsNonContiguous=*/true);
10491 bool RequiresOuterTask
= D
.hasClausesOfKind
<OMPDependClause
>() ||
10492 D
.hasClausesOfKind
<OMPNowaitClause
>();
10493 bool EmitDebug
= CGF
.CGM
.getCodeGenOpts().getDebugInfo() !=
10494 llvm::codegenoptions::NoDebugInfo
;
10495 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, Info
.RTArgs
, Info
,
10497 /*ForEndCall=*/false);
10498 InputInfo
.NumberOfTargetItems
= Info
.NumberOfPtrs
;
10499 InputInfo
.BasePointersArray
= Address(Info
.RTArgs
.BasePointersArray
,
10500 CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10501 InputInfo
.PointersArray
= Address(Info
.RTArgs
.PointersArray
, CGF
.VoidPtrTy
,
10502 CGM
.getPointerAlign());
10503 InputInfo
.SizesArray
=
10504 Address(Info
.RTArgs
.SizesArray
, CGF
.Int64Ty
, CGM
.getPointerAlign());
10505 InputInfo
.MappersArray
=
10506 Address(Info
.RTArgs
.MappersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10507 MapTypesArray
= Info
.RTArgs
.MapTypesArray
;
10508 MapNamesArray
= Info
.RTArgs
.MapNamesArray
;
10509 if (RequiresOuterTask
)
10510 CGF
.EmitOMPTargetTaskBasedDirective(D
, ThenGen
, InputInfo
);
10512 emitInlinedDirective(CGF
, D
.getDirectiveKind(), ThenGen
);
10516 emitIfClause(CGF
, IfCond
, TargetThenGen
,
10517 [](CodeGenFunction
&CGF
, PrePostActionTy
&) {});
10519 RegionCodeGenTy
ThenRCG(TargetThenGen
);
10525 /// Kind of parameter in a function with 'declare simd' directive.
10534 /// Attribute set of the parameter.
10535 struct ParamAttrTy
{
10536 ParamKindTy Kind
= Vector
;
10537 llvm::APSInt StrideOrArg
;
10538 llvm::APSInt Alignment
;
10539 bool HasVarStride
= false;
10543 static unsigned evaluateCDTSize(const FunctionDecl
*FD
,
10544 ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10545 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
10546 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
10547 // of that clause. The VLEN value must be power of 2.
10548 // In other case the notion of the function`s "characteristic data type" (CDT)
10549 // is used to compute the vector length.
10550 // CDT is defined in the following order:
10551 // a) For non-void function, the CDT is the return type.
10552 // b) If the function has any non-uniform, non-linear parameters, then the
10553 // CDT is the type of the first such parameter.
10554 // c) If the CDT determined by a) or b) above is struct, union, or class
10555 // type which is pass-by-value (except for the type that maps to the
10556 // built-in complex data type), the characteristic data type is int.
10557 // d) If none of the above three cases is applicable, the CDT is int.
10558 // The VLEN is then determined based on the CDT and the size of vector
10559 // register of that ISA for which current vector version is generated. The
10560 // VLEN is computed using the formula below:
10561 // VLEN = sizeof(vector_register) / sizeof(CDT),
10562 // where vector register size specified in section 3.2.1 Registers and the
10563 // Stack Frame of original AMD64 ABI document.
10564 QualType RetType
= FD
->getReturnType();
10565 if (RetType
.isNull())
10567 ASTContext
&C
= FD
->getASTContext();
10569 if (!RetType
.isNull() && !RetType
->isVoidType()) {
10572 unsigned Offset
= 0;
10573 if (const auto *MD
= dyn_cast
<CXXMethodDecl
>(FD
)) {
10574 if (ParamAttrs
[Offset
].Kind
== Vector
)
10575 CDT
= C
.getPointerType(C
.getRecordType(MD
->getParent()));
10578 if (CDT
.isNull()) {
10579 for (unsigned I
= 0, E
= FD
->getNumParams(); I
< E
; ++I
) {
10580 if (ParamAttrs
[I
+ Offset
].Kind
== Vector
) {
10581 CDT
= FD
->getParamDecl(I
)->getType();
10589 CDT
= CDT
->getCanonicalTypeUnqualified();
10590 if (CDT
->isRecordType() || CDT
->isUnionType())
10592 return C
.getTypeSize(CDT
);
10595 /// Mangle the parameter part of the vector function name according to
10596 /// their OpenMP classification. The mangling function is defined in
10597 /// section 4.5 of the AAVFABI(2021Q1).
10598 static std::string
mangleVectorParameters(ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10599 SmallString
<256> Buffer
;
10600 llvm::raw_svector_ostream
Out(Buffer
);
10601 for (const auto &ParamAttr
: ParamAttrs
) {
10602 switch (ParamAttr
.Kind
) {
10622 if (ParamAttr
.HasVarStride
)
10623 Out
<< "s" << ParamAttr
.StrideOrArg
;
10624 else if (ParamAttr
.Kind
== Linear
|| ParamAttr
.Kind
== LinearRef
||
10625 ParamAttr
.Kind
== LinearUVal
|| ParamAttr
.Kind
== LinearVal
) {
10626 // Don't print the step value if it is not present or if it is
10628 if (ParamAttr
.StrideOrArg
< 0)
10629 Out
<< 'n' << -ParamAttr
.StrideOrArg
;
10630 else if (ParamAttr
.StrideOrArg
!= 1)
10631 Out
<< ParamAttr
.StrideOrArg
;
10634 if (!!ParamAttr
.Alignment
)
10635 Out
<< 'a' << ParamAttr
.Alignment
;
10638 return std::string(Out
.str());
10642 emitX86DeclareSimdFunction(const FunctionDecl
*FD
, llvm::Function
*Fn
,
10643 const llvm::APSInt
&VLENVal
,
10644 ArrayRef
<ParamAttrTy
> ParamAttrs
,
10645 OMPDeclareSimdDeclAttr::BranchStateTy State
) {
10648 unsigned VecRegSize
;
10650 ISADataTy ISAData
[] = {
10664 llvm::SmallVector
<char, 2> Masked
;
10666 case OMPDeclareSimdDeclAttr::BS_Undefined
:
10667 Masked
.push_back('N');
10668 Masked
.push_back('M');
10670 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
10671 Masked
.push_back('N');
10673 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
10674 Masked
.push_back('M');
10677 for (char Mask
: Masked
) {
10678 for (const ISADataTy
&Data
: ISAData
) {
10679 SmallString
<256> Buffer
;
10680 llvm::raw_svector_ostream
Out(Buffer
);
10681 Out
<< "_ZGV" << Data
.ISA
<< Mask
;
10683 unsigned NumElts
= evaluateCDTSize(FD
, ParamAttrs
);
10684 assert(NumElts
&& "Non-zero simdlen/cdtsize expected");
10685 Out
<< llvm::APSInt::getUnsigned(Data
.VecRegSize
/ NumElts
);
10689 Out
<< mangleVectorParameters(ParamAttrs
);
10690 Out
<< '_' << Fn
->getName();
10691 Fn
->addFnAttr(Out
.str());
10696 // This are the Functions that are needed to mangle the name of the
10697 // vector functions generated by the compiler, according to the rules
10698 // defined in the "Vector Function ABI specifications for AArch64",
10700 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
10702 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
10703 static bool getAArch64MTV(QualType QT
, ParamKindTy Kind
) {
10704 QT
= QT
.getCanonicalType();
10706 if (QT
->isVoidType())
10709 if (Kind
== ParamKindTy::Uniform
)
10712 if (Kind
== ParamKindTy::LinearUVal
|| Kind
== ParamKindTy::LinearRef
)
10715 if ((Kind
== ParamKindTy::Linear
|| Kind
== ParamKindTy::LinearVal
) &&
10716 !QT
->isReferenceType())
10722 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
10723 static bool getAArch64PBV(QualType QT
, ASTContext
&C
) {
10724 QT
= QT
.getCanonicalType();
10725 unsigned Size
= C
.getTypeSize(QT
);
10727 // Only scalars and complex within 16 bytes wide set PVB to true.
10728 if (Size
!= 8 && Size
!= 16 && Size
!= 32 && Size
!= 64 && Size
!= 128)
10731 if (QT
->isFloatingType())
10734 if (QT
->isIntegerType())
10737 if (QT
->isPointerType())
10740 // TODO: Add support for complex types (section 3.1.2, item 2).
10745 /// Computes the lane size (LS) of a return type or of an input parameter,
10746 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
10747 /// TODO: Add support for references, section 3.2.1, item 1.
10748 static unsigned getAArch64LS(QualType QT
, ParamKindTy Kind
, ASTContext
&C
) {
10749 if (!getAArch64MTV(QT
, Kind
) && QT
.getCanonicalType()->isPointerType()) {
10750 QualType PTy
= QT
.getCanonicalType()->getPointeeType();
10751 if (getAArch64PBV(PTy
, C
))
10752 return C
.getTypeSize(PTy
);
10754 if (getAArch64PBV(QT
, C
))
10755 return C
.getTypeSize(QT
);
10757 return C
.getTypeSize(C
.getUIntPtrType());
10760 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
10761 // signature of the scalar function, as defined in 3.2.2 of the
10763 static std::tuple
<unsigned, unsigned, bool>
10764 getNDSWDS(const FunctionDecl
*FD
, ArrayRef
<ParamAttrTy
> ParamAttrs
) {
10765 QualType RetType
= FD
->getReturnType().getCanonicalType();
10767 ASTContext
&C
= FD
->getASTContext();
10769 bool OutputBecomesInput
= false;
10771 llvm::SmallVector
<unsigned, 8> Sizes
;
10772 if (!RetType
->isVoidType()) {
10773 Sizes
.push_back(getAArch64LS(RetType
, ParamKindTy::Vector
, C
));
10774 if (!getAArch64PBV(RetType
, C
) && getAArch64MTV(RetType
, {}))
10775 OutputBecomesInput
= true;
10777 for (unsigned I
= 0, E
= FD
->getNumParams(); I
< E
; ++I
) {
10778 QualType QT
= FD
->getParamDecl(I
)->getType().getCanonicalType();
10779 Sizes
.push_back(getAArch64LS(QT
, ParamAttrs
[I
].Kind
, C
));
10782 assert(!Sizes
.empty() && "Unable to determine NDS and WDS.");
10783 // The LS of a function parameter / return value can only be a power
10784 // of 2, starting from 8 bits, up to 128.
10785 assert(llvm::all_of(Sizes
,
10786 [](unsigned Size
) {
10787 return Size
== 8 || Size
== 16 || Size
== 32 ||
10788 Size
== 64 || Size
== 128;
10792 return std::make_tuple(*std::min_element(std::begin(Sizes
), std::end(Sizes
)),
10793 *std::max_element(std::begin(Sizes
), std::end(Sizes
)),
10794 OutputBecomesInput
);
10797 // Function used to add the attribute. The parameter `VLEN` is
10798 // templated to allow the use of "x" when targeting scalable functions
10800 template <typename T
>
10801 static void addAArch64VectorName(T VLEN
, StringRef LMask
, StringRef Prefix
,
10802 char ISA
, StringRef ParSeq
,
10803 StringRef MangledName
, bool OutputBecomesInput
,
10804 llvm::Function
*Fn
) {
10805 SmallString
<256> Buffer
;
10806 llvm::raw_svector_ostream
Out(Buffer
);
10807 Out
<< Prefix
<< ISA
<< LMask
<< VLEN
;
10808 if (OutputBecomesInput
)
10810 Out
<< ParSeq
<< "_" << MangledName
;
10811 Fn
->addFnAttr(Out
.str());
10814 // Helper function to generate the Advanced SIMD names depending on
10815 // the value of the NDS when simdlen is not present.
10816 static void addAArch64AdvSIMDNDSNames(unsigned NDS
, StringRef Mask
,
10817 StringRef Prefix
, char ISA
,
10818 StringRef ParSeq
, StringRef MangledName
,
10819 bool OutputBecomesInput
,
10820 llvm::Function
*Fn
) {
10823 addAArch64VectorName(8, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10824 OutputBecomesInput
, Fn
);
10825 addAArch64VectorName(16, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10826 OutputBecomesInput
, Fn
);
10829 addAArch64VectorName(4, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10830 OutputBecomesInput
, Fn
);
10831 addAArch64VectorName(8, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10832 OutputBecomesInput
, Fn
);
10835 addAArch64VectorName(2, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10836 OutputBecomesInput
, Fn
);
10837 addAArch64VectorName(4, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10838 OutputBecomesInput
, Fn
);
10842 addAArch64VectorName(2, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
10843 OutputBecomesInput
, Fn
);
10846 llvm_unreachable("Scalar type is too wide.");
10850 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
10851 static void emitAArch64DeclareSimdFunction(
10852 CodeGenModule
&CGM
, const FunctionDecl
*FD
, unsigned UserVLEN
,
10853 ArrayRef
<ParamAttrTy
> ParamAttrs
,
10854 OMPDeclareSimdDeclAttr::BranchStateTy State
, StringRef MangledName
,
10855 char ISA
, unsigned VecRegSize
, llvm::Function
*Fn
, SourceLocation SLoc
) {
10857 // Get basic data for building the vector signature.
10858 const auto Data
= getNDSWDS(FD
, ParamAttrs
);
10859 const unsigned NDS
= std::get
<0>(Data
);
10860 const unsigned WDS
= std::get
<1>(Data
);
10861 const bool OutputBecomesInput
= std::get
<2>(Data
);
10863 // Check the values provided via `simdlen` by the user.
10864 // 1. A `simdlen(1)` doesn't produce vector signatures,
10865 if (UserVLEN
== 1) {
10866 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
10867 DiagnosticsEngine::Warning
,
10868 "The clause simdlen(1) has no effect when targeting aarch64.");
10869 CGM
.getDiags().Report(SLoc
, DiagID
);
10873 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
10874 // Advanced SIMD output.
10875 if (ISA
== 'n' && UserVLEN
&& !llvm::isPowerOf2_32(UserVLEN
)) {
10876 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
10877 DiagnosticsEngine::Warning
, "The value specified in simdlen must be a "
10878 "power of 2 when targeting Advanced SIMD.");
10879 CGM
.getDiags().Report(SLoc
, DiagID
);
10883 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
10885 if (ISA
== 's' && UserVLEN
!= 0) {
10886 if ((UserVLEN
* WDS
> 2048) || (UserVLEN
* WDS
% 128 != 0)) {
10887 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
10888 DiagnosticsEngine::Warning
, "The clause simdlen must fit the %0-bit "
10889 "lanes in the architectural constraints "
10890 "for SVE (min is 128-bit, max is "
10891 "2048-bit, by steps of 128-bit)");
10892 CGM
.getDiags().Report(SLoc
, DiagID
) << WDS
;
10897 // Sort out parameter sequence.
10898 const std::string ParSeq
= mangleVectorParameters(ParamAttrs
);
10899 StringRef Prefix
= "_ZGV";
10900 // Generate simdlen from user input (if any).
10903 // SVE generates only a masked function.
10904 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10905 OutputBecomesInput
, Fn
);
10907 assert(ISA
== 'n' && "Expected ISA either 's' or 'n'.");
10908 // Advanced SIMD generates one or two functions, depending on
10909 // the `[not]inbranch` clause.
10911 case OMPDeclareSimdDeclAttr::BS_Undefined
:
10912 addAArch64VectorName(UserVLEN
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10913 OutputBecomesInput
, Fn
);
10914 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10915 OutputBecomesInput
, Fn
);
10917 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
10918 addAArch64VectorName(UserVLEN
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10919 OutputBecomesInput
, Fn
);
10921 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
10922 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10923 OutputBecomesInput
, Fn
);
10928 // If no user simdlen is provided, follow the AAVFABI rules for
10929 // generating the vector length.
10931 // SVE, section 3.4.1, item 1.
10932 addAArch64VectorName("x", "M", Prefix
, ISA
, ParSeq
, MangledName
,
10933 OutputBecomesInput
, Fn
);
10935 assert(ISA
== 'n' && "Expected ISA either 's' or 'n'.");
10936 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
10937 // two vector names depending on the use of the clause
10938 // `[not]inbranch`.
10940 case OMPDeclareSimdDeclAttr::BS_Undefined
:
10941 addAArch64AdvSIMDNDSNames(NDS
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10942 OutputBecomesInput
, Fn
);
10943 addAArch64AdvSIMDNDSNames(NDS
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10944 OutputBecomesInput
, Fn
);
10946 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
10947 addAArch64AdvSIMDNDSNames(NDS
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
10948 OutputBecomesInput
, Fn
);
10950 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
10951 addAArch64AdvSIMDNDSNames(NDS
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
10952 OutputBecomesInput
, Fn
);
10959 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl
*FD
,
10960 llvm::Function
*Fn
) {
10961 ASTContext
&C
= CGM
.getContext();
10962 FD
= FD
->getMostRecentDecl();
10964 // Map params to their positions in function decl.
10965 llvm::DenseMap
<const Decl
*, unsigned> ParamPositions
;
10966 if (isa
<CXXMethodDecl
>(FD
))
10967 ParamPositions
.try_emplace(FD
, 0);
10968 unsigned ParamPos
= ParamPositions
.size();
10969 for (const ParmVarDecl
*P
: FD
->parameters()) {
10970 ParamPositions
.try_emplace(P
->getCanonicalDecl(), ParamPos
);
10973 for (const auto *Attr
: FD
->specific_attrs
<OMPDeclareSimdDeclAttr
>()) {
10974 llvm::SmallVector
<ParamAttrTy
, 8> ParamAttrs(ParamPositions
.size());
10975 // Mark uniform parameters.
10976 for (const Expr
*E
: Attr
->uniforms()) {
10977 E
= E
->IgnoreParenImpCasts();
10979 if (isa
<CXXThisExpr
>(E
)) {
10980 Pos
= ParamPositions
[FD
];
10982 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
10983 ->getCanonicalDecl();
10984 auto It
= ParamPositions
.find(PVD
);
10985 assert(It
!= ParamPositions
.end() && "Function parameter not found");
10988 ParamAttrs
[Pos
].Kind
= Uniform
;
10990 // Get alignment info.
10991 auto *NI
= Attr
->alignments_begin();
10992 for (const Expr
*E
: Attr
->aligneds()) {
10993 E
= E
->IgnoreParenImpCasts();
10996 if (isa
<CXXThisExpr
>(E
)) {
10997 Pos
= ParamPositions
[FD
];
10998 ParmTy
= E
->getType();
11000 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11001 ->getCanonicalDecl();
11002 auto It
= ParamPositions
.find(PVD
);
11003 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11005 ParmTy
= PVD
->getType();
11007 ParamAttrs
[Pos
].Alignment
=
11009 ? (*NI
)->EvaluateKnownConstInt(C
)
11010 : llvm::APSInt::getUnsigned(
11011 C
.toCharUnitsFromBits(C
.getOpenMPDefaultSimdAlign(ParmTy
))
11015 // Mark linear parameters.
11016 auto *SI
= Attr
->steps_begin();
11017 auto *MI
= Attr
->modifiers_begin();
11018 for (const Expr
*E
: Attr
->linears()) {
11019 E
= E
->IgnoreParenImpCasts();
11021 bool IsReferenceType
= false;
11022 // Rescaling factor needed to compute the linear parameter
11023 // value in the mangled name.
11024 unsigned PtrRescalingFactor
= 1;
11025 if (isa
<CXXThisExpr
>(E
)) {
11026 Pos
= ParamPositions
[FD
];
11027 auto *P
= cast
<PointerType
>(E
->getType());
11028 PtrRescalingFactor
= CGM
.getContext()
11029 .getTypeSizeInChars(P
->getPointeeType())
11032 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11033 ->getCanonicalDecl();
11034 auto It
= ParamPositions
.find(PVD
);
11035 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11037 if (auto *P
= dyn_cast
<PointerType
>(PVD
->getType()))
11038 PtrRescalingFactor
= CGM
.getContext()
11039 .getTypeSizeInChars(P
->getPointeeType())
11041 else if (PVD
->getType()->isReferenceType()) {
11042 IsReferenceType
= true;
11043 PtrRescalingFactor
=
11045 .getTypeSizeInChars(PVD
->getType().getNonReferenceType())
11049 ParamAttrTy
&ParamAttr
= ParamAttrs
[Pos
];
11050 if (*MI
== OMPC_LINEAR_ref
)
11051 ParamAttr
.Kind
= LinearRef
;
11052 else if (*MI
== OMPC_LINEAR_uval
)
11053 ParamAttr
.Kind
= LinearUVal
;
11054 else if (IsReferenceType
)
11055 ParamAttr
.Kind
= LinearVal
;
11057 ParamAttr
.Kind
= Linear
;
11058 // Assuming a stride of 1, for `linear` without modifiers.
11059 ParamAttr
.StrideOrArg
= llvm::APSInt::getUnsigned(1);
11061 Expr::EvalResult Result
;
11062 if (!(*SI
)->EvaluateAsInt(Result
, C
, Expr::SE_AllowSideEffects
)) {
11063 if (const auto *DRE
=
11064 cast
<DeclRefExpr
>((*SI
)->IgnoreParenImpCasts())) {
11065 if (const auto *StridePVD
=
11066 dyn_cast
<ParmVarDecl
>(DRE
->getDecl())) {
11067 ParamAttr
.HasVarStride
= true;
11068 auto It
= ParamPositions
.find(StridePVD
->getCanonicalDecl());
11069 assert(It
!= ParamPositions
.end() &&
11070 "Function parameter not found");
11071 ParamAttr
.StrideOrArg
= llvm::APSInt::getUnsigned(It
->second
);
11075 ParamAttr
.StrideOrArg
= Result
.Val
.getInt();
11078 // If we are using a linear clause on a pointer, we need to
11079 // rescale the value of linear_step with the byte size of the
11081 if (!ParamAttr
.HasVarStride
&&
11082 (ParamAttr
.Kind
== Linear
|| ParamAttr
.Kind
== LinearRef
))
11083 ParamAttr
.StrideOrArg
= ParamAttr
.StrideOrArg
* PtrRescalingFactor
;
11087 llvm::APSInt VLENVal
;
11088 SourceLocation ExprLoc
;
11089 const Expr
*VLENExpr
= Attr
->getSimdlen();
11091 VLENVal
= VLENExpr
->EvaluateKnownConstInt(C
);
11092 ExprLoc
= VLENExpr
->getExprLoc();
11094 OMPDeclareSimdDeclAttr::BranchStateTy State
= Attr
->getBranchState();
11095 if (CGM
.getTriple().isX86()) {
11096 emitX86DeclareSimdFunction(FD
, Fn
, VLENVal
, ParamAttrs
, State
);
11097 } else if (CGM
.getTriple().getArch() == llvm::Triple::aarch64
) {
11098 unsigned VLEN
= VLENVal
.getExtValue();
11099 StringRef MangledName
= Fn
->getName();
11100 if (CGM
.getTarget().hasFeature("sve"))
11101 emitAArch64DeclareSimdFunction(CGM
, FD
, VLEN
, ParamAttrs
, State
,
11102 MangledName
, 's', 128, Fn
, ExprLoc
);
11103 else if (CGM
.getTarget().hasFeature("neon"))
11104 emitAArch64DeclareSimdFunction(CGM
, FD
, VLEN
, ParamAttrs
, State
,
11105 MangledName
, 'n', 128, Fn
, ExprLoc
);
11108 FD
= FD
->getPreviousDecl();
11113 /// Cleanup action for doacross support.
11114 class DoacrossCleanupTy final
: public EHScopeStack::Cleanup
{
11116 static const int DoacrossFinArgs
= 2;
11119 llvm::FunctionCallee RTLFn
;
11120 llvm::Value
*Args
[DoacrossFinArgs
];
11123 DoacrossCleanupTy(llvm::FunctionCallee RTLFn
,
11124 ArrayRef
<llvm::Value
*> CallArgs
)
11126 assert(CallArgs
.size() == DoacrossFinArgs
);
11127 std::copy(CallArgs
.begin(), CallArgs
.end(), std::begin(Args
));
11129 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
11130 if (!CGF
.HaveInsertPoint())
11132 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11137 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction
&CGF
,
11138 const OMPLoopDirective
&D
,
11139 ArrayRef
<Expr
*> NumIterations
) {
11140 if (!CGF
.HaveInsertPoint())
11143 ASTContext
&C
= CGM
.getContext();
11144 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11146 if (KmpDimTy
.isNull()) {
11147 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11148 // kmp_int64 lo; // lower
11149 // kmp_int64 up; // upper
11150 // kmp_int64 st; // stride
11152 RD
= C
.buildImplicitRecord("kmp_dim");
11153 RD
->startDefinition();
11154 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11155 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11156 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11157 RD
->completeDefinition();
11158 KmpDimTy
= C
.getRecordType(RD
);
11160 RD
= cast
<RecordDecl
>(KmpDimTy
->getAsTagDecl());
11162 llvm::APInt
Size(/*numBits=*/32, NumIterations
.size());
11163 QualType ArrayTy
= C
.getConstantArrayType(KmpDimTy
, Size
, nullptr,
11164 ArraySizeModifier::Normal
, 0);
11166 Address DimsAddr
= CGF
.CreateMemTemp(ArrayTy
, "dims");
11167 CGF
.EmitNullInitialization(DimsAddr
, ArrayTy
);
11168 enum { LowerFD
= 0, UpperFD
, StrideFD
};
11169 // Fill dims with data.
11170 for (unsigned I
= 0, E
= NumIterations
.size(); I
< E
; ++I
) {
11171 LValue DimsLVal
= CGF
.MakeAddrLValue(
11172 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, I
), KmpDimTy
);
11173 // dims.upper = num_iterations;
11174 LValue UpperLVal
= CGF
.EmitLValueForField(
11175 DimsLVal
, *std::next(RD
->field_begin(), UpperFD
));
11176 llvm::Value
*NumIterVal
= CGF
.EmitScalarConversion(
11177 CGF
.EmitScalarExpr(NumIterations
[I
]), NumIterations
[I
]->getType(),
11178 Int64Ty
, NumIterations
[I
]->getExprLoc());
11179 CGF
.EmitStoreOfScalar(NumIterVal
, UpperLVal
);
11180 // dims.stride = 1;
11181 LValue StrideLVal
= CGF
.EmitLValueForField(
11182 DimsLVal
, *std::next(RD
->field_begin(), StrideFD
));
11183 CGF
.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM
.Int64Ty
, /*V=*/1),
11187 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11188 // kmp_int32 num_dims, struct kmp_dim * dims);
11189 llvm::Value
*Args
[] = {
11190 emitUpdateLocation(CGF
, D
.getBeginLoc()),
11191 getThreadID(CGF
, D
.getBeginLoc()),
11192 llvm::ConstantInt::getSigned(CGM
.Int32Ty
, NumIterations
.size()),
11193 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11194 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, 0).emitRawPointer(CGF
),
11197 llvm::FunctionCallee RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11198 CGM
.getModule(), OMPRTL___kmpc_doacross_init
);
11199 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11200 llvm::Value
*FiniArgs
[DoacrossCleanupTy::DoacrossFinArgs
] = {
11201 emitUpdateLocation(CGF
, D
.getEndLoc()), getThreadID(CGF
, D
.getEndLoc())};
11202 llvm::FunctionCallee FiniRTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11203 CGM
.getModule(), OMPRTL___kmpc_doacross_fini
);
11204 CGF
.EHStack
.pushCleanup
<DoacrossCleanupTy
>(NormalAndEHCleanup
, FiniRTLFn
,
11205 llvm::ArrayRef(FiniArgs
));
11208 template <typename T
>
11209 static void EmitDoacrossOrdered(CodeGenFunction
&CGF
, CodeGenModule
&CGM
,
11210 const T
*C
, llvm::Value
*ULoc
,
11211 llvm::Value
*ThreadID
) {
11213 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11214 llvm::APInt
Size(/*numBits=*/32, C
->getNumLoops());
11215 QualType ArrayTy
= CGM
.getContext().getConstantArrayType(
11216 Int64Ty
, Size
, nullptr, ArraySizeModifier::Normal
, 0);
11217 Address CntAddr
= CGF
.CreateMemTemp(ArrayTy
, ".cnt.addr");
11218 for (unsigned I
= 0, E
= C
->getNumLoops(); I
< E
; ++I
) {
11219 const Expr
*CounterVal
= C
->getLoopData(I
);
11220 assert(CounterVal
);
11221 llvm::Value
*CntVal
= CGF
.EmitScalarConversion(
11222 CGF
.EmitScalarExpr(CounterVal
), CounterVal
->getType(), Int64Ty
,
11223 CounterVal
->getExprLoc());
11224 CGF
.EmitStoreOfScalar(CntVal
, CGF
.Builder
.CreateConstArrayGEP(CntAddr
, I
),
11225 /*Volatile=*/false, Int64Ty
);
11227 llvm::Value
*Args
[] = {
11229 CGF
.Builder
.CreateConstArrayGEP(CntAddr
, 0).emitRawPointer(CGF
)};
11230 llvm::FunctionCallee RTLFn
;
11231 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
11232 OMPDoacrossKind
<T
> ODK
;
11233 if (ODK
.isSource(C
)) {
11234 RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
11235 OMPRTL___kmpc_doacross_post
);
11237 assert(ODK
.isSink(C
) && "Expect sink modifier.");
11238 RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
11239 OMPRTL___kmpc_doacross_wait
);
11241 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11244 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
11245 const OMPDependClause
*C
) {
11246 return EmitDoacrossOrdered
<OMPDependClause
>(
11247 CGF
, CGM
, C
, emitUpdateLocation(CGF
, C
->getBeginLoc()),
11248 getThreadID(CGF
, C
->getBeginLoc()));
11251 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
11252 const OMPDoacrossClause
*C
) {
11253 return EmitDoacrossOrdered
<OMPDoacrossClause
>(
11254 CGF
, CGM
, C
, emitUpdateLocation(CGF
, C
->getBeginLoc()),
11255 getThreadID(CGF
, C
->getBeginLoc()));
11258 void CGOpenMPRuntime::emitCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
11259 llvm::FunctionCallee Callee
,
11260 ArrayRef
<llvm::Value
*> Args
) const {
11261 assert(Loc
.isValid() && "Outlined function call location must be valid.");
11262 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
11264 if (auto *Fn
= dyn_cast
<llvm::Function
>(Callee
.getCallee())) {
11265 if (Fn
->doesNotThrow()) {
11266 CGF
.EmitNounwindRuntimeCall(Fn
, Args
);
11270 CGF
.EmitRuntimeCall(Callee
, Args
);
11273 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11274 CodeGenFunction
&CGF
, SourceLocation Loc
, llvm::FunctionCallee OutlinedFn
,
11275 ArrayRef
<llvm::Value
*> Args
) const {
11276 emitCall(CGF
, Loc
, OutlinedFn
, Args
);
11279 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction
&CGF
, const Decl
*D
) {
11280 if (const auto *FD
= dyn_cast
<FunctionDecl
>(D
))
11281 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD
))
11282 HasEmittedDeclareTargetRegion
= true;
11285 Address
CGOpenMPRuntime::getParameterAddress(CodeGenFunction
&CGF
,
11286 const VarDecl
*NativeParam
,
11287 const VarDecl
*TargetParam
) const {
11288 return CGF
.GetAddrOfLocalVar(NativeParam
);
11291 /// Return allocator value from expression, or return a null allocator (default
11292 /// when no allocator specified).
11293 static llvm::Value
*getAllocatorVal(CodeGenFunction
&CGF
,
11294 const Expr
*Allocator
) {
11295 llvm::Value
*AllocVal
;
11297 AllocVal
= CGF
.EmitScalarExpr(Allocator
);
11298 // According to the standard, the original allocator type is a enum
11299 // (integer). Convert to pointer type, if required.
11300 AllocVal
= CGF
.EmitScalarConversion(AllocVal
, Allocator
->getType(),
11301 CGF
.getContext().VoidPtrTy
,
11302 Allocator
->getExprLoc());
11304 // If no allocator specified, it defaults to the null allocator.
11305 AllocVal
= llvm::Constant::getNullValue(
11306 CGF
.CGM
.getTypes().ConvertType(CGF
.getContext().VoidPtrTy
));
11311 /// Return the alignment from an allocate directive if present.
11312 static llvm::Value
*getAlignmentValue(CodeGenModule
&CGM
, const VarDecl
*VD
) {
11313 std::optional
<CharUnits
> AllocateAlignment
= CGM
.getOMPAllocateAlignment(VD
);
11315 if (!AllocateAlignment
)
11318 return llvm::ConstantInt::get(CGM
.SizeTy
, AllocateAlignment
->getQuantity());
11321 Address
CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction
&CGF
,
11322 const VarDecl
*VD
) {
11324 return Address::invalid();
11325 Address UntiedAddr
= Address::invalid();
11326 Address UntiedRealAddr
= Address::invalid();
11327 auto It
= FunctionToUntiedTaskStackMap
.find(CGF
.CurFn
);
11328 if (It
!= FunctionToUntiedTaskStackMap
.end()) {
11329 const UntiedLocalVarsAddressesMap
&UntiedData
=
11330 UntiedLocalVarsStack
[It
->second
];
11331 auto I
= UntiedData
.find(VD
);
11332 if (I
!= UntiedData
.end()) {
11333 UntiedAddr
= I
->second
.first
;
11334 UntiedRealAddr
= I
->second
.second
;
11337 const VarDecl
*CVD
= VD
->getCanonicalDecl();
11338 if (CVD
->hasAttr
<OMPAllocateDeclAttr
>()) {
11339 // Use the default allocation.
11340 if (!isAllocatableDecl(VD
))
11343 CharUnits Align
= CGM
.getContext().getDeclAlign(CVD
);
11344 if (CVD
->getType()->isVariablyModifiedType()) {
11345 Size
= CGF
.getTypeSize(CVD
->getType());
11346 // Align the size: ((size + align - 1) / align) * align
11347 Size
= CGF
.Builder
.CreateNUWAdd(
11348 Size
, CGM
.getSize(Align
- CharUnits::fromQuantity(1)));
11349 Size
= CGF
.Builder
.CreateUDiv(Size
, CGM
.getSize(Align
));
11350 Size
= CGF
.Builder
.CreateNUWMul(Size
, CGM
.getSize(Align
));
11352 CharUnits Sz
= CGM
.getContext().getTypeSizeInChars(CVD
->getType());
11353 Size
= CGM
.getSize(Sz
.alignTo(Align
));
11355 llvm::Value
*ThreadID
= getThreadID(CGF
, CVD
->getBeginLoc());
11356 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
11357 const Expr
*Allocator
= AA
->getAllocator();
11358 llvm::Value
*AllocVal
= getAllocatorVal(CGF
, Allocator
);
11359 llvm::Value
*Alignment
= getAlignmentValue(CGM
, CVD
);
11360 SmallVector
<llvm::Value
*, 4> Args
;
11361 Args
.push_back(ThreadID
);
11363 Args
.push_back(Alignment
);
11364 Args
.push_back(Size
);
11365 Args
.push_back(AllocVal
);
11366 llvm::omp::RuntimeFunction FnID
=
11367 Alignment
? OMPRTL___kmpc_aligned_alloc
: OMPRTL___kmpc_alloc
;
11368 llvm::Value
*Addr
= CGF
.EmitRuntimeCall(
11369 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(), FnID
), Args
,
11370 getName({CVD
->getName(), ".void.addr"}));
11371 llvm::FunctionCallee FiniRTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11372 CGM
.getModule(), OMPRTL___kmpc_free
);
11373 QualType Ty
= CGM
.getContext().getPointerType(CVD
->getType());
11374 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11375 Addr
, CGF
.ConvertTypeForMem(Ty
), getName({CVD
->getName(), ".addr"}));
11376 if (UntiedAddr
.isValid())
11377 CGF
.EmitStoreOfScalar(Addr
, UntiedAddr
, /*Volatile=*/false, Ty
);
11379 // Cleanup action for allocate support.
11380 class OMPAllocateCleanupTy final
: public EHScopeStack::Cleanup
{
11381 llvm::FunctionCallee RTLFn
;
11382 SourceLocation::UIntTy LocEncoding
;
11384 const Expr
*AllocExpr
;
11387 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn
,
11388 SourceLocation::UIntTy LocEncoding
, Address Addr
,
11389 const Expr
*AllocExpr
)
11390 : RTLFn(RTLFn
), LocEncoding(LocEncoding
), Addr(Addr
),
11391 AllocExpr(AllocExpr
) {}
11392 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
11393 if (!CGF
.HaveInsertPoint())
11395 llvm::Value
*Args
[3];
11396 Args
[0] = CGF
.CGM
.getOpenMPRuntime().getThreadID(
11397 CGF
, SourceLocation::getFromRawEncoding(LocEncoding
));
11398 Args
[1] = CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11399 Addr
.emitRawPointer(CGF
), CGF
.VoidPtrTy
);
11400 llvm::Value
*AllocVal
= getAllocatorVal(CGF
, AllocExpr
);
11401 Args
[2] = AllocVal
;
11402 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11406 UntiedRealAddr
.isValid()
11408 : Address(Addr
, CGF
.ConvertTypeForMem(CVD
->getType()), Align
);
11409 CGF
.EHStack
.pushCleanup
<OMPAllocateCleanupTy
>(
11410 NormalAndEHCleanup
, FiniRTLFn
, CVD
->getLocation().getRawEncoding(),
11411 VDAddr
, Allocator
);
11412 if (UntiedRealAddr
.isValid())
11414 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
11415 Region
->emitUntiedSwitch(CGF
);
11421 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction
&CGF
,
11422 const VarDecl
*VD
) const {
11423 auto It
= FunctionToUntiedTaskStackMap
.find(CGF
.CurFn
);
11424 if (It
== FunctionToUntiedTaskStackMap
.end())
11426 return UntiedLocalVarsStack
[It
->second
].count(VD
) > 0;
11429 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11430 CodeGenModule
&CGM
, const OMPLoopDirective
&S
)
11431 : CGM(CGM
), NeedToPush(S
.hasClausesOfKind
<OMPNontemporalClause
>()) {
11432 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11435 NontemporalDeclsSet
&DS
=
11436 CGM
.getOpenMPRuntime().NontemporalDeclsStack
.emplace_back();
11437 for (const auto *C
: S
.getClausesOfKind
<OMPNontemporalClause
>()) {
11438 for (const Stmt
*Ref
: C
->private_refs()) {
11439 const auto *SimpleRefExpr
= cast
<Expr
>(Ref
)->IgnoreParenImpCasts();
11440 const ValueDecl
*VD
;
11441 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(SimpleRefExpr
)) {
11442 VD
= DRE
->getDecl();
11444 const auto *ME
= cast
<MemberExpr
>(SimpleRefExpr
);
11445 assert((ME
->isImplicitCXXThis() ||
11446 isa
<CXXThisExpr
>(ME
->getBase()->IgnoreParenImpCasts())) &&
11447 "Expected member of current class.");
11448 VD
= ME
->getMemberDecl();
11455 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11458 CGM
.getOpenMPRuntime().NontemporalDeclsStack
.pop_back();
11461 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11462 CodeGenFunction
&CGF
,
11463 const llvm::MapVector
<CanonicalDeclPtr
<const VarDecl
>,
11464 std::pair
<Address
, Address
>> &LocalVars
)
11465 : CGM(CGF
.CGM
), NeedToPush(!LocalVars
.empty()) {
11468 CGM
.getOpenMPRuntime().FunctionToUntiedTaskStackMap
.try_emplace(
11469 CGF
.CurFn
, CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.size());
11470 CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.push_back(LocalVars
);
11473 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11476 CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.pop_back();
11479 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl
*VD
) const {
11480 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11482 return llvm::any_of(
11483 CGM
.getOpenMPRuntime().NontemporalDeclsStack
,
11484 [VD
](const NontemporalDeclsSet
&Set
) { return Set
.contains(VD
); });
11487 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11488 const OMPExecutableDirective
&S
,
11489 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &NeedToAddForLPCsAsDisabled
)
11491 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> NeedToCheckForLPCs
;
11492 // Vars in target/task regions must be excluded completely.
11493 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()) ||
11494 isOpenMPTaskingDirective(S
.getDirectiveKind())) {
11495 SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
11496 getOpenMPCaptureRegions(CaptureRegions
, S
.getDirectiveKind());
11497 const CapturedStmt
*CS
= S
.getCapturedStmt(CaptureRegions
.front());
11498 for (const CapturedStmt::Capture
&Cap
: CS
->captures()) {
11499 if (Cap
.capturesVariable() || Cap
.capturesVariableByCopy())
11500 NeedToCheckForLPCs
.insert(Cap
.getCapturedVar());
11503 // Exclude vars in private clauses.
11504 for (const auto *C
: S
.getClausesOfKind
<OMPPrivateClause
>()) {
11505 for (const Expr
*Ref
: C
->varlists()) {
11506 if (!Ref
->getType()->isScalarType())
11508 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11511 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11514 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
11515 for (const Expr
*Ref
: C
->varlists()) {
11516 if (!Ref
->getType()->isScalarType())
11518 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11521 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11524 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
11525 for (const Expr
*Ref
: C
->varlists()) {
11526 if (!Ref
->getType()->isScalarType())
11528 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11531 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11534 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
11535 for (const Expr
*Ref
: C
->varlists()) {
11536 if (!Ref
->getType()->isScalarType())
11538 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11541 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11544 for (const auto *C
: S
.getClausesOfKind
<OMPLinearClause
>()) {
11545 for (const Expr
*Ref
: C
->varlists()) {
11546 if (!Ref
->getType()->isScalarType())
11548 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11551 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11554 for (const Decl
*VD
: NeedToCheckForLPCs
) {
11555 for (const LastprivateConditionalData
&Data
:
11556 llvm::reverse(CGM
.getOpenMPRuntime().LastprivateConditionalStack
)) {
11557 if (Data
.DeclToUniqueName
.count(VD
) > 0) {
11558 if (!Data
.Disabled
)
11559 NeedToAddForLPCsAsDisabled
.insert(VD
);
11566 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11567 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
, LValue IVLVal
)
11569 Action((CGM
.getLangOpts().OpenMP
>= 50 &&
11570 llvm::any_of(S
.getClausesOfKind
<OMPLastprivateClause
>(),
11571 [](const OMPLastprivateClause
*C
) {
11572 return C
->getKind() ==
11573 OMPC_LASTPRIVATE_conditional
;
11575 ? ActionToDo::PushAsLastprivateConditional
11576 : ActionToDo::DoNotPush
) {
11577 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11578 if (CGM
.getLangOpts().OpenMP
< 50 || Action
== ActionToDo::DoNotPush
)
11580 assert(Action
== ActionToDo::PushAsLastprivateConditional
&&
11581 "Expected a push action.");
11582 LastprivateConditionalData
&Data
=
11583 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.emplace_back();
11584 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
11585 if (C
->getKind() != OMPC_LASTPRIVATE_conditional
)
11588 for (const Expr
*Ref
: C
->varlists()) {
11589 Data
.DeclToUniqueName
.insert(std::make_pair(
11590 cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts())->getDecl(),
11591 SmallString
<16>(generateUniqueName(CGM
, "pl_cond", Ref
))));
11594 Data
.IVLVal
= IVLVal
;
11595 Data
.Fn
= CGF
.CurFn
;
11598 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
11599 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
11600 : CGM(CGF
.CGM
), Action(ActionToDo::DoNotPush
) {
11601 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11602 if (CGM
.getLangOpts().OpenMP
< 50)
11604 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> NeedToAddForLPCsAsDisabled
;
11605 tryToDisableInnerAnalysis(S
, NeedToAddForLPCsAsDisabled
);
11606 if (!NeedToAddForLPCsAsDisabled
.empty()) {
11607 Action
= ActionToDo::DisableLastprivateConditional
;
11608 LastprivateConditionalData
&Data
=
11609 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.emplace_back();
11610 for (const Decl
*VD
: NeedToAddForLPCsAsDisabled
)
11611 Data
.DeclToUniqueName
.insert(std::make_pair(VD
, SmallString
<16>()));
11612 Data
.Fn
= CGF
.CurFn
;
11613 Data
.Disabled
= true;
11617 CGOpenMPRuntime::LastprivateConditionalRAII
11618 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
11619 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
11620 return LastprivateConditionalRAII(CGF
, S
);
11623 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
11624 if (CGM
.getLangOpts().OpenMP
< 50)
11626 if (Action
== ActionToDo::DisableLastprivateConditional
) {
11627 assert(CGM
.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled
&&
11628 "Expected list of disabled private vars.");
11629 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.pop_back();
11631 if (Action
== ActionToDo::PushAsLastprivateConditional
) {
11633 !CGM
.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled
&&
11634 "Expected list of lastprivate conditional vars.");
11635 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.pop_back();
11639 Address
CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction
&CGF
,
11640 const VarDecl
*VD
) {
11641 ASTContext
&C
= CGM
.getContext();
11642 auto I
= LastprivateConditionalToTypes
.find(CGF
.CurFn
);
11643 if (I
== LastprivateConditionalToTypes
.end())
11644 I
= LastprivateConditionalToTypes
.try_emplace(CGF
.CurFn
).first
;
11646 const FieldDecl
*VDField
;
11647 const FieldDecl
*FiredField
;
11649 auto VI
= I
->getSecond().find(VD
);
11650 if (VI
== I
->getSecond().end()) {
11651 RecordDecl
*RD
= C
.buildImplicitRecord("lasprivate.conditional");
11652 RD
->startDefinition();
11653 VDField
= addFieldToRecordDecl(C
, RD
, VD
->getType().getNonReferenceType());
11654 FiredField
= addFieldToRecordDecl(C
, RD
, C
.CharTy
);
11655 RD
->completeDefinition();
11656 NewType
= C
.getRecordType(RD
);
11657 Address Addr
= CGF
.CreateMemTemp(NewType
, C
.getDeclAlign(VD
), VD
->getName());
11658 BaseLVal
= CGF
.MakeAddrLValue(Addr
, NewType
, AlignmentSource::Decl
);
11659 I
->getSecond().try_emplace(VD
, NewType
, VDField
, FiredField
, BaseLVal
);
11661 NewType
= std::get
<0>(VI
->getSecond());
11662 VDField
= std::get
<1>(VI
->getSecond());
11663 FiredField
= std::get
<2>(VI
->getSecond());
11664 BaseLVal
= std::get
<3>(VI
->getSecond());
11667 CGF
.EmitLValueForField(BaseLVal
, FiredField
);
11668 CGF
.EmitStoreOfScalar(
11669 llvm::ConstantInt::getNullValue(CGF
.ConvertTypeForMem(C
.CharTy
)),
11671 return CGF
.EmitLValueForField(BaseLVal
, VDField
).getAddress();
11675 /// Checks if the lastprivate conditional variable is referenced in LHS.
11676 class LastprivateConditionalRefChecker final
11677 : public ConstStmtVisitor
<LastprivateConditionalRefChecker
, bool> {
11678 ArrayRef
<CGOpenMPRuntime::LastprivateConditionalData
> LPM
;
11679 const Expr
*FoundE
= nullptr;
11680 const Decl
*FoundD
= nullptr;
11681 StringRef UniqueDeclName
;
11683 llvm::Function
*FoundFn
= nullptr;
11684 SourceLocation Loc
;
11687 bool VisitDeclRefExpr(const DeclRefExpr
*E
) {
11688 for (const CGOpenMPRuntime::LastprivateConditionalData
&D
:
11689 llvm::reverse(LPM
)) {
11690 auto It
= D
.DeclToUniqueName
.find(E
->getDecl());
11691 if (It
== D
.DeclToUniqueName
.end())
11696 FoundD
= E
->getDecl()->getCanonicalDecl();
11697 UniqueDeclName
= It
->second
;
11702 return FoundE
== E
;
11704 bool VisitMemberExpr(const MemberExpr
*E
) {
11705 if (!CodeGenFunction::IsWrappedCXXThis(E
->getBase()))
11707 for (const CGOpenMPRuntime::LastprivateConditionalData
&D
:
11708 llvm::reverse(LPM
)) {
11709 auto It
= D
.DeclToUniqueName
.find(E
->getMemberDecl());
11710 if (It
== D
.DeclToUniqueName
.end())
11715 FoundD
= E
->getMemberDecl()->getCanonicalDecl();
11716 UniqueDeclName
= It
->second
;
11721 return FoundE
== E
;
11723 bool VisitStmt(const Stmt
*S
) {
11724 for (const Stmt
*Child
: S
->children()) {
11727 if (const auto *E
= dyn_cast
<Expr
>(Child
))
11728 if (!E
->isGLValue())
11735 explicit LastprivateConditionalRefChecker(
11736 ArrayRef
<CGOpenMPRuntime::LastprivateConditionalData
> LPM
)
11738 std::tuple
<const Expr
*, const Decl
*, StringRef
, LValue
, llvm::Function
*>
11739 getFoundData() const {
11740 return std::make_tuple(FoundE
, FoundD
, UniqueDeclName
, IVLVal
, FoundFn
);
11745 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction
&CGF
,
11747 StringRef UniqueDeclName
,
11749 SourceLocation Loc
) {
11750 // Last updated loop counter for the lastprivate conditional var.
11751 // int<xx> last_iv = 0;
11752 llvm::Type
*LLIVTy
= CGF
.ConvertTypeForMem(IVLVal
.getType());
11753 llvm::Constant
*LastIV
= OMPBuilder
.getOrCreateInternalVariable(
11754 LLIVTy
, getName({UniqueDeclName
, "iv"}));
11755 cast
<llvm::GlobalVariable
>(LastIV
)->setAlignment(
11756 IVLVal
.getAlignment().getAsAlign());
11757 LValue LastIVLVal
=
11758 CGF
.MakeNaturalAlignRawAddrLValue(LastIV
, IVLVal
.getType());
11760 // Last value of the lastprivate conditional.
11761 // decltype(priv_a) last_a;
11762 llvm::GlobalVariable
*Last
= OMPBuilder
.getOrCreateInternalVariable(
11763 CGF
.ConvertTypeForMem(LVal
.getType()), UniqueDeclName
);
11764 cast
<llvm::GlobalVariable
>(Last
)->setAlignment(
11765 LVal
.getAlignment().getAsAlign());
11767 CGF
.MakeRawAddrLValue(Last
, LVal
.getType(), LVal
.getAlignment());
11769 // Global loop counter. Required to handle inner parallel-for regions.
11771 llvm::Value
*IVVal
= CGF
.EmitLoadOfScalar(IVLVal
, Loc
);
11773 // #pragma omp critical(a)
11774 // if (last_iv <= iv) {
11776 // last_a = priv_a;
11778 auto &&CodeGen
= [&LastIVLVal
, &IVLVal
, IVVal
, &LVal
, &LastLVal
,
11779 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
11781 llvm::Value
*LastIVVal
= CGF
.EmitLoadOfScalar(LastIVLVal
, Loc
);
11782 // (last_iv <= iv) ? Check if the variable is updated and store new
11783 // value in global var.
11784 llvm::Value
*CmpRes
;
11785 if (IVLVal
.getType()->isSignedIntegerType()) {
11786 CmpRes
= CGF
.Builder
.CreateICmpSLE(LastIVVal
, IVVal
);
11788 assert(IVLVal
.getType()->isUnsignedIntegerType() &&
11789 "Loop iteration variable must be integer.");
11790 CmpRes
= CGF
.Builder
.CreateICmpULE(LastIVVal
, IVVal
);
11792 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock("lp_cond_then");
11793 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock("lp_cond_exit");
11794 CGF
.Builder
.CreateCondBr(CmpRes
, ThenBB
, ExitBB
);
11796 CGF
.EmitBlock(ThenBB
);
11799 CGF
.EmitStoreOfScalar(IVVal
, LastIVLVal
);
11801 // last_a = priv_a;
11802 switch (CGF
.getEvaluationKind(LVal
.getType())) {
11804 llvm::Value
*PrivVal
= CGF
.EmitLoadOfScalar(LVal
, Loc
);
11805 CGF
.EmitStoreOfScalar(PrivVal
, LastLVal
);
11808 case TEK_Complex
: {
11809 CodeGenFunction::ComplexPairTy PrivVal
= CGF
.EmitLoadOfComplex(LVal
, Loc
);
11810 CGF
.EmitStoreOfComplex(PrivVal
, LastLVal
, /*isInit=*/false);
11813 case TEK_Aggregate
:
11815 "Aggregates are not supported in lastprivate conditional.");
11818 CGF
.EmitBranch(ExitBB
);
11819 // There is no need to emit line number for unconditional branch.
11820 (void)ApplyDebugLocation::CreateEmpty(CGF
);
11821 CGF
.EmitBlock(ExitBB
, /*IsFinished=*/true);
11824 if (CGM
.getLangOpts().OpenMPSimd
) {
11825 // Do not emit as a critical region as no parallel region could be emitted.
11826 RegionCodeGenTy
ThenRCG(CodeGen
);
11829 emitCriticalRegion(CGF
, UniqueDeclName
, CodeGen
, Loc
);
11833 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction
&CGF
,
11835 if (CGF
.getLangOpts().OpenMP
< 50 || LastprivateConditionalStack
.empty())
11837 LastprivateConditionalRefChecker
Checker(LastprivateConditionalStack
);
11838 if (!Checker
.Visit(LHS
))
11840 const Expr
*FoundE
;
11841 const Decl
*FoundD
;
11842 StringRef UniqueDeclName
;
11844 llvm::Function
*FoundFn
;
11845 std::tie(FoundE
, FoundD
, UniqueDeclName
, IVLVal
, FoundFn
) =
11846 Checker
.getFoundData();
11847 if (FoundFn
!= CGF
.CurFn
) {
11848 // Special codegen for inner parallel regions.
11849 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
11850 auto It
= LastprivateConditionalToTypes
[FoundFn
].find(FoundD
);
11851 assert(It
!= LastprivateConditionalToTypes
[FoundFn
].end() &&
11852 "Lastprivate conditional is not found in outer region.");
11853 QualType StructTy
= std::get
<0>(It
->getSecond());
11854 const FieldDecl
* FiredDecl
= std::get
<2>(It
->getSecond());
11855 LValue PrivLVal
= CGF
.EmitLValue(FoundE
);
11856 Address StructAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11857 PrivLVal
.getAddress(),
11858 CGF
.ConvertTypeForMem(CGF
.getContext().getPointerType(StructTy
)),
11859 CGF
.ConvertTypeForMem(StructTy
));
11861 CGF
.MakeAddrLValue(StructAddr
, StructTy
, AlignmentSource::Decl
);
11862 LValue FiredLVal
= CGF
.EmitLValueForField(BaseLVal
, FiredDecl
);
11863 CGF
.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
11864 CGF
.ConvertTypeForMem(FiredDecl
->getType()), 1)),
11865 FiredLVal
, llvm::AtomicOrdering::Unordered
,
11866 /*IsVolatile=*/true, /*isInit=*/false);
11870 // Private address of the lastprivate conditional in the current context.
11872 LValue LVal
= CGF
.EmitLValue(FoundE
);
11873 emitLastprivateConditionalUpdate(CGF
, IVLVal
, UniqueDeclName
, LVal
,
11874 FoundE
->getExprLoc());
11877 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
11878 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
11879 const llvm::DenseSet
<CanonicalDeclPtr
<const VarDecl
>> &IgnoredDecls
) {
11880 if (CGF
.getLangOpts().OpenMP
< 50 || LastprivateConditionalStack
.empty())
11882 auto Range
= llvm::reverse(LastprivateConditionalStack
);
11883 auto It
= llvm::find_if(
11884 Range
, [](const LastprivateConditionalData
&D
) { return !D
.Disabled
; });
11885 if (It
== Range
.end() || It
->Fn
!= CGF
.CurFn
)
11887 auto LPCI
= LastprivateConditionalToTypes
.find(It
->Fn
);
11888 assert(LPCI
!= LastprivateConditionalToTypes
.end() &&
11889 "Lastprivates must be registered already.");
11890 SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
11891 getOpenMPCaptureRegions(CaptureRegions
, D
.getDirectiveKind());
11892 const CapturedStmt
*CS
= D
.getCapturedStmt(CaptureRegions
.back());
11893 for (const auto &Pair
: It
->DeclToUniqueName
) {
11894 const auto *VD
= cast
<VarDecl
>(Pair
.first
->getCanonicalDecl());
11895 if (!CS
->capturesVariable(VD
) || IgnoredDecls
.contains(VD
))
11897 auto I
= LPCI
->getSecond().find(Pair
.first
);
11898 assert(I
!= LPCI
->getSecond().end() &&
11899 "Lastprivate must be rehistered already.");
11900 // bool Cmp = priv_a.Fired != 0;
11901 LValue BaseLVal
= std::get
<3>(I
->getSecond());
11903 CGF
.EmitLValueForField(BaseLVal
, std::get
<2>(I
->getSecond()));
11904 llvm::Value
*Res
= CGF
.EmitLoadOfScalar(FiredLVal
, D
.getBeginLoc());
11905 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Res
);
11906 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock("lpc.then");
11907 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("lpc.done");
11909 CGF
.Builder
.CreateCondBr(Cmp
, ThenBB
, DoneBB
);
11910 CGF
.EmitBlock(ThenBB
);
11911 Address Addr
= CGF
.GetAddrOfLocalVar(VD
);
11913 if (VD
->getType()->isReferenceType())
11914 LVal
= CGF
.EmitLoadOfReferenceLValue(Addr
, VD
->getType(),
11915 AlignmentSource::Decl
);
11917 LVal
= CGF
.MakeAddrLValue(Addr
, VD
->getType().getNonReferenceType(),
11918 AlignmentSource::Decl
);
11919 emitLastprivateConditionalUpdate(CGF
, It
->IVLVal
, Pair
.second
, LVal
,
11921 auto AL
= ApplyDebugLocation::CreateArtificial(CGF
);
11922 CGF
.EmitBlock(DoneBB
, /*IsFinal=*/true);
11927 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
11928 CodeGenFunction
&CGF
, LValue PrivLVal
, const VarDecl
*VD
,
11929 SourceLocation Loc
) {
11930 if (CGF
.getLangOpts().OpenMP
< 50)
11932 auto It
= LastprivateConditionalStack
.back().DeclToUniqueName
.find(VD
);
11933 assert(It
!= LastprivateConditionalStack
.back().DeclToUniqueName
.end() &&
11934 "Unknown lastprivate conditional variable.");
11935 StringRef UniqueName
= It
->second
;
11936 llvm::GlobalVariable
*GV
= CGM
.getModule().getNamedGlobal(UniqueName
);
11937 // The variable was not updated in the region - exit.
11940 LValue LPLVal
= CGF
.MakeRawAddrLValue(
11941 GV
, PrivLVal
.getType().getNonReferenceType(), PrivLVal
.getAlignment());
11942 llvm::Value
*Res
= CGF
.EmitLoadOfScalar(LPLVal
, Loc
);
11943 CGF
.EmitStoreOfScalar(Res
, PrivLVal
);
11946 llvm::Function
*CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
11947 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
11948 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
11949 const RegionCodeGenTy
&CodeGen
) {
11950 llvm_unreachable("Not supported in SIMD-only mode");
11953 llvm::Function
*CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
11954 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
11955 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
11956 const RegionCodeGenTy
&CodeGen
) {
11957 llvm_unreachable("Not supported in SIMD-only mode");
11960 llvm::Function
*CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
11961 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
11962 const VarDecl
*PartIDVar
, const VarDecl
*TaskTVar
,
11963 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
11964 bool Tied
, unsigned &NumberOfParts
) {
11965 llvm_unreachable("Not supported in SIMD-only mode");
11968 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction
&CGF
,
11969 SourceLocation Loc
,
11970 llvm::Function
*OutlinedFn
,
11971 ArrayRef
<llvm::Value
*> CapturedVars
,
11972 const Expr
*IfCond
,
11973 llvm::Value
*NumThreads
) {
11974 llvm_unreachable("Not supported in SIMD-only mode");
11977 void CGOpenMPSIMDRuntime::emitCriticalRegion(
11978 CodeGenFunction
&CGF
, StringRef CriticalName
,
11979 const RegionCodeGenTy
&CriticalOpGen
, SourceLocation Loc
,
11980 const Expr
*Hint
) {
11981 llvm_unreachable("Not supported in SIMD-only mode");
11984 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction
&CGF
,
11985 const RegionCodeGenTy
&MasterOpGen
,
11986 SourceLocation Loc
) {
11987 llvm_unreachable("Not supported in SIMD-only mode");
11990 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction
&CGF
,
11991 const RegionCodeGenTy
&MasterOpGen
,
11992 SourceLocation Loc
,
11993 const Expr
*Filter
) {
11994 llvm_unreachable("Not supported in SIMD-only mode");
11997 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction
&CGF
,
11998 SourceLocation Loc
) {
11999 llvm_unreachable("Not supported in SIMD-only mode");
12002 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12003 CodeGenFunction
&CGF
, const RegionCodeGenTy
&TaskgroupOpGen
,
12004 SourceLocation Loc
) {
12005 llvm_unreachable("Not supported in SIMD-only mode");
12008 void CGOpenMPSIMDRuntime::emitSingleRegion(
12009 CodeGenFunction
&CGF
, const RegionCodeGenTy
&SingleOpGen
,
12010 SourceLocation Loc
, ArrayRef
<const Expr
*> CopyprivateVars
,
12011 ArrayRef
<const Expr
*> DestExprs
, ArrayRef
<const Expr
*> SrcExprs
,
12012 ArrayRef
<const Expr
*> AssignmentOps
) {
12013 llvm_unreachable("Not supported in SIMD-only mode");
12016 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction
&CGF
,
12017 const RegionCodeGenTy
&OrderedOpGen
,
12018 SourceLocation Loc
,
12020 llvm_unreachable("Not supported in SIMD-only mode");
12023 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction
&CGF
,
12024 SourceLocation Loc
,
12025 OpenMPDirectiveKind Kind
,
12027 bool ForceSimpleCall
) {
12028 llvm_unreachable("Not supported in SIMD-only mode");
12031 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12032 CodeGenFunction
&CGF
, SourceLocation Loc
,
12033 const OpenMPScheduleTy
&ScheduleKind
, unsigned IVSize
, bool IVSigned
,
12034 bool Ordered
, const DispatchRTInput
&DispatchValues
) {
12035 llvm_unreachable("Not supported in SIMD-only mode");
12038 void CGOpenMPSIMDRuntime::emitForDispatchDeinit(CodeGenFunction
&CGF
,
12039 SourceLocation Loc
) {
12040 llvm_unreachable("Not supported in SIMD-only mode");
12043 void CGOpenMPSIMDRuntime::emitForStaticInit(
12044 CodeGenFunction
&CGF
, SourceLocation Loc
, OpenMPDirectiveKind DKind
,
12045 const OpenMPScheduleTy
&ScheduleKind
, const StaticRTInput
&Values
) {
12046 llvm_unreachable("Not supported in SIMD-only mode");
12049 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12050 CodeGenFunction
&CGF
, SourceLocation Loc
,
12051 OpenMPDistScheduleClauseKind SchedKind
, const StaticRTInput
&Values
) {
12052 llvm_unreachable("Not supported in SIMD-only mode");
12055 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction
&CGF
,
12056 SourceLocation Loc
,
12059 llvm_unreachable("Not supported in SIMD-only mode");
12062 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction
&CGF
,
12063 SourceLocation Loc
,
12064 OpenMPDirectiveKind DKind
) {
12065 llvm_unreachable("Not supported in SIMD-only mode");
12068 llvm::Value
*CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction
&CGF
,
12069 SourceLocation Loc
,
12070 unsigned IVSize
, bool IVSigned
,
12071 Address IL
, Address LB
,
12072 Address UB
, Address ST
) {
12073 llvm_unreachable("Not supported in SIMD-only mode");
12076 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction
&CGF
,
12077 llvm::Value
*NumThreads
,
12078 SourceLocation Loc
) {
12079 llvm_unreachable("Not supported in SIMD-only mode");
12082 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction
&CGF
,
12083 ProcBindKind ProcBind
,
12084 SourceLocation Loc
) {
12085 llvm_unreachable("Not supported in SIMD-only mode");
12088 Address
CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction
&CGF
,
12091 SourceLocation Loc
) {
12092 llvm_unreachable("Not supported in SIMD-only mode");
12095 llvm::Function
*CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12096 const VarDecl
*VD
, Address VDAddr
, SourceLocation Loc
, bool PerformInit
,
12097 CodeGenFunction
*CGF
) {
12098 llvm_unreachable("Not supported in SIMD-only mode");
12101 Address
CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12102 CodeGenFunction
&CGF
, QualType VarType
, StringRef Name
) {
12103 llvm_unreachable("Not supported in SIMD-only mode");
12106 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction
&CGF
,
12107 ArrayRef
<const Expr
*> Vars
,
12108 SourceLocation Loc
,
12109 llvm::AtomicOrdering AO
) {
12110 llvm_unreachable("Not supported in SIMD-only mode");
12113 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
12114 const OMPExecutableDirective
&D
,
12115 llvm::Function
*TaskFunction
,
12116 QualType SharedsTy
, Address Shareds
,
12117 const Expr
*IfCond
,
12118 const OMPTaskDataTy
&Data
) {
12119 llvm_unreachable("Not supported in SIMD-only mode");
12122 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12123 CodeGenFunction
&CGF
, SourceLocation Loc
, const OMPLoopDirective
&D
,
12124 llvm::Function
*TaskFunction
, QualType SharedsTy
, Address Shareds
,
12125 const Expr
*IfCond
, const OMPTaskDataTy
&Data
) {
12126 llvm_unreachable("Not supported in SIMD-only mode");
12129 void CGOpenMPSIMDRuntime::emitReduction(
12130 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> Privates
,
12131 ArrayRef
<const Expr
*> LHSExprs
, ArrayRef
<const Expr
*> RHSExprs
,
12132 ArrayRef
<const Expr
*> ReductionOps
, ReductionOptionsTy Options
) {
12133 assert(Options
.SimpleReduction
&& "Only simple reduction is expected.");
12134 CGOpenMPRuntime::emitReduction(CGF
, Loc
, Privates
, LHSExprs
, RHSExprs
,
12135 ReductionOps
, Options
);
12138 llvm::Value
*CGOpenMPSIMDRuntime::emitTaskReductionInit(
12139 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> LHSExprs
,
12140 ArrayRef
<const Expr
*> RHSExprs
, const OMPTaskDataTy
&Data
) {
12141 llvm_unreachable("Not supported in SIMD-only mode");
12144 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction
&CGF
,
12145 SourceLocation Loc
,
12146 bool IsWorksharingReduction
) {
12147 llvm_unreachable("Not supported in SIMD-only mode");
12150 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction
&CGF
,
12151 SourceLocation Loc
,
12152 ReductionCodeGen
&RCG
,
12154 llvm_unreachable("Not supported in SIMD-only mode");
12157 Address
CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction
&CGF
,
12158 SourceLocation Loc
,
12159 llvm::Value
*ReductionsPtr
,
12160 LValue SharedLVal
) {
12161 llvm_unreachable("Not supported in SIMD-only mode");
12164 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction
&CGF
,
12165 SourceLocation Loc
,
12166 const OMPTaskDataTy
&Data
) {
12167 llvm_unreachable("Not supported in SIMD-only mode");
12170 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12171 CodeGenFunction
&CGF
, SourceLocation Loc
,
12172 OpenMPDirectiveKind CancelRegion
) {
12173 llvm_unreachable("Not supported in SIMD-only mode");
12176 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction
&CGF
,
12177 SourceLocation Loc
, const Expr
*IfCond
,
12178 OpenMPDirectiveKind CancelRegion
) {
12179 llvm_unreachable("Not supported in SIMD-only mode");
12182 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12183 const OMPExecutableDirective
&D
, StringRef ParentName
,
12184 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
12185 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
12186 llvm_unreachable("Not supported in SIMD-only mode");
12189 void CGOpenMPSIMDRuntime::emitTargetCall(
12190 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
12191 llvm::Function
*OutlinedFn
, llvm::Value
*OutlinedFnID
, const Expr
*IfCond
,
12192 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
12193 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
12194 const OMPLoopDirective
&D
)>
12196 llvm_unreachable("Not supported in SIMD-only mode");
12199 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD
) {
12200 llvm_unreachable("Not supported in SIMD-only mode");
12203 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD
) {
12204 llvm_unreachable("Not supported in SIMD-only mode");
12207 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD
) {
12211 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction
&CGF
,
12212 const OMPExecutableDirective
&D
,
12213 SourceLocation Loc
,
12214 llvm::Function
*OutlinedFn
,
12215 ArrayRef
<llvm::Value
*> CapturedVars
) {
12216 llvm_unreachable("Not supported in SIMD-only mode");
12219 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction
&CGF
,
12220 const Expr
*NumTeams
,
12221 const Expr
*ThreadLimit
,
12222 SourceLocation Loc
) {
12223 llvm_unreachable("Not supported in SIMD-only mode");
12226 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12227 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
12228 const Expr
*Device
, const RegionCodeGenTy
&CodeGen
,
12229 CGOpenMPRuntime::TargetDataInfo
&Info
) {
12230 llvm_unreachable("Not supported in SIMD-only mode");
12233 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12234 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
12235 const Expr
*Device
) {
12236 llvm_unreachable("Not supported in SIMD-only mode");
12239 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction
&CGF
,
12240 const OMPLoopDirective
&D
,
12241 ArrayRef
<Expr
*> NumIterations
) {
12242 llvm_unreachable("Not supported in SIMD-only mode");
12245 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
12246 const OMPDependClause
*C
) {
12247 llvm_unreachable("Not supported in SIMD-only mode");
12250 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
12251 const OMPDoacrossClause
*C
) {
12252 llvm_unreachable("Not supported in SIMD-only mode");
12256 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl
*FD
,
12257 const VarDecl
*NativeParam
) const {
12258 llvm_unreachable("Not supported in SIMD-only mode");
12262 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction
&CGF
,
12263 const VarDecl
*NativeParam
,
12264 const VarDecl
*TargetParam
) const {
12265 llvm_unreachable("Not supported in SIMD-only mode");