1 //===----- CGOpenMPRuntime.cpp - Interface to OpenMP Runtimes -------------===//
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
7 //===----------------------------------------------------------------------===//
9 // This provides a class for OpenMP runtime code generation.
11 //===----------------------------------------------------------------------===//
13 #include "CGOpenMPRuntime.h"
15 #include "CGCleanup.h"
16 #include "CGRecordLayout.h"
17 #include "CodeGenFunction.h"
18 #include "TargetInfo.h"
19 #include "clang/AST/APValue.h"
20 #include "clang/AST/Attr.h"
21 #include "clang/AST/Decl.h"
22 #include "clang/AST/OpenMPClause.h"
23 #include "clang/AST/StmtOpenMP.h"
24 #include "clang/AST/StmtVisitor.h"
25 #include "clang/Basic/BitmaskEnum.h"
26 #include "clang/Basic/FileManager.h"
27 #include "clang/Basic/OpenMPKinds.h"
28 #include "clang/Basic/SourceManager.h"
29 #include "clang/CodeGen/ConstantInitBuilder.h"
30 #include "llvm/ADT/ArrayRef.h"
31 #include "llvm/ADT/SetOperations.h"
32 #include "llvm/ADT/SmallBitVector.h"
33 #include "llvm/ADT/StringExtras.h"
34 #include "llvm/Bitcode/BitcodeReader.h"
35 #include "llvm/IR/Constants.h"
36 #include "llvm/IR/DerivedTypes.h"
37 #include "llvm/IR/GlobalValue.h"
38 #include "llvm/IR/InstrTypes.h"
39 #include "llvm/IR/Value.h"
40 #include "llvm/Support/AtomicOrdering.h"
41 #include "llvm/Support/Format.h"
42 #include "llvm/Support/raw_ostream.h"
46 using namespace clang
;
47 using namespace CodeGen
;
48 using namespace llvm::omp
;
51 /// Base class for handling code generation inside OpenMP regions.
52 class CGOpenMPRegionInfo
: public CodeGenFunction::CGCapturedStmtInfo
{
54 /// Kinds of OpenMP regions used in codegen.
55 enum CGOpenMPRegionKind
{
56 /// Region with outlined function for standalone 'parallel'
58 ParallelOutlinedRegion
,
59 /// Region with outlined function for standalone 'task' directive.
61 /// Region for constructs that do not require function outlining,
62 /// like 'for', 'sections', 'atomic' etc. directives.
64 /// Region with outlined function for standalone 'target' directive.
68 CGOpenMPRegionInfo(const CapturedStmt
&CS
,
69 const CGOpenMPRegionKind RegionKind
,
70 const RegionCodeGenTy
&CodeGen
, OpenMPDirectiveKind Kind
,
72 : CGCapturedStmtInfo(CS
, CR_OpenMP
), RegionKind(RegionKind
),
73 CodeGen(CodeGen
), Kind(Kind
), HasCancel(HasCancel
) {}
75 CGOpenMPRegionInfo(const CGOpenMPRegionKind RegionKind
,
76 const RegionCodeGenTy
&CodeGen
, OpenMPDirectiveKind Kind
,
78 : CGCapturedStmtInfo(CR_OpenMP
), RegionKind(RegionKind
), CodeGen(CodeGen
),
79 Kind(Kind
), HasCancel(HasCancel
) {}
81 /// Get a variable or parameter for storing global thread id
82 /// inside OpenMP construct.
83 virtual const VarDecl
*getThreadIDVariable() const = 0;
85 /// Emit the captured statement body.
86 void EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) override
;
88 /// Get an LValue for the current ThreadID variable.
89 /// \return LValue for thread id variable. This LValue always has type int32*.
90 virtual LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
);
92 virtual void emitUntiedSwitch(CodeGenFunction
& /*CGF*/) {}
94 CGOpenMPRegionKind
getRegionKind() const { return RegionKind
; }
96 OpenMPDirectiveKind
getDirectiveKind() const { return Kind
; }
98 bool hasCancel() const { return HasCancel
; }
100 static bool classof(const CGCapturedStmtInfo
*Info
) {
101 return Info
->getKind() == CR_OpenMP
;
104 ~CGOpenMPRegionInfo() override
= default;
107 CGOpenMPRegionKind RegionKind
;
108 RegionCodeGenTy CodeGen
;
109 OpenMPDirectiveKind Kind
;
113 /// API for captured statement code generation in OpenMP constructs.
114 class CGOpenMPOutlinedRegionInfo final
: public CGOpenMPRegionInfo
{
116 CGOpenMPOutlinedRegionInfo(const CapturedStmt
&CS
, const VarDecl
*ThreadIDVar
,
117 const RegionCodeGenTy
&CodeGen
,
118 OpenMPDirectiveKind Kind
, bool HasCancel
,
119 StringRef HelperName
)
120 : CGOpenMPRegionInfo(CS
, ParallelOutlinedRegion
, CodeGen
, Kind
,
122 ThreadIDVar(ThreadIDVar
), HelperName(HelperName
) {
123 assert(ThreadIDVar
!= nullptr && "No ThreadID in OpenMP region.");
126 /// Get a variable or parameter for storing global thread id
127 /// inside OpenMP construct.
128 const VarDecl
*getThreadIDVariable() const override
{ return ThreadIDVar
; }
130 /// Get the name of the capture helper.
131 StringRef
getHelperName() const override
{ return HelperName
; }
133 static bool classof(const CGCapturedStmtInfo
*Info
) {
134 return CGOpenMPRegionInfo::classof(Info
) &&
135 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() ==
136 ParallelOutlinedRegion
;
140 /// A variable or parameter storing global thread id for OpenMP
142 const VarDecl
*ThreadIDVar
;
143 StringRef HelperName
;
146 /// API for captured statement code generation in OpenMP constructs.
147 class CGOpenMPTaskOutlinedRegionInfo final
: public CGOpenMPRegionInfo
{
149 class UntiedTaskActionTy final
: public PrePostActionTy
{
151 const VarDecl
*PartIDVar
;
152 const RegionCodeGenTy UntiedCodeGen
;
153 llvm::SwitchInst
*UntiedSwitch
= nullptr;
156 UntiedTaskActionTy(bool Tied
, const VarDecl
*PartIDVar
,
157 const RegionCodeGenTy
&UntiedCodeGen
)
158 : Untied(!Tied
), PartIDVar(PartIDVar
), UntiedCodeGen(UntiedCodeGen
) {}
159 void Enter(CodeGenFunction
&CGF
) override
{
161 // Emit task switching point.
162 LValue PartIdLVal
= CGF
.EmitLoadOfPointerLValue(
163 CGF
.GetAddrOfLocalVar(PartIDVar
),
164 PartIDVar
->getType()->castAs
<PointerType
>());
166 CGF
.EmitLoadOfScalar(PartIdLVal
, PartIDVar
->getLocation());
167 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock(".untied.done.");
168 UntiedSwitch
= CGF
.Builder
.CreateSwitch(Res
, DoneBB
);
169 CGF
.EmitBlock(DoneBB
);
170 CGF
.EmitBranchThroughCleanup(CGF
.ReturnBlock
);
171 CGF
.EmitBlock(CGF
.createBasicBlock(".untied.jmp."));
172 UntiedSwitch
->addCase(CGF
.Builder
.getInt32(0),
173 CGF
.Builder
.GetInsertBlock());
174 emitUntiedSwitch(CGF
);
177 void emitUntiedSwitch(CodeGenFunction
&CGF
) const {
179 LValue PartIdLVal
= CGF
.EmitLoadOfPointerLValue(
180 CGF
.GetAddrOfLocalVar(PartIDVar
),
181 PartIDVar
->getType()->castAs
<PointerType
>());
182 CGF
.EmitStoreOfScalar(CGF
.Builder
.getInt32(UntiedSwitch
->getNumCases()),
185 CodeGenFunction::JumpDest CurPoint
=
186 CGF
.getJumpDestInCurrentScope(".untied.next.");
187 CGF
.EmitBranch(CGF
.ReturnBlock
.getBlock());
188 CGF
.EmitBlock(CGF
.createBasicBlock(".untied.jmp."));
189 UntiedSwitch
->addCase(CGF
.Builder
.getInt32(UntiedSwitch
->getNumCases()),
190 CGF
.Builder
.GetInsertBlock());
191 CGF
.EmitBranchThroughCleanup(CurPoint
);
192 CGF
.EmitBlock(CurPoint
.getBlock());
195 unsigned getNumberOfParts() const { return UntiedSwitch
->getNumCases(); }
197 CGOpenMPTaskOutlinedRegionInfo(const CapturedStmt
&CS
,
198 const VarDecl
*ThreadIDVar
,
199 const RegionCodeGenTy
&CodeGen
,
200 OpenMPDirectiveKind Kind
, bool HasCancel
,
201 const UntiedTaskActionTy
&Action
)
202 : CGOpenMPRegionInfo(CS
, TaskOutlinedRegion
, CodeGen
, Kind
, HasCancel
),
203 ThreadIDVar(ThreadIDVar
), Action(Action
) {
204 assert(ThreadIDVar
!= nullptr && "No ThreadID in OpenMP region.");
207 /// Get a variable or parameter for storing global thread id
208 /// inside OpenMP construct.
209 const VarDecl
*getThreadIDVariable() const override
{ return ThreadIDVar
; }
211 /// Get an LValue for the current ThreadID variable.
212 LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
) override
;
214 /// Get the name of the capture helper.
215 StringRef
getHelperName() const override
{ return ".omp_outlined."; }
217 void emitUntiedSwitch(CodeGenFunction
&CGF
) override
{
218 Action
.emitUntiedSwitch(CGF
);
221 static bool classof(const CGCapturedStmtInfo
*Info
) {
222 return CGOpenMPRegionInfo::classof(Info
) &&
223 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() ==
228 /// A variable or parameter storing global thread id for OpenMP
230 const VarDecl
*ThreadIDVar
;
231 /// Action for emitting code for untied tasks.
232 const UntiedTaskActionTy
&Action
;
235 /// API for inlined captured statement code generation in OpenMP
237 class CGOpenMPInlinedRegionInfo
: public CGOpenMPRegionInfo
{
239 CGOpenMPInlinedRegionInfo(CodeGenFunction::CGCapturedStmtInfo
*OldCSI
,
240 const RegionCodeGenTy
&CodeGen
,
241 OpenMPDirectiveKind Kind
, bool HasCancel
)
242 : CGOpenMPRegionInfo(InlinedRegion
, CodeGen
, Kind
, HasCancel
),
244 OuterRegionInfo(dyn_cast_or_null
<CGOpenMPRegionInfo
>(OldCSI
)) {}
246 // Retrieve the value of the context parameter.
247 llvm::Value
*getContextValue() const override
{
249 return OuterRegionInfo
->getContextValue();
250 llvm_unreachable("No context value for inlined OpenMP region");
253 void setContextValue(llvm::Value
*V
) override
{
254 if (OuterRegionInfo
) {
255 OuterRegionInfo
->setContextValue(V
);
258 llvm_unreachable("No context value for inlined OpenMP region");
261 /// Lookup the captured field decl for a variable.
262 const FieldDecl
*lookup(const VarDecl
*VD
) const override
{
264 return OuterRegionInfo
->lookup(VD
);
265 // If there is no outer outlined region,no need to lookup in a list of
266 // captured variables, we can use the original one.
270 FieldDecl
*getThisFieldDecl() const override
{
272 return OuterRegionInfo
->getThisFieldDecl();
276 /// Get a variable or parameter for storing global thread id
277 /// inside OpenMP construct.
278 const VarDecl
*getThreadIDVariable() const override
{
280 return OuterRegionInfo
->getThreadIDVariable();
284 /// Get an LValue for the current ThreadID variable.
285 LValue
getThreadIDVariableLValue(CodeGenFunction
&CGF
) override
{
287 return OuterRegionInfo
->getThreadIDVariableLValue(CGF
);
288 llvm_unreachable("No LValue for inlined OpenMP construct");
291 /// Get the name of the capture helper.
292 StringRef
getHelperName() const override
{
293 if (auto *OuterRegionInfo
= getOldCSI())
294 return OuterRegionInfo
->getHelperName();
295 llvm_unreachable("No helper name for inlined OpenMP construct");
298 void emitUntiedSwitch(CodeGenFunction
&CGF
) override
{
300 OuterRegionInfo
->emitUntiedSwitch(CGF
);
303 CodeGenFunction::CGCapturedStmtInfo
*getOldCSI() const { return OldCSI
; }
305 static bool classof(const CGCapturedStmtInfo
*Info
) {
306 return CGOpenMPRegionInfo::classof(Info
) &&
307 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() == InlinedRegion
;
310 ~CGOpenMPInlinedRegionInfo() override
= default;
313 /// CodeGen info about outer OpenMP region.
314 CodeGenFunction::CGCapturedStmtInfo
*OldCSI
;
315 CGOpenMPRegionInfo
*OuterRegionInfo
;
318 /// API for captured statement code generation in OpenMP target
319 /// constructs. For this captures, implicit parameters are used instead of the
320 /// captured fields. The name of the target region has to be unique in a given
321 /// application so it is provided by the client, because only the client has
322 /// the information to generate that.
323 class CGOpenMPTargetRegionInfo final
: public CGOpenMPRegionInfo
{
325 CGOpenMPTargetRegionInfo(const CapturedStmt
&CS
,
326 const RegionCodeGenTy
&CodeGen
, StringRef HelperName
)
327 : CGOpenMPRegionInfo(CS
, TargetRegion
, CodeGen
, OMPD_target
,
328 /*HasCancel=*/false),
329 HelperName(HelperName
) {}
331 /// This is unused for target regions because each starts executing
332 /// with a single thread.
333 const VarDecl
*getThreadIDVariable() const override
{ return nullptr; }
335 /// Get the name of the capture helper.
336 StringRef
getHelperName() const override
{ return HelperName
; }
338 static bool classof(const CGCapturedStmtInfo
*Info
) {
339 return CGOpenMPRegionInfo::classof(Info
) &&
340 cast
<CGOpenMPRegionInfo
>(Info
)->getRegionKind() == TargetRegion
;
344 StringRef HelperName
;
347 static void EmptyCodeGen(CodeGenFunction
&, PrePostActionTy
&) {
348 llvm_unreachable("No codegen for expressions");
350 /// API for generation of expressions captured in a innermost OpenMP
352 class CGOpenMPInnerExprInfo final
: public CGOpenMPInlinedRegionInfo
{
354 CGOpenMPInnerExprInfo(CodeGenFunction
&CGF
, const CapturedStmt
&CS
)
355 : CGOpenMPInlinedRegionInfo(CGF
.CapturedStmtInfo
, EmptyCodeGen
,
357 /*HasCancel=*/false),
359 // Make sure the globals captured in the provided statement are local by
360 // using the privatization logic. We assume the same variable is not
361 // captured more than once.
362 for (const auto &C
: CS
.captures()) {
363 if (!C
.capturesVariable() && !C
.capturesVariableByCopy())
366 const VarDecl
*VD
= C
.getCapturedVar();
367 if (VD
->isLocalVarDeclOrParm())
370 DeclRefExpr
DRE(CGF
.getContext(), const_cast<VarDecl
*>(VD
),
371 /*RefersToEnclosingVariableOrCapture=*/false,
372 VD
->getType().getNonReferenceType(), VK_LValue
,
374 PrivScope
.addPrivate(VD
, CGF
.EmitLValue(&DRE
).getAddress(CGF
));
376 (void)PrivScope
.Privatize();
379 /// Lookup the captured field decl for a variable.
380 const FieldDecl
*lookup(const VarDecl
*VD
) const override
{
381 if (const FieldDecl
*FD
= CGOpenMPInlinedRegionInfo::lookup(VD
))
386 /// Emit the captured statement body.
387 void EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) override
{
388 llvm_unreachable("No body for expressions");
391 /// Get a variable or parameter for storing global thread id
392 /// inside OpenMP construct.
393 const VarDecl
*getThreadIDVariable() const override
{
394 llvm_unreachable("No thread id for expressions");
397 /// Get the name of the capture helper.
398 StringRef
getHelperName() const override
{
399 llvm_unreachable("No helper name for expressions");
402 static bool classof(const CGCapturedStmtInfo
*Info
) { return false; }
405 /// Private scope to capture global variables.
406 CodeGenFunction::OMPPrivateScope PrivScope
;
409 /// RAII for emitting code of OpenMP constructs.
410 class InlinedOpenMPRegionRAII
{
411 CodeGenFunction
&CGF
;
412 llvm::DenseMap
<const ValueDecl
*, FieldDecl
*> LambdaCaptureFields
;
413 FieldDecl
*LambdaThisCaptureField
= nullptr;
414 const CodeGen::CGBlockInfo
*BlockInfo
= nullptr;
415 bool NoInheritance
= false;
418 /// Constructs region for combined constructs.
419 /// \param CodeGen Code generation sequence for combined directives. Includes
420 /// a list of functions used for code generation of implicitly inlined
422 InlinedOpenMPRegionRAII(CodeGenFunction
&CGF
, const RegionCodeGenTy
&CodeGen
,
423 OpenMPDirectiveKind Kind
, bool HasCancel
,
424 bool NoInheritance
= true)
425 : CGF(CGF
), NoInheritance(NoInheritance
) {
426 // Start emission for the construct.
427 CGF
.CapturedStmtInfo
= new CGOpenMPInlinedRegionInfo(
428 CGF
.CapturedStmtInfo
, CodeGen
, Kind
, HasCancel
);
430 std::swap(CGF
.LambdaCaptureFields
, LambdaCaptureFields
);
431 LambdaThisCaptureField
= CGF
.LambdaThisCaptureField
;
432 CGF
.LambdaThisCaptureField
= nullptr;
433 BlockInfo
= CGF
.BlockInfo
;
434 CGF
.BlockInfo
= nullptr;
438 ~InlinedOpenMPRegionRAII() {
439 // Restore original CapturedStmtInfo only if we're done with code emission.
441 cast
<CGOpenMPInlinedRegionInfo
>(CGF
.CapturedStmtInfo
)->getOldCSI();
442 delete CGF
.CapturedStmtInfo
;
443 CGF
.CapturedStmtInfo
= OldCSI
;
445 std::swap(CGF
.LambdaCaptureFields
, LambdaCaptureFields
);
446 CGF
.LambdaThisCaptureField
= LambdaThisCaptureField
;
447 CGF
.BlockInfo
= BlockInfo
;
452 /// Values for bit flags used in the ident_t to describe the fields.
453 /// All enumeric elements are named and described in accordance with the code
454 /// from https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
455 enum OpenMPLocationFlags
: unsigned {
456 /// Use trampoline for internal microtask.
457 OMP_IDENT_IMD
= 0x01,
458 /// Use c-style ident structure.
459 OMP_IDENT_KMPC
= 0x02,
460 /// Atomic reduction option for kmpc_reduce.
461 OMP_ATOMIC_REDUCE
= 0x10,
462 /// Explicit 'barrier' directive.
463 OMP_IDENT_BARRIER_EXPL
= 0x20,
464 /// Implicit barrier in code.
465 OMP_IDENT_BARRIER_IMPL
= 0x40,
466 /// Implicit barrier in 'for' directive.
467 OMP_IDENT_BARRIER_IMPL_FOR
= 0x40,
468 /// Implicit barrier in 'sections' directive.
469 OMP_IDENT_BARRIER_IMPL_SECTIONS
= 0xC0,
470 /// Implicit barrier in 'single' directive.
471 OMP_IDENT_BARRIER_IMPL_SINGLE
= 0x140,
472 /// Call of __kmp_for_static_init for static loop.
473 OMP_IDENT_WORK_LOOP
= 0x200,
474 /// Call of __kmp_for_static_init for sections.
475 OMP_IDENT_WORK_SECTIONS
= 0x400,
476 /// Call of __kmp_for_static_init for distribute.
477 OMP_IDENT_WORK_DISTRIBUTE
= 0x800,
478 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_IDENT_WORK_DISTRIBUTE
)
482 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
483 /// Values for bit flags for marking which requires clauses have been used.
484 enum OpenMPOffloadingRequiresDirFlags
: int64_t {
486 OMP_REQ_UNDEFINED
= 0x000,
487 /// no requires clause present.
488 OMP_REQ_NONE
= 0x001,
489 /// reverse_offload clause.
490 OMP_REQ_REVERSE_OFFLOAD
= 0x002,
491 /// unified_address clause.
492 OMP_REQ_UNIFIED_ADDRESS
= 0x004,
493 /// unified_shared_memory clause.
494 OMP_REQ_UNIFIED_SHARED_MEMORY
= 0x008,
495 /// dynamic_allocators clause.
496 OMP_REQ_DYNAMIC_ALLOCATORS
= 0x010,
497 LLVM_MARK_AS_BITMASK_ENUM(/*LargestValue=*/OMP_REQ_DYNAMIC_ALLOCATORS
)
500 enum OpenMPOffloadingReservedDeviceIDs
{
501 /// Device ID if the device was not defined, runtime should get it
502 /// from environment variables in the spec.
503 OMP_DEVICEID_UNDEF
= -1,
505 } // anonymous namespace
507 /// Describes ident structure that describes a source location.
508 /// All descriptions are taken from
509 /// https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h
510 /// Original structure:
511 /// typedef struct ident {
512 /// kmp_int32 reserved_1; /**< might be used in Fortran;
514 /// kmp_int32 flags; /**< also f.flags; KMP_IDENT_xxx flags;
515 /// KMP_IDENT_KMPC identifies this union
517 /// kmp_int32 reserved_2; /**< not really used in Fortran any more;
520 /// /* but currently used for storing
521 /// region-specific ITT */
522 /// /* contextual information. */
523 ///#endif /* USE_ITT_BUILD */
524 /// kmp_int32 reserved_3; /**< source[4] in Fortran, do not use for
526 /// char const *psource; /**< String describing the source location.
527 /// The string is composed of semi-colon separated
528 // fields which describe the source file,
529 /// the function and a pair of line numbers that
530 /// delimit the construct.
533 enum IdentFieldIndex
{
534 /// might be used in Fortran
535 IdentField_Reserved_1
,
536 /// OMP_IDENT_xxx flags; OMP_IDENT_KMPC identifies this union member.
538 /// Not really used in Fortran any more
539 IdentField_Reserved_2
,
540 /// Source[4] in Fortran, do not use for C++
541 IdentField_Reserved_3
,
542 /// String describing the source location. The string is composed of
543 /// semi-colon separated fields which describe the source file, the function
544 /// and a pair of line numbers that delimit the construct.
548 /// Schedule types for 'omp for' loops (these enumerators are taken from
549 /// the enum sched_type in kmp.h).
550 enum OpenMPSchedType
{
551 /// Lower bound for default (unordered) versions.
553 OMP_sch_static_chunked
= 33,
555 OMP_sch_dynamic_chunked
= 35,
556 OMP_sch_guided_chunked
= 36,
557 OMP_sch_runtime
= 37,
559 /// static with chunk adjustment (e.g., simd)
560 OMP_sch_static_balanced_chunked
= 45,
561 /// Lower bound for 'ordered' versions.
563 OMP_ord_static_chunked
= 65,
565 OMP_ord_dynamic_chunked
= 67,
566 OMP_ord_guided_chunked
= 68,
567 OMP_ord_runtime
= 69,
569 OMP_sch_default
= OMP_sch_static
,
570 /// dist_schedule types
571 OMP_dist_sch_static_chunked
= 91,
572 OMP_dist_sch_static
= 92,
573 /// Support for OpenMP 4.5 monotonic and nonmonotonic schedule modifiers.
574 /// Set if the monotonic schedule modifier was present.
575 OMP_sch_modifier_monotonic
= (1 << 29),
576 /// Set if the nonmonotonic schedule modifier was present.
577 OMP_sch_modifier_nonmonotonic
= (1 << 30),
580 /// A basic class for pre|post-action for advanced codegen sequence for OpenMP
582 class CleanupTy final
: public EHScopeStack::Cleanup
{
583 PrePostActionTy
*Action
;
586 explicit CleanupTy(PrePostActionTy
*Action
) : Action(Action
) {}
587 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
588 if (!CGF
.HaveInsertPoint())
594 } // anonymous namespace
596 void RegionCodeGenTy::operator()(CodeGenFunction
&CGF
) const {
597 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
599 CGF
.EHStack
.pushCleanup
<CleanupTy
>(NormalAndEHCleanup
, PrePostAction
);
600 Callback(CodeGen
, CGF
, *PrePostAction
);
602 PrePostActionTy Action
;
603 Callback(CodeGen
, CGF
, Action
);
607 /// Check if the combiner is a call to UDR combiner and if it is so return the
608 /// UDR decl used for reduction.
609 static const OMPDeclareReductionDecl
*
610 getReductionInit(const Expr
*ReductionOp
) {
611 if (const auto *CE
= dyn_cast
<CallExpr
>(ReductionOp
))
612 if (const auto *OVE
= dyn_cast
<OpaqueValueExpr
>(CE
->getCallee()))
613 if (const auto *DRE
=
614 dyn_cast
<DeclRefExpr
>(OVE
->getSourceExpr()->IgnoreImpCasts()))
615 if (const auto *DRD
= dyn_cast
<OMPDeclareReductionDecl
>(DRE
->getDecl()))
620 static void emitInitWithReductionInitializer(CodeGenFunction
&CGF
,
621 const OMPDeclareReductionDecl
*DRD
,
623 Address Private
, Address Original
,
625 if (DRD
->getInitializer()) {
626 std::pair
<llvm::Function
*, llvm::Function
*> Reduction
=
627 CGF
.CGM
.getOpenMPRuntime().getUserDefinedReduction(DRD
);
628 const auto *CE
= cast
<CallExpr
>(InitOp
);
629 const auto *OVE
= cast
<OpaqueValueExpr
>(CE
->getCallee());
630 const Expr
*LHS
= CE
->getArg(/*Arg=*/0)->IgnoreParenImpCasts();
631 const Expr
*RHS
= CE
->getArg(/*Arg=*/1)->IgnoreParenImpCasts();
633 cast
<DeclRefExpr
>(cast
<UnaryOperator
>(LHS
)->getSubExpr());
635 cast
<DeclRefExpr
>(cast
<UnaryOperator
>(RHS
)->getSubExpr());
636 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
637 PrivateScope
.addPrivate(cast
<VarDecl
>(LHSDRE
->getDecl()), Private
);
638 PrivateScope
.addPrivate(cast
<VarDecl
>(RHSDRE
->getDecl()), Original
);
639 (void)PrivateScope
.Privatize();
640 RValue Func
= RValue::get(Reduction
.second
);
641 CodeGenFunction::OpaqueValueMapping
Map(CGF
, OVE
, Func
);
642 CGF
.EmitIgnoredExpr(InitOp
);
644 llvm::Constant
*Init
= CGF
.CGM
.EmitNullConstant(Ty
);
645 std::string Name
= CGF
.CGM
.getOpenMPRuntime().getName({"init"});
646 auto *GV
= new llvm::GlobalVariable(
647 CGF
.CGM
.getModule(), Init
->getType(), /*isConstant=*/true,
648 llvm::GlobalValue::PrivateLinkage
, Init
, Name
);
649 LValue LV
= CGF
.MakeNaturalAlignAddrLValue(GV
, Ty
);
651 switch (CGF
.getEvaluationKind(Ty
)) {
653 InitRVal
= CGF
.EmitLoadOfLValue(LV
, DRD
->getLocation());
657 RValue::getComplex(CGF
.EmitLoadOfComplex(LV
, DRD
->getLocation()));
659 case TEK_Aggregate
: {
660 OpaqueValueExpr
OVE(DRD
->getLocation(), Ty
, VK_LValue
);
661 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, &OVE
, LV
);
662 CGF
.EmitAnyExprToMem(&OVE
, Private
, Ty
.getQualifiers(),
663 /*IsInitializer=*/false);
667 OpaqueValueExpr
OVE(DRD
->getLocation(), Ty
, VK_PRValue
);
668 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, &OVE
, InitRVal
);
669 CGF
.EmitAnyExprToMem(&OVE
, Private
, Ty
.getQualifiers(),
670 /*IsInitializer=*/false);
674 /// Emit initialization of arrays of complex types.
675 /// \param DestAddr Address of the array.
676 /// \param Type Type of array.
677 /// \param Init Initial expression of array.
678 /// \param SrcAddr Address of the original array.
679 static void EmitOMPAggregateInit(CodeGenFunction
&CGF
, Address DestAddr
,
680 QualType Type
, bool EmitDeclareReductionInit
,
682 const OMPDeclareReductionDecl
*DRD
,
683 Address SrcAddr
= Address::invalid()) {
684 // Perform element-by-element initialization.
687 // Drill down to the base element type on both arrays.
688 const ArrayType
*ArrayTy
= Type
->getAsArrayTypeUnsafe();
689 llvm::Value
*NumElements
= CGF
.emitArrayLength(ArrayTy
, ElementTy
, DestAddr
);
692 CGF
.Builder
.CreateElementBitCast(SrcAddr
, DestAddr
.getElementType());
694 llvm::Value
*SrcBegin
= nullptr;
696 SrcBegin
= SrcAddr
.getPointer();
697 llvm::Value
*DestBegin
= DestAddr
.getPointer();
698 // Cast from pointer to array type to pointer to single element.
699 llvm::Value
*DestEnd
=
700 CGF
.Builder
.CreateGEP(DestAddr
.getElementType(), DestBegin
, NumElements
);
701 // The basic structure here is a while-do loop.
702 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.arrayinit.body");
703 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.arrayinit.done");
704 llvm::Value
*IsEmpty
=
705 CGF
.Builder
.CreateICmpEQ(DestBegin
, DestEnd
, "omp.arrayinit.isempty");
706 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
708 // Enter the loop body, making that address the current address.
709 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
710 CGF
.EmitBlock(BodyBB
);
712 CharUnits ElementSize
= CGF
.getContext().getTypeSizeInChars(ElementTy
);
714 llvm::PHINode
*SrcElementPHI
= nullptr;
715 Address SrcElementCurrent
= Address::invalid();
717 SrcElementPHI
= CGF
.Builder
.CreatePHI(SrcBegin
->getType(), 2,
718 "omp.arraycpy.srcElementPast");
719 SrcElementPHI
->addIncoming(SrcBegin
, EntryBB
);
721 Address(SrcElementPHI
, SrcAddr
.getElementType(),
722 SrcAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
724 llvm::PHINode
*DestElementPHI
= CGF
.Builder
.CreatePHI(
725 DestBegin
->getType(), 2, "omp.arraycpy.destElementPast");
726 DestElementPHI
->addIncoming(DestBegin
, EntryBB
);
727 Address DestElementCurrent
=
728 Address(DestElementPHI
, DestAddr
.getElementType(),
729 DestAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
733 CodeGenFunction::RunCleanupsScope
InitScope(CGF
);
734 if (EmitDeclareReductionInit
) {
735 emitInitWithReductionInitializer(CGF
, DRD
, Init
, DestElementCurrent
,
736 SrcElementCurrent
, ElementTy
);
738 CGF
.EmitAnyExprToMem(Init
, DestElementCurrent
, ElementTy
.getQualifiers(),
739 /*IsInitializer=*/false);
743 // Shift the address forward by one element.
744 llvm::Value
*SrcElementNext
= CGF
.Builder
.CreateConstGEP1_32(
745 SrcAddr
.getElementType(), SrcElementPHI
, /*Idx0=*/1,
746 "omp.arraycpy.dest.element");
747 SrcElementPHI
->addIncoming(SrcElementNext
, CGF
.Builder
.GetInsertBlock());
750 // Shift the address forward by one element.
751 llvm::Value
*DestElementNext
= CGF
.Builder
.CreateConstGEP1_32(
752 DestAddr
.getElementType(), DestElementPHI
, /*Idx0=*/1,
753 "omp.arraycpy.dest.element");
754 // Check whether we've reached the end.
756 CGF
.Builder
.CreateICmpEQ(DestElementNext
, DestEnd
, "omp.arraycpy.done");
757 CGF
.Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
758 DestElementPHI
->addIncoming(DestElementNext
, CGF
.Builder
.GetInsertBlock());
761 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
764 LValue
ReductionCodeGen::emitSharedLValue(CodeGenFunction
&CGF
, const Expr
*E
) {
765 return CGF
.EmitOMPSharedLValue(E
);
768 LValue
ReductionCodeGen::emitSharedLValueUB(CodeGenFunction
&CGF
,
770 if (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(E
))
771 return CGF
.EmitOMPArraySectionExpr(OASE
, /*IsLowerBound=*/false);
775 void ReductionCodeGen::emitAggregateInitialization(
776 CodeGenFunction
&CGF
, unsigned N
, Address PrivateAddr
, Address SharedAddr
,
777 const OMPDeclareReductionDecl
*DRD
) {
778 // Emit VarDecl with copy init for arrays.
779 // Get the address of the original variable captured in current
781 const auto *PrivateVD
=
782 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Private
)->getDecl());
783 bool EmitDeclareReductionInit
=
784 DRD
&& (DRD
->getInitializer() || !PrivateVD
->hasInit());
785 EmitOMPAggregateInit(CGF
, PrivateAddr
, PrivateVD
->getType(),
786 EmitDeclareReductionInit
,
787 EmitDeclareReductionInit
? ClausesData
[N
].ReductionOp
788 : PrivateVD
->getInit(),
792 ReductionCodeGen::ReductionCodeGen(ArrayRef
<const Expr
*> Shareds
,
793 ArrayRef
<const Expr
*> Origs
,
794 ArrayRef
<const Expr
*> Privates
,
795 ArrayRef
<const Expr
*> ReductionOps
) {
796 ClausesData
.reserve(Shareds
.size());
797 SharedAddresses
.reserve(Shareds
.size());
798 Sizes
.reserve(Shareds
.size());
799 BaseDecls
.reserve(Shareds
.size());
800 const auto *IOrig
= Origs
.begin();
801 const auto *IPriv
= Privates
.begin();
802 const auto *IRed
= ReductionOps
.begin();
803 for (const Expr
*Ref
: Shareds
) {
804 ClausesData
.emplace_back(Ref
, *IOrig
, *IPriv
, *IRed
);
805 std::advance(IOrig
, 1);
806 std::advance(IPriv
, 1);
807 std::advance(IRed
, 1);
811 void ReductionCodeGen::emitSharedOrigLValue(CodeGenFunction
&CGF
, unsigned N
) {
812 assert(SharedAddresses
.size() == N
&& OrigAddresses
.size() == N
&&
813 "Number of generated lvalues must be exactly N.");
814 LValue First
= emitSharedLValue(CGF
, ClausesData
[N
].Shared
);
815 LValue Second
= emitSharedLValueUB(CGF
, ClausesData
[N
].Shared
);
816 SharedAddresses
.emplace_back(First
, Second
);
817 if (ClausesData
[N
].Shared
== ClausesData
[N
].Ref
) {
818 OrigAddresses
.emplace_back(First
, Second
);
820 LValue First
= emitSharedLValue(CGF
, ClausesData
[N
].Ref
);
821 LValue Second
= emitSharedLValueUB(CGF
, ClausesData
[N
].Ref
);
822 OrigAddresses
.emplace_back(First
, Second
);
826 void ReductionCodeGen::emitAggregateType(CodeGenFunction
&CGF
, unsigned N
) {
827 QualType PrivateType
= getPrivateType(N
);
828 bool AsArraySection
= isa
<OMPArraySectionExpr
>(ClausesData
[N
].Ref
);
829 if (!PrivateType
->isVariablyModifiedType()) {
831 CGF
.getTypeSize(OrigAddresses
[N
].first
.getType().getNonReferenceType()),
836 llvm::Value
*SizeInChars
;
837 auto *ElemType
= OrigAddresses
[N
].first
.getAddress(CGF
).getElementType();
838 auto *ElemSizeOf
= llvm::ConstantExpr::getSizeOf(ElemType
);
839 if (AsArraySection
) {
840 Size
= CGF
.Builder
.CreatePtrDiff(ElemType
,
841 OrigAddresses
[N
].second
.getPointer(CGF
),
842 OrigAddresses
[N
].first
.getPointer(CGF
));
843 Size
= CGF
.Builder
.CreateNUWAdd(
844 Size
, llvm::ConstantInt::get(Size
->getType(), /*V=*/1));
845 SizeInChars
= CGF
.Builder
.CreateNUWMul(Size
, ElemSizeOf
);
848 CGF
.getTypeSize(OrigAddresses
[N
].first
.getType().getNonReferenceType());
849 Size
= CGF
.Builder
.CreateExactUDiv(SizeInChars
, ElemSizeOf
);
851 Sizes
.emplace_back(SizeInChars
, Size
);
852 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
854 cast
<OpaqueValueExpr
>(
855 CGF
.getContext().getAsVariableArrayType(PrivateType
)->getSizeExpr()),
857 CGF
.EmitVariablyModifiedType(PrivateType
);
860 void ReductionCodeGen::emitAggregateType(CodeGenFunction
&CGF
, unsigned N
,
862 QualType PrivateType
= getPrivateType(N
);
863 if (!PrivateType
->isVariablyModifiedType()) {
864 assert(!Size
&& !Sizes
[N
].second
&&
865 "Size should be nullptr for non-variably modified reduction "
869 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
871 cast
<OpaqueValueExpr
>(
872 CGF
.getContext().getAsVariableArrayType(PrivateType
)->getSizeExpr()),
874 CGF
.EmitVariablyModifiedType(PrivateType
);
877 void ReductionCodeGen::emitInitialization(
878 CodeGenFunction
&CGF
, unsigned N
, Address PrivateAddr
, Address SharedAddr
,
879 llvm::function_ref
<bool(CodeGenFunction
&)> DefaultInit
) {
880 assert(SharedAddresses
.size() > N
&& "No variable was generated");
881 const auto *PrivateVD
=
882 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Private
)->getDecl());
883 const OMPDeclareReductionDecl
*DRD
=
884 getReductionInit(ClausesData
[N
].ReductionOp
);
885 if (CGF
.getContext().getAsArrayType(PrivateVD
->getType())) {
886 if (DRD
&& DRD
->getInitializer())
887 (void)DefaultInit(CGF
);
888 emitAggregateInitialization(CGF
, N
, PrivateAddr
, SharedAddr
, DRD
);
889 } else if (DRD
&& (DRD
->getInitializer() || !PrivateVD
->hasInit())) {
890 (void)DefaultInit(CGF
);
891 QualType SharedType
= SharedAddresses
[N
].first
.getType();
892 emitInitWithReductionInitializer(CGF
, DRD
, ClausesData
[N
].ReductionOp
,
893 PrivateAddr
, SharedAddr
, SharedType
);
894 } else if (!DefaultInit(CGF
) && PrivateVD
->hasInit() &&
895 !CGF
.isTrivialInitializer(PrivateVD
->getInit())) {
896 CGF
.EmitAnyExprToMem(PrivateVD
->getInit(), PrivateAddr
,
897 PrivateVD
->getType().getQualifiers(),
898 /*IsInitializer=*/false);
902 bool ReductionCodeGen::needCleanups(unsigned N
) {
903 QualType PrivateType
= getPrivateType(N
);
904 QualType::DestructionKind DTorKind
= PrivateType
.isDestructedType();
905 return DTorKind
!= QualType::DK_none
;
908 void ReductionCodeGen::emitCleanups(CodeGenFunction
&CGF
, unsigned N
,
909 Address PrivateAddr
) {
910 QualType PrivateType
= getPrivateType(N
);
911 QualType::DestructionKind DTorKind
= PrivateType
.isDestructedType();
912 if (needCleanups(N
)) {
913 PrivateAddr
= CGF
.Builder
.CreateElementBitCast(
914 PrivateAddr
, CGF
.ConvertTypeForMem(PrivateType
));
915 CGF
.pushDestroy(DTorKind
, PrivateAddr
, PrivateType
);
919 static LValue
loadToBegin(CodeGenFunction
&CGF
, QualType BaseTy
, QualType ElTy
,
921 BaseTy
= BaseTy
.getNonReferenceType();
922 while ((BaseTy
->isPointerType() || BaseTy
->isReferenceType()) &&
923 !CGF
.getContext().hasSameType(BaseTy
, ElTy
)) {
924 if (const auto *PtrTy
= BaseTy
->getAs
<PointerType
>()) {
925 BaseLV
= CGF
.EmitLoadOfPointerLValue(BaseLV
.getAddress(CGF
), PtrTy
);
927 LValue RefLVal
= CGF
.MakeAddrLValue(BaseLV
.getAddress(CGF
), BaseTy
);
928 BaseLV
= CGF
.EmitLoadOfReferenceLValue(RefLVal
);
930 BaseTy
= BaseTy
->getPointeeType();
932 return CGF
.MakeAddrLValue(
933 CGF
.Builder
.CreateElementBitCast(BaseLV
.getAddress(CGF
),
934 CGF
.ConvertTypeForMem(ElTy
)),
935 BaseLV
.getType(), BaseLV
.getBaseInfo(),
936 CGF
.CGM
.getTBAAInfoForSubobject(BaseLV
, BaseLV
.getType()));
939 static Address
castToBase(CodeGenFunction
&CGF
, QualType BaseTy
, QualType ElTy
,
940 Address OriginalBaseAddress
, llvm::Value
*Addr
) {
941 Address Tmp
= Address::invalid();
942 Address TopTmp
= Address::invalid();
943 Address MostTopTmp
= Address::invalid();
944 BaseTy
= BaseTy
.getNonReferenceType();
945 while ((BaseTy
->isPointerType() || BaseTy
->isReferenceType()) &&
946 !CGF
.getContext().hasSameType(BaseTy
, ElTy
)) {
947 Tmp
= CGF
.CreateMemTemp(BaseTy
);
948 if (TopTmp
.isValid())
949 CGF
.Builder
.CreateStore(Tmp
.getPointer(), TopTmp
);
953 BaseTy
= BaseTy
->getPointeeType();
957 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
958 Addr
, Tmp
.getElementType());
959 CGF
.Builder
.CreateStore(Addr
, Tmp
);
963 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
964 Addr
, OriginalBaseAddress
.getType());
965 return OriginalBaseAddress
.withPointer(Addr
);
968 static const VarDecl
*getBaseDecl(const Expr
*Ref
, const DeclRefExpr
*&DE
) {
969 const VarDecl
*OrigVD
= nullptr;
970 if (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(Ref
)) {
971 const Expr
*Base
= OASE
->getBase()->IgnoreParenImpCasts();
972 while (const auto *TempOASE
= dyn_cast
<OMPArraySectionExpr
>(Base
))
973 Base
= TempOASE
->getBase()->IgnoreParenImpCasts();
974 while (const auto *TempASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
975 Base
= TempASE
->getBase()->IgnoreParenImpCasts();
976 DE
= cast
<DeclRefExpr
>(Base
);
977 OrigVD
= cast
<VarDecl
>(DE
->getDecl());
978 } else if (const auto *ASE
= dyn_cast
<ArraySubscriptExpr
>(Ref
)) {
979 const Expr
*Base
= ASE
->getBase()->IgnoreParenImpCasts();
980 while (const auto *TempASE
= dyn_cast
<ArraySubscriptExpr
>(Base
))
981 Base
= TempASE
->getBase()->IgnoreParenImpCasts();
982 DE
= cast
<DeclRefExpr
>(Base
);
983 OrigVD
= cast
<VarDecl
>(DE
->getDecl());
988 Address
ReductionCodeGen::adjustPrivateAddress(CodeGenFunction
&CGF
, unsigned N
,
989 Address PrivateAddr
) {
990 const DeclRefExpr
*DE
;
991 if (const VarDecl
*OrigVD
= ::getBaseDecl(ClausesData
[N
].Ref
, DE
)) {
992 BaseDecls
.emplace_back(OrigVD
);
993 LValue OriginalBaseLValue
= CGF
.EmitLValue(DE
);
995 loadToBegin(CGF
, OrigVD
->getType(), SharedAddresses
[N
].first
.getType(),
997 Address SharedAddr
= SharedAddresses
[N
].first
.getAddress(CGF
);
998 llvm::Value
*Adjustment
= CGF
.Builder
.CreatePtrDiff(
999 SharedAddr
.getElementType(), BaseLValue
.getPointer(CGF
),
1000 SharedAddr
.getPointer());
1001 llvm::Value
*PrivatePointer
=
1002 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
1003 PrivateAddr
.getPointer(), SharedAddr
.getType());
1004 llvm::Value
*Ptr
= CGF
.Builder
.CreateGEP(
1005 SharedAddr
.getElementType(), PrivatePointer
, Adjustment
);
1006 return castToBase(CGF
, OrigVD
->getType(),
1007 SharedAddresses
[N
].first
.getType(),
1008 OriginalBaseLValue
.getAddress(CGF
), Ptr
);
1010 BaseDecls
.emplace_back(
1011 cast
<VarDecl
>(cast
<DeclRefExpr
>(ClausesData
[N
].Ref
)->getDecl()));
1015 bool ReductionCodeGen::usesReductionInitializer(unsigned N
) const {
1016 const OMPDeclareReductionDecl
*DRD
=
1017 getReductionInit(ClausesData
[N
].ReductionOp
);
1018 return DRD
&& DRD
->getInitializer();
1021 LValue
CGOpenMPRegionInfo::getThreadIDVariableLValue(CodeGenFunction
&CGF
) {
1022 return CGF
.EmitLoadOfPointerLValue(
1023 CGF
.GetAddrOfLocalVar(getThreadIDVariable()),
1024 getThreadIDVariable()->getType()->castAs
<PointerType
>());
1027 void CGOpenMPRegionInfo::EmitBody(CodeGenFunction
&CGF
, const Stmt
*S
) {
1028 if (!CGF
.HaveInsertPoint())
1030 // 1.2.2 OpenMP Language Terminology
1031 // Structured block - An executable statement with a single entry at the
1032 // top and a single exit at the bottom.
1033 // The point of exit cannot be a branch out of the structured block.
1034 // longjmp() and throw() must not violate the entry/exit criteria.
1035 CGF
.EHStack
.pushTerminate();
1037 CGF
.incrementProfileCounter(S
);
1039 CGF
.EHStack
.popTerminate();
1042 LValue
CGOpenMPTaskOutlinedRegionInfo::getThreadIDVariableLValue(
1043 CodeGenFunction
&CGF
) {
1044 return CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(getThreadIDVariable()),
1045 getThreadIDVariable()->getType(),
1046 AlignmentSource::Decl
);
1049 static FieldDecl
*addFieldToRecordDecl(ASTContext
&C
, DeclContext
*DC
,
1051 auto *Field
= FieldDecl::Create(
1052 C
, DC
, SourceLocation(), SourceLocation(), /*Id=*/nullptr, FieldTy
,
1053 C
.getTrivialTypeSourceInfo(FieldTy
, SourceLocation()),
1054 /*BW=*/nullptr, /*Mutable=*/false, /*InitStyle=*/ICIS_NoInit
);
1055 Field
->setAccess(AS_public
);
1060 CGOpenMPRuntime::CGOpenMPRuntime(CodeGenModule
&CGM
, StringRef FirstSeparator
,
1061 StringRef Separator
)
1062 : CGM(CGM
), FirstSeparator(FirstSeparator
), Separator(Separator
),
1063 OMPBuilder(CGM
.getModule()), OffloadEntriesInfoManager() {
1064 KmpCriticalNameTy
= llvm::ArrayType::get(CGM
.Int32Ty
, /*NumElements*/ 8);
1066 // Initialize Types used in OpenMPIRBuilder from OMPKinds.def
1067 OMPBuilder
.initialize();
1068 loadOffloadInfoMetadata();
1071 void CGOpenMPRuntime::clear() {
1072 InternalVars
.clear();
1073 // Clean non-target variable declarations possibly used only in debug info.
1074 for (const auto &Data
: EmittedNonTargetVariables
) {
1075 if (!Data
.getValue().pointsToAliveValue())
1077 auto *GV
= dyn_cast
<llvm::GlobalVariable
>(Data
.getValue());
1080 if (!GV
->isDeclaration() || GV
->getNumUses() > 0)
1082 GV
->eraseFromParent();
1086 std::string
CGOpenMPRuntime::getName(ArrayRef
<StringRef
> Parts
) const {
1087 SmallString
<128> Buffer
;
1088 llvm::raw_svector_ostream
OS(Buffer
);
1089 StringRef Sep
= FirstSeparator
;
1090 for (StringRef Part
: Parts
) {
1094 return std::string(OS
.str());
1097 static llvm::Function
*
1098 emitCombinerOrInitializer(CodeGenModule
&CGM
, QualType Ty
,
1099 const Expr
*CombinerInitializer
, const VarDecl
*In
,
1100 const VarDecl
*Out
, bool IsCombiner
) {
1101 // void .omp_combiner.(Ty *in, Ty *out);
1102 ASTContext
&C
= CGM
.getContext();
1103 QualType PtrTy
= C
.getPointerType(Ty
).withRestrict();
1104 FunctionArgList Args
;
1105 ImplicitParamDecl
OmpOutParm(C
, /*DC=*/nullptr, Out
->getLocation(),
1106 /*Id=*/nullptr, PtrTy
, ImplicitParamDecl::Other
);
1107 ImplicitParamDecl
OmpInParm(C
, /*DC=*/nullptr, In
->getLocation(),
1108 /*Id=*/nullptr, PtrTy
, ImplicitParamDecl::Other
);
1109 Args
.push_back(&OmpOutParm
);
1110 Args
.push_back(&OmpInParm
);
1111 const CGFunctionInfo
&FnInfo
=
1112 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
1113 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
1114 std::string Name
= CGM
.getOpenMPRuntime().getName(
1115 {IsCombiner
? "omp_combiner" : "omp_initializer", ""});
1116 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
1117 Name
, &CGM
.getModule());
1118 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
1119 if (CGM
.getLangOpts().Optimize
) {
1120 Fn
->removeFnAttr(llvm::Attribute::NoInline
);
1121 Fn
->removeFnAttr(llvm::Attribute::OptimizeNone
);
1122 Fn
->addFnAttr(llvm::Attribute::AlwaysInline
);
1124 CodeGenFunction
CGF(CGM
);
1125 // Map "T omp_in;" variable to "*omp_in_parm" value in all expressions.
1126 // Map "T omp_out;" variable to "*omp_out_parm" value in all expressions.
1127 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, In
->getLocation(),
1128 Out
->getLocation());
1129 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
1130 Address AddrIn
= CGF
.GetAddrOfLocalVar(&OmpInParm
);
1132 In
, CGF
.EmitLoadOfPointerLValue(AddrIn
, PtrTy
->castAs
<PointerType
>())
1134 Address AddrOut
= CGF
.GetAddrOfLocalVar(&OmpOutParm
);
1136 Out
, CGF
.EmitLoadOfPointerLValue(AddrOut
, PtrTy
->castAs
<PointerType
>())
1138 (void)Scope
.Privatize();
1139 if (!IsCombiner
&& Out
->hasInit() &&
1140 !CGF
.isTrivialInitializer(Out
->getInit())) {
1141 CGF
.EmitAnyExprToMem(Out
->getInit(), CGF
.GetAddrOfLocalVar(Out
),
1142 Out
->getType().getQualifiers(),
1143 /*IsInitializer=*/true);
1145 if (CombinerInitializer
)
1146 CGF
.EmitIgnoredExpr(CombinerInitializer
);
1147 Scope
.ForceCleanup();
1148 CGF
.FinishFunction();
1152 void CGOpenMPRuntime::emitUserDefinedReduction(
1153 CodeGenFunction
*CGF
, const OMPDeclareReductionDecl
*D
) {
1154 if (UDRMap
.count(D
) > 0)
1156 llvm::Function
*Combiner
= emitCombinerOrInitializer(
1157 CGM
, D
->getType(), D
->getCombiner(),
1158 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getCombinerIn())->getDecl()),
1159 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getCombinerOut())->getDecl()),
1160 /*IsCombiner=*/true);
1161 llvm::Function
*Initializer
= nullptr;
1162 if (const Expr
*Init
= D
->getInitializer()) {
1163 Initializer
= emitCombinerOrInitializer(
1165 D
->getInitializerKind() == OMPDeclareReductionDecl::CallInit
? Init
1167 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getInitOrig())->getDecl()),
1168 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getInitPriv())->getDecl()),
1169 /*IsCombiner=*/false);
1171 UDRMap
.try_emplace(D
, Combiner
, Initializer
);
1173 auto &Decls
= FunctionUDRMap
.FindAndConstruct(CGF
->CurFn
);
1174 Decls
.second
.push_back(D
);
1178 std::pair
<llvm::Function
*, llvm::Function
*>
1179 CGOpenMPRuntime::getUserDefinedReduction(const OMPDeclareReductionDecl
*D
) {
1180 auto I
= UDRMap
.find(D
);
1181 if (I
!= UDRMap
.end())
1183 emitUserDefinedReduction(/*CGF=*/nullptr, D
);
1184 return UDRMap
.lookup(D
);
1188 // Temporary RAII solution to perform a push/pop stack event on the OpenMP IR
1189 // Builder if one is present.
1190 struct PushAndPopStackRAII
{
1191 PushAndPopStackRAII(llvm::OpenMPIRBuilder
*OMPBuilder
, CodeGenFunction
&CGF
,
1192 bool HasCancel
, llvm::omp::Directive Kind
)
1193 : OMPBuilder(OMPBuilder
) {
1197 // The following callback is the crucial part of clangs cleanup process.
1200 // Once the OpenMPIRBuilder is used to create parallel regions (and
1201 // similar), the cancellation destination (Dest below) is determined via
1202 // IP. That means if we have variables to finalize we split the block at IP,
1203 // use the new block (=BB) as destination to build a JumpDest (via
1204 // getJumpDestInCurrentScope(BB)) which then is fed to
1205 // EmitBranchThroughCleanup. Furthermore, there will not be the need
1206 // to push & pop an FinalizationInfo object.
1207 // The FiniCB will still be needed but at the point where the
1208 // OpenMPIRBuilder is asked to construct a parallel (or similar) construct.
1209 auto FiniCB
= [&CGF
](llvm::OpenMPIRBuilder::InsertPointTy IP
) {
1210 assert(IP
.getBlock()->end() == IP
.getPoint() &&
1211 "Clang CG should cause non-terminated block!");
1212 CGBuilderTy::InsertPointGuard
IPG(CGF
.Builder
);
1213 CGF
.Builder
.restoreIP(IP
);
1214 CodeGenFunction::JumpDest Dest
=
1215 CGF
.getOMPCancelDestination(OMPD_parallel
);
1216 CGF
.EmitBranchThroughCleanup(Dest
);
1219 // TODO: Remove this once we emit parallel regions through the
1220 // OpenMPIRBuilder as it can do this setup internally.
1221 llvm::OpenMPIRBuilder::FinalizationInfo
FI({FiniCB
, Kind
, HasCancel
});
1222 OMPBuilder
->pushFinalizationCB(std::move(FI
));
1224 ~PushAndPopStackRAII() {
1226 OMPBuilder
->popFinalizationCB();
1228 llvm::OpenMPIRBuilder
*OMPBuilder
;
1232 static llvm::Function
*emitParallelOrTeamsOutlinedFunction(
1233 CodeGenModule
&CGM
, const OMPExecutableDirective
&D
, const CapturedStmt
*CS
,
1234 const VarDecl
*ThreadIDVar
, OpenMPDirectiveKind InnermostKind
,
1235 const StringRef OutlinedHelperName
, const RegionCodeGenTy
&CodeGen
) {
1236 assert(ThreadIDVar
->getType()->isPointerType() &&
1237 "thread id variable must be of type kmp_int32 *");
1238 CodeGenFunction
CGF(CGM
, true);
1239 bool HasCancel
= false;
1240 if (const auto *OPD
= dyn_cast
<OMPParallelDirective
>(&D
))
1241 HasCancel
= OPD
->hasCancel();
1242 else if (const auto *OPD
= dyn_cast
<OMPTargetParallelDirective
>(&D
))
1243 HasCancel
= OPD
->hasCancel();
1244 else if (const auto *OPSD
= dyn_cast
<OMPParallelSectionsDirective
>(&D
))
1245 HasCancel
= OPSD
->hasCancel();
1246 else if (const auto *OPFD
= dyn_cast
<OMPParallelForDirective
>(&D
))
1247 HasCancel
= OPFD
->hasCancel();
1248 else if (const auto *OPFD
= dyn_cast
<OMPTargetParallelForDirective
>(&D
))
1249 HasCancel
= OPFD
->hasCancel();
1250 else if (const auto *OPFD
= dyn_cast
<OMPDistributeParallelForDirective
>(&D
))
1251 HasCancel
= OPFD
->hasCancel();
1252 else if (const auto *OPFD
=
1253 dyn_cast
<OMPTeamsDistributeParallelForDirective
>(&D
))
1254 HasCancel
= OPFD
->hasCancel();
1255 else if (const auto *OPFD
=
1256 dyn_cast
<OMPTargetTeamsDistributeParallelForDirective
>(&D
))
1257 HasCancel
= OPFD
->hasCancel();
1259 // TODO: Temporarily inform the OpenMPIRBuilder, if any, about the new
1260 // parallel region to make cancellation barriers work properly.
1261 llvm::OpenMPIRBuilder
&OMPBuilder
= CGM
.getOpenMPRuntime().getOMPBuilder();
1262 PushAndPopStackRAII
PSR(&OMPBuilder
, CGF
, HasCancel
, InnermostKind
);
1263 CGOpenMPOutlinedRegionInfo
CGInfo(*CS
, ThreadIDVar
, CodeGen
, InnermostKind
,
1264 HasCancel
, OutlinedHelperName
);
1265 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
1266 return CGF
.GenerateOpenMPCapturedStmtFunction(*CS
, D
.getBeginLoc());
1269 llvm::Function
*CGOpenMPRuntime::emitParallelOutlinedFunction(
1270 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
1271 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
) {
1272 const CapturedStmt
*CS
= D
.getCapturedStmt(OMPD_parallel
);
1273 return emitParallelOrTeamsOutlinedFunction(
1274 CGM
, D
, CS
, ThreadIDVar
, InnermostKind
, getOutlinedHelperName(), CodeGen
);
1277 llvm::Function
*CGOpenMPRuntime::emitTeamsOutlinedFunction(
1278 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
1279 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
) {
1280 const CapturedStmt
*CS
= D
.getCapturedStmt(OMPD_teams
);
1281 return emitParallelOrTeamsOutlinedFunction(
1282 CGM
, D
, CS
, ThreadIDVar
, InnermostKind
, getOutlinedHelperName(), CodeGen
);
1285 llvm::Function
*CGOpenMPRuntime::emitTaskOutlinedFunction(
1286 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
1287 const VarDecl
*PartIDVar
, const VarDecl
*TaskTVar
,
1288 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
1289 bool Tied
, unsigned &NumberOfParts
) {
1290 auto &&UntiedCodeGen
= [this, &D
, TaskTVar
](CodeGenFunction
&CGF
,
1291 PrePostActionTy
&) {
1292 llvm::Value
*ThreadID
= getThreadID(CGF
, D
.getBeginLoc());
1293 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
1294 llvm::Value
*TaskArgs
[] = {
1296 CGF
.EmitLoadOfPointerLValue(CGF
.GetAddrOfLocalVar(TaskTVar
),
1297 TaskTVar
->getType()->castAs
<PointerType
>())
1299 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1300 CGM
.getModule(), OMPRTL___kmpc_omp_task
),
1303 CGOpenMPTaskOutlinedRegionInfo::UntiedTaskActionTy
Action(Tied
, PartIDVar
,
1305 CodeGen
.setAction(Action
);
1306 assert(!ThreadIDVar
->getType()->isPointerType() &&
1307 "thread id variable must be of type kmp_int32 for tasks");
1308 const OpenMPDirectiveKind Region
=
1309 isOpenMPTaskLoopDirective(D
.getDirectiveKind()) ? OMPD_taskloop
1311 const CapturedStmt
*CS
= D
.getCapturedStmt(Region
);
1312 bool HasCancel
= false;
1313 if (const auto *TD
= dyn_cast
<OMPTaskDirective
>(&D
))
1314 HasCancel
= TD
->hasCancel();
1315 else if (const auto *TD
= dyn_cast
<OMPTaskLoopDirective
>(&D
))
1316 HasCancel
= TD
->hasCancel();
1317 else if (const auto *TD
= dyn_cast
<OMPMasterTaskLoopDirective
>(&D
))
1318 HasCancel
= TD
->hasCancel();
1319 else if (const auto *TD
= dyn_cast
<OMPParallelMasterTaskLoopDirective
>(&D
))
1320 HasCancel
= TD
->hasCancel();
1322 CodeGenFunction
CGF(CGM
, true);
1323 CGOpenMPTaskOutlinedRegionInfo
CGInfo(*CS
, ThreadIDVar
, CodeGen
,
1324 InnermostKind
, HasCancel
, Action
);
1325 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
1326 llvm::Function
*Res
= CGF
.GenerateCapturedStmtFunction(*CS
);
1328 NumberOfParts
= Action
.getNumberOfParts();
1332 void CGOpenMPRuntime::setLocThreadIdInsertPt(CodeGenFunction
&CGF
,
1333 bool AtCurrentPoint
) {
1334 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1335 assert(!Elem
.second
.ServiceInsertPt
&& "Insert point is set already.");
1337 llvm::Value
*Undef
= llvm::UndefValue::get(CGF
.Int32Ty
);
1338 if (AtCurrentPoint
) {
1339 Elem
.second
.ServiceInsertPt
= new llvm::BitCastInst(
1340 Undef
, CGF
.Int32Ty
, "svcpt", CGF
.Builder
.GetInsertBlock());
1342 Elem
.second
.ServiceInsertPt
=
1343 new llvm::BitCastInst(Undef
, CGF
.Int32Ty
, "svcpt");
1344 Elem
.second
.ServiceInsertPt
->insertAfter(CGF
.AllocaInsertPt
);
1348 void CGOpenMPRuntime::clearLocThreadIdInsertPt(CodeGenFunction
&CGF
) {
1349 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1350 if (Elem
.second
.ServiceInsertPt
) {
1351 llvm::Instruction
*Ptr
= Elem
.second
.ServiceInsertPt
;
1352 Elem
.second
.ServiceInsertPt
= nullptr;
1353 Ptr
->eraseFromParent();
1357 static StringRef
getIdentStringFromSourceLocation(CodeGenFunction
&CGF
,
1359 SmallString
<128> &Buffer
) {
1360 llvm::raw_svector_ostream
OS(Buffer
);
1361 // Build debug location
1362 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
1363 OS
<< ";" << PLoc
.getFilename() << ";";
1364 if (const auto *FD
= dyn_cast_or_null
<FunctionDecl
>(CGF
.CurFuncDecl
))
1365 OS
<< FD
->getQualifiedNameAsString();
1366 OS
<< ";" << PLoc
.getLine() << ";" << PLoc
.getColumn() << ";;";
1370 llvm::Value
*CGOpenMPRuntime::emitUpdateLocation(CodeGenFunction
&CGF
,
1373 uint32_t SrcLocStrSize
;
1374 llvm::Constant
*SrcLocStr
;
1375 if (CGM
.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo
||
1377 SrcLocStr
= OMPBuilder
.getOrCreateDefaultSrcLocStr(SrcLocStrSize
);
1379 std::string FunctionName
;
1380 if (const auto *FD
= dyn_cast_or_null
<FunctionDecl
>(CGF
.CurFuncDecl
))
1381 FunctionName
= FD
->getQualifiedNameAsString();
1382 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
1383 const char *FileName
= PLoc
.getFilename();
1384 unsigned Line
= PLoc
.getLine();
1385 unsigned Column
= PLoc
.getColumn();
1386 SrcLocStr
= OMPBuilder
.getOrCreateSrcLocStr(FunctionName
, FileName
, Line
,
1387 Column
, SrcLocStrSize
);
1389 unsigned Reserved2Flags
= getDefaultLocationReserved2Flags();
1390 return OMPBuilder
.getOrCreateIdent(
1391 SrcLocStr
, SrcLocStrSize
, llvm::omp::IdentFlag(Flags
), Reserved2Flags
);
1394 llvm::Value
*CGOpenMPRuntime::getThreadID(CodeGenFunction
&CGF
,
1395 SourceLocation Loc
) {
1396 assert(CGF
.CurFn
&& "No function in current CodeGenFunction.");
1397 // If the OpenMPIRBuilder is used we need to use it for all thread id calls as
1398 // the clang invariants used below might be broken.
1399 if (CGM
.getLangOpts().OpenMPIRBuilder
) {
1400 SmallString
<128> Buffer
;
1401 OMPBuilder
.updateToLocation(CGF
.Builder
.saveIP());
1402 uint32_t SrcLocStrSize
;
1403 auto *SrcLocStr
= OMPBuilder
.getOrCreateSrcLocStr(
1404 getIdentStringFromSourceLocation(CGF
, Loc
, Buffer
), SrcLocStrSize
);
1405 return OMPBuilder
.getOrCreateThreadID(
1406 OMPBuilder
.getOrCreateIdent(SrcLocStr
, SrcLocStrSize
));
1409 llvm::Value
*ThreadID
= nullptr;
1410 // Check whether we've already cached a load of the thread id in this
1412 auto I
= OpenMPLocThreadIDMap
.find(CGF
.CurFn
);
1413 if (I
!= OpenMPLocThreadIDMap
.end()) {
1414 ThreadID
= I
->second
.ThreadID
;
1415 if (ThreadID
!= nullptr)
1418 // If exceptions are enabled, do not use parameter to avoid possible crash.
1419 if (auto *OMPRegionInfo
=
1420 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
1421 if (OMPRegionInfo
->getThreadIDVariable()) {
1422 // Check if this an outlined function with thread id passed as argument.
1423 LValue LVal
= OMPRegionInfo
->getThreadIDVariableLValue(CGF
);
1424 llvm::BasicBlock
*TopBlock
= CGF
.AllocaInsertPt
->getParent();
1425 if (!CGF
.EHStack
.requiresLandingPad() || !CGF
.getLangOpts().Exceptions
||
1426 !CGF
.getLangOpts().CXXExceptions
||
1427 CGF
.Builder
.GetInsertBlock() == TopBlock
||
1428 !isa
<llvm::Instruction
>(LVal
.getPointer(CGF
)) ||
1429 cast
<llvm::Instruction
>(LVal
.getPointer(CGF
))->getParent() ==
1431 cast
<llvm::Instruction
>(LVal
.getPointer(CGF
))->getParent() ==
1432 CGF
.Builder
.GetInsertBlock()) {
1433 ThreadID
= CGF
.EmitLoadOfScalar(LVal
, Loc
);
1434 // If value loaded in entry block, cache it and use it everywhere in
1436 if (CGF
.Builder
.GetInsertBlock() == TopBlock
) {
1437 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1438 Elem
.second
.ThreadID
= ThreadID
;
1445 // This is not an outlined function region - need to call __kmpc_int32
1446 // kmpc_global_thread_num(ident_t *loc).
1447 // Generate thread id value and cache this value for use across the
1449 auto &Elem
= OpenMPLocThreadIDMap
.FindAndConstruct(CGF
.CurFn
);
1450 if (!Elem
.second
.ServiceInsertPt
)
1451 setLocThreadIdInsertPt(CGF
);
1452 CGBuilderTy::InsertPointGuard
IPG(CGF
.Builder
);
1453 CGF
.Builder
.SetInsertPoint(Elem
.second
.ServiceInsertPt
);
1454 llvm::CallInst
*Call
= CGF
.Builder
.CreateCall(
1455 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
1456 OMPRTL___kmpc_global_thread_num
),
1457 emitUpdateLocation(CGF
, Loc
));
1458 Call
->setCallingConv(CGF
.getRuntimeCC());
1459 Elem
.second
.ThreadID
= Call
;
1463 void CGOpenMPRuntime::functionFinished(CodeGenFunction
&CGF
) {
1464 assert(CGF
.CurFn
&& "No function in current CodeGenFunction.");
1465 if (OpenMPLocThreadIDMap
.count(CGF
.CurFn
)) {
1466 clearLocThreadIdInsertPt(CGF
);
1467 OpenMPLocThreadIDMap
.erase(CGF
.CurFn
);
1469 if (FunctionUDRMap
.count(CGF
.CurFn
) > 0) {
1470 for(const auto *D
: FunctionUDRMap
[CGF
.CurFn
])
1472 FunctionUDRMap
.erase(CGF
.CurFn
);
1474 auto I
= FunctionUDMMap
.find(CGF
.CurFn
);
1475 if (I
!= FunctionUDMMap
.end()) {
1476 for(const auto *D
: I
->second
)
1478 FunctionUDMMap
.erase(I
);
1480 LastprivateConditionalToTypes
.erase(CGF
.CurFn
);
1481 FunctionToUntiedTaskStackMap
.erase(CGF
.CurFn
);
1484 llvm::Type
*CGOpenMPRuntime::getIdentTyPointerTy() {
1485 return OMPBuilder
.IdentPtr
;
1488 llvm::Type
*CGOpenMPRuntime::getKmpc_MicroPointerTy() {
1489 if (!Kmpc_MicroTy
) {
1490 // Build void (*kmpc_micro)(kmp_int32 *global_tid, kmp_int32 *bound_tid,...)
1491 llvm::Type
*MicroParams
[] = {llvm::PointerType::getUnqual(CGM
.Int32Ty
),
1492 llvm::PointerType::getUnqual(CGM
.Int32Ty
)};
1493 Kmpc_MicroTy
= llvm::FunctionType::get(CGM
.VoidTy
, MicroParams
, true);
1495 return llvm::PointerType::getUnqual(Kmpc_MicroTy
);
1498 llvm::FunctionCallee
1499 CGOpenMPRuntime::createForStaticInitFunction(unsigned IVSize
, bool IVSigned
,
1500 bool IsGPUDistribute
) {
1501 assert((IVSize
== 32 || IVSize
== 64) &&
1502 "IV size is not compatible with the omp runtime");
1504 if (IsGPUDistribute
)
1505 Name
= IVSize
== 32 ? (IVSigned
? "__kmpc_distribute_static_init_4"
1506 : "__kmpc_distribute_static_init_4u")
1507 : (IVSigned
? "__kmpc_distribute_static_init_8"
1508 : "__kmpc_distribute_static_init_8u");
1510 Name
= IVSize
== 32 ? (IVSigned
? "__kmpc_for_static_init_4"
1511 : "__kmpc_for_static_init_4u")
1512 : (IVSigned
? "__kmpc_for_static_init_8"
1513 : "__kmpc_for_static_init_8u");
1515 llvm::Type
*ITy
= IVSize
== 32 ? CGM
.Int32Ty
: CGM
.Int64Ty
;
1516 auto *PtrTy
= llvm::PointerType::getUnqual(ITy
);
1517 llvm::Type
*TypeParams
[] = {
1518 getIdentTyPointerTy(), // loc
1520 CGM
.Int32Ty
, // schedtype
1521 llvm::PointerType::getUnqual(CGM
.Int32Ty
), // p_lastiter
1529 llvm::FunctionType::get(CGM
.VoidTy
, TypeParams
, /*isVarArg*/ false);
1530 return CGM
.CreateRuntimeFunction(FnTy
, Name
);
1533 llvm::FunctionCallee
1534 CGOpenMPRuntime::createDispatchInitFunction(unsigned IVSize
, bool IVSigned
) {
1535 assert((IVSize
== 32 || IVSize
== 64) &&
1536 "IV size is not compatible with the omp runtime");
1539 ? (IVSigned
? "__kmpc_dispatch_init_4" : "__kmpc_dispatch_init_4u")
1540 : (IVSigned
? "__kmpc_dispatch_init_8" : "__kmpc_dispatch_init_8u");
1541 llvm::Type
*ITy
= IVSize
== 32 ? CGM
.Int32Ty
: CGM
.Int64Ty
;
1542 llvm::Type
*TypeParams
[] = { getIdentTyPointerTy(), // loc
1544 CGM
.Int32Ty
, // schedtype
1551 llvm::FunctionType::get(CGM
.VoidTy
, TypeParams
, /*isVarArg*/ false);
1552 return CGM
.CreateRuntimeFunction(FnTy
, Name
);
1555 llvm::FunctionCallee
1556 CGOpenMPRuntime::createDispatchFiniFunction(unsigned IVSize
, bool IVSigned
) {
1557 assert((IVSize
== 32 || IVSize
== 64) &&
1558 "IV size is not compatible with the omp runtime");
1561 ? (IVSigned
? "__kmpc_dispatch_fini_4" : "__kmpc_dispatch_fini_4u")
1562 : (IVSigned
? "__kmpc_dispatch_fini_8" : "__kmpc_dispatch_fini_8u");
1563 llvm::Type
*TypeParams
[] = {
1564 getIdentTyPointerTy(), // loc
1568 llvm::FunctionType::get(CGM
.VoidTy
, TypeParams
, /*isVarArg=*/false);
1569 return CGM
.CreateRuntimeFunction(FnTy
, Name
);
1572 llvm::FunctionCallee
1573 CGOpenMPRuntime::createDispatchNextFunction(unsigned IVSize
, bool IVSigned
) {
1574 assert((IVSize
== 32 || IVSize
== 64) &&
1575 "IV size is not compatible with the omp runtime");
1578 ? (IVSigned
? "__kmpc_dispatch_next_4" : "__kmpc_dispatch_next_4u")
1579 : (IVSigned
? "__kmpc_dispatch_next_8" : "__kmpc_dispatch_next_8u");
1580 llvm::Type
*ITy
= IVSize
== 32 ? CGM
.Int32Ty
: CGM
.Int64Ty
;
1581 auto *PtrTy
= llvm::PointerType::getUnqual(ITy
);
1582 llvm::Type
*TypeParams
[] = {
1583 getIdentTyPointerTy(), // loc
1585 llvm::PointerType::getUnqual(CGM
.Int32Ty
), // p_lastiter
1591 llvm::FunctionType::get(CGM
.Int32Ty
, TypeParams
, /*isVarArg*/ false);
1592 return CGM
.CreateRuntimeFunction(FnTy
, Name
);
1595 /// Obtain information that uniquely identifies a target entry. This
1596 /// consists of the file and device IDs as well as line number associated with
1597 /// the relevant entry source location.
1598 static llvm::TargetRegionEntryInfo
1599 getTargetEntryUniqueInfo(ASTContext
&C
, SourceLocation Loc
,
1600 StringRef ParentName
= "") {
1601 SourceManager
&SM
= C
.getSourceManager();
1603 // The loc should be always valid and have a file ID (the user cannot use
1604 // #pragma directives in macros)
1606 assert(Loc
.isValid() && "Source location is expected to be always valid.");
1608 PresumedLoc PLoc
= SM
.getPresumedLoc(Loc
);
1609 assert(PLoc
.isValid() && "Source location is expected to be always valid.");
1611 llvm::sys::fs::UniqueID ID
;
1612 if (auto EC
= llvm::sys::fs::getUniqueID(PLoc
.getFilename(), ID
)) {
1613 PLoc
= SM
.getPresumedLoc(Loc
, /*UseLineDirectives=*/false);
1614 assert(PLoc
.isValid() && "Source location is expected to be always valid.");
1615 if (auto EC
= llvm::sys::fs::getUniqueID(PLoc
.getFilename(), ID
))
1616 SM
.getDiagnostics().Report(diag::err_cannot_open_file
)
1617 << PLoc
.getFilename() << EC
.message();
1620 return llvm::TargetRegionEntryInfo(ParentName
, ID
.getDevice(), ID
.getFile(),
1624 Address
CGOpenMPRuntime::getAddrOfDeclareTargetVar(const VarDecl
*VD
) {
1625 if (CGM
.getLangOpts().OpenMPSimd
)
1626 return Address::invalid();
1627 llvm::Optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
1628 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
1629 if (Res
&& (*Res
== OMPDeclareTargetDeclAttr::MT_Link
||
1630 (*Res
== OMPDeclareTargetDeclAttr::MT_To
&&
1631 HasRequiresUnifiedSharedMemory
))) {
1632 SmallString
<64> PtrName
;
1634 llvm::raw_svector_ostream
OS(PtrName
);
1635 OS
<< CGM
.getMangledName(GlobalDecl(VD
));
1636 if (!VD
->isExternallyVisible()) {
1637 auto EntryInfo
= getTargetEntryUniqueInfo(
1638 CGM
.getContext(), VD
->getCanonicalDecl()->getBeginLoc());
1639 OS
<< llvm::format("_%x", EntryInfo
.FileID
);
1641 OS
<< "_decl_tgt_ref_ptr";
1643 llvm::Value
*Ptr
= CGM
.getModule().getNamedValue(PtrName
);
1644 QualType PtrTy
= CGM
.getContext().getPointerType(VD
->getType());
1645 llvm::Type
*LlvmPtrTy
= CGM
.getTypes().ConvertTypeForMem(PtrTy
);
1647 Ptr
= getOrCreateInternalVariable(LlvmPtrTy
, PtrName
);
1649 auto *GV
= cast
<llvm::GlobalVariable
>(Ptr
);
1650 GV
->setLinkage(llvm::GlobalValue::WeakAnyLinkage
);
1652 if (!CGM
.getLangOpts().OpenMPIsDevice
)
1653 GV
->setInitializer(CGM
.GetAddrOfGlobal(VD
));
1654 registerTargetGlobalVariable(VD
, cast
<llvm::Constant
>(Ptr
));
1656 return Address(Ptr
, LlvmPtrTy
, CGM
.getContext().getDeclAlign(VD
));
1658 return Address::invalid();
1662 CGOpenMPRuntime::getOrCreateThreadPrivateCache(const VarDecl
*VD
) {
1663 assert(!CGM
.getLangOpts().OpenMPUseTLS
||
1664 !CGM
.getContext().getTargetInfo().isTLSSupported());
1665 // Lookup the entry, lazily creating it if necessary.
1666 std::string Suffix
= getName({"cache", ""});
1667 return getOrCreateInternalVariable(
1668 CGM
.Int8PtrPtrTy
, Twine(CGM
.getMangledName(VD
)).concat(Suffix
));
1671 Address
CGOpenMPRuntime::getAddrOfThreadPrivate(CodeGenFunction
&CGF
,
1674 SourceLocation Loc
) {
1675 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1676 CGM
.getContext().getTargetInfo().isTLSSupported())
1679 llvm::Type
*VarTy
= VDAddr
.getElementType();
1680 llvm::Value
*Args
[] = {
1681 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
1682 CGF
.Builder
.CreatePointerCast(VDAddr
.getPointer(), CGM
.Int8PtrTy
),
1683 CGM
.getSize(CGM
.GetTargetTypeStoreSize(VarTy
)),
1684 getOrCreateThreadPrivateCache(VD
)};
1686 CGF
.EmitRuntimeCall(
1687 OMPBuilder
.getOrCreateRuntimeFunction(
1688 CGM
.getModule(), OMPRTL___kmpc_threadprivate_cached
),
1690 CGF
.Int8Ty
, VDAddr
.getAlignment());
1693 void CGOpenMPRuntime::emitThreadPrivateVarInit(
1694 CodeGenFunction
&CGF
, Address VDAddr
, llvm::Value
*Ctor
,
1695 llvm::Value
*CopyCtor
, llvm::Value
*Dtor
, SourceLocation Loc
) {
1696 // Call kmp_int32 __kmpc_global_thread_num(&loc) to init OpenMP runtime
1698 llvm::Value
*OMPLoc
= emitUpdateLocation(CGF
, Loc
);
1699 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
1700 CGM
.getModule(), OMPRTL___kmpc_global_thread_num
),
1702 // Call __kmpc_threadprivate_register(&loc, &var, ctor, cctor/*NULL*/, dtor)
1703 // to register constructor/destructor for variable.
1704 llvm::Value
*Args
[] = {
1705 OMPLoc
, CGF
.Builder
.CreatePointerCast(VDAddr
.getPointer(), CGM
.VoidPtrTy
),
1706 Ctor
, CopyCtor
, Dtor
};
1707 CGF
.EmitRuntimeCall(
1708 OMPBuilder
.getOrCreateRuntimeFunction(
1709 CGM
.getModule(), OMPRTL___kmpc_threadprivate_register
),
1713 llvm::Function
*CGOpenMPRuntime::emitThreadPrivateVarDefinition(
1714 const VarDecl
*VD
, Address VDAddr
, SourceLocation Loc
,
1715 bool PerformInit
, CodeGenFunction
*CGF
) {
1716 if (CGM
.getLangOpts().OpenMPUseTLS
&&
1717 CGM
.getContext().getTargetInfo().isTLSSupported())
1720 VD
= VD
->getDefinition(CGM
.getContext());
1721 if (VD
&& ThreadPrivateWithDefinition
.insert(CGM
.getMangledName(VD
)).second
) {
1722 QualType ASTTy
= VD
->getType();
1724 llvm::Value
*Ctor
= nullptr, *CopyCtor
= nullptr, *Dtor
= nullptr;
1725 const Expr
*Init
= VD
->getAnyInitializer();
1726 if (CGM
.getLangOpts().CPlusPlus
&& PerformInit
) {
1727 // Generate function that re-emits the declaration's initializer into the
1728 // threadprivate copy of the variable VD
1729 CodeGenFunction
CtorCGF(CGM
);
1730 FunctionArgList Args
;
1731 ImplicitParamDecl
Dst(CGM
.getContext(), /*DC=*/nullptr, Loc
,
1732 /*Id=*/nullptr, CGM
.getContext().VoidPtrTy
,
1733 ImplicitParamDecl::Other
);
1734 Args
.push_back(&Dst
);
1736 const auto &FI
= CGM
.getTypes().arrangeBuiltinFunctionDeclaration(
1737 CGM
.getContext().VoidPtrTy
, Args
);
1738 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1739 std::string Name
= getName({"__kmpc_global_ctor_", ""});
1740 llvm::Function
*Fn
=
1741 CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, Name
, FI
, Loc
);
1742 CtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidPtrTy
, Fn
, FI
,
1744 llvm::Value
*ArgVal
= CtorCGF
.EmitLoadOfScalar(
1745 CtorCGF
.GetAddrOfLocalVar(&Dst
), /*Volatile=*/false,
1746 CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1747 Address
Arg(ArgVal
, CtorCGF
.Int8Ty
, VDAddr
.getAlignment());
1748 Arg
= CtorCGF
.Builder
.CreateElementBitCast(
1749 Arg
, CtorCGF
.ConvertTypeForMem(ASTTy
));
1750 CtorCGF
.EmitAnyExprToMem(Init
, Arg
, Init
->getType().getQualifiers(),
1751 /*IsInitializer=*/true);
1752 ArgVal
= CtorCGF
.EmitLoadOfScalar(
1753 CtorCGF
.GetAddrOfLocalVar(&Dst
), /*Volatile=*/false,
1754 CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1755 CtorCGF
.Builder
.CreateStore(ArgVal
, CtorCGF
.ReturnValue
);
1756 CtorCGF
.FinishFunction();
1759 if (VD
->getType().isDestructedType() != QualType::DK_none
) {
1760 // Generate function that emits destructor call for the threadprivate copy
1761 // of the variable VD
1762 CodeGenFunction
DtorCGF(CGM
);
1763 FunctionArgList Args
;
1764 ImplicitParamDecl
Dst(CGM
.getContext(), /*DC=*/nullptr, Loc
,
1765 /*Id=*/nullptr, CGM
.getContext().VoidPtrTy
,
1766 ImplicitParamDecl::Other
);
1767 Args
.push_back(&Dst
);
1769 const auto &FI
= CGM
.getTypes().arrangeBuiltinFunctionDeclaration(
1770 CGM
.getContext().VoidTy
, Args
);
1771 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1772 std::string Name
= getName({"__kmpc_global_dtor_", ""});
1773 llvm::Function
*Fn
=
1774 CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, Name
, FI
, Loc
);
1775 auto NL
= ApplyDebugLocation::CreateEmpty(DtorCGF
);
1776 DtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
, Args
,
1778 // Create a scope with an artificial location for the body of this function.
1779 auto AL
= ApplyDebugLocation::CreateArtificial(DtorCGF
);
1780 llvm::Value
*ArgVal
= DtorCGF
.EmitLoadOfScalar(
1781 DtorCGF
.GetAddrOfLocalVar(&Dst
),
1782 /*Volatile=*/false, CGM
.getContext().VoidPtrTy
, Dst
.getLocation());
1783 DtorCGF
.emitDestroy(
1784 Address(ArgVal
, DtorCGF
.Int8Ty
, VDAddr
.getAlignment()), ASTTy
,
1785 DtorCGF
.getDestroyer(ASTTy
.isDestructedType()),
1786 DtorCGF
.needsEHCleanup(ASTTy
.isDestructedType()));
1787 DtorCGF
.FinishFunction();
1790 // Do not emit init function if it is not required.
1794 llvm::Type
*CopyCtorTyArgs
[] = {CGM
.VoidPtrTy
, CGM
.VoidPtrTy
};
1795 auto *CopyCtorTy
= llvm::FunctionType::get(CGM
.VoidPtrTy
, CopyCtorTyArgs
,
1798 // Copying constructor for the threadprivate variable.
1799 // Must be NULL - reserved by runtime, but currently it requires that this
1800 // parameter is always NULL. Otherwise it fires assertion.
1801 CopyCtor
= llvm::Constant::getNullValue(CopyCtorTy
);
1802 if (Ctor
== nullptr) {
1803 auto *CtorTy
= llvm::FunctionType::get(CGM
.VoidPtrTy
, CGM
.VoidPtrTy
,
1806 Ctor
= llvm::Constant::getNullValue(CtorTy
);
1808 if (Dtor
== nullptr) {
1809 auto *DtorTy
= llvm::FunctionType::get(CGM
.VoidTy
, CGM
.VoidPtrTy
,
1812 Dtor
= llvm::Constant::getNullValue(DtorTy
);
1815 auto *InitFunctionTy
=
1816 llvm::FunctionType::get(CGM
.VoidTy
, /*isVarArg*/ false);
1817 std::string Name
= getName({"__omp_threadprivate_init_", ""});
1818 llvm::Function
*InitFunction
= CGM
.CreateGlobalInitOrCleanUpFunction(
1819 InitFunctionTy
, Name
, CGM
.getTypes().arrangeNullaryFunction());
1820 CodeGenFunction
InitCGF(CGM
);
1821 FunctionArgList ArgList
;
1822 InitCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, InitFunction
,
1823 CGM
.getTypes().arrangeNullaryFunction(), ArgList
,
1825 emitThreadPrivateVarInit(InitCGF
, VDAddr
, Ctor
, CopyCtor
, Dtor
, Loc
);
1826 InitCGF
.FinishFunction();
1827 return InitFunction
;
1829 emitThreadPrivateVarInit(*CGF
, VDAddr
, Ctor
, CopyCtor
, Dtor
, Loc
);
1834 bool CGOpenMPRuntime::emitDeclareTargetVarDefinition(const VarDecl
*VD
,
1835 llvm::GlobalVariable
*Addr
,
1837 if (CGM
.getLangOpts().OMPTargetTriples
.empty() &&
1838 !CGM
.getLangOpts().OpenMPIsDevice
)
1840 Optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
1841 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
1842 if (!Res
|| *Res
== OMPDeclareTargetDeclAttr::MT_Link
||
1843 (*Res
== OMPDeclareTargetDeclAttr::MT_To
&&
1844 HasRequiresUnifiedSharedMemory
))
1845 return CGM
.getLangOpts().OpenMPIsDevice
;
1846 VD
= VD
->getDefinition(CGM
.getContext());
1847 assert(VD
&& "Unknown VarDecl");
1849 if (!DeclareTargetWithDefinition
.insert(CGM
.getMangledName(VD
)).second
)
1850 return CGM
.getLangOpts().OpenMPIsDevice
;
1852 QualType ASTTy
= VD
->getType();
1853 SourceLocation Loc
= VD
->getCanonicalDecl()->getBeginLoc();
1855 // Produce the unique prefix to identify the new target regions. We use
1856 // the source location of the variable declaration which we know to not
1857 // conflict with any target region.
1859 getTargetEntryUniqueInfo(CGM
.getContext(), Loc
, VD
->getName());
1860 SmallString
<128> Buffer
, Out
;
1861 OffloadEntriesInfoManager
.getTargetRegionEntryFnName(Buffer
, EntryInfo
);
1863 const Expr
*Init
= VD
->getAnyInitializer();
1864 if (CGM
.getLangOpts().CPlusPlus
&& PerformInit
) {
1865 llvm::Constant
*Ctor
;
1867 if (CGM
.getLangOpts().OpenMPIsDevice
) {
1868 // Generate function that re-emits the declaration's initializer into
1869 // the threadprivate copy of the variable VD
1870 CodeGenFunction
CtorCGF(CGM
);
1872 const CGFunctionInfo
&FI
= CGM
.getTypes().arrangeNullaryFunction();
1873 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1874 llvm::Function
*Fn
= CGM
.CreateGlobalInitOrCleanUpFunction(
1875 FTy
, Twine(Buffer
, "_ctor"), FI
, Loc
, false,
1876 llvm::GlobalValue::WeakODRLinkage
);
1877 Fn
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
1878 if (CGM
.getTriple().isAMDGCN())
1879 Fn
->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL
);
1880 auto NL
= ApplyDebugLocation::CreateEmpty(CtorCGF
);
1881 CtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
,
1882 FunctionArgList(), Loc
, Loc
);
1883 auto AL
= ApplyDebugLocation::CreateArtificial(CtorCGF
);
1884 llvm::Constant
*AddrInAS0
= Addr
;
1885 if (Addr
->getAddressSpace() != 0)
1886 AddrInAS0
= llvm::ConstantExpr::getAddrSpaceCast(
1887 Addr
, llvm::PointerType::getWithSamePointeeType(
1888 cast
<llvm::PointerType
>(Addr
->getType()), 0));
1889 CtorCGF
.EmitAnyExprToMem(Init
,
1890 Address(AddrInAS0
, Addr
->getValueType(),
1891 CGM
.getContext().getDeclAlign(VD
)),
1892 Init
->getType().getQualifiers(),
1893 /*IsInitializer=*/true);
1894 CtorCGF
.FinishFunction();
1896 ID
= llvm::ConstantExpr::getBitCast(Fn
, CGM
.Int8PtrTy
);
1898 Ctor
= new llvm::GlobalVariable(
1899 CGM
.getModule(), CGM
.Int8Ty
, /*isConstant=*/true,
1900 llvm::GlobalValue::PrivateLinkage
,
1901 llvm::Constant::getNullValue(CGM
.Int8Ty
), Twine(Buffer
, "_ctor"));
1905 // Register the information for the entry associated with the constructor.
1907 auto CtorEntryInfo
= EntryInfo
;
1908 CtorEntryInfo
.ParentName
= Twine(Buffer
, "_ctor").toStringRef(Out
);
1909 OffloadEntriesInfoManager
.registerTargetRegionEntryInfo(
1910 CtorEntryInfo
, Ctor
, ID
,
1911 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryCtor
,
1912 CGM
.getLangOpts().OpenMPIsDevice
);
1914 if (VD
->getType().isDestructedType() != QualType::DK_none
) {
1915 llvm::Constant
*Dtor
;
1917 if (CGM
.getLangOpts().OpenMPIsDevice
) {
1918 // Generate function that emits destructor call for the threadprivate
1919 // copy of the variable VD
1920 CodeGenFunction
DtorCGF(CGM
);
1922 const CGFunctionInfo
&FI
= CGM
.getTypes().arrangeNullaryFunction();
1923 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
1924 llvm::Function
*Fn
= CGM
.CreateGlobalInitOrCleanUpFunction(
1925 FTy
, Twine(Buffer
, "_dtor"), FI
, Loc
, false,
1926 llvm::GlobalValue::WeakODRLinkage
);
1927 Fn
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
1928 if (CGM
.getTriple().isAMDGCN())
1929 Fn
->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL
);
1930 auto NL
= ApplyDebugLocation::CreateEmpty(DtorCGF
);
1931 DtorCGF
.StartFunction(GlobalDecl(), CGM
.getContext().VoidTy
, Fn
, FI
,
1932 FunctionArgList(), Loc
, Loc
);
1933 // Create a scope with an artificial location for the body of this
1935 auto AL
= ApplyDebugLocation::CreateArtificial(DtorCGF
);
1936 llvm::Constant
*AddrInAS0
= Addr
;
1937 if (Addr
->getAddressSpace() != 0)
1938 AddrInAS0
= llvm::ConstantExpr::getAddrSpaceCast(
1939 Addr
, llvm::PointerType::getWithSamePointeeType(
1940 cast
<llvm::PointerType
>(Addr
->getType()), 0));
1941 DtorCGF
.emitDestroy(Address(AddrInAS0
, Addr
->getValueType(),
1942 CGM
.getContext().getDeclAlign(VD
)),
1943 ASTTy
, DtorCGF
.getDestroyer(ASTTy
.isDestructedType()),
1944 DtorCGF
.needsEHCleanup(ASTTy
.isDestructedType()));
1945 DtorCGF
.FinishFunction();
1947 ID
= llvm::ConstantExpr::getBitCast(Fn
, CGM
.Int8PtrTy
);
1949 Dtor
= new llvm::GlobalVariable(
1950 CGM
.getModule(), CGM
.Int8Ty
, /*isConstant=*/true,
1951 llvm::GlobalValue::PrivateLinkage
,
1952 llvm::Constant::getNullValue(CGM
.Int8Ty
), Twine(Buffer
, "_dtor"));
1955 // Register the information for the entry associated with the destructor.
1957 auto DtorEntryInfo
= EntryInfo
;
1958 DtorEntryInfo
.ParentName
= Twine(Buffer
, "_dtor").toStringRef(Out
);
1959 OffloadEntriesInfoManager
.registerTargetRegionEntryInfo(
1960 DtorEntryInfo
, Dtor
, ID
,
1961 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryDtor
,
1962 CGM
.getLangOpts().OpenMPIsDevice
);
1964 return CGM
.getLangOpts().OpenMPIsDevice
;
1967 Address
CGOpenMPRuntime::getAddrOfArtificialThreadPrivate(CodeGenFunction
&CGF
,
1970 std::string Suffix
= getName({"artificial", ""});
1971 llvm::Type
*VarLVType
= CGF
.ConvertTypeForMem(VarType
);
1972 llvm::GlobalVariable
*GAddr
=
1973 getOrCreateInternalVariable(VarLVType
, Twine(Name
).concat(Suffix
));
1974 if (CGM
.getLangOpts().OpenMP
&& CGM
.getLangOpts().OpenMPUseTLS
&&
1975 CGM
.getTarget().isTLSSupported()) {
1976 GAddr
->setThreadLocal(/*Val=*/true);
1977 return Address(GAddr
, GAddr
->getValueType(),
1978 CGM
.getContext().getTypeAlignInChars(VarType
));
1980 std::string CacheSuffix
= getName({"cache", ""});
1981 llvm::Value
*Args
[] = {
1982 emitUpdateLocation(CGF
, SourceLocation()),
1983 getThreadID(CGF
, SourceLocation()),
1984 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(GAddr
, CGM
.VoidPtrTy
),
1985 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(VarType
), CGM
.SizeTy
,
1986 /*isSigned=*/false),
1987 getOrCreateInternalVariable(
1988 CGM
.VoidPtrPtrTy
, Twine(Name
).concat(Suffix
).concat(CacheSuffix
))};
1990 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
1991 CGF
.EmitRuntimeCall(
1992 OMPBuilder
.getOrCreateRuntimeFunction(
1993 CGM
.getModule(), OMPRTL___kmpc_threadprivate_cached
),
1995 VarLVType
->getPointerTo(/*AddrSpace=*/0)),
1996 VarLVType
, CGM
.getContext().getTypeAlignInChars(VarType
));
1999 void CGOpenMPRuntime::emitIfClause(CodeGenFunction
&CGF
, const Expr
*Cond
,
2000 const RegionCodeGenTy
&ThenGen
,
2001 const RegionCodeGenTy
&ElseGen
) {
2002 CodeGenFunction::LexicalScope
ConditionScope(CGF
, Cond
->getSourceRange());
2004 // If the condition constant folds and can be elided, try to avoid emitting
2005 // the condition and the dead arm of the if/else.
2007 if (CGF
.ConstantFoldsToSimpleInteger(Cond
, CondConstant
)) {
2015 // Otherwise, the condition did not fold, or we couldn't elide it. Just
2016 // emit the conditional branch.
2017 llvm::BasicBlock
*ThenBlock
= CGF
.createBasicBlock("omp_if.then");
2018 llvm::BasicBlock
*ElseBlock
= CGF
.createBasicBlock("omp_if.else");
2019 llvm::BasicBlock
*ContBlock
= CGF
.createBasicBlock("omp_if.end");
2020 CGF
.EmitBranchOnBoolExpr(Cond
, ThenBlock
, ElseBlock
, /*TrueCount=*/0);
2022 // Emit the 'then' code.
2023 CGF
.EmitBlock(ThenBlock
);
2025 CGF
.EmitBranch(ContBlock
);
2026 // Emit the 'else' code if present.
2027 // There is no need to emit line number for unconditional branch.
2028 (void)ApplyDebugLocation::CreateEmpty(CGF
);
2029 CGF
.EmitBlock(ElseBlock
);
2031 // There is no need to emit line number for unconditional branch.
2032 (void)ApplyDebugLocation::CreateEmpty(CGF
);
2033 CGF
.EmitBranch(ContBlock
);
2034 // Emit the continuation block for code after the if.
2035 CGF
.EmitBlock(ContBlock
, /*IsFinished=*/true);
2038 void CGOpenMPRuntime::emitParallelCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2039 llvm::Function
*OutlinedFn
,
2040 ArrayRef
<llvm::Value
*> CapturedVars
,
2042 llvm::Value
*NumThreads
) {
2043 if (!CGF
.HaveInsertPoint())
2045 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
2046 auto &M
= CGM
.getModule();
2047 auto &&ThenGen
= [&M
, OutlinedFn
, CapturedVars
, RTLoc
,
2048 this](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2049 // Build call __kmpc_fork_call(loc, n, microtask, var1, .., varn);
2050 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
2051 llvm::Value
*Args
[] = {
2053 CGF
.Builder
.getInt32(CapturedVars
.size()), // Number of captured vars
2054 CGF
.Builder
.CreateBitCast(OutlinedFn
, RT
.getKmpc_MicroPointerTy())};
2055 llvm::SmallVector
<llvm::Value
*, 16> RealArgs
;
2056 RealArgs
.append(std::begin(Args
), std::end(Args
));
2057 RealArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
2059 llvm::FunctionCallee RTLFn
=
2060 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_fork_call
);
2061 CGF
.EmitRuntimeCall(RTLFn
, RealArgs
);
2063 auto &&ElseGen
= [&M
, OutlinedFn
, CapturedVars
, RTLoc
, Loc
,
2064 this](CodeGenFunction
&CGF
, PrePostActionTy
&) {
2065 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
2066 llvm::Value
*ThreadID
= RT
.getThreadID(CGF
, Loc
);
2068 // __kmpc_serialized_parallel(&Loc, GTid);
2069 llvm::Value
*Args
[] = {RTLoc
, ThreadID
};
2070 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2071 M
, OMPRTL___kmpc_serialized_parallel
),
2074 // OutlinedFn(>id, &zero_bound, CapturedStruct);
2075 Address ThreadIDAddr
= RT
.emitThreadIDAddress(CGF
, Loc
);
2076 Address ZeroAddrBound
=
2077 CGF
.CreateDefaultAlignTempAlloca(CGF
.Int32Ty
,
2078 /*Name=*/".bound.zero.addr");
2079 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(/*C*/ 0), ZeroAddrBound
);
2080 llvm::SmallVector
<llvm::Value
*, 16> OutlinedFnArgs
;
2081 // ThreadId for serialized parallels is 0.
2082 OutlinedFnArgs
.push_back(ThreadIDAddr
.getPointer());
2083 OutlinedFnArgs
.push_back(ZeroAddrBound
.getPointer());
2084 OutlinedFnArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
2086 // Ensure we do not inline the function. This is trivially true for the ones
2087 // passed to __kmpc_fork_call but the ones called in serialized regions
2088 // could be inlined. This is not a perfect but it is closer to the invariant
2089 // we want, namely, every data environment starts with a new function.
2090 // TODO: We should pass the if condition to the runtime function and do the
2091 // handling there. Much cleaner code.
2092 OutlinedFn
->removeFnAttr(llvm::Attribute::AlwaysInline
);
2093 OutlinedFn
->addFnAttr(llvm::Attribute::NoInline
);
2094 RT
.emitOutlinedFunctionCall(CGF
, Loc
, OutlinedFn
, OutlinedFnArgs
);
2096 // __kmpc_end_serialized_parallel(&Loc, GTid);
2097 llvm::Value
*EndArgs
[] = {RT
.emitUpdateLocation(CGF
, Loc
), ThreadID
};
2098 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2099 M
, OMPRTL___kmpc_end_serialized_parallel
),
2103 emitIfClause(CGF
, IfCond
, ThenGen
, ElseGen
);
2105 RegionCodeGenTy
ThenRCG(ThenGen
);
2110 // If we're inside an (outlined) parallel region, use the region info's
2111 // thread-ID variable (it is passed in a first argument of the outlined function
2112 // as "kmp_int32 *gtid"). Otherwise, if we're not inside parallel region, but in
2113 // regular serial code region, get thread ID by calling kmp_int32
2114 // kmpc_global_thread_num(ident_t *loc), stash this thread ID in a temporary and
2115 // return the address of that temp.
2116 Address
CGOpenMPRuntime::emitThreadIDAddress(CodeGenFunction
&CGF
,
2117 SourceLocation Loc
) {
2118 if (auto *OMPRegionInfo
=
2119 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
2120 if (OMPRegionInfo
->getThreadIDVariable())
2121 return OMPRegionInfo
->getThreadIDVariableLValue(CGF
).getAddress(CGF
);
2123 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
2125 CGF
.getContext().getIntTypeForBitwidth(/*DestWidth*/ 32, /*Signed*/ true);
2126 Address ThreadIDTemp
= CGF
.CreateMemTemp(Int32Ty
, /*Name*/ ".threadid_temp.");
2127 CGF
.EmitStoreOfScalar(ThreadID
,
2128 CGF
.MakeAddrLValue(ThreadIDTemp
, Int32Ty
));
2130 return ThreadIDTemp
;
2133 llvm::GlobalVariable
*CGOpenMPRuntime::getOrCreateInternalVariable(
2134 llvm::Type
*Ty
, const llvm::Twine
&Name
, unsigned AddressSpace
) {
2135 SmallString
<256> Buffer
;
2136 llvm::raw_svector_ostream
Out(Buffer
);
2138 StringRef RuntimeName
= Out
.str();
2139 auto &Elem
= *InternalVars
.try_emplace(RuntimeName
, nullptr).first
;
2141 assert(Elem
.second
->getType()->isOpaqueOrPointeeTypeMatches(Ty
) &&
2142 "OMP internal variable has different type than requested");
2143 return &*Elem
.second
;
2146 return Elem
.second
= new llvm::GlobalVariable(
2147 CGM
.getModule(), Ty
, /*IsConstant*/ false,
2148 llvm::GlobalValue::CommonLinkage
, llvm::Constant::getNullValue(Ty
),
2149 Elem
.first(), /*InsertBefore=*/nullptr,
2150 llvm::GlobalValue::NotThreadLocal
, AddressSpace
);
2153 llvm::Value
*CGOpenMPRuntime::getCriticalRegionLock(StringRef CriticalName
) {
2154 std::string Prefix
= Twine("gomp_critical_user_", CriticalName
).str();
2155 std::string Name
= getName({Prefix
, "var"});
2156 return getOrCreateInternalVariable(KmpCriticalNameTy
, Name
);
2160 /// Common pre(post)-action for different OpenMP constructs.
2161 class CommonActionTy final
: public PrePostActionTy
{
2162 llvm::FunctionCallee EnterCallee
;
2163 ArrayRef
<llvm::Value
*> EnterArgs
;
2164 llvm::FunctionCallee ExitCallee
;
2165 ArrayRef
<llvm::Value
*> ExitArgs
;
2167 llvm::BasicBlock
*ContBlock
= nullptr;
2170 CommonActionTy(llvm::FunctionCallee EnterCallee
,
2171 ArrayRef
<llvm::Value
*> EnterArgs
,
2172 llvm::FunctionCallee ExitCallee
,
2173 ArrayRef
<llvm::Value
*> ExitArgs
, bool Conditional
= false)
2174 : EnterCallee(EnterCallee
), EnterArgs(EnterArgs
), ExitCallee(ExitCallee
),
2175 ExitArgs(ExitArgs
), Conditional(Conditional
) {}
2176 void Enter(CodeGenFunction
&CGF
) override
{
2177 llvm::Value
*EnterRes
= CGF
.EmitRuntimeCall(EnterCallee
, EnterArgs
);
2179 llvm::Value
*CallBool
= CGF
.Builder
.CreateIsNotNull(EnterRes
);
2180 auto *ThenBlock
= CGF
.createBasicBlock("omp_if.then");
2181 ContBlock
= CGF
.createBasicBlock("omp_if.end");
2182 // Generate the branch (If-stmt)
2183 CGF
.Builder
.CreateCondBr(CallBool
, ThenBlock
, ContBlock
);
2184 CGF
.EmitBlock(ThenBlock
);
2187 void Done(CodeGenFunction
&CGF
) {
2188 // Emit the rest of blocks/branches
2189 CGF
.EmitBranch(ContBlock
);
2190 CGF
.EmitBlock(ContBlock
, true);
2192 void Exit(CodeGenFunction
&CGF
) override
{
2193 CGF
.EmitRuntimeCall(ExitCallee
, ExitArgs
);
2196 } // anonymous namespace
2198 void CGOpenMPRuntime::emitCriticalRegion(CodeGenFunction
&CGF
,
2199 StringRef CriticalName
,
2200 const RegionCodeGenTy
&CriticalOpGen
,
2201 SourceLocation Loc
, const Expr
*Hint
) {
2202 // __kmpc_critical[_with_hint](ident_t *, gtid, Lock[, hint]);
2204 // __kmpc_end_critical(ident_t *, gtid, Lock);
2205 // Prepare arguments and build a call to __kmpc_critical
2206 if (!CGF
.HaveInsertPoint())
2208 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2209 getCriticalRegionLock(CriticalName
)};
2210 llvm::SmallVector
<llvm::Value
*, 4> EnterArgs(std::begin(Args
),
2213 EnterArgs
.push_back(CGF
.Builder
.CreateIntCast(
2214 CGF
.EmitScalarExpr(Hint
), CGM
.Int32Ty
, /*isSigned=*/false));
2216 CommonActionTy
Action(
2217 OMPBuilder
.getOrCreateRuntimeFunction(
2219 Hint
? OMPRTL___kmpc_critical_with_hint
: OMPRTL___kmpc_critical
),
2221 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
2222 OMPRTL___kmpc_end_critical
),
2224 CriticalOpGen
.setAction(Action
);
2225 emitInlinedDirective(CGF
, OMPD_critical
, CriticalOpGen
);
2228 void CGOpenMPRuntime::emitMasterRegion(CodeGenFunction
&CGF
,
2229 const RegionCodeGenTy
&MasterOpGen
,
2230 SourceLocation Loc
) {
2231 if (!CGF
.HaveInsertPoint())
2233 // if(__kmpc_master(ident_t *, gtid)) {
2235 // __kmpc_end_master(ident_t *, gtid);
2237 // Prepare arguments and build a call to __kmpc_master
2238 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2239 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2240 CGM
.getModule(), OMPRTL___kmpc_master
),
2242 OMPBuilder
.getOrCreateRuntimeFunction(
2243 CGM
.getModule(), OMPRTL___kmpc_end_master
),
2245 /*Conditional=*/true);
2246 MasterOpGen
.setAction(Action
);
2247 emitInlinedDirective(CGF
, OMPD_master
, MasterOpGen
);
2251 void CGOpenMPRuntime::emitMaskedRegion(CodeGenFunction
&CGF
,
2252 const RegionCodeGenTy
&MaskedOpGen
,
2253 SourceLocation Loc
, const Expr
*Filter
) {
2254 if (!CGF
.HaveInsertPoint())
2256 // if(__kmpc_masked(ident_t *, gtid, filter)) {
2258 // __kmpc_end_masked(iden_t *, gtid);
2260 // Prepare arguments and build a call to __kmpc_masked
2261 llvm::Value
*FilterVal
= Filter
2262 ? CGF
.EmitScalarExpr(Filter
, CGF
.Int32Ty
)
2263 : llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/0);
2264 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2266 llvm::Value
*ArgsEnd
[] = {emitUpdateLocation(CGF
, Loc
),
2267 getThreadID(CGF
, Loc
)};
2268 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2269 CGM
.getModule(), OMPRTL___kmpc_masked
),
2271 OMPBuilder
.getOrCreateRuntimeFunction(
2272 CGM
.getModule(), OMPRTL___kmpc_end_masked
),
2274 /*Conditional=*/true);
2275 MaskedOpGen
.setAction(Action
);
2276 emitInlinedDirective(CGF
, OMPD_masked
, MaskedOpGen
);
2280 void CGOpenMPRuntime::emitTaskyieldCall(CodeGenFunction
&CGF
,
2281 SourceLocation Loc
) {
2282 if (!CGF
.HaveInsertPoint())
2284 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2285 OMPBuilder
.createTaskyield(CGF
.Builder
);
2287 // Build call __kmpc_omp_taskyield(loc, thread_id, 0);
2288 llvm::Value
*Args
[] = {
2289 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2290 llvm::ConstantInt::get(CGM
.IntTy
, /*V=*/0, /*isSigned=*/true)};
2291 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2292 CGM
.getModule(), OMPRTL___kmpc_omp_taskyield
),
2296 if (auto *Region
= dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
2297 Region
->emitUntiedSwitch(CGF
);
2300 void CGOpenMPRuntime::emitTaskgroupRegion(CodeGenFunction
&CGF
,
2301 const RegionCodeGenTy
&TaskgroupOpGen
,
2302 SourceLocation Loc
) {
2303 if (!CGF
.HaveInsertPoint())
2305 // __kmpc_taskgroup(ident_t *, gtid);
2306 // TaskgroupOpGen();
2307 // __kmpc_end_taskgroup(ident_t *, gtid);
2308 // Prepare arguments and build a call to __kmpc_taskgroup
2309 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2310 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2311 CGM
.getModule(), OMPRTL___kmpc_taskgroup
),
2313 OMPBuilder
.getOrCreateRuntimeFunction(
2314 CGM
.getModule(), OMPRTL___kmpc_end_taskgroup
),
2316 TaskgroupOpGen
.setAction(Action
);
2317 emitInlinedDirective(CGF
, OMPD_taskgroup
, TaskgroupOpGen
);
2320 /// Given an array of pointers to variables, project the address of a
2322 static Address
emitAddrOfVarFromArray(CodeGenFunction
&CGF
, Address Array
,
2323 unsigned Index
, const VarDecl
*Var
) {
2324 // Pull out the pointer to the variable.
2325 Address PtrAddr
= CGF
.Builder
.CreateConstArrayGEP(Array
, Index
);
2326 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(PtrAddr
);
2328 llvm::Type
*ElemTy
= CGF
.ConvertTypeForMem(Var
->getType());
2330 CGF
.Builder
.CreateBitCast(
2331 Ptr
, ElemTy
->getPointerTo(Ptr
->getType()->getPointerAddressSpace())),
2332 ElemTy
, CGF
.getContext().getDeclAlign(Var
));
2335 static llvm::Value
*emitCopyprivateCopyFunction(
2336 CodeGenModule
&CGM
, llvm::Type
*ArgsElemType
,
2337 ArrayRef
<const Expr
*> CopyprivateVars
, ArrayRef
<const Expr
*> DestExprs
,
2338 ArrayRef
<const Expr
*> SrcExprs
, ArrayRef
<const Expr
*> AssignmentOps
,
2339 SourceLocation Loc
) {
2340 ASTContext
&C
= CGM
.getContext();
2341 // void copy_func(void *LHSArg, void *RHSArg);
2342 FunctionArgList Args
;
2343 ImplicitParamDecl
LHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
2344 ImplicitParamDecl::Other
);
2345 ImplicitParamDecl
RHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
2346 ImplicitParamDecl::Other
);
2347 Args
.push_back(&LHSArg
);
2348 Args
.push_back(&RHSArg
);
2350 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
2352 CGM
.getOpenMPRuntime().getName({"omp", "copyprivate", "copy_func"});
2353 auto *Fn
= llvm::Function::Create(CGM
.getTypes().GetFunctionType(CGFI
),
2354 llvm::GlobalValue::InternalLinkage
, Name
,
2356 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, CGFI
);
2357 Fn
->setDoesNotRecurse();
2358 CodeGenFunction
CGF(CGM
);
2359 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, CGFI
, Args
, Loc
, Loc
);
2360 // Dest = (void*[n])(LHSArg);
2361 // Src = (void*[n])(RHSArg);
2362 Address
LHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2363 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&LHSArg
)),
2364 ArgsElemType
->getPointerTo()),
2365 ArgsElemType
, CGF
.getPointerAlign());
2366 Address
RHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2367 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&RHSArg
)),
2368 ArgsElemType
->getPointerTo()),
2369 ArgsElemType
, CGF
.getPointerAlign());
2370 // *(Type0*)Dst[0] = *(Type0*)Src[0];
2371 // *(Type1*)Dst[1] = *(Type1*)Src[1];
2373 // *(Typen*)Dst[n] = *(Typen*)Src[n];
2374 for (unsigned I
= 0, E
= AssignmentOps
.size(); I
< E
; ++I
) {
2375 const auto *DestVar
=
2376 cast
<VarDecl
>(cast
<DeclRefExpr
>(DestExprs
[I
])->getDecl());
2377 Address DestAddr
= emitAddrOfVarFromArray(CGF
, LHS
, I
, DestVar
);
2379 const auto *SrcVar
=
2380 cast
<VarDecl
>(cast
<DeclRefExpr
>(SrcExprs
[I
])->getDecl());
2381 Address SrcAddr
= emitAddrOfVarFromArray(CGF
, RHS
, I
, SrcVar
);
2383 const auto *VD
= cast
<DeclRefExpr
>(CopyprivateVars
[I
])->getDecl();
2384 QualType Type
= VD
->getType();
2385 CGF
.EmitOMPCopy(Type
, DestAddr
, SrcAddr
, DestVar
, SrcVar
, AssignmentOps
[I
]);
2387 CGF
.FinishFunction();
2391 void CGOpenMPRuntime::emitSingleRegion(CodeGenFunction
&CGF
,
2392 const RegionCodeGenTy
&SingleOpGen
,
2394 ArrayRef
<const Expr
*> CopyprivateVars
,
2395 ArrayRef
<const Expr
*> SrcExprs
,
2396 ArrayRef
<const Expr
*> DstExprs
,
2397 ArrayRef
<const Expr
*> AssignmentOps
) {
2398 if (!CGF
.HaveInsertPoint())
2400 assert(CopyprivateVars
.size() == SrcExprs
.size() &&
2401 CopyprivateVars
.size() == DstExprs
.size() &&
2402 CopyprivateVars
.size() == AssignmentOps
.size());
2403 ASTContext
&C
= CGM
.getContext();
2404 // int32 did_it = 0;
2405 // if(__kmpc_single(ident_t *, gtid)) {
2407 // __kmpc_end_single(ident_t *, gtid);
2410 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2411 // <copy_func>, did_it);
2413 Address DidIt
= Address::invalid();
2414 if (!CopyprivateVars
.empty()) {
2415 // int32 did_it = 0;
2416 QualType KmpInt32Ty
=
2417 C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
2418 DidIt
= CGF
.CreateMemTemp(KmpInt32Ty
, ".omp.copyprivate.did_it");
2419 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(0), DidIt
);
2421 // Prepare arguments and build a call to __kmpc_single
2422 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2423 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2424 CGM
.getModule(), OMPRTL___kmpc_single
),
2426 OMPBuilder
.getOrCreateRuntimeFunction(
2427 CGM
.getModule(), OMPRTL___kmpc_end_single
),
2429 /*Conditional=*/true);
2430 SingleOpGen
.setAction(Action
);
2431 emitInlinedDirective(CGF
, OMPD_single
, SingleOpGen
);
2432 if (DidIt
.isValid()) {
2434 CGF
.Builder
.CreateStore(CGF
.Builder
.getInt32(1), DidIt
);
2437 // call __kmpc_copyprivate(ident_t *, gtid, <buf_size>, <copyprivate list>,
2438 // <copy_func>, did_it);
2439 if (DidIt
.isValid()) {
2440 llvm::APInt
ArraySize(/*unsigned int numBits=*/32, CopyprivateVars
.size());
2441 QualType CopyprivateArrayTy
= C
.getConstantArrayType(
2442 C
.VoidPtrTy
, ArraySize
, nullptr, ArrayType::Normal
,
2443 /*IndexTypeQuals=*/0);
2444 // Create a list of all private variables for copyprivate.
2445 Address CopyprivateList
=
2446 CGF
.CreateMemTemp(CopyprivateArrayTy
, ".omp.copyprivate.cpr_list");
2447 for (unsigned I
= 0, E
= CopyprivateVars
.size(); I
< E
; ++I
) {
2448 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(CopyprivateList
, I
);
2449 CGF
.Builder
.CreateStore(
2450 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2451 CGF
.EmitLValue(CopyprivateVars
[I
]).getPointer(CGF
),
2455 // Build function that copies private values from single region to all other
2456 // threads in the corresponding parallel region.
2457 llvm::Value
*CpyFn
= emitCopyprivateCopyFunction(
2458 CGM
, CGF
.ConvertTypeForMem(CopyprivateArrayTy
), CopyprivateVars
,
2459 SrcExprs
, DstExprs
, AssignmentOps
, Loc
);
2460 llvm::Value
*BufSize
= CGF
.getTypeSize(CopyprivateArrayTy
);
2461 Address CL
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
2462 CopyprivateList
, CGF
.VoidPtrTy
, CGF
.Int8Ty
);
2463 llvm::Value
*DidItVal
= CGF
.Builder
.CreateLoad(DidIt
);
2464 llvm::Value
*Args
[] = {
2465 emitUpdateLocation(CGF
, Loc
), // ident_t *<loc>
2466 getThreadID(CGF
, Loc
), // i32 <gtid>
2467 BufSize
, // size_t <buf_size>
2468 CL
.getPointer(), // void *<copyprivate list>
2469 CpyFn
, // void (*) (void *, void *) <copy_func>
2470 DidItVal
// i32 did_it
2472 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2473 CGM
.getModule(), OMPRTL___kmpc_copyprivate
),
2478 void CGOpenMPRuntime::emitOrderedRegion(CodeGenFunction
&CGF
,
2479 const RegionCodeGenTy
&OrderedOpGen
,
2480 SourceLocation Loc
, bool IsThreads
) {
2481 if (!CGF
.HaveInsertPoint())
2483 // __kmpc_ordered(ident_t *, gtid);
2485 // __kmpc_end_ordered(ident_t *, gtid);
2486 // Prepare arguments and build a call to __kmpc_ordered
2488 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2489 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
2490 CGM
.getModule(), OMPRTL___kmpc_ordered
),
2492 OMPBuilder
.getOrCreateRuntimeFunction(
2493 CGM
.getModule(), OMPRTL___kmpc_end_ordered
),
2495 OrderedOpGen
.setAction(Action
);
2496 emitInlinedDirective(CGF
, OMPD_ordered
, OrderedOpGen
);
2499 emitInlinedDirective(CGF
, OMPD_ordered
, OrderedOpGen
);
2502 unsigned CGOpenMPRuntime::getDefaultFlagsForBarriers(OpenMPDirectiveKind Kind
) {
2504 if (Kind
== OMPD_for
)
2505 Flags
= OMP_IDENT_BARRIER_IMPL_FOR
;
2506 else if (Kind
== OMPD_sections
)
2507 Flags
= OMP_IDENT_BARRIER_IMPL_SECTIONS
;
2508 else if (Kind
== OMPD_single
)
2509 Flags
= OMP_IDENT_BARRIER_IMPL_SINGLE
;
2510 else if (Kind
== OMPD_barrier
)
2511 Flags
= OMP_IDENT_BARRIER_EXPL
;
2513 Flags
= OMP_IDENT_BARRIER_IMPL
;
2517 void CGOpenMPRuntime::getDefaultScheduleAndChunk(
2518 CodeGenFunction
&CGF
, const OMPLoopDirective
&S
,
2519 OpenMPScheduleClauseKind
&ScheduleKind
, const Expr
*&ChunkExpr
) const {
2520 // Check if the loop directive is actually a doacross loop directive. In this
2521 // case choose static, 1 schedule.
2523 S
.getClausesOfKind
<OMPOrderedClause
>(),
2524 [](const OMPOrderedClause
*C
) { return C
->getNumForLoops(); })) {
2525 ScheduleKind
= OMPC_SCHEDULE_static
;
2526 // Chunk size is 1 in this case.
2527 llvm::APInt
ChunkSize(32, 1);
2528 ChunkExpr
= IntegerLiteral::Create(
2529 CGF
.getContext(), ChunkSize
,
2530 CGF
.getContext().getIntTypeForBitwidth(32, /*Signed=*/0),
2535 void CGOpenMPRuntime::emitBarrierCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
2536 OpenMPDirectiveKind Kind
, bool EmitChecks
,
2537 bool ForceSimpleCall
) {
2538 // Check if we should use the OMPBuilder
2539 auto *OMPRegionInfo
=
2540 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
);
2541 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2542 CGF
.Builder
.restoreIP(OMPBuilder
.createBarrier(
2543 CGF
.Builder
, Kind
, ForceSimpleCall
, EmitChecks
));
2547 if (!CGF
.HaveInsertPoint())
2549 // Build call __kmpc_cancel_barrier(loc, thread_id);
2550 // Build call __kmpc_barrier(loc, thread_id);
2551 unsigned Flags
= getDefaultFlagsForBarriers(Kind
);
2552 // Build call __kmpc_cancel_barrier(loc, thread_id) or __kmpc_barrier(loc,
2554 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
, Flags
),
2555 getThreadID(CGF
, Loc
)};
2556 if (OMPRegionInfo
) {
2557 if (!ForceSimpleCall
&& OMPRegionInfo
->hasCancel()) {
2558 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
2559 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
2560 OMPRTL___kmpc_cancel_barrier
),
2563 // if (__kmpc_cancel_barrier()) {
2564 // exit from construct;
2566 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
2567 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
2568 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
2569 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
2570 CGF
.EmitBlock(ExitBB
);
2571 // exit from construct;
2572 CodeGenFunction::JumpDest CancelDestination
=
2573 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
2574 CGF
.EmitBranchThroughCleanup(CancelDestination
);
2575 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
2580 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2581 CGM
.getModule(), OMPRTL___kmpc_barrier
),
2585 /// Map the OpenMP loop schedule to the runtime enumeration.
2586 static OpenMPSchedType
getRuntimeSchedule(OpenMPScheduleClauseKind ScheduleKind
,
2587 bool Chunked
, bool Ordered
) {
2588 switch (ScheduleKind
) {
2589 case OMPC_SCHEDULE_static
:
2590 return Chunked
? (Ordered
? OMP_ord_static_chunked
: OMP_sch_static_chunked
)
2591 : (Ordered
? OMP_ord_static
: OMP_sch_static
);
2592 case OMPC_SCHEDULE_dynamic
:
2593 return Ordered
? OMP_ord_dynamic_chunked
: OMP_sch_dynamic_chunked
;
2594 case OMPC_SCHEDULE_guided
:
2595 return Ordered
? OMP_ord_guided_chunked
: OMP_sch_guided_chunked
;
2596 case OMPC_SCHEDULE_runtime
:
2597 return Ordered
? OMP_ord_runtime
: OMP_sch_runtime
;
2598 case OMPC_SCHEDULE_auto
:
2599 return Ordered
? OMP_ord_auto
: OMP_sch_auto
;
2600 case OMPC_SCHEDULE_unknown
:
2601 assert(!Chunked
&& "chunk was specified but schedule kind not known");
2602 return Ordered
? OMP_ord_static
: OMP_sch_static
;
2604 llvm_unreachable("Unexpected runtime schedule");
2607 /// Map the OpenMP distribute schedule to the runtime enumeration.
2608 static OpenMPSchedType
2609 getRuntimeSchedule(OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) {
2610 // only static is allowed for dist_schedule
2611 return Chunked
? OMP_dist_sch_static_chunked
: OMP_dist_sch_static
;
2614 bool CGOpenMPRuntime::isStaticNonchunked(OpenMPScheduleClauseKind ScheduleKind
,
2615 bool Chunked
) const {
2616 OpenMPSchedType Schedule
=
2617 getRuntimeSchedule(ScheduleKind
, Chunked
, /*Ordered=*/false);
2618 return Schedule
== OMP_sch_static
;
2621 bool CGOpenMPRuntime::isStaticNonchunked(
2622 OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) const {
2623 OpenMPSchedType Schedule
= getRuntimeSchedule(ScheduleKind
, Chunked
);
2624 return Schedule
== OMP_dist_sch_static
;
2627 bool CGOpenMPRuntime::isStaticChunked(OpenMPScheduleClauseKind ScheduleKind
,
2628 bool Chunked
) const {
2629 OpenMPSchedType Schedule
=
2630 getRuntimeSchedule(ScheduleKind
, Chunked
, /*Ordered=*/false);
2631 return Schedule
== OMP_sch_static_chunked
;
2634 bool CGOpenMPRuntime::isStaticChunked(
2635 OpenMPDistScheduleClauseKind ScheduleKind
, bool Chunked
) const {
2636 OpenMPSchedType Schedule
= getRuntimeSchedule(ScheduleKind
, Chunked
);
2637 return Schedule
== OMP_dist_sch_static_chunked
;
2640 bool CGOpenMPRuntime::isDynamic(OpenMPScheduleClauseKind ScheduleKind
) const {
2641 OpenMPSchedType Schedule
=
2642 getRuntimeSchedule(ScheduleKind
, /*Chunked=*/false, /*Ordered=*/false);
2643 assert(Schedule
!= OMP_sch_static_chunked
&& "cannot be chunked here");
2644 return Schedule
!= OMP_sch_static
;
2647 static int addMonoNonMonoModifier(CodeGenModule
&CGM
, OpenMPSchedType Schedule
,
2648 OpenMPScheduleClauseModifier M1
,
2649 OpenMPScheduleClauseModifier M2
) {
2652 case OMPC_SCHEDULE_MODIFIER_monotonic
:
2653 Modifier
= OMP_sch_modifier_monotonic
;
2655 case OMPC_SCHEDULE_MODIFIER_nonmonotonic
:
2656 Modifier
= OMP_sch_modifier_nonmonotonic
;
2658 case OMPC_SCHEDULE_MODIFIER_simd
:
2659 if (Schedule
== OMP_sch_static_chunked
)
2660 Schedule
= OMP_sch_static_balanced_chunked
;
2662 case OMPC_SCHEDULE_MODIFIER_last
:
2663 case OMPC_SCHEDULE_MODIFIER_unknown
:
2667 case OMPC_SCHEDULE_MODIFIER_monotonic
:
2668 Modifier
= OMP_sch_modifier_monotonic
;
2670 case OMPC_SCHEDULE_MODIFIER_nonmonotonic
:
2671 Modifier
= OMP_sch_modifier_nonmonotonic
;
2673 case OMPC_SCHEDULE_MODIFIER_simd
:
2674 if (Schedule
== OMP_sch_static_chunked
)
2675 Schedule
= OMP_sch_static_balanced_chunked
;
2677 case OMPC_SCHEDULE_MODIFIER_last
:
2678 case OMPC_SCHEDULE_MODIFIER_unknown
:
2681 // OpenMP 5.0, 2.9.2 Worksharing-Loop Construct, Desription.
2682 // If the static schedule kind is specified or if the ordered clause is
2683 // specified, and if the nonmonotonic modifier is not specified, the effect is
2684 // as if the monotonic modifier is specified. Otherwise, unless the monotonic
2685 // modifier is specified, the effect is as if the nonmonotonic modifier is
2687 if (CGM
.getLangOpts().OpenMP
>= 50 && Modifier
== 0) {
2688 if (!(Schedule
== OMP_sch_static_chunked
|| Schedule
== OMP_sch_static
||
2689 Schedule
== OMP_sch_static_balanced_chunked
||
2690 Schedule
== OMP_ord_static_chunked
|| Schedule
== OMP_ord_static
||
2691 Schedule
== OMP_dist_sch_static_chunked
||
2692 Schedule
== OMP_dist_sch_static
))
2693 Modifier
= OMP_sch_modifier_nonmonotonic
;
2695 return Schedule
| Modifier
;
2698 void CGOpenMPRuntime::emitForDispatchInit(
2699 CodeGenFunction
&CGF
, SourceLocation Loc
,
2700 const OpenMPScheduleTy
&ScheduleKind
, unsigned IVSize
, bool IVSigned
,
2701 bool Ordered
, const DispatchRTInput
&DispatchValues
) {
2702 if (!CGF
.HaveInsertPoint())
2704 OpenMPSchedType Schedule
= getRuntimeSchedule(
2705 ScheduleKind
.Schedule
, DispatchValues
.Chunk
!= nullptr, Ordered
);
2707 (Schedule
!= OMP_sch_static
&& Schedule
!= OMP_sch_static_chunked
&&
2708 Schedule
!= OMP_ord_static
&& Schedule
!= OMP_ord_static_chunked
&&
2709 Schedule
!= OMP_sch_static_balanced_chunked
));
2710 // Call __kmpc_dispatch_init(
2711 // ident_t *loc, kmp_int32 tid, kmp_int32 schedule,
2712 // kmp_int[32|64] lower, kmp_int[32|64] upper,
2713 // kmp_int[32|64] stride, kmp_int[32|64] chunk);
2715 // If the Chunk was not specified in the clause - use default value 1.
2716 llvm::Value
*Chunk
= DispatchValues
.Chunk
? DispatchValues
.Chunk
2717 : CGF
.Builder
.getIntN(IVSize
, 1);
2718 llvm::Value
*Args
[] = {
2719 emitUpdateLocation(CGF
, Loc
),
2720 getThreadID(CGF
, Loc
),
2721 CGF
.Builder
.getInt32(addMonoNonMonoModifier(
2722 CGM
, Schedule
, ScheduleKind
.M1
, ScheduleKind
.M2
)), // Schedule type
2723 DispatchValues
.LB
, // Lower
2724 DispatchValues
.UB
, // Upper
2725 CGF
.Builder
.getIntN(IVSize
, 1), // Stride
2728 CGF
.EmitRuntimeCall(createDispatchInitFunction(IVSize
, IVSigned
), Args
);
2731 static void emitForStaticInitCall(
2732 CodeGenFunction
&CGF
, llvm::Value
*UpdateLocation
, llvm::Value
*ThreadId
,
2733 llvm::FunctionCallee ForStaticInitFunction
, OpenMPSchedType Schedule
,
2734 OpenMPScheduleClauseModifier M1
, OpenMPScheduleClauseModifier M2
,
2735 const CGOpenMPRuntime::StaticRTInput
&Values
) {
2736 if (!CGF
.HaveInsertPoint())
2739 assert(!Values
.Ordered
);
2740 assert(Schedule
== OMP_sch_static
|| Schedule
== OMP_sch_static_chunked
||
2741 Schedule
== OMP_sch_static_balanced_chunked
||
2742 Schedule
== OMP_ord_static
|| Schedule
== OMP_ord_static_chunked
||
2743 Schedule
== OMP_dist_sch_static
||
2744 Schedule
== OMP_dist_sch_static_chunked
);
2746 // Call __kmpc_for_static_init(
2747 // ident_t *loc, kmp_int32 tid, kmp_int32 schedtype,
2748 // kmp_int32 *p_lastiter, kmp_int[32|64] *p_lower,
2749 // kmp_int[32|64] *p_upper, kmp_int[32|64] *p_stride,
2750 // kmp_int[32|64] incr, kmp_int[32|64] chunk);
2751 llvm::Value
*Chunk
= Values
.Chunk
;
2752 if (Chunk
== nullptr) {
2753 assert((Schedule
== OMP_sch_static
|| Schedule
== OMP_ord_static
||
2754 Schedule
== OMP_dist_sch_static
) &&
2755 "expected static non-chunked schedule");
2756 // If the Chunk was not specified in the clause - use default value 1.
2757 Chunk
= CGF
.Builder
.getIntN(Values
.IVSize
, 1);
2759 assert((Schedule
== OMP_sch_static_chunked
||
2760 Schedule
== OMP_sch_static_balanced_chunked
||
2761 Schedule
== OMP_ord_static_chunked
||
2762 Schedule
== OMP_dist_sch_static_chunked
) &&
2763 "expected static chunked schedule");
2765 llvm::Value
*Args
[] = {
2768 CGF
.Builder
.getInt32(addMonoNonMonoModifier(CGF
.CGM
, Schedule
, M1
,
2769 M2
)), // Schedule type
2770 Values
.IL
.getPointer(), // &isLastIter
2771 Values
.LB
.getPointer(), // &LB
2772 Values
.UB
.getPointer(), // &UB
2773 Values
.ST
.getPointer(), // &Stride
2774 CGF
.Builder
.getIntN(Values
.IVSize
, 1), // Incr
2777 CGF
.EmitRuntimeCall(ForStaticInitFunction
, Args
);
2780 void CGOpenMPRuntime::emitForStaticInit(CodeGenFunction
&CGF
,
2782 OpenMPDirectiveKind DKind
,
2783 const OpenMPScheduleTy
&ScheduleKind
,
2784 const StaticRTInput
&Values
) {
2785 OpenMPSchedType ScheduleNum
= getRuntimeSchedule(
2786 ScheduleKind
.Schedule
, Values
.Chunk
!= nullptr, Values
.Ordered
);
2787 assert(isOpenMPWorksharingDirective(DKind
) &&
2788 "Expected loop-based or sections-based directive.");
2789 llvm::Value
*UpdatedLocation
= emitUpdateLocation(CGF
, Loc
,
2790 isOpenMPLoopDirective(DKind
)
2791 ? OMP_IDENT_WORK_LOOP
2792 : OMP_IDENT_WORK_SECTIONS
);
2793 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
2794 llvm::FunctionCallee StaticInitFunction
=
2795 createForStaticInitFunction(Values
.IVSize
, Values
.IVSigned
, false);
2796 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
2797 emitForStaticInitCall(CGF
, UpdatedLocation
, ThreadId
, StaticInitFunction
,
2798 ScheduleNum
, ScheduleKind
.M1
, ScheduleKind
.M2
, Values
);
2801 void CGOpenMPRuntime::emitDistributeStaticInit(
2802 CodeGenFunction
&CGF
, SourceLocation Loc
,
2803 OpenMPDistScheduleClauseKind SchedKind
,
2804 const CGOpenMPRuntime::StaticRTInput
&Values
) {
2805 OpenMPSchedType ScheduleNum
=
2806 getRuntimeSchedule(SchedKind
, Values
.Chunk
!= nullptr);
2807 llvm::Value
*UpdatedLocation
=
2808 emitUpdateLocation(CGF
, Loc
, OMP_IDENT_WORK_DISTRIBUTE
);
2809 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
2810 llvm::FunctionCallee StaticInitFunction
;
2811 bool isGPUDistribute
=
2812 CGM
.getLangOpts().OpenMPIsDevice
&&
2813 (CGM
.getTriple().isAMDGCN() || CGM
.getTriple().isNVPTX());
2814 StaticInitFunction
= createForStaticInitFunction(
2815 Values
.IVSize
, Values
.IVSigned
, isGPUDistribute
);
2817 emitForStaticInitCall(CGF
, UpdatedLocation
, ThreadId
, StaticInitFunction
,
2818 ScheduleNum
, OMPC_SCHEDULE_MODIFIER_unknown
,
2819 OMPC_SCHEDULE_MODIFIER_unknown
, Values
);
2822 void CGOpenMPRuntime::emitForStaticFinish(CodeGenFunction
&CGF
,
2824 OpenMPDirectiveKind DKind
) {
2825 if (!CGF
.HaveInsertPoint())
2827 // Call __kmpc_for_static_fini(ident_t *loc, kmp_int32 tid);
2828 llvm::Value
*Args
[] = {
2829 emitUpdateLocation(CGF
, Loc
,
2830 isOpenMPDistributeDirective(DKind
)
2831 ? OMP_IDENT_WORK_DISTRIBUTE
2832 : isOpenMPLoopDirective(DKind
)
2833 ? OMP_IDENT_WORK_LOOP
2834 : OMP_IDENT_WORK_SECTIONS
),
2835 getThreadID(CGF
, Loc
)};
2836 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
2837 if (isOpenMPDistributeDirective(DKind
) && CGM
.getLangOpts().OpenMPIsDevice
&&
2838 (CGM
.getTriple().isAMDGCN() || CGM
.getTriple().isNVPTX()))
2839 CGF
.EmitRuntimeCall(
2840 OMPBuilder
.getOrCreateRuntimeFunction(
2841 CGM
.getModule(), OMPRTL___kmpc_distribute_static_fini
),
2844 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2845 CGM
.getModule(), OMPRTL___kmpc_for_static_fini
),
2849 void CGOpenMPRuntime::emitForOrderedIterationEnd(CodeGenFunction
&CGF
,
2853 if (!CGF
.HaveInsertPoint())
2855 // Call __kmpc_for_dynamic_fini_(4|8)[u](ident_t *loc, kmp_int32 tid);
2856 llvm::Value
*Args
[] = {emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
)};
2857 CGF
.EmitRuntimeCall(createDispatchFiniFunction(IVSize
, IVSigned
), Args
);
2860 llvm::Value
*CGOpenMPRuntime::emitForNext(CodeGenFunction
&CGF
,
2861 SourceLocation Loc
, unsigned IVSize
,
2862 bool IVSigned
, Address IL
,
2863 Address LB
, Address UB
,
2865 // Call __kmpc_dispatch_next(
2866 // ident_t *loc, kmp_int32 tid, kmp_int32 *p_lastiter,
2867 // kmp_int[32|64] *p_lower, kmp_int[32|64] *p_upper,
2868 // kmp_int[32|64] *p_stride);
2869 llvm::Value
*Args
[] = {
2870 emitUpdateLocation(CGF
, Loc
),
2871 getThreadID(CGF
, Loc
),
2872 IL
.getPointer(), // &isLastIter
2873 LB
.getPointer(), // &Lower
2874 UB
.getPointer(), // &Upper
2875 ST
.getPointer() // &Stride
2878 CGF
.EmitRuntimeCall(createDispatchNextFunction(IVSize
, IVSigned
), Args
);
2879 return CGF
.EmitScalarConversion(
2880 Call
, CGF
.getContext().getIntTypeForBitwidth(32, /*Signed=*/1),
2881 CGF
.getContext().BoolTy
, Loc
);
2884 void CGOpenMPRuntime::emitNumThreadsClause(CodeGenFunction
&CGF
,
2885 llvm::Value
*NumThreads
,
2886 SourceLocation Loc
) {
2887 if (!CGF
.HaveInsertPoint())
2889 // Build call __kmpc_push_num_threads(&loc, global_tid, num_threads)
2890 llvm::Value
*Args
[] = {
2891 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2892 CGF
.Builder
.CreateIntCast(NumThreads
, CGF
.Int32Ty
, /*isSigned*/ true)};
2893 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2894 CGM
.getModule(), OMPRTL___kmpc_push_num_threads
),
2898 void CGOpenMPRuntime::emitProcBindClause(CodeGenFunction
&CGF
,
2899 ProcBindKind ProcBind
,
2900 SourceLocation Loc
) {
2901 if (!CGF
.HaveInsertPoint())
2903 assert(ProcBind
!= OMP_PROC_BIND_unknown
&& "Unsupported proc_bind value.");
2904 // Build call __kmpc_push_proc_bind(&loc, global_tid, proc_bind)
2905 llvm::Value
*Args
[] = {
2906 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
2907 llvm::ConstantInt::get(CGM
.IntTy
, unsigned(ProcBind
), /*isSigned=*/true)};
2908 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2909 CGM
.getModule(), OMPRTL___kmpc_push_proc_bind
),
2913 void CGOpenMPRuntime::emitFlush(CodeGenFunction
&CGF
, ArrayRef
<const Expr
*>,
2914 SourceLocation Loc
, llvm::AtomicOrdering AO
) {
2915 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
) {
2916 OMPBuilder
.createFlush(CGF
.Builder
);
2918 if (!CGF
.HaveInsertPoint())
2920 // Build call void __kmpc_flush(ident_t *loc)
2921 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
2922 CGM
.getModule(), OMPRTL___kmpc_flush
),
2923 emitUpdateLocation(CGF
, Loc
));
2928 /// Indexes of fields for type kmp_task_t.
2929 enum KmpTaskTFields
{
2930 /// List of shared variables.
2934 /// Partition id for the untied tasks.
2936 /// Function with call of destructors for private variables.
2940 /// (Taskloops only) Lower bound.
2942 /// (Taskloops only) Upper bound.
2944 /// (Taskloops only) Stride.
2946 /// (Taskloops only) Is last iteration flag.
2948 /// (Taskloops only) Reduction data.
2951 } // anonymous namespace
2953 void CGOpenMPRuntime::createOffloadEntriesAndInfoMetadata() {
2954 // If we are in simd mode or there are no entries, we don't need to do
2956 if (CGM
.getLangOpts().OpenMPSimd
|| OffloadEntriesInfoManager
.empty())
2959 llvm::OpenMPIRBuilder::EmitMetadataErrorReportFunctionTy
&&ErrorReportFn
=
2960 [this](llvm::OpenMPIRBuilder::EmitMetadataErrorKind Kind
,
2961 const llvm::TargetRegionEntryInfo
&EntryInfo
) -> void {
2963 if (Kind
!= llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR
) {
2964 for (auto I
= CGM
.getContext().getSourceManager().fileinfo_begin(),
2965 E
= CGM
.getContext().getSourceManager().fileinfo_end();
2967 if (I
->getFirst()->getUniqueID().getDevice() == EntryInfo
.DeviceID
&&
2968 I
->getFirst()->getUniqueID().getFile() == EntryInfo
.FileID
) {
2969 Loc
= CGM
.getContext().getSourceManager().translateFileLineCol(
2970 I
->getFirst(), EntryInfo
.Line
, 1);
2976 case llvm::OpenMPIRBuilder::EMIT_MD_TARGET_REGION_ERROR
: {
2977 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2978 DiagnosticsEngine::Error
, "Offloading entry for target region in "
2979 "%0 is incorrect: either the "
2980 "address or the ID is invalid.");
2981 CGM
.getDiags().Report(Loc
, DiagID
) << EntryInfo
.ParentName
;
2983 case llvm::OpenMPIRBuilder::EMIT_MD_DECLARE_TARGET_ERROR
: {
2984 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2985 DiagnosticsEngine::Error
, "Offloading entry for declare target "
2986 "variable %0 is incorrect: the "
2987 "address is invalid.");
2988 CGM
.getDiags().Report(Loc
, DiagID
) << EntryInfo
.ParentName
;
2990 case llvm::OpenMPIRBuilder::EMIT_MD_GLOBAL_VAR_LINK_ERROR
: {
2991 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
2992 DiagnosticsEngine::Error
,
2993 "Offloading entry for declare target variable is incorrect: the "
2994 "address is invalid.");
2995 CGM
.getDiags().Report(DiagID
);
3000 OMPBuilder
.createOffloadEntriesAndInfoMetadata(
3001 OffloadEntriesInfoManager
, isTargetCodegen(),
3002 CGM
.getLangOpts().OpenMPIsDevice
,
3003 CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory(), ErrorReportFn
);
3006 /// Loads all the offload entries information from the host IR
3008 void CGOpenMPRuntime::loadOffloadInfoMetadata() {
3009 // If we are in target mode, load the metadata from the host IR. This code has
3010 // to match the metadaata creation in createOffloadEntriesAndInfoMetadata().
3012 if (!CGM
.getLangOpts().OpenMPIsDevice
)
3015 if (CGM
.getLangOpts().OMPHostIRFile
.empty())
3018 auto Buf
= llvm::MemoryBuffer::getFile(CGM
.getLangOpts().OMPHostIRFile
);
3019 if (auto EC
= Buf
.getError()) {
3020 CGM
.getDiags().Report(diag::err_cannot_open_file
)
3021 << CGM
.getLangOpts().OMPHostIRFile
<< EC
.message();
3025 llvm::LLVMContext C
;
3026 auto ME
= expectedToErrorOrAndEmitErrors(
3027 C
, llvm::parseBitcodeFile(Buf
.get()->getMemBufferRef(), C
));
3029 if (auto EC
= ME
.getError()) {
3030 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
3031 DiagnosticsEngine::Error
, "Unable to parse host IR file '%0':'%1'");
3032 CGM
.getDiags().Report(DiagID
)
3033 << CGM
.getLangOpts().OMPHostIRFile
<< EC
.message();
3037 OMPBuilder
.loadOffloadInfoMetadata(*ME
.get(), OffloadEntriesInfoManager
);
3040 void CGOpenMPRuntime::emitKmpRoutineEntryT(QualType KmpInt32Ty
) {
3041 if (!KmpRoutineEntryPtrTy
) {
3042 // Build typedef kmp_int32 (* kmp_routine_entry_t)(kmp_int32, void *); type.
3043 ASTContext
&C
= CGM
.getContext();
3044 QualType KmpRoutineEntryTyArgs
[] = {KmpInt32Ty
, C
.VoidPtrTy
};
3045 FunctionProtoType::ExtProtoInfo EPI
;
3046 KmpRoutineEntryPtrQTy
= C
.getPointerType(
3047 C
.getFunctionType(KmpInt32Ty
, KmpRoutineEntryTyArgs
, EPI
));
3048 KmpRoutineEntryPtrTy
= CGM
.getTypes().ConvertType(KmpRoutineEntryPtrQTy
);
3053 struct PrivateHelpersTy
{
3054 PrivateHelpersTy(const Expr
*OriginalRef
, const VarDecl
*Original
,
3055 const VarDecl
*PrivateCopy
, const VarDecl
*PrivateElemInit
)
3056 : OriginalRef(OriginalRef
), Original(Original
), PrivateCopy(PrivateCopy
),
3057 PrivateElemInit(PrivateElemInit
) {}
3058 PrivateHelpersTy(const VarDecl
*Original
) : Original(Original
) {}
3059 const Expr
*OriginalRef
= nullptr;
3060 const VarDecl
*Original
= nullptr;
3061 const VarDecl
*PrivateCopy
= nullptr;
3062 const VarDecl
*PrivateElemInit
= nullptr;
3063 bool isLocalPrivate() const {
3064 return !OriginalRef
&& !PrivateCopy
&& !PrivateElemInit
;
3067 typedef std::pair
<CharUnits
/*Align*/, PrivateHelpersTy
> PrivateDataTy
;
3068 } // anonymous namespace
3070 static bool isAllocatableDecl(const VarDecl
*VD
) {
3071 const VarDecl
*CVD
= VD
->getCanonicalDecl();
3072 if (!CVD
->hasAttr
<OMPAllocateDeclAttr
>())
3074 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
3075 // Use the default allocation.
3076 return !(AA
->getAllocatorType() == OMPAllocateDeclAttr::OMPDefaultMemAlloc
&&
3077 !AA
->getAllocator());
3081 createPrivatesRecordDecl(CodeGenModule
&CGM
, ArrayRef
<PrivateDataTy
> Privates
) {
3082 if (!Privates
.empty()) {
3083 ASTContext
&C
= CGM
.getContext();
3084 // Build struct .kmp_privates_t. {
3085 // /* private vars */
3087 RecordDecl
*RD
= C
.buildImplicitRecord(".kmp_privates.t");
3088 RD
->startDefinition();
3089 for (const auto &Pair
: Privates
) {
3090 const VarDecl
*VD
= Pair
.second
.Original
;
3091 QualType Type
= VD
->getType().getNonReferenceType();
3092 // If the private variable is a local variable with lvalue ref type,
3093 // allocate the pointer instead of the pointee type.
3094 if (Pair
.second
.isLocalPrivate()) {
3095 if (VD
->getType()->isLValueReferenceType())
3096 Type
= C
.getPointerType(Type
);
3097 if (isAllocatableDecl(VD
))
3098 Type
= C
.getPointerType(Type
);
3100 FieldDecl
*FD
= addFieldToRecordDecl(C
, RD
, Type
);
3101 if (VD
->hasAttrs()) {
3102 for (specific_attr_iterator
<AlignedAttr
> I(VD
->getAttrs().begin()),
3103 E(VD
->getAttrs().end());
3108 RD
->completeDefinition();
3115 createKmpTaskTRecordDecl(CodeGenModule
&CGM
, OpenMPDirectiveKind Kind
,
3116 QualType KmpInt32Ty
,
3117 QualType KmpRoutineEntryPointerQTy
) {
3118 ASTContext
&C
= CGM
.getContext();
3119 // Build struct kmp_task_t {
3121 // kmp_routine_entry_t routine;
3122 // kmp_int32 part_id;
3123 // kmp_cmplrdata_t data1;
3124 // kmp_cmplrdata_t data2;
3125 // For taskloops additional fields:
3130 // void * reductions;
3132 RecordDecl
*UD
= C
.buildImplicitRecord("kmp_cmplrdata_t", TTK_Union
);
3133 UD
->startDefinition();
3134 addFieldToRecordDecl(C
, UD
, KmpInt32Ty
);
3135 addFieldToRecordDecl(C
, UD
, KmpRoutineEntryPointerQTy
);
3136 UD
->completeDefinition();
3137 QualType KmpCmplrdataTy
= C
.getRecordType(UD
);
3138 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_task_t");
3139 RD
->startDefinition();
3140 addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
3141 addFieldToRecordDecl(C
, RD
, KmpRoutineEntryPointerQTy
);
3142 addFieldToRecordDecl(C
, RD
, KmpInt32Ty
);
3143 addFieldToRecordDecl(C
, RD
, KmpCmplrdataTy
);
3144 addFieldToRecordDecl(C
, RD
, KmpCmplrdataTy
);
3145 if (isOpenMPTaskLoopDirective(Kind
)) {
3146 QualType KmpUInt64Ty
=
3147 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
3148 QualType KmpInt64Ty
=
3149 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
3150 addFieldToRecordDecl(C
, RD
, KmpUInt64Ty
);
3151 addFieldToRecordDecl(C
, RD
, KmpUInt64Ty
);
3152 addFieldToRecordDecl(C
, RD
, KmpInt64Ty
);
3153 addFieldToRecordDecl(C
, RD
, KmpInt32Ty
);
3154 addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
3156 RD
->completeDefinition();
3161 createKmpTaskTWithPrivatesRecordDecl(CodeGenModule
&CGM
, QualType KmpTaskTQTy
,
3162 ArrayRef
<PrivateDataTy
> Privates
) {
3163 ASTContext
&C
= CGM
.getContext();
3164 // Build struct kmp_task_t_with_privates {
3165 // kmp_task_t task_data;
3166 // .kmp_privates_t. privates;
3168 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_task_t_with_privates");
3169 RD
->startDefinition();
3170 addFieldToRecordDecl(C
, RD
, KmpTaskTQTy
);
3171 if (const RecordDecl
*PrivateRD
= createPrivatesRecordDecl(CGM
, Privates
))
3172 addFieldToRecordDecl(C
, RD
, C
.getRecordType(PrivateRD
));
3173 RD
->completeDefinition();
3177 /// Emit a proxy function which accepts kmp_task_t as the second
3180 /// kmp_int32 .omp_task_entry.(kmp_int32 gtid, kmp_task_t *tt) {
3181 /// TaskFunction(gtid, tt->part_id, &tt->privates, task_privates_map, tt,
3183 /// tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3184 /// tt->reductions, tt->shareds);
3188 static llvm::Function
*
3189 emitProxyTaskFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3190 OpenMPDirectiveKind Kind
, QualType KmpInt32Ty
,
3191 QualType KmpTaskTWithPrivatesPtrQTy
,
3192 QualType KmpTaskTWithPrivatesQTy
, QualType KmpTaskTQTy
,
3193 QualType SharedsPtrTy
, llvm::Function
*TaskFunction
,
3194 llvm::Value
*TaskPrivatesMap
) {
3195 ASTContext
&C
= CGM
.getContext();
3196 FunctionArgList Args
;
3197 ImplicitParamDecl
GtidArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, KmpInt32Ty
,
3198 ImplicitParamDecl::Other
);
3199 ImplicitParamDecl
TaskTypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3200 KmpTaskTWithPrivatesPtrQTy
.withRestrict(),
3201 ImplicitParamDecl::Other
);
3202 Args
.push_back(&GtidArg
);
3203 Args
.push_back(&TaskTypeArg
);
3204 const auto &TaskEntryFnInfo
=
3205 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty
, Args
);
3206 llvm::FunctionType
*TaskEntryTy
=
3207 CGM
.getTypes().GetFunctionType(TaskEntryFnInfo
);
3208 std::string Name
= CGM
.getOpenMPRuntime().getName({"omp_task_entry", ""});
3209 auto *TaskEntry
= llvm::Function::Create(
3210 TaskEntryTy
, llvm::GlobalValue::InternalLinkage
, Name
, &CGM
.getModule());
3211 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskEntry
, TaskEntryFnInfo
);
3212 TaskEntry
->setDoesNotRecurse();
3213 CodeGenFunction
CGF(CGM
);
3214 CGF
.StartFunction(GlobalDecl(), KmpInt32Ty
, TaskEntry
, TaskEntryFnInfo
, Args
,
3217 // TaskFunction(gtid, tt->task_data.part_id, &tt->privates, task_privates_map,
3220 // tt->task_data.lb, tt->task_data.ub, tt->task_data.st, tt->task_data.liter,
3221 // tt->task_data.shareds);
3222 llvm::Value
*GtidParam
= CGF
.EmitLoadOfScalar(
3223 CGF
.GetAddrOfLocalVar(&GtidArg
), /*Volatile=*/false, KmpInt32Ty
, Loc
);
3224 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3225 CGF
.GetAddrOfLocalVar(&TaskTypeArg
),
3226 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3227 const auto *KmpTaskTWithPrivatesQTyRD
=
3228 cast
<RecordDecl
>(KmpTaskTWithPrivatesQTy
->getAsTagDecl());
3230 CGF
.EmitLValueForField(TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3231 const auto *KmpTaskTQTyRD
= cast
<RecordDecl
>(KmpTaskTQTy
->getAsTagDecl());
3232 auto PartIdFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTPartId
);
3233 LValue PartIdLVal
= CGF
.EmitLValueForField(Base
, *PartIdFI
);
3234 llvm::Value
*PartidParam
= PartIdLVal
.getPointer(CGF
);
3236 auto SharedsFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTShareds
);
3237 LValue SharedsLVal
= CGF
.EmitLValueForField(Base
, *SharedsFI
);
3238 llvm::Value
*SharedsParam
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3239 CGF
.EmitLoadOfScalar(SharedsLVal
, Loc
),
3240 CGF
.ConvertTypeForMem(SharedsPtrTy
));
3242 auto PrivatesFI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin(), 1);
3243 llvm::Value
*PrivatesParam
;
3244 if (PrivatesFI
!= KmpTaskTWithPrivatesQTyRD
->field_end()) {
3245 LValue PrivatesLVal
= CGF
.EmitLValueForField(TDBase
, *PrivatesFI
);
3246 PrivatesParam
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3247 PrivatesLVal
.getPointer(CGF
), CGF
.VoidPtrTy
);
3249 PrivatesParam
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
3252 llvm::Value
*CommonArgs
[] = {
3253 GtidParam
, PartidParam
, PrivatesParam
, TaskPrivatesMap
,
3255 .CreatePointerBitCastOrAddrSpaceCast(TDBase
.getAddress(CGF
),
3256 CGF
.VoidPtrTy
, CGF
.Int8Ty
)
3258 SmallVector
<llvm::Value
*, 16> CallArgs(std::begin(CommonArgs
),
3259 std::end(CommonArgs
));
3260 if (isOpenMPTaskLoopDirective(Kind
)) {
3261 auto LBFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLowerBound
);
3262 LValue LBLVal
= CGF
.EmitLValueForField(Base
, *LBFI
);
3263 llvm::Value
*LBParam
= CGF
.EmitLoadOfScalar(LBLVal
, Loc
);
3264 auto UBFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTUpperBound
);
3265 LValue UBLVal
= CGF
.EmitLValueForField(Base
, *UBFI
);
3266 llvm::Value
*UBParam
= CGF
.EmitLoadOfScalar(UBLVal
, Loc
);
3267 auto StFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTStride
);
3268 LValue StLVal
= CGF
.EmitLValueForField(Base
, *StFI
);
3269 llvm::Value
*StParam
= CGF
.EmitLoadOfScalar(StLVal
, Loc
);
3270 auto LIFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLastIter
);
3271 LValue LILVal
= CGF
.EmitLValueForField(Base
, *LIFI
);
3272 llvm::Value
*LIParam
= CGF
.EmitLoadOfScalar(LILVal
, Loc
);
3273 auto RFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTReductions
);
3274 LValue RLVal
= CGF
.EmitLValueForField(Base
, *RFI
);
3275 llvm::Value
*RParam
= CGF
.EmitLoadOfScalar(RLVal
, Loc
);
3276 CallArgs
.push_back(LBParam
);
3277 CallArgs
.push_back(UBParam
);
3278 CallArgs
.push_back(StParam
);
3279 CallArgs
.push_back(LIParam
);
3280 CallArgs
.push_back(RParam
);
3282 CallArgs
.push_back(SharedsParam
);
3284 CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, Loc
, TaskFunction
,
3286 CGF
.EmitStoreThroughLValue(RValue::get(CGF
.Builder
.getInt32(/*C=*/0)),
3287 CGF
.MakeAddrLValue(CGF
.ReturnValue
, KmpInt32Ty
));
3288 CGF
.FinishFunction();
3292 static llvm::Value
*emitDestructorsFunction(CodeGenModule
&CGM
,
3294 QualType KmpInt32Ty
,
3295 QualType KmpTaskTWithPrivatesPtrQTy
,
3296 QualType KmpTaskTWithPrivatesQTy
) {
3297 ASTContext
&C
= CGM
.getContext();
3298 FunctionArgList Args
;
3299 ImplicitParamDecl
GtidArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, KmpInt32Ty
,
3300 ImplicitParamDecl::Other
);
3301 ImplicitParamDecl
TaskTypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3302 KmpTaskTWithPrivatesPtrQTy
.withRestrict(),
3303 ImplicitParamDecl::Other
);
3304 Args
.push_back(&GtidArg
);
3305 Args
.push_back(&TaskTypeArg
);
3306 const auto &DestructorFnInfo
=
3307 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(KmpInt32Ty
, Args
);
3308 llvm::FunctionType
*DestructorFnTy
=
3309 CGM
.getTypes().GetFunctionType(DestructorFnInfo
);
3311 CGM
.getOpenMPRuntime().getName({"omp_task_destructor", ""});
3312 auto *DestructorFn
=
3313 llvm::Function::Create(DestructorFnTy
, llvm::GlobalValue::InternalLinkage
,
3314 Name
, &CGM
.getModule());
3315 CGM
.SetInternalFunctionAttributes(GlobalDecl(), DestructorFn
,
3317 DestructorFn
->setDoesNotRecurse();
3318 CodeGenFunction
CGF(CGM
);
3319 CGF
.StartFunction(GlobalDecl(), KmpInt32Ty
, DestructorFn
, DestructorFnInfo
,
3322 LValue Base
= CGF
.EmitLoadOfPointerLValue(
3323 CGF
.GetAddrOfLocalVar(&TaskTypeArg
),
3324 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3325 const auto *KmpTaskTWithPrivatesQTyRD
=
3326 cast
<RecordDecl
>(KmpTaskTWithPrivatesQTy
->getAsTagDecl());
3327 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3328 Base
= CGF
.EmitLValueForField(Base
, *FI
);
3329 for (const auto *Field
:
3330 cast
<RecordDecl
>(FI
->getType()->getAsTagDecl())->fields()) {
3331 if (QualType::DestructionKind DtorKind
=
3332 Field
->getType().isDestructedType()) {
3333 LValue FieldLValue
= CGF
.EmitLValueForField(Base
, Field
);
3334 CGF
.pushDestroy(DtorKind
, FieldLValue
.getAddress(CGF
), Field
->getType());
3337 CGF
.FinishFunction();
3338 return DestructorFn
;
3341 /// Emit a privates mapping function for correct handling of private and
3342 /// firstprivate variables.
3344 /// void .omp_task_privates_map.(const .privates. *noalias privs, <ty1>
3345 /// **noalias priv1,..., <tyn> **noalias privn) {
3346 /// *priv1 = &.privates.priv1;
3348 /// *privn = &.privates.privn;
3351 static llvm::Value
*
3352 emitTaskPrivateMappingFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3353 const OMPTaskDataTy
&Data
, QualType PrivatesQTy
,
3354 ArrayRef
<PrivateDataTy
> Privates
) {
3355 ASTContext
&C
= CGM
.getContext();
3356 FunctionArgList Args
;
3357 ImplicitParamDecl
TaskPrivatesArg(
3358 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3359 C
.getPointerType(PrivatesQTy
).withConst().withRestrict(),
3360 ImplicitParamDecl::Other
);
3361 Args
.push_back(&TaskPrivatesArg
);
3362 llvm::DenseMap
<CanonicalDeclPtr
<const VarDecl
>, unsigned> PrivateVarsPos
;
3363 unsigned Counter
= 1;
3364 for (const Expr
*E
: Data
.PrivateVars
) {
3365 Args
.push_back(ImplicitParamDecl::Create(
3366 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3367 C
.getPointerType(C
.getPointerType(E
->getType()))
3370 ImplicitParamDecl::Other
));
3371 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3372 PrivateVarsPos
[VD
] = Counter
;
3375 for (const Expr
*E
: Data
.FirstprivateVars
) {
3376 Args
.push_back(ImplicitParamDecl::Create(
3377 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3378 C
.getPointerType(C
.getPointerType(E
->getType()))
3381 ImplicitParamDecl::Other
));
3382 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3383 PrivateVarsPos
[VD
] = Counter
;
3386 for (const Expr
*E
: Data
.LastprivateVars
) {
3387 Args
.push_back(ImplicitParamDecl::Create(
3388 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3389 C
.getPointerType(C
.getPointerType(E
->getType()))
3392 ImplicitParamDecl::Other
));
3393 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3394 PrivateVarsPos
[VD
] = Counter
;
3397 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
3398 QualType Ty
= VD
->getType().getNonReferenceType();
3399 if (VD
->getType()->isLValueReferenceType())
3400 Ty
= C
.getPointerType(Ty
);
3401 if (isAllocatableDecl(VD
))
3402 Ty
= C
.getPointerType(Ty
);
3403 Args
.push_back(ImplicitParamDecl::Create(
3404 C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3405 C
.getPointerType(C
.getPointerType(Ty
)).withConst().withRestrict(),
3406 ImplicitParamDecl::Other
));
3407 PrivateVarsPos
[VD
] = Counter
;
3410 const auto &TaskPrivatesMapFnInfo
=
3411 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
3412 llvm::FunctionType
*TaskPrivatesMapTy
=
3413 CGM
.getTypes().GetFunctionType(TaskPrivatesMapFnInfo
);
3415 CGM
.getOpenMPRuntime().getName({"omp_task_privates_map", ""});
3416 auto *TaskPrivatesMap
= llvm::Function::Create(
3417 TaskPrivatesMapTy
, llvm::GlobalValue::InternalLinkage
, Name
,
3419 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskPrivatesMap
,
3420 TaskPrivatesMapFnInfo
);
3421 if (CGM
.getLangOpts().Optimize
) {
3422 TaskPrivatesMap
->removeFnAttr(llvm::Attribute::NoInline
);
3423 TaskPrivatesMap
->removeFnAttr(llvm::Attribute::OptimizeNone
);
3424 TaskPrivatesMap
->addFnAttr(llvm::Attribute::AlwaysInline
);
3426 CodeGenFunction
CGF(CGM
);
3427 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, TaskPrivatesMap
,
3428 TaskPrivatesMapFnInfo
, Args
, Loc
, Loc
);
3430 // *privi = &.privates.privi;
3431 LValue Base
= CGF
.EmitLoadOfPointerLValue(
3432 CGF
.GetAddrOfLocalVar(&TaskPrivatesArg
),
3433 TaskPrivatesArg
.getType()->castAs
<PointerType
>());
3434 const auto *PrivatesQTyRD
= cast
<RecordDecl
>(PrivatesQTy
->getAsTagDecl());
3436 for (const FieldDecl
*Field
: PrivatesQTyRD
->fields()) {
3437 LValue FieldLVal
= CGF
.EmitLValueForField(Base
, Field
);
3438 const VarDecl
*VD
= Args
[PrivateVarsPos
[Privates
[Counter
].second
.Original
]];
3440 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(VD
), VD
->getType());
3441 LValue RefLoadLVal
= CGF
.EmitLoadOfPointerLValue(
3442 RefLVal
.getAddress(CGF
), RefLVal
.getType()->castAs
<PointerType
>());
3443 CGF
.EmitStoreOfScalar(FieldLVal
.getPointer(CGF
), RefLoadLVal
);
3446 CGF
.FinishFunction();
3447 return TaskPrivatesMap
;
3450 /// Emit initialization for private variables in task-based directives.
3451 static void emitPrivatesInit(CodeGenFunction
&CGF
,
3452 const OMPExecutableDirective
&D
,
3453 Address KmpTaskSharedsPtr
, LValue TDBase
,
3454 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3455 QualType SharedsTy
, QualType SharedsPtrTy
,
3456 const OMPTaskDataTy
&Data
,
3457 ArrayRef
<PrivateDataTy
> Privates
, bool ForDup
) {
3458 ASTContext
&C
= CGF
.getContext();
3459 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3460 LValue PrivatesBase
= CGF
.EmitLValueForField(TDBase
, *FI
);
3461 OpenMPDirectiveKind Kind
= isOpenMPTaskLoopDirective(D
.getDirectiveKind())
3464 const CapturedStmt
&CS
= *D
.getCapturedStmt(Kind
);
3465 CodeGenFunction::CGCapturedStmtInfo
CapturesInfo(CS
);
3468 isOpenMPTargetDataManagementDirective(D
.getDirectiveKind()) ||
3469 isOpenMPTargetExecutionDirective(D
.getDirectiveKind());
3470 // For target-based directives skip 4 firstprivate arrays BasePointersArray,
3471 // PointersArray, SizesArray, and MappersArray. The original variables for
3472 // these arrays are not captured and we get their addresses explicitly.
3473 if ((!IsTargetTask
&& !Data
.FirstprivateVars
.empty() && ForDup
) ||
3474 (IsTargetTask
&& KmpTaskSharedsPtr
.isValid())) {
3475 SrcBase
= CGF
.MakeAddrLValue(
3476 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3477 KmpTaskSharedsPtr
, CGF
.ConvertTypeForMem(SharedsPtrTy
),
3478 CGF
.ConvertTypeForMem(SharedsTy
)),
3481 FI
= cast
<RecordDecl
>(FI
->getType()->getAsTagDecl())->field_begin();
3482 for (const PrivateDataTy
&Pair
: Privates
) {
3483 // Do not initialize private locals.
3484 if (Pair
.second
.isLocalPrivate()) {
3488 const VarDecl
*VD
= Pair
.second
.PrivateCopy
;
3489 const Expr
*Init
= VD
->getAnyInitializer();
3490 if (Init
&& (!ForDup
|| (isa
<CXXConstructExpr
>(Init
) &&
3491 !CGF
.isTrivialInitializer(Init
)))) {
3492 LValue PrivateLValue
= CGF
.EmitLValueForField(PrivatesBase
, *FI
);
3493 if (const VarDecl
*Elem
= Pair
.second
.PrivateElemInit
) {
3494 const VarDecl
*OriginalVD
= Pair
.second
.Original
;
3495 // Check if the variable is the target-based BasePointersArray,
3496 // PointersArray, SizesArray, or MappersArray.
3497 LValue SharedRefLValue
;
3498 QualType Type
= PrivateLValue
.getType();
3499 const FieldDecl
*SharedField
= CapturesInfo
.lookup(OriginalVD
);
3500 if (IsTargetTask
&& !SharedField
) {
3501 assert(isa
<ImplicitParamDecl
>(OriginalVD
) &&
3502 isa
<CapturedDecl
>(OriginalVD
->getDeclContext()) &&
3503 cast
<CapturedDecl
>(OriginalVD
->getDeclContext())
3504 ->getNumParams() == 0 &&
3505 isa
<TranslationUnitDecl
>(
3506 cast
<CapturedDecl
>(OriginalVD
->getDeclContext())
3507 ->getDeclContext()) &&
3508 "Expected artificial target data variable.");
3510 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(OriginalVD
), Type
);
3511 } else if (ForDup
) {
3512 SharedRefLValue
= CGF
.EmitLValueForField(SrcBase
, SharedField
);
3513 SharedRefLValue
= CGF
.MakeAddrLValue(
3514 SharedRefLValue
.getAddress(CGF
).withAlignment(
3515 C
.getDeclAlign(OriginalVD
)),
3516 SharedRefLValue
.getType(), LValueBaseInfo(AlignmentSource::Decl
),
3517 SharedRefLValue
.getTBAAInfo());
3518 } else if (CGF
.LambdaCaptureFields
.count(
3519 Pair
.second
.Original
->getCanonicalDecl()) > 0 ||
3520 isa_and_nonnull
<BlockDecl
>(CGF
.CurCodeDecl
)) {
3521 SharedRefLValue
= CGF
.EmitLValue(Pair
.second
.OriginalRef
);
3523 // Processing for implicitly captured variables.
3524 InlinedOpenMPRegionRAII
Region(
3525 CGF
, [](CodeGenFunction
&, PrePostActionTy
&) {}, OMPD_unknown
,
3526 /*HasCancel=*/false, /*NoInheritance=*/true);
3527 SharedRefLValue
= CGF
.EmitLValue(Pair
.second
.OriginalRef
);
3529 if (Type
->isArrayType()) {
3530 // Initialize firstprivate array.
3531 if (!isa
<CXXConstructExpr
>(Init
) || CGF
.isTrivialInitializer(Init
)) {
3532 // Perform simple memcpy.
3533 CGF
.EmitAggregateAssign(PrivateLValue
, SharedRefLValue
, Type
);
3535 // Initialize firstprivate array using element-by-element
3537 CGF
.EmitOMPAggregateAssign(
3538 PrivateLValue
.getAddress(CGF
), SharedRefLValue
.getAddress(CGF
),
3540 [&CGF
, Elem
, Init
, &CapturesInfo
](Address DestElement
,
3541 Address SrcElement
) {
3542 // Clean up any temporaries needed by the initialization.
3543 CodeGenFunction::OMPPrivateScope
InitScope(CGF
);
3544 InitScope
.addPrivate(Elem
, SrcElement
);
3545 (void)InitScope
.Privatize();
3546 // Emit initialization for single element.
3547 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(
3548 CGF
, &CapturesInfo
);
3549 CGF
.EmitAnyExprToMem(Init
, DestElement
,
3550 Init
->getType().getQualifiers(),
3551 /*IsInitializer=*/false);
3555 CodeGenFunction::OMPPrivateScope
InitScope(CGF
);
3556 InitScope
.addPrivate(Elem
, SharedRefLValue
.getAddress(CGF
));
3557 (void)InitScope
.Privatize();
3558 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CapturesInfo
);
3559 CGF
.EmitExprAsInit(Init
, VD
, PrivateLValue
,
3560 /*capturedByInit=*/false);
3563 CGF
.EmitExprAsInit(Init
, VD
, PrivateLValue
, /*capturedByInit=*/false);
3570 /// Check if duplication function is required for taskloops.
3571 static bool checkInitIsRequired(CodeGenFunction
&CGF
,
3572 ArrayRef
<PrivateDataTy
> Privates
) {
3573 bool InitRequired
= false;
3574 for (const PrivateDataTy
&Pair
: Privates
) {
3575 if (Pair
.second
.isLocalPrivate())
3577 const VarDecl
*VD
= Pair
.second
.PrivateCopy
;
3578 const Expr
*Init
= VD
->getAnyInitializer();
3579 InitRequired
= InitRequired
|| (isa_and_nonnull
<CXXConstructExpr
>(Init
) &&
3580 !CGF
.isTrivialInitializer(Init
));
3584 return InitRequired
;
3588 /// Emit task_dup function (for initialization of
3589 /// private/firstprivate/lastprivate vars and last_iter flag)
3591 /// void __task_dup_entry(kmp_task_t *task_dst, const kmp_task_t *task_src, int
3593 /// // setup lastprivate flag
3594 /// task_dst->last = lastpriv;
3595 /// // could be constructor calls here...
3598 static llvm::Value
*
3599 emitTaskDupFunction(CodeGenModule
&CGM
, SourceLocation Loc
,
3600 const OMPExecutableDirective
&D
,
3601 QualType KmpTaskTWithPrivatesPtrQTy
,
3602 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3603 const RecordDecl
*KmpTaskTQTyRD
, QualType SharedsTy
,
3604 QualType SharedsPtrTy
, const OMPTaskDataTy
&Data
,
3605 ArrayRef
<PrivateDataTy
> Privates
, bool WithLastIter
) {
3606 ASTContext
&C
= CGM
.getContext();
3607 FunctionArgList Args
;
3608 ImplicitParamDecl
DstArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3609 KmpTaskTWithPrivatesPtrQTy
,
3610 ImplicitParamDecl::Other
);
3611 ImplicitParamDecl
SrcArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
3612 KmpTaskTWithPrivatesPtrQTy
,
3613 ImplicitParamDecl::Other
);
3614 ImplicitParamDecl
LastprivArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.IntTy
,
3615 ImplicitParamDecl::Other
);
3616 Args
.push_back(&DstArg
);
3617 Args
.push_back(&SrcArg
);
3618 Args
.push_back(&LastprivArg
);
3619 const auto &TaskDupFnInfo
=
3620 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
3621 llvm::FunctionType
*TaskDupTy
= CGM
.getTypes().GetFunctionType(TaskDupFnInfo
);
3622 std::string Name
= CGM
.getOpenMPRuntime().getName({"omp_task_dup", ""});
3623 auto *TaskDup
= llvm::Function::Create(
3624 TaskDupTy
, llvm::GlobalValue::InternalLinkage
, Name
, &CGM
.getModule());
3625 CGM
.SetInternalFunctionAttributes(GlobalDecl(), TaskDup
, TaskDupFnInfo
);
3626 TaskDup
->setDoesNotRecurse();
3627 CodeGenFunction
CGF(CGM
);
3628 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, TaskDup
, TaskDupFnInfo
, Args
, Loc
,
3631 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3632 CGF
.GetAddrOfLocalVar(&DstArg
),
3633 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3634 // task_dst->liter = lastpriv;
3636 auto LIFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTLastIter
);
3637 LValue Base
= CGF
.EmitLValueForField(
3638 TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3639 LValue LILVal
= CGF
.EmitLValueForField(Base
, *LIFI
);
3640 llvm::Value
*Lastpriv
= CGF
.EmitLoadOfScalar(
3641 CGF
.GetAddrOfLocalVar(&LastprivArg
), /*Volatile=*/false, C
.IntTy
, Loc
);
3642 CGF
.EmitStoreOfScalar(Lastpriv
, LILVal
);
3645 // Emit initial values for private copies (if any).
3646 assert(!Privates
.empty());
3647 Address KmpTaskSharedsPtr
= Address::invalid();
3648 if (!Data
.FirstprivateVars
.empty()) {
3649 LValue TDBase
= CGF
.EmitLoadOfPointerLValue(
3650 CGF
.GetAddrOfLocalVar(&SrcArg
),
3651 KmpTaskTWithPrivatesPtrQTy
->castAs
<PointerType
>());
3652 LValue Base
= CGF
.EmitLValueForField(
3653 TDBase
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
3654 KmpTaskSharedsPtr
= Address(
3655 CGF
.EmitLoadOfScalar(CGF
.EmitLValueForField(
3656 Base
, *std::next(KmpTaskTQTyRD
->field_begin(),
3659 CGF
.Int8Ty
, CGM
.getNaturalTypeAlignment(SharedsTy
));
3661 emitPrivatesInit(CGF
, D
, KmpTaskSharedsPtr
, TDBase
, KmpTaskTWithPrivatesQTyRD
,
3662 SharedsTy
, SharedsPtrTy
, Data
, Privates
, /*ForDup=*/true);
3663 CGF
.FinishFunction();
3667 /// Checks if destructor function is required to be generated.
3668 /// \return true if cleanups are required, false otherwise.
3670 checkDestructorsRequired(const RecordDecl
*KmpTaskTWithPrivatesQTyRD
,
3671 ArrayRef
<PrivateDataTy
> Privates
) {
3672 for (const PrivateDataTy
&P
: Privates
) {
3673 if (P
.second
.isLocalPrivate())
3675 QualType Ty
= P
.second
.Original
->getType().getNonReferenceType();
3676 if (Ty
.isDestructedType())
3683 /// Loop generator for OpenMP iterator expression.
3684 class OMPIteratorGeneratorScope final
3685 : public CodeGenFunction::OMPPrivateScope
{
3686 CodeGenFunction
&CGF
;
3687 const OMPIteratorExpr
*E
= nullptr;
3688 SmallVector
<CodeGenFunction::JumpDest
, 4> ContDests
;
3689 SmallVector
<CodeGenFunction::JumpDest
, 4> ExitDests
;
3690 OMPIteratorGeneratorScope() = delete;
3691 OMPIteratorGeneratorScope(OMPIteratorGeneratorScope
&) = delete;
3694 OMPIteratorGeneratorScope(CodeGenFunction
&CGF
, const OMPIteratorExpr
*E
)
3695 : CodeGenFunction::OMPPrivateScope(CGF
), CGF(CGF
), E(E
) {
3698 SmallVector
<llvm::Value
*, 4> Uppers
;
3699 for (unsigned I
= 0, End
= E
->numOfIterators(); I
< End
; ++I
) {
3700 Uppers
.push_back(CGF
.EmitScalarExpr(E
->getHelper(I
).Upper
));
3701 const auto *VD
= cast
<VarDecl
>(E
->getIteratorDecl(I
));
3702 addPrivate(VD
, CGF
.CreateMemTemp(VD
->getType(), VD
->getName()));
3703 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
);
3705 HelperData
.CounterVD
,
3706 CGF
.CreateMemTemp(HelperData
.CounterVD
->getType(), "counter.addr"));
3710 for (unsigned I
= 0, End
= E
->numOfIterators(); I
< End
; ++I
) {
3711 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
);
3713 CGF
.MakeAddrLValue(CGF
.GetAddrOfLocalVar(HelperData
.CounterVD
),
3714 HelperData
.CounterVD
->getType());
3716 CGF
.EmitStoreOfScalar(
3717 llvm::ConstantInt::get(CLVal
.getAddress(CGF
).getElementType(), 0),
3719 CodeGenFunction::JumpDest
&ContDest
=
3720 ContDests
.emplace_back(CGF
.getJumpDestInCurrentScope("iter.cont"));
3721 CodeGenFunction::JumpDest
&ExitDest
=
3722 ExitDests
.emplace_back(CGF
.getJumpDestInCurrentScope("iter.exit"));
3723 // N = <number-of_iterations>;
3724 llvm::Value
*N
= Uppers
[I
];
3726 // if (Counter < N) goto body; else goto exit;
3727 CGF
.EmitBlock(ContDest
.getBlock());
3729 CGF
.EmitLoadOfScalar(CLVal
, HelperData
.CounterVD
->getLocation());
3731 HelperData
.CounterVD
->getType()->isSignedIntegerOrEnumerationType()
3732 ? CGF
.Builder
.CreateICmpSLT(CVal
, N
)
3733 : CGF
.Builder
.CreateICmpULT(CVal
, N
);
3734 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("iter.body");
3735 CGF
.Builder
.CreateCondBr(Cmp
, BodyBB
, ExitDest
.getBlock());
3737 CGF
.EmitBlock(BodyBB
);
3738 // Iteri = Begini + Counter * Stepi;
3739 CGF
.EmitIgnoredExpr(HelperData
.Update
);
3742 ~OMPIteratorGeneratorScope() {
3745 for (unsigned I
= E
->numOfIterators(); I
> 0; --I
) {
3746 // Counter = Counter + 1;
3747 const OMPIteratorHelperData
&HelperData
= E
->getHelper(I
- 1);
3748 CGF
.EmitIgnoredExpr(HelperData
.CounterUpdate
);
3750 CGF
.EmitBranchThroughCleanup(ContDests
[I
- 1]);
3752 CGF
.EmitBlock(ExitDests
[I
- 1].getBlock(), /*IsFinished=*/I
== 1);
3758 static std::pair
<llvm::Value
*, llvm::Value
*>
3759 getPointerAndSize(CodeGenFunction
&CGF
, const Expr
*E
) {
3760 const auto *OASE
= dyn_cast
<OMPArrayShapingExpr
>(E
);
3763 const Expr
*Base
= OASE
->getBase();
3764 Addr
= CGF
.EmitScalarExpr(Base
);
3766 Addr
= CGF
.EmitLValue(E
).getPointer(CGF
);
3768 llvm::Value
*SizeVal
;
3769 QualType Ty
= E
->getType();
3771 SizeVal
= CGF
.getTypeSize(OASE
->getBase()->getType()->getPointeeType());
3772 for (const Expr
*SE
: OASE
->getDimensions()) {
3773 llvm::Value
*Sz
= CGF
.EmitScalarExpr(SE
);
3774 Sz
= CGF
.EmitScalarConversion(
3775 Sz
, SE
->getType(), CGF
.getContext().getSizeType(), SE
->getExprLoc());
3776 SizeVal
= CGF
.Builder
.CreateNUWMul(SizeVal
, Sz
);
3778 } else if (const auto *ASE
=
3779 dyn_cast
<OMPArraySectionExpr
>(E
->IgnoreParenImpCasts())) {
3781 CGF
.EmitOMPArraySectionExpr(ASE
, /*IsLowerBound=*/false);
3782 Address UpAddrAddress
= UpAddrLVal
.getAddress(CGF
);
3783 llvm::Value
*UpAddr
= CGF
.Builder
.CreateConstGEP1_32(
3784 UpAddrAddress
.getElementType(), UpAddrAddress
.getPointer(), /*Idx0=*/1);
3785 llvm::Value
*LowIntPtr
= CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.SizeTy
);
3786 llvm::Value
*UpIntPtr
= CGF
.Builder
.CreatePtrToInt(UpAddr
, CGF
.SizeTy
);
3787 SizeVal
= CGF
.Builder
.CreateNUWSub(UpIntPtr
, LowIntPtr
);
3789 SizeVal
= CGF
.getTypeSize(Ty
);
3791 return std::make_pair(Addr
, SizeVal
);
3794 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
3795 static void getKmpAffinityType(ASTContext
&C
, QualType
&KmpTaskAffinityInfoTy
) {
3796 QualType FlagsTy
= C
.getIntTypeForBitwidth(32, /*Signed=*/false);
3797 if (KmpTaskAffinityInfoTy
.isNull()) {
3798 RecordDecl
*KmpAffinityInfoRD
=
3799 C
.buildImplicitRecord("kmp_task_affinity_info_t");
3800 KmpAffinityInfoRD
->startDefinition();
3801 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, C
.getIntPtrType());
3802 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, C
.getSizeType());
3803 addFieldToRecordDecl(C
, KmpAffinityInfoRD
, FlagsTy
);
3804 KmpAffinityInfoRD
->completeDefinition();
3805 KmpTaskAffinityInfoTy
= C
.getRecordType(KmpAffinityInfoRD
);
3809 CGOpenMPRuntime::TaskResultTy
3810 CGOpenMPRuntime::emitTaskInit(CodeGenFunction
&CGF
, SourceLocation Loc
,
3811 const OMPExecutableDirective
&D
,
3812 llvm::Function
*TaskFunction
, QualType SharedsTy
,
3813 Address Shareds
, const OMPTaskDataTy
&Data
) {
3814 ASTContext
&C
= CGM
.getContext();
3815 llvm::SmallVector
<PrivateDataTy
, 4> Privates
;
3816 // Aggregate privates and sort them by the alignment.
3817 const auto *I
= Data
.PrivateCopies
.begin();
3818 for (const Expr
*E
: Data
.PrivateVars
) {
3819 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3820 Privates
.emplace_back(
3822 PrivateHelpersTy(E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3823 /*PrivateElemInit=*/nullptr));
3826 I
= Data
.FirstprivateCopies
.begin();
3827 const auto *IElemInitRef
= Data
.FirstprivateInits
.begin();
3828 for (const Expr
*E
: Data
.FirstprivateVars
) {
3829 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3830 Privates
.emplace_back(
3833 E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3834 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IElemInitRef
)->getDecl())));
3838 I
= Data
.LastprivateCopies
.begin();
3839 for (const Expr
*E
: Data
.LastprivateVars
) {
3840 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl());
3841 Privates
.emplace_back(
3843 PrivateHelpersTy(E
, VD
, cast
<VarDecl
>(cast
<DeclRefExpr
>(*I
)->getDecl()),
3844 /*PrivateElemInit=*/nullptr));
3847 for (const VarDecl
*VD
: Data
.PrivateLocals
) {
3848 if (isAllocatableDecl(VD
))
3849 Privates
.emplace_back(CGM
.getPointerAlign(), PrivateHelpersTy(VD
));
3851 Privates
.emplace_back(C
.getDeclAlign(VD
), PrivateHelpersTy(VD
));
3853 llvm::stable_sort(Privates
,
3854 [](const PrivateDataTy
&L
, const PrivateDataTy
&R
) {
3855 return L
.first
> R
.first
;
3857 QualType KmpInt32Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/1);
3858 // Build type kmp_routine_entry_t (if not built yet).
3859 emitKmpRoutineEntryT(KmpInt32Ty
);
3860 // Build type kmp_task_t (if not built yet).
3861 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind())) {
3862 if (SavedKmpTaskloopTQTy
.isNull()) {
3863 SavedKmpTaskloopTQTy
= C
.getRecordType(createKmpTaskTRecordDecl(
3864 CGM
, D
.getDirectiveKind(), KmpInt32Ty
, KmpRoutineEntryPtrQTy
));
3866 KmpTaskTQTy
= SavedKmpTaskloopTQTy
;
3868 assert((D
.getDirectiveKind() == OMPD_task
||
3869 isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) ||
3870 isOpenMPTargetDataManagementDirective(D
.getDirectiveKind())) &&
3871 "Expected taskloop, task or target directive");
3872 if (SavedKmpTaskTQTy
.isNull()) {
3873 SavedKmpTaskTQTy
= C
.getRecordType(createKmpTaskTRecordDecl(
3874 CGM
, D
.getDirectiveKind(), KmpInt32Ty
, KmpRoutineEntryPtrQTy
));
3876 KmpTaskTQTy
= SavedKmpTaskTQTy
;
3878 const auto *KmpTaskTQTyRD
= cast
<RecordDecl
>(KmpTaskTQTy
->getAsTagDecl());
3879 // Build particular struct kmp_task_t for the given task.
3880 const RecordDecl
*KmpTaskTWithPrivatesQTyRD
=
3881 createKmpTaskTWithPrivatesRecordDecl(CGM
, KmpTaskTQTy
, Privates
);
3882 QualType KmpTaskTWithPrivatesQTy
= C
.getRecordType(KmpTaskTWithPrivatesQTyRD
);
3883 QualType KmpTaskTWithPrivatesPtrQTy
=
3884 C
.getPointerType(KmpTaskTWithPrivatesQTy
);
3885 llvm::Type
*KmpTaskTWithPrivatesTy
= CGF
.ConvertType(KmpTaskTWithPrivatesQTy
);
3886 llvm::Type
*KmpTaskTWithPrivatesPtrTy
=
3887 KmpTaskTWithPrivatesTy
->getPointerTo();
3888 llvm::Value
*KmpTaskTWithPrivatesTySize
=
3889 CGF
.getTypeSize(KmpTaskTWithPrivatesQTy
);
3890 QualType SharedsPtrTy
= C
.getPointerType(SharedsTy
);
3892 // Emit initial values for private copies (if any).
3893 llvm::Value
*TaskPrivatesMap
= nullptr;
3894 llvm::Type
*TaskPrivatesMapTy
=
3895 std::next(TaskFunction
->arg_begin(), 3)->getType();
3896 if (!Privates
.empty()) {
3897 auto FI
= std::next(KmpTaskTWithPrivatesQTyRD
->field_begin());
3899 emitTaskPrivateMappingFunction(CGM
, Loc
, Data
, FI
->getType(), Privates
);
3900 TaskPrivatesMap
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3901 TaskPrivatesMap
, TaskPrivatesMapTy
);
3903 TaskPrivatesMap
= llvm::ConstantPointerNull::get(
3904 cast
<llvm::PointerType
>(TaskPrivatesMapTy
));
3906 // Build a proxy function kmp_int32 .omp_task_entry.(kmp_int32 gtid,
3908 llvm::Function
*TaskEntry
= emitProxyTaskFunction(
3909 CGM
, Loc
, D
.getDirectiveKind(), KmpInt32Ty
, KmpTaskTWithPrivatesPtrQTy
,
3910 KmpTaskTWithPrivatesQTy
, KmpTaskTQTy
, SharedsPtrTy
, TaskFunction
,
3913 // Build call kmp_task_t * __kmpc_omp_task_alloc(ident_t *, kmp_int32 gtid,
3914 // kmp_int32 flags, size_t sizeof_kmp_task_t, size_t sizeof_shareds,
3915 // kmp_routine_entry_t *task_entry);
3916 // Task flags. Format is taken from
3917 // https://github.com/llvm/llvm-project/blob/main/openmp/runtime/src/kmp.h,
3918 // description of kmp_tasking_flags struct.
3922 DestructorsFlag
= 0x8,
3923 PriorityFlag
= 0x20,
3924 DetachableFlag
= 0x40,
3926 unsigned Flags
= Data
.Tied
? TiedFlag
: 0;
3927 bool NeedsCleanup
= false;
3928 if (!Privates
.empty()) {
3930 checkDestructorsRequired(KmpTaskTWithPrivatesQTyRD
, Privates
);
3932 Flags
= Flags
| DestructorsFlag
;
3934 if (Data
.Priority
.getInt())
3935 Flags
= Flags
| PriorityFlag
;
3936 if (D
.hasClausesOfKind
<OMPDetachClause
>())
3937 Flags
= Flags
| DetachableFlag
;
3938 llvm::Value
*TaskFlags
=
3939 Data
.Final
.getPointer()
3940 ? CGF
.Builder
.CreateSelect(Data
.Final
.getPointer(),
3941 CGF
.Builder
.getInt32(FinalFlag
),
3942 CGF
.Builder
.getInt32(/*C=*/0))
3943 : CGF
.Builder
.getInt32(Data
.Final
.getInt() ? FinalFlag
: 0);
3944 TaskFlags
= CGF
.Builder
.CreateOr(TaskFlags
, CGF
.Builder
.getInt32(Flags
));
3945 llvm::Value
*SharedsSize
= CGM
.getSize(C
.getTypeSizeInChars(SharedsTy
));
3946 SmallVector
<llvm::Value
*, 8> AllocArgs
= {emitUpdateLocation(CGF
, Loc
),
3947 getThreadID(CGF
, Loc
), TaskFlags
, KmpTaskTWithPrivatesTySize
,
3948 SharedsSize
, CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
3949 TaskEntry
, KmpRoutineEntryPtrTy
)};
3950 llvm::Value
*NewTask
;
3951 if (D
.hasClausesOfKind
<OMPNowaitClause
>()) {
3952 // Check if we have any device clause associated with the directive.
3953 const Expr
*Device
= nullptr;
3954 if (auto *C
= D
.getSingleClause
<OMPDeviceClause
>())
3955 Device
= C
->getDevice();
3956 // Emit device ID if any otherwise use default value.
3957 llvm::Value
*DeviceID
;
3959 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
3960 CGF
.Int64Ty
, /*isSigned=*/true);
3962 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
3963 AllocArgs
.push_back(DeviceID
);
3964 NewTask
= CGF
.EmitRuntimeCall(
3965 OMPBuilder
.getOrCreateRuntimeFunction(
3966 CGM
.getModule(), OMPRTL___kmpc_omp_target_task_alloc
),
3970 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
3971 CGM
.getModule(), OMPRTL___kmpc_omp_task_alloc
),
3974 // Emit detach clause initialization.
3975 // evt = (typeof(evt))__kmpc_task_allow_completion_event(loc, tid,
3976 // task_descriptor);
3977 if (const auto *DC
= D
.getSingleClause
<OMPDetachClause
>()) {
3978 const Expr
*Evt
= DC
->getEventHandler()->IgnoreParenImpCasts();
3979 LValue EvtLVal
= CGF
.EmitLValue(Evt
);
3981 // Build kmp_event_t *__kmpc_task_allow_completion_event(ident_t *loc_ref,
3982 // int gtid, kmp_task_t *task);
3983 llvm::Value
*Loc
= emitUpdateLocation(CGF
, DC
->getBeginLoc());
3984 llvm::Value
*Tid
= getThreadID(CGF
, DC
->getBeginLoc());
3985 Tid
= CGF
.Builder
.CreateIntCast(Tid
, CGF
.IntTy
, /*isSigned=*/false);
3986 llvm::Value
*EvtVal
= CGF
.EmitRuntimeCall(
3987 OMPBuilder
.getOrCreateRuntimeFunction(
3988 CGM
.getModule(), OMPRTL___kmpc_task_allow_completion_event
),
3989 {Loc
, Tid
, NewTask
});
3990 EvtVal
= CGF
.EmitScalarConversion(EvtVal
, C
.VoidPtrTy
, Evt
->getType(),
3992 CGF
.EmitStoreOfScalar(EvtVal
, EvtLVal
);
3994 // Process affinity clauses.
3995 if (D
.hasClausesOfKind
<OMPAffinityClause
>()) {
3996 // Process list of affinity data.
3997 ASTContext
&C
= CGM
.getContext();
3998 Address AffinitiesArray
= Address::invalid();
3999 // Calculate number of elements to form the array of affinity data.
4000 llvm::Value
*NumOfElements
= nullptr;
4001 unsigned NumAffinities
= 0;
4002 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
4003 if (const Expr
*Modifier
= C
->getModifier()) {
4004 const auto *IE
= cast
<OMPIteratorExpr
>(Modifier
->IgnoreParenImpCasts());
4005 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4006 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4007 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.SizeTy
, /*isSigned=*/false);
4009 NumOfElements
? CGF
.Builder
.CreateNUWMul(NumOfElements
, Sz
) : Sz
;
4012 NumAffinities
+= C
->varlist_size();
4015 getKmpAffinityType(CGM
.getContext(), KmpTaskAffinityInfoTy
);
4016 // Fields ids in kmp_task_affinity_info record.
4017 enum RTLAffinityInfoFieldsTy
{ BaseAddr
, Len
, Flags
};
4019 QualType KmpTaskAffinityInfoArrayTy
;
4020 if (NumOfElements
) {
4021 NumOfElements
= CGF
.Builder
.CreateNUWAdd(
4022 llvm::ConstantInt::get(CGF
.SizeTy
, NumAffinities
), NumOfElements
);
4023 auto *OVE
= new (C
) OpaqueValueExpr(
4025 C
.getIntTypeForBitwidth(C
.getTypeSize(C
.getSizeType()), /*Signed=*/0),
4027 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, OVE
,
4028 RValue::get(NumOfElements
));
4029 KmpTaskAffinityInfoArrayTy
=
4030 C
.getVariableArrayType(KmpTaskAffinityInfoTy
, OVE
, ArrayType::Normal
,
4031 /*IndexTypeQuals=*/0, SourceRange(Loc
, Loc
));
4032 // Properly emit variable-sized array.
4033 auto *PD
= ImplicitParamDecl::Create(C
, KmpTaskAffinityInfoArrayTy
,
4034 ImplicitParamDecl::Other
);
4035 CGF
.EmitVarDecl(*PD
);
4036 AffinitiesArray
= CGF
.GetAddrOfLocalVar(PD
);
4037 NumOfElements
= CGF
.Builder
.CreateIntCast(NumOfElements
, CGF
.Int32Ty
,
4038 /*isSigned=*/false);
4040 KmpTaskAffinityInfoArrayTy
= C
.getConstantArrayType(
4041 KmpTaskAffinityInfoTy
,
4042 llvm::APInt(C
.getTypeSize(C
.getSizeType()), NumAffinities
), nullptr,
4043 ArrayType::Normal
, /*IndexTypeQuals=*/0);
4045 CGF
.CreateMemTemp(KmpTaskAffinityInfoArrayTy
, ".affs.arr.addr");
4046 AffinitiesArray
= CGF
.Builder
.CreateConstArrayGEP(AffinitiesArray
, 0);
4047 NumOfElements
= llvm::ConstantInt::get(CGM
.Int32Ty
, NumAffinities
,
4048 /*isSigned=*/false);
4051 const auto *KmpAffinityInfoRD
= KmpTaskAffinityInfoTy
->getAsRecordDecl();
4052 // Fill array by elements without iterators.
4054 bool HasIterator
= false;
4055 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
4056 if (C
->getModifier()) {
4060 for (const Expr
*E
: C
->varlists()) {
4063 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
4065 CGF
.MakeAddrLValue(CGF
.Builder
.CreateConstGEP(AffinitiesArray
, Pos
),
4066 KmpTaskAffinityInfoTy
);
4067 // affs[i].base_addr = &<Affinities[i].second>;
4068 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4069 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), BaseAddr
));
4070 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
),
4072 // affs[i].len = sizeof(<Affinities[i].second>);
4073 LValue LenLVal
= CGF
.EmitLValueForField(
4074 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), Len
));
4075 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4081 PosLVal
= CGF
.MakeAddrLValue(
4082 CGF
.CreateMemTemp(C
.getSizeType(), "affs.counter.addr"),
4084 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Pos
), PosLVal
);
4086 // Process elements with iterators.
4087 for (const auto *C
: D
.getClausesOfKind
<OMPAffinityClause
>()) {
4088 const Expr
*Modifier
= C
->getModifier();
4091 OMPIteratorGeneratorScope
IteratorScope(
4092 CGF
, cast_or_null
<OMPIteratorExpr
>(Modifier
->IgnoreParenImpCasts()));
4093 for (const Expr
*E
: C
->varlists()) {
4096 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
4097 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4098 LValue Base
= CGF
.MakeAddrLValue(
4099 CGF
.Builder
.CreateGEP(AffinitiesArray
, Idx
), KmpTaskAffinityInfoTy
);
4100 // affs[i].base_addr = &<Affinities[i].second>;
4101 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4102 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), BaseAddr
));
4103 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
),
4105 // affs[i].len = sizeof(<Affinities[i].second>);
4106 LValue LenLVal
= CGF
.EmitLValueForField(
4107 Base
, *std::next(KmpAffinityInfoRD
->field_begin(), Len
));
4108 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4109 Idx
= CGF
.Builder
.CreateNUWAdd(
4110 Idx
, llvm::ConstantInt::get(Idx
->getType(), 1));
4111 CGF
.EmitStoreOfScalar(Idx
, PosLVal
);
4114 // Call to kmp_int32 __kmpc_omp_reg_task_with_affinity(ident_t *loc_ref,
4115 // kmp_int32 gtid, kmp_task_t *new_task, kmp_int32
4116 // naffins, kmp_task_affinity_info_t *affin_list);
4117 llvm::Value
*LocRef
= emitUpdateLocation(CGF
, Loc
);
4118 llvm::Value
*GTid
= getThreadID(CGF
, Loc
);
4119 llvm::Value
*AffinListPtr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4120 AffinitiesArray
.getPointer(), CGM
.VoidPtrTy
);
4121 // FIXME: Emit the function and ignore its result for now unless the
4122 // runtime function is properly implemented.
4123 (void)CGF
.EmitRuntimeCall(
4124 OMPBuilder
.getOrCreateRuntimeFunction(
4125 CGM
.getModule(), OMPRTL___kmpc_omp_reg_task_with_affinity
),
4126 {LocRef
, GTid
, NewTask
, NumOfElements
, AffinListPtr
});
4128 llvm::Value
*NewTaskNewTaskTTy
=
4129 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4130 NewTask
, KmpTaskTWithPrivatesPtrTy
);
4131 LValue Base
= CGF
.MakeNaturalAlignAddrLValue(NewTaskNewTaskTTy
,
4132 KmpTaskTWithPrivatesQTy
);
4134 CGF
.EmitLValueForField(Base
, *KmpTaskTWithPrivatesQTyRD
->field_begin());
4135 // Fill the data in the resulting kmp_task_t record.
4136 // Copy shareds if there are any.
4137 Address KmpTaskSharedsPtr
= Address::invalid();
4138 if (!SharedsTy
->getAsStructureType()->getDecl()->field_empty()) {
4139 KmpTaskSharedsPtr
= Address(
4140 CGF
.EmitLoadOfScalar(
4141 CGF
.EmitLValueForField(
4143 *std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTShareds
)),
4145 CGF
.Int8Ty
, CGM
.getNaturalTypeAlignment(SharedsTy
));
4146 LValue Dest
= CGF
.MakeAddrLValue(KmpTaskSharedsPtr
, SharedsTy
);
4147 LValue Src
= CGF
.MakeAddrLValue(Shareds
, SharedsTy
);
4148 CGF
.EmitAggregateCopy(Dest
, Src
, SharedsTy
, AggValueSlot::DoesNotOverlap
);
4150 // Emit initial values for private copies (if any).
4151 TaskResultTy Result
;
4152 if (!Privates
.empty()) {
4153 emitPrivatesInit(CGF
, D
, KmpTaskSharedsPtr
, Base
, KmpTaskTWithPrivatesQTyRD
,
4154 SharedsTy
, SharedsPtrTy
, Data
, Privates
,
4156 if (isOpenMPTaskLoopDirective(D
.getDirectiveKind()) &&
4157 (!Data
.LastprivateVars
.empty() || checkInitIsRequired(CGF
, Privates
))) {
4158 Result
.TaskDupFn
= emitTaskDupFunction(
4159 CGM
, Loc
, D
, KmpTaskTWithPrivatesPtrQTy
, KmpTaskTWithPrivatesQTyRD
,
4160 KmpTaskTQTyRD
, SharedsTy
, SharedsPtrTy
, Data
, Privates
,
4161 /*WithLastIter=*/!Data
.LastprivateVars
.empty());
4164 // Fields of union "kmp_cmplrdata_t" for destructors and priority.
4165 enum { Priority
= 0, Destructors
= 1 };
4166 // Provide pointer to function with destructors for privates.
4167 auto FI
= std::next(KmpTaskTQTyRD
->field_begin(), Data1
);
4168 const RecordDecl
*KmpCmplrdataUD
=
4169 (*FI
)->getType()->getAsUnionType()->getDecl();
4171 llvm::Value
*DestructorFn
= emitDestructorsFunction(
4172 CGM
, Loc
, KmpInt32Ty
, KmpTaskTWithPrivatesPtrQTy
,
4173 KmpTaskTWithPrivatesQTy
);
4174 LValue Data1LV
= CGF
.EmitLValueForField(TDBase
, *FI
);
4175 LValue DestructorsLV
= CGF
.EmitLValueForField(
4176 Data1LV
, *std::next(KmpCmplrdataUD
->field_begin(), Destructors
));
4177 CGF
.EmitStoreOfScalar(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4178 DestructorFn
, KmpRoutineEntryPtrTy
),
4182 if (Data
.Priority
.getInt()) {
4183 LValue Data2LV
= CGF
.EmitLValueForField(
4184 TDBase
, *std::next(KmpTaskTQTyRD
->field_begin(), Data2
));
4185 LValue PriorityLV
= CGF
.EmitLValueForField(
4186 Data2LV
, *std::next(KmpCmplrdataUD
->field_begin(), Priority
));
4187 CGF
.EmitStoreOfScalar(Data
.Priority
.getPointer(), PriorityLV
);
4189 Result
.NewTask
= NewTask
;
4190 Result
.TaskEntry
= TaskEntry
;
4191 Result
.NewTaskNewTaskTTy
= NewTaskNewTaskTTy
;
4192 Result
.TDBase
= TDBase
;
4193 Result
.KmpTaskTQTyRD
= KmpTaskTQTyRD
;
4197 /// Translates internal dependency kind into the runtime kind.
4198 static RTLDependenceKindTy
translateDependencyKind(OpenMPDependClauseKind K
) {
4199 RTLDependenceKindTy DepKind
;
4201 case OMPC_DEPEND_in
:
4202 DepKind
= RTLDependenceKindTy::DepIn
;
4204 // Out and InOut dependencies must use the same code.
4205 case OMPC_DEPEND_out
:
4206 case OMPC_DEPEND_inout
:
4207 DepKind
= RTLDependenceKindTy::DepInOut
;
4209 case OMPC_DEPEND_mutexinoutset
:
4210 DepKind
= RTLDependenceKindTy::DepMutexInOutSet
;
4212 case OMPC_DEPEND_inoutset
:
4213 DepKind
= RTLDependenceKindTy::DepInOutSet
;
4215 case OMPC_DEPEND_outallmemory
:
4216 DepKind
= RTLDependenceKindTy::DepOmpAllMem
;
4218 case OMPC_DEPEND_source
:
4219 case OMPC_DEPEND_sink
:
4220 case OMPC_DEPEND_depobj
:
4221 case OMPC_DEPEND_inoutallmemory
:
4222 case OMPC_DEPEND_unknown
:
4223 llvm_unreachable("Unknown task dependence type");
4228 /// Builds kmp_depend_info, if it is not built yet, and builds flags type.
4229 static void getDependTypes(ASTContext
&C
, QualType
&KmpDependInfoTy
,
4230 QualType
&FlagsTy
) {
4231 FlagsTy
= C
.getIntTypeForBitwidth(C
.getTypeSize(C
.BoolTy
), /*Signed=*/false);
4232 if (KmpDependInfoTy
.isNull()) {
4233 RecordDecl
*KmpDependInfoRD
= C
.buildImplicitRecord("kmp_depend_info");
4234 KmpDependInfoRD
->startDefinition();
4235 addFieldToRecordDecl(C
, KmpDependInfoRD
, C
.getIntPtrType());
4236 addFieldToRecordDecl(C
, KmpDependInfoRD
, C
.getSizeType());
4237 addFieldToRecordDecl(C
, KmpDependInfoRD
, FlagsTy
);
4238 KmpDependInfoRD
->completeDefinition();
4239 KmpDependInfoTy
= C
.getRecordType(KmpDependInfoRD
);
4243 std::pair
<llvm::Value
*, LValue
>
4244 CGOpenMPRuntime::getDepobjElements(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4245 SourceLocation Loc
) {
4246 ASTContext
&C
= CGM
.getContext();
4248 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4249 RecordDecl
*KmpDependInfoRD
=
4250 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4251 QualType KmpDependInfoPtrTy
= C
.getPointerType(KmpDependInfoTy
);
4252 LValue Base
= CGF
.EmitLoadOfPointerLValue(
4253 CGF
.Builder
.CreateElementBitCast(
4254 DepobjLVal
.getAddress(CGF
),
4255 CGF
.ConvertTypeForMem(KmpDependInfoPtrTy
)),
4256 KmpDependInfoPtrTy
->castAs
<PointerType
>());
4257 Address DepObjAddr
= CGF
.Builder
.CreateGEP(
4258 Base
.getAddress(CGF
),
4259 llvm::ConstantInt::get(CGF
.IntPtrTy
, -1, /*isSigned=*/true));
4260 LValue NumDepsBase
= CGF
.MakeAddrLValue(
4261 DepObjAddr
, KmpDependInfoTy
, Base
.getBaseInfo(), Base
.getTBAAInfo());
4262 // NumDeps = deps[i].base_addr;
4263 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4265 *std::next(KmpDependInfoRD
->field_begin(),
4266 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4267 llvm::Value
*NumDeps
= CGF
.EmitLoadOfScalar(BaseAddrLVal
, Loc
);
4268 return std::make_pair(NumDeps
, Base
);
4271 static void emitDependData(CodeGenFunction
&CGF
, QualType
&KmpDependInfoTy
,
4272 llvm::PointerUnion
<unsigned *, LValue
*> Pos
,
4273 const OMPTaskDataTy::DependData
&Data
,
4274 Address DependenciesArray
) {
4275 CodeGenModule
&CGM
= CGF
.CGM
;
4276 ASTContext
&C
= CGM
.getContext();
4278 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4279 RecordDecl
*KmpDependInfoRD
=
4280 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4281 llvm::Type
*LLVMFlagsTy
= CGF
.ConvertTypeForMem(FlagsTy
);
4283 OMPIteratorGeneratorScope
IteratorScope(
4284 CGF
, cast_or_null
<OMPIteratorExpr
>(
4285 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4287 for (const Expr
*E
: Data
.DepExprs
) {
4291 // The expression will be a nullptr in the 'omp_all_memory' case.
4293 std::tie(Addr
, Size
) = getPointerAndSize(CGF
, E
);
4294 Addr
= CGF
.Builder
.CreatePtrToInt(Addr
, CGF
.IntPtrTy
);
4296 Addr
= llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4297 Size
= llvm::ConstantInt::get(CGF
.SizeTy
, 0);
4300 if (unsigned *P
= Pos
.dyn_cast
<unsigned *>()) {
4301 Base
= CGF
.MakeAddrLValue(
4302 CGF
.Builder
.CreateConstGEP(DependenciesArray
, *P
), KmpDependInfoTy
);
4304 assert(E
&& "Expected a non-null expression");
4305 LValue
&PosLVal
= *Pos
.get
<LValue
*>();
4306 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4307 Base
= CGF
.MakeAddrLValue(
4308 CGF
.Builder
.CreateGEP(DependenciesArray
, Idx
), KmpDependInfoTy
);
4310 // deps[i].base_addr = &<Dependencies[i].second>;
4311 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4313 *std::next(KmpDependInfoRD
->field_begin(),
4314 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4315 CGF
.EmitStoreOfScalar(Addr
, BaseAddrLVal
);
4316 // deps[i].len = sizeof(<Dependencies[i].second>);
4317 LValue LenLVal
= CGF
.EmitLValueForField(
4318 Base
, *std::next(KmpDependInfoRD
->field_begin(),
4319 static_cast<unsigned int>(RTLDependInfoFields::Len
)));
4320 CGF
.EmitStoreOfScalar(Size
, LenLVal
);
4321 // deps[i].flags = <Dependencies[i].first>;
4322 RTLDependenceKindTy DepKind
= translateDependencyKind(Data
.DepKind
);
4323 LValue FlagsLVal
= CGF
.EmitLValueForField(
4325 *std::next(KmpDependInfoRD
->field_begin(),
4326 static_cast<unsigned int>(RTLDependInfoFields::Flags
)));
4327 CGF
.EmitStoreOfScalar(
4328 llvm::ConstantInt::get(LLVMFlagsTy
, static_cast<unsigned int>(DepKind
)),
4330 if (unsigned *P
= Pos
.dyn_cast
<unsigned *>()) {
4333 LValue
&PosLVal
= *Pos
.get
<LValue
*>();
4334 llvm::Value
*Idx
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4335 Idx
= CGF
.Builder
.CreateNUWAdd(Idx
,
4336 llvm::ConstantInt::get(Idx
->getType(), 1));
4337 CGF
.EmitStoreOfScalar(Idx
, PosLVal
);
4342 SmallVector
<llvm::Value
*, 4> CGOpenMPRuntime::emitDepobjElementsSizes(
4343 CodeGenFunction
&CGF
, QualType
&KmpDependInfoTy
,
4344 const OMPTaskDataTy::DependData
&Data
) {
4345 assert(Data
.DepKind
== OMPC_DEPEND_depobj
&&
4346 "Expected depobj dependency kind.");
4347 SmallVector
<llvm::Value
*, 4> Sizes
;
4348 SmallVector
<LValue
, 4> SizeLVals
;
4349 ASTContext
&C
= CGF
.getContext();
4351 OMPIteratorGeneratorScope
IteratorScope(
4352 CGF
, cast_or_null
<OMPIteratorExpr
>(
4353 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4355 for (const Expr
*E
: Data
.DepExprs
) {
4356 llvm::Value
*NumDeps
;
4358 LValue DepobjLVal
= CGF
.EmitLValue(E
->IgnoreParenImpCasts());
4359 std::tie(NumDeps
, Base
) =
4360 getDepobjElements(CGF
, DepobjLVal
, E
->getExprLoc());
4361 LValue NumLVal
= CGF
.MakeAddrLValue(
4362 CGF
.CreateMemTemp(C
.getUIntPtrType(), "depobj.size.addr"),
4363 C
.getUIntPtrType());
4364 CGF
.Builder
.CreateStore(llvm::ConstantInt::get(CGF
.IntPtrTy
, 0),
4365 NumLVal
.getAddress(CGF
));
4366 llvm::Value
*PrevVal
= CGF
.EmitLoadOfScalar(NumLVal
, E
->getExprLoc());
4367 llvm::Value
*Add
= CGF
.Builder
.CreateNUWAdd(PrevVal
, NumDeps
);
4368 CGF
.EmitStoreOfScalar(Add
, NumLVal
);
4369 SizeLVals
.push_back(NumLVal
);
4372 for (unsigned I
= 0, E
= SizeLVals
.size(); I
< E
; ++I
) {
4374 CGF
.EmitLoadOfScalar(SizeLVals
[I
], Data
.DepExprs
[I
]->getExprLoc());
4375 Sizes
.push_back(Size
);
4380 void CGOpenMPRuntime::emitDepobjElements(CodeGenFunction
&CGF
,
4381 QualType
&KmpDependInfoTy
,
4383 const OMPTaskDataTy::DependData
&Data
,
4384 Address DependenciesArray
) {
4385 assert(Data
.DepKind
== OMPC_DEPEND_depobj
&&
4386 "Expected depobj dependency kind.");
4387 llvm::Value
*ElSize
= CGF
.getTypeSize(KmpDependInfoTy
);
4389 OMPIteratorGeneratorScope
IteratorScope(
4390 CGF
, cast_or_null
<OMPIteratorExpr
>(
4391 Data
.IteratorExpr
? Data
.IteratorExpr
->IgnoreParenImpCasts()
4393 for (unsigned I
= 0, End
= Data
.DepExprs
.size(); I
< End
; ++I
) {
4394 const Expr
*E
= Data
.DepExprs
[I
];
4395 llvm::Value
*NumDeps
;
4397 LValue DepobjLVal
= CGF
.EmitLValue(E
->IgnoreParenImpCasts());
4398 std::tie(NumDeps
, Base
) =
4399 getDepobjElements(CGF
, DepobjLVal
, E
->getExprLoc());
4401 // memcopy dependency data.
4402 llvm::Value
*Size
= CGF
.Builder
.CreateNUWMul(
4404 CGF
.Builder
.CreateIntCast(NumDeps
, CGF
.SizeTy
, /*isSigned=*/false));
4405 llvm::Value
*Pos
= CGF
.EmitLoadOfScalar(PosLVal
, E
->getExprLoc());
4406 Address DepAddr
= CGF
.Builder
.CreateGEP(DependenciesArray
, Pos
);
4407 CGF
.Builder
.CreateMemCpy(DepAddr
, Base
.getAddress(CGF
), Size
);
4411 llvm::Value
*Add
= CGF
.Builder
.CreateNUWAdd(Pos
, NumDeps
);
4412 CGF
.EmitStoreOfScalar(Add
, PosLVal
);
4417 std::pair
<llvm::Value
*, Address
> CGOpenMPRuntime::emitDependClause(
4418 CodeGenFunction
&CGF
, ArrayRef
<OMPTaskDataTy::DependData
> Dependencies
,
4419 SourceLocation Loc
) {
4420 if (llvm::all_of(Dependencies
, [](const OMPTaskDataTy::DependData
&D
) {
4421 return D
.DepExprs
.empty();
4423 return std::make_pair(nullptr, Address::invalid());
4424 // Process list of dependencies.
4425 ASTContext
&C
= CGM
.getContext();
4426 Address DependenciesArray
= Address::invalid();
4427 llvm::Value
*NumOfElements
= nullptr;
4428 unsigned NumDependencies
= std::accumulate(
4429 Dependencies
.begin(), Dependencies
.end(), 0,
4430 [](unsigned V
, const OMPTaskDataTy::DependData
&D
) {
4431 return D
.DepKind
== OMPC_DEPEND_depobj
4433 : (V
+ (D
.IteratorExpr
? 0 : D
.DepExprs
.size()));
4436 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4437 bool HasDepobjDeps
= false;
4438 bool HasRegularWithIterators
= false;
4439 llvm::Value
*NumOfDepobjElements
= llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4440 llvm::Value
*NumOfRegularWithIterators
=
4441 llvm::ConstantInt::get(CGF
.IntPtrTy
, 0);
4442 // Calculate number of depobj dependencies and regular deps with the
4444 for (const OMPTaskDataTy::DependData
&D
: Dependencies
) {
4445 if (D
.DepKind
== OMPC_DEPEND_depobj
) {
4446 SmallVector
<llvm::Value
*, 4> Sizes
=
4447 emitDepobjElementsSizes(CGF
, KmpDependInfoTy
, D
);
4448 for (llvm::Value
*Size
: Sizes
) {
4449 NumOfDepobjElements
=
4450 CGF
.Builder
.CreateNUWAdd(NumOfDepobjElements
, Size
);
4452 HasDepobjDeps
= true;
4455 // Include number of iterations, if any.
4457 if (const auto *IE
= cast_or_null
<OMPIteratorExpr
>(D
.IteratorExpr
)) {
4458 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4459 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4460 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.IntPtrTy
, /*isSigned=*/false);
4461 llvm::Value
*NumClauseDeps
= CGF
.Builder
.CreateNUWMul(
4462 Sz
, llvm::ConstantInt::get(CGF
.IntPtrTy
, D
.DepExprs
.size()));
4463 NumOfRegularWithIterators
=
4464 CGF
.Builder
.CreateNUWAdd(NumOfRegularWithIterators
, NumClauseDeps
);
4466 HasRegularWithIterators
= true;
4471 QualType KmpDependInfoArrayTy
;
4472 if (HasDepobjDeps
|| HasRegularWithIterators
) {
4473 NumOfElements
= llvm::ConstantInt::get(CGM
.IntPtrTy
, NumDependencies
,
4474 /*isSigned=*/false);
4475 if (HasDepobjDeps
) {
4477 CGF
.Builder
.CreateNUWAdd(NumOfDepobjElements
, NumOfElements
);
4479 if (HasRegularWithIterators
) {
4481 CGF
.Builder
.CreateNUWAdd(NumOfRegularWithIterators
, NumOfElements
);
4483 auto *OVE
= new (C
) OpaqueValueExpr(
4484 Loc
, C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0),
4486 CodeGenFunction::OpaqueValueMapping
OpaqueMap(CGF
, OVE
,
4487 RValue::get(NumOfElements
));
4488 KmpDependInfoArrayTy
=
4489 C
.getVariableArrayType(KmpDependInfoTy
, OVE
, ArrayType::Normal
,
4490 /*IndexTypeQuals=*/0, SourceRange(Loc
, Loc
));
4491 // CGF.EmitVariablyModifiedType(KmpDependInfoArrayTy);
4492 // Properly emit variable-sized array.
4493 auto *PD
= ImplicitParamDecl::Create(C
, KmpDependInfoArrayTy
,
4494 ImplicitParamDecl::Other
);
4495 CGF
.EmitVarDecl(*PD
);
4496 DependenciesArray
= CGF
.GetAddrOfLocalVar(PD
);
4497 NumOfElements
= CGF
.Builder
.CreateIntCast(NumOfElements
, CGF
.Int32Ty
,
4498 /*isSigned=*/false);
4500 KmpDependInfoArrayTy
= C
.getConstantArrayType(
4501 KmpDependInfoTy
, llvm::APInt(/*numBits=*/64, NumDependencies
), nullptr,
4502 ArrayType::Normal
, /*IndexTypeQuals=*/0);
4504 CGF
.CreateMemTemp(KmpDependInfoArrayTy
, ".dep.arr.addr");
4505 DependenciesArray
= CGF
.Builder
.CreateConstArrayGEP(DependenciesArray
, 0);
4506 NumOfElements
= llvm::ConstantInt::get(CGM
.Int32Ty
, NumDependencies
,
4507 /*isSigned=*/false);
4510 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4511 if (Dependencies
[I
].DepKind
== OMPC_DEPEND_depobj
||
4512 Dependencies
[I
].IteratorExpr
)
4514 emitDependData(CGF
, KmpDependInfoTy
, &Pos
, Dependencies
[I
],
4517 // Copy regular dependencies with iterators.
4518 LValue PosLVal
= CGF
.MakeAddrLValue(
4519 CGF
.CreateMemTemp(C
.getSizeType(), "dep.counter.addr"), C
.getSizeType());
4520 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Pos
), PosLVal
);
4521 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4522 if (Dependencies
[I
].DepKind
== OMPC_DEPEND_depobj
||
4523 !Dependencies
[I
].IteratorExpr
)
4525 emitDependData(CGF
, KmpDependInfoTy
, &PosLVal
, Dependencies
[I
],
4528 // Copy final depobj arrays without iterators.
4529 if (HasDepobjDeps
) {
4530 for (unsigned I
= 0, End
= Dependencies
.size(); I
< End
; ++I
) {
4531 if (Dependencies
[I
].DepKind
!= OMPC_DEPEND_depobj
)
4533 emitDepobjElements(CGF
, KmpDependInfoTy
, PosLVal
, Dependencies
[I
],
4537 DependenciesArray
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4538 DependenciesArray
, CGF
.VoidPtrTy
, CGF
.Int8Ty
);
4539 return std::make_pair(NumOfElements
, DependenciesArray
);
4542 Address
CGOpenMPRuntime::emitDepobjDependClause(
4543 CodeGenFunction
&CGF
, const OMPTaskDataTy::DependData
&Dependencies
,
4544 SourceLocation Loc
) {
4545 if (Dependencies
.DepExprs
.empty())
4546 return Address::invalid();
4547 // Process list of dependencies.
4548 ASTContext
&C
= CGM
.getContext();
4549 Address DependenciesArray
= Address::invalid();
4550 unsigned NumDependencies
= Dependencies
.DepExprs
.size();
4552 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4553 RecordDecl
*KmpDependInfoRD
=
4554 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4557 // Define type kmp_depend_info[<Dependencies.size()>];
4558 // For depobj reserve one extra element to store the number of elements.
4559 // It is required to handle depobj(x) update(in) construct.
4560 // kmp_depend_info[<Dependencies.size()>] deps;
4561 llvm::Value
*NumDepsVal
;
4562 CharUnits Align
= C
.getTypeAlignInChars(KmpDependInfoTy
);
4563 if (const auto *IE
=
4564 cast_or_null
<OMPIteratorExpr
>(Dependencies
.IteratorExpr
)) {
4565 NumDepsVal
= llvm::ConstantInt::get(CGF
.SizeTy
, 1);
4566 for (unsigned I
= 0, E
= IE
->numOfIterators(); I
< E
; ++I
) {
4567 llvm::Value
*Sz
= CGF
.EmitScalarExpr(IE
->getHelper(I
).Upper
);
4568 Sz
= CGF
.Builder
.CreateIntCast(Sz
, CGF
.SizeTy
, /*isSigned=*/false);
4569 NumDepsVal
= CGF
.Builder
.CreateNUWMul(NumDepsVal
, Sz
);
4571 Size
= CGF
.Builder
.CreateNUWAdd(llvm::ConstantInt::get(CGF
.SizeTy
, 1),
4573 CharUnits SizeInBytes
=
4574 C
.getTypeSizeInChars(KmpDependInfoTy
).alignTo(Align
);
4575 llvm::Value
*RecSize
= CGM
.getSize(SizeInBytes
);
4576 Size
= CGF
.Builder
.CreateNUWMul(Size
, RecSize
);
4578 CGF
.Builder
.CreateIntCast(NumDepsVal
, CGF
.IntPtrTy
, /*isSigned=*/false);
4580 QualType KmpDependInfoArrayTy
= C
.getConstantArrayType(
4581 KmpDependInfoTy
, llvm::APInt(/*numBits=*/64, NumDependencies
+ 1),
4582 nullptr, ArrayType::Normal
, /*IndexTypeQuals=*/0);
4583 CharUnits Sz
= C
.getTypeSizeInChars(KmpDependInfoArrayTy
);
4584 Size
= CGM
.getSize(Sz
.alignTo(Align
));
4585 NumDepsVal
= llvm::ConstantInt::get(CGF
.IntPtrTy
, NumDependencies
);
4587 // Need to allocate on the dynamic memory.
4588 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4589 // Use default allocator.
4590 llvm::Value
*Allocator
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4591 llvm::Value
*Args
[] = {ThreadID
, Size
, Allocator
};
4594 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4595 CGM
.getModule(), OMPRTL___kmpc_alloc
),
4596 Args
, ".dep.arr.addr");
4597 llvm::Type
*KmpDependInfoLlvmTy
= CGF
.ConvertTypeForMem(KmpDependInfoTy
);
4598 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4599 Addr
, KmpDependInfoLlvmTy
->getPointerTo());
4600 DependenciesArray
= Address(Addr
, KmpDependInfoLlvmTy
, Align
);
4601 // Write number of elements in the first element of array for depobj.
4602 LValue Base
= CGF
.MakeAddrLValue(DependenciesArray
, KmpDependInfoTy
);
4603 // deps[i].base_addr = NumDependencies;
4604 LValue BaseAddrLVal
= CGF
.EmitLValueForField(
4606 *std::next(KmpDependInfoRD
->field_begin(),
4607 static_cast<unsigned int>(RTLDependInfoFields::BaseAddr
)));
4608 CGF
.EmitStoreOfScalar(NumDepsVal
, BaseAddrLVal
);
4609 llvm::PointerUnion
<unsigned *, LValue
*> Pos
;
4612 if (Dependencies
.IteratorExpr
) {
4613 PosLVal
= CGF
.MakeAddrLValue(
4614 CGF
.CreateMemTemp(C
.getSizeType(), "iterator.counter.addr"),
4616 CGF
.EmitStoreOfScalar(llvm::ConstantInt::get(CGF
.SizeTy
, Idx
), PosLVal
,
4622 emitDependData(CGF
, KmpDependInfoTy
, Pos
, Dependencies
, DependenciesArray
);
4623 DependenciesArray
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4624 CGF
.Builder
.CreateConstGEP(DependenciesArray
, 1), CGF
.VoidPtrTy
,
4626 return DependenciesArray
;
4629 void CGOpenMPRuntime::emitDestroyClause(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4630 SourceLocation Loc
) {
4631 ASTContext
&C
= CGM
.getContext();
4633 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4634 LValue Base
= CGF
.EmitLoadOfPointerLValue(
4635 DepobjLVal
.getAddress(CGF
), C
.VoidPtrTy
.castAs
<PointerType
>());
4636 QualType KmpDependInfoPtrTy
= C
.getPointerType(KmpDependInfoTy
);
4637 Address Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4638 Base
.getAddress(CGF
), CGF
.ConvertTypeForMem(KmpDependInfoPtrTy
),
4639 CGF
.ConvertTypeForMem(KmpDependInfoTy
));
4640 llvm::Value
*DepObjAddr
= CGF
.Builder
.CreateGEP(
4641 Addr
.getElementType(), Addr
.getPointer(),
4642 llvm::ConstantInt::get(CGF
.IntPtrTy
, -1, /*isSigned=*/true));
4643 DepObjAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(DepObjAddr
,
4645 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4646 // Use default allocator.
4647 llvm::Value
*Allocator
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4648 llvm::Value
*Args
[] = {ThreadID
, DepObjAddr
, Allocator
};
4650 // _kmpc_free(gtid, addr, nullptr);
4651 (void)CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4652 CGM
.getModule(), OMPRTL___kmpc_free
),
4656 void CGOpenMPRuntime::emitUpdateClause(CodeGenFunction
&CGF
, LValue DepobjLVal
,
4657 OpenMPDependClauseKind NewDepKind
,
4658 SourceLocation Loc
) {
4659 ASTContext
&C
= CGM
.getContext();
4661 getDependTypes(C
, KmpDependInfoTy
, FlagsTy
);
4662 RecordDecl
*KmpDependInfoRD
=
4663 cast
<RecordDecl
>(KmpDependInfoTy
->getAsTagDecl());
4664 llvm::Type
*LLVMFlagsTy
= CGF
.ConvertTypeForMem(FlagsTy
);
4665 llvm::Value
*NumDeps
;
4667 std::tie(NumDeps
, Base
) = getDepobjElements(CGF
, DepobjLVal
, Loc
);
4669 Address Begin
= Base
.getAddress(CGF
);
4670 // Cast from pointer to array type to pointer to single element.
4671 llvm::Value
*End
= CGF
.Builder
.CreateGEP(
4672 Begin
.getElementType(), Begin
.getPointer(), NumDeps
);
4673 // The basic structure here is a while-do loop.
4674 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.body");
4675 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.done");
4676 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
4677 CGF
.EmitBlock(BodyBB
);
4678 llvm::PHINode
*ElementPHI
=
4679 CGF
.Builder
.CreatePHI(Begin
.getType(), 2, "omp.elementPast");
4680 ElementPHI
->addIncoming(Begin
.getPointer(), EntryBB
);
4681 Begin
= Begin
.withPointer(ElementPHI
);
4682 Base
= CGF
.MakeAddrLValue(Begin
, KmpDependInfoTy
, Base
.getBaseInfo(),
4683 Base
.getTBAAInfo());
4684 // deps[i].flags = NewDepKind;
4685 RTLDependenceKindTy DepKind
= translateDependencyKind(NewDepKind
);
4686 LValue FlagsLVal
= CGF
.EmitLValueForField(
4687 Base
, *std::next(KmpDependInfoRD
->field_begin(),
4688 static_cast<unsigned int>(RTLDependInfoFields::Flags
)));
4689 CGF
.EmitStoreOfScalar(
4690 llvm::ConstantInt::get(LLVMFlagsTy
, static_cast<unsigned int>(DepKind
)),
4693 // Shift the address forward by one element.
4694 Address ElementNext
=
4695 CGF
.Builder
.CreateConstGEP(Begin
, /*Index=*/1, "omp.elementNext");
4696 ElementPHI
->addIncoming(ElementNext
.getPointer(),
4697 CGF
.Builder
.GetInsertBlock());
4698 llvm::Value
*IsEmpty
=
4699 CGF
.Builder
.CreateICmpEQ(ElementNext
.getPointer(), End
, "omp.isempty");
4700 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
4702 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
4705 void CGOpenMPRuntime::emitTaskCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
4706 const OMPExecutableDirective
&D
,
4707 llvm::Function
*TaskFunction
,
4708 QualType SharedsTy
, Address Shareds
,
4710 const OMPTaskDataTy
&Data
) {
4711 if (!CGF
.HaveInsertPoint())
4714 TaskResultTy Result
=
4715 emitTaskInit(CGF
, Loc
, D
, TaskFunction
, SharedsTy
, Shareds
, Data
);
4716 llvm::Value
*NewTask
= Result
.NewTask
;
4717 llvm::Function
*TaskEntry
= Result
.TaskEntry
;
4718 llvm::Value
*NewTaskNewTaskTTy
= Result
.NewTaskNewTaskTTy
;
4719 LValue TDBase
= Result
.TDBase
;
4720 const RecordDecl
*KmpTaskTQTyRD
= Result
.KmpTaskTQTyRD
;
4721 // Process list of dependences.
4722 Address DependenciesArray
= Address::invalid();
4723 llvm::Value
*NumOfElements
;
4724 std::tie(NumOfElements
, DependenciesArray
) =
4725 emitDependClause(CGF
, Data
.Dependences
, Loc
);
4727 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4729 // Build kmp_int32 __kmpc_omp_task_with_deps(ident_t *, kmp_int32 gtid,
4730 // kmp_task_t *new_task, kmp_int32 ndeps, kmp_depend_info_t *dep_list,
4731 // kmp_int32 ndeps_noalias, kmp_depend_info_t *noalias_dep_list) if dependence
4732 // list is not empty
4733 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4734 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
4735 llvm::Value
*TaskArgs
[] = { UpLoc
, ThreadID
, NewTask
};
4736 llvm::Value
*DepTaskArgs
[7];
4737 if (!Data
.Dependences
.empty()) {
4738 DepTaskArgs
[0] = UpLoc
;
4739 DepTaskArgs
[1] = ThreadID
;
4740 DepTaskArgs
[2] = NewTask
;
4741 DepTaskArgs
[3] = NumOfElements
;
4742 DepTaskArgs
[4] = DependenciesArray
.getPointer();
4743 DepTaskArgs
[5] = CGF
.Builder
.getInt32(0);
4744 DepTaskArgs
[6] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4746 auto &&ThenCodeGen
= [this, &Data
, TDBase
, KmpTaskTQTyRD
, &TaskArgs
,
4747 &DepTaskArgs
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4749 auto PartIdFI
= std::next(KmpTaskTQTyRD
->field_begin(), KmpTaskTPartId
);
4750 LValue PartIdLVal
= CGF
.EmitLValueForField(TDBase
, *PartIdFI
);
4751 CGF
.EmitStoreOfScalar(CGF
.Builder
.getInt32(0), PartIdLVal
);
4753 if (!Data
.Dependences
.empty()) {
4754 CGF
.EmitRuntimeCall(
4755 OMPBuilder
.getOrCreateRuntimeFunction(
4756 CGM
.getModule(), OMPRTL___kmpc_omp_task_with_deps
),
4759 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4760 CGM
.getModule(), OMPRTL___kmpc_omp_task
),
4763 // Check if parent region is untied and build return for untied task;
4765 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
4766 Region
->emitUntiedSwitch(CGF
);
4769 llvm::Value
*DepWaitTaskArgs
[6];
4770 if (!Data
.Dependences
.empty()) {
4771 DepWaitTaskArgs
[0] = UpLoc
;
4772 DepWaitTaskArgs
[1] = ThreadID
;
4773 DepWaitTaskArgs
[2] = NumOfElements
;
4774 DepWaitTaskArgs
[3] = DependenciesArray
.getPointer();
4775 DepWaitTaskArgs
[4] = CGF
.Builder
.getInt32(0);
4776 DepWaitTaskArgs
[5] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
4778 auto &M
= CGM
.getModule();
4779 auto &&ElseCodeGen
= [this, &M
, &TaskArgs
, ThreadID
, NewTaskNewTaskTTy
,
4780 TaskEntry
, &Data
, &DepWaitTaskArgs
,
4781 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
4782 CodeGenFunction::RunCleanupsScope
LocalScope(CGF
);
4783 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
4784 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
4785 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
4787 if (!Data
.Dependences
.empty())
4788 CGF
.EmitRuntimeCall(
4789 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_omp_wait_deps
),
4791 // Call proxy_task_entry(gtid, new_task);
4792 auto &&CodeGen
= [TaskEntry
, ThreadID
, NewTaskNewTaskTTy
,
4793 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
4795 llvm::Value
*OutlinedFnArgs
[] = {ThreadID
, NewTaskNewTaskTTy
};
4796 CGF
.CGM
.getOpenMPRuntime().emitOutlinedFunctionCall(CGF
, Loc
, TaskEntry
,
4800 // Build void __kmpc_omp_task_begin_if0(ident_t *, kmp_int32 gtid,
4801 // kmp_task_t *new_task);
4802 // Build void __kmpc_omp_task_complete_if0(ident_t *, kmp_int32 gtid,
4803 // kmp_task_t *new_task);
4804 RegionCodeGenTy
RCG(CodeGen
);
4805 CommonActionTy
Action(OMPBuilder
.getOrCreateRuntimeFunction(
4806 M
, OMPRTL___kmpc_omp_task_begin_if0
),
4808 OMPBuilder
.getOrCreateRuntimeFunction(
4809 M
, OMPRTL___kmpc_omp_task_complete_if0
),
4811 RCG
.setAction(Action
);
4816 emitIfClause(CGF
, IfCond
, ThenCodeGen
, ElseCodeGen
);
4818 RegionCodeGenTy
ThenRCG(ThenCodeGen
);
4823 void CGOpenMPRuntime::emitTaskLoopCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
4824 const OMPLoopDirective
&D
,
4825 llvm::Function
*TaskFunction
,
4826 QualType SharedsTy
, Address Shareds
,
4828 const OMPTaskDataTy
&Data
) {
4829 if (!CGF
.HaveInsertPoint())
4831 TaskResultTy Result
=
4832 emitTaskInit(CGF
, Loc
, D
, TaskFunction
, SharedsTy
, Shareds
, Data
);
4833 // NOTE: routine and part_id fields are initialized by __kmpc_omp_task_alloc()
4835 // Call to void __kmpc_taskloop(ident_t *loc, int gtid, kmp_task_t *task, int
4836 // if_val, kmp_uint64 *lb, kmp_uint64 *ub, kmp_int64 st, int nogroup, int
4837 // sched, kmp_uint64 grainsize, void *task_dup);
4838 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
4839 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
4842 IfVal
= CGF
.Builder
.CreateIntCast(CGF
.EvaluateExprAsBool(IfCond
), CGF
.IntTy
,
4845 IfVal
= llvm::ConstantInt::getSigned(CGF
.IntTy
, /*V=*/1);
4848 LValue LBLVal
= CGF
.EmitLValueForField(
4850 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTLowerBound
));
4852 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getLowerBoundVariable())->getDecl());
4853 CGF
.EmitAnyExprToMem(LBVar
->getInit(), LBLVal
.getAddress(CGF
),
4855 /*IsInitializer=*/true);
4856 LValue UBLVal
= CGF
.EmitLValueForField(
4858 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTUpperBound
));
4860 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getUpperBoundVariable())->getDecl());
4861 CGF
.EmitAnyExprToMem(UBVar
->getInit(), UBLVal
.getAddress(CGF
),
4863 /*IsInitializer=*/true);
4864 LValue StLVal
= CGF
.EmitLValueForField(
4866 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTStride
));
4868 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
.getStrideVariable())->getDecl());
4869 CGF
.EmitAnyExprToMem(StVar
->getInit(), StLVal
.getAddress(CGF
),
4871 /*IsInitializer=*/true);
4872 // Store reductions address.
4873 LValue RedLVal
= CGF
.EmitLValueForField(
4875 *std::next(Result
.KmpTaskTQTyRD
->field_begin(), KmpTaskTReductions
));
4876 if (Data
.Reductions
) {
4877 CGF
.EmitStoreOfScalar(Data
.Reductions
, RedLVal
);
4879 CGF
.EmitNullInitialization(RedLVal
.getAddress(CGF
),
4880 CGF
.getContext().VoidPtrTy
);
4882 enum { NoSchedule
= 0, Grainsize
= 1, NumTasks
= 2 };
4883 llvm::Value
*TaskArgs
[] = {
4888 LBLVal
.getPointer(CGF
),
4889 UBLVal
.getPointer(CGF
),
4890 CGF
.EmitLoadOfScalar(StLVal
, Loc
),
4891 llvm::ConstantInt::getSigned(
4892 CGF
.IntTy
, 1), // Always 1 because taskgroup emitted by the compiler
4893 llvm::ConstantInt::getSigned(
4894 CGF
.IntTy
, Data
.Schedule
.getPointer()
4895 ? Data
.Schedule
.getInt() ? NumTasks
: Grainsize
4897 Data
.Schedule
.getPointer()
4898 ? CGF
.Builder
.CreateIntCast(Data
.Schedule
.getPointer(), CGF
.Int64Ty
,
4900 : llvm::ConstantInt::get(CGF
.Int64Ty
, /*V=*/0),
4901 Result
.TaskDupFn
? CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
4902 Result
.TaskDupFn
, CGF
.VoidPtrTy
)
4903 : llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
)};
4904 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
4905 CGM
.getModule(), OMPRTL___kmpc_taskloop
),
4909 /// Emit reduction operation for each element of array (required for
4910 /// array sections) LHS op = RHS.
4911 /// \param Type Type of array.
4912 /// \param LHSVar Variable on the left side of the reduction operation
4913 /// (references element of array in original variable).
4914 /// \param RHSVar Variable on the right side of the reduction operation
4915 /// (references element of array in original variable).
4916 /// \param RedOpGen Generator of reduction operation with use of LHSVar and
4918 static void EmitOMPAggregateReduction(
4919 CodeGenFunction
&CGF
, QualType Type
, const VarDecl
*LHSVar
,
4920 const VarDecl
*RHSVar
,
4921 const llvm::function_ref
<void(CodeGenFunction
&CGF
, const Expr
*,
4922 const Expr
*, const Expr
*)> &RedOpGen
,
4923 const Expr
*XExpr
= nullptr, const Expr
*EExpr
= nullptr,
4924 const Expr
*UpExpr
= nullptr) {
4925 // Perform element-by-element initialization.
4927 Address LHSAddr
= CGF
.GetAddrOfLocalVar(LHSVar
);
4928 Address RHSAddr
= CGF
.GetAddrOfLocalVar(RHSVar
);
4930 // Drill down to the base element type on both arrays.
4931 const ArrayType
*ArrayTy
= Type
->getAsArrayTypeUnsafe();
4932 llvm::Value
*NumElements
= CGF
.emitArrayLength(ArrayTy
, ElementTy
, LHSAddr
);
4934 llvm::Value
*RHSBegin
= RHSAddr
.getPointer();
4935 llvm::Value
*LHSBegin
= LHSAddr
.getPointer();
4936 // Cast from pointer to array type to pointer to single element.
4937 llvm::Value
*LHSEnd
=
4938 CGF
.Builder
.CreateGEP(LHSAddr
.getElementType(), LHSBegin
, NumElements
);
4939 // The basic structure here is a while-do loop.
4940 llvm::BasicBlock
*BodyBB
= CGF
.createBasicBlock("omp.arraycpy.body");
4941 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("omp.arraycpy.done");
4942 llvm::Value
*IsEmpty
=
4943 CGF
.Builder
.CreateICmpEQ(LHSBegin
, LHSEnd
, "omp.arraycpy.isempty");
4944 CGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
4946 // Enter the loop body, making that address the current address.
4947 llvm::BasicBlock
*EntryBB
= CGF
.Builder
.GetInsertBlock();
4948 CGF
.EmitBlock(BodyBB
);
4950 CharUnits ElementSize
= CGF
.getContext().getTypeSizeInChars(ElementTy
);
4952 llvm::PHINode
*RHSElementPHI
= CGF
.Builder
.CreatePHI(
4953 RHSBegin
->getType(), 2, "omp.arraycpy.srcElementPast");
4954 RHSElementPHI
->addIncoming(RHSBegin
, EntryBB
);
4955 Address
RHSElementCurrent(
4956 RHSElementPHI
, RHSAddr
.getElementType(),
4957 RHSAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
4959 llvm::PHINode
*LHSElementPHI
= CGF
.Builder
.CreatePHI(
4960 LHSBegin
->getType(), 2, "omp.arraycpy.destElementPast");
4961 LHSElementPHI
->addIncoming(LHSBegin
, EntryBB
);
4962 Address
LHSElementCurrent(
4963 LHSElementPHI
, LHSAddr
.getElementType(),
4964 LHSAddr
.getAlignment().alignmentOfArrayElement(ElementSize
));
4967 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
4968 Scope
.addPrivate(LHSVar
, LHSElementCurrent
);
4969 Scope
.addPrivate(RHSVar
, RHSElementCurrent
);
4971 RedOpGen(CGF
, XExpr
, EExpr
, UpExpr
);
4972 Scope
.ForceCleanup();
4974 // Shift the address forward by one element.
4975 llvm::Value
*LHSElementNext
= CGF
.Builder
.CreateConstGEP1_32(
4976 LHSAddr
.getElementType(), LHSElementPHI
, /*Idx0=*/1,
4977 "omp.arraycpy.dest.element");
4978 llvm::Value
*RHSElementNext
= CGF
.Builder
.CreateConstGEP1_32(
4979 RHSAddr
.getElementType(), RHSElementPHI
, /*Idx0=*/1,
4980 "omp.arraycpy.src.element");
4981 // Check whether we've reached the end.
4983 CGF
.Builder
.CreateICmpEQ(LHSElementNext
, LHSEnd
, "omp.arraycpy.done");
4984 CGF
.Builder
.CreateCondBr(Done
, DoneBB
, BodyBB
);
4985 LHSElementPHI
->addIncoming(LHSElementNext
, CGF
.Builder
.GetInsertBlock());
4986 RHSElementPHI
->addIncoming(RHSElementNext
, CGF
.Builder
.GetInsertBlock());
4989 CGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
4992 /// Emit reduction combiner. If the combiner is a simple expression emit it as
4993 /// is, otherwise consider it as combiner of UDR decl and emit it as a call of
4994 /// UDR combiner function.
4995 static void emitReductionCombiner(CodeGenFunction
&CGF
,
4996 const Expr
*ReductionOp
) {
4997 if (const auto *CE
= dyn_cast
<CallExpr
>(ReductionOp
))
4998 if (const auto *OVE
= dyn_cast
<OpaqueValueExpr
>(CE
->getCallee()))
4999 if (const auto *DRE
=
5000 dyn_cast
<DeclRefExpr
>(OVE
->getSourceExpr()->IgnoreImpCasts()))
5001 if (const auto *DRD
=
5002 dyn_cast
<OMPDeclareReductionDecl
>(DRE
->getDecl())) {
5003 std::pair
<llvm::Function
*, llvm::Function
*> Reduction
=
5004 CGF
.CGM
.getOpenMPRuntime().getUserDefinedReduction(DRD
);
5005 RValue Func
= RValue::get(Reduction
.first
);
5006 CodeGenFunction::OpaqueValueMapping
Map(CGF
, OVE
, Func
);
5007 CGF
.EmitIgnoredExpr(ReductionOp
);
5010 CGF
.EmitIgnoredExpr(ReductionOp
);
5013 llvm::Function
*CGOpenMPRuntime::emitReductionFunction(
5014 SourceLocation Loc
, llvm::Type
*ArgsElemType
,
5015 ArrayRef
<const Expr
*> Privates
, ArrayRef
<const Expr
*> LHSExprs
,
5016 ArrayRef
<const Expr
*> RHSExprs
, ArrayRef
<const Expr
*> ReductionOps
) {
5017 ASTContext
&C
= CGM
.getContext();
5019 // void reduction_func(void *LHSArg, void *RHSArg);
5020 FunctionArgList Args
;
5021 ImplicitParamDecl
LHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5022 ImplicitParamDecl::Other
);
5023 ImplicitParamDecl
RHSArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5024 ImplicitParamDecl::Other
);
5025 Args
.push_back(&LHSArg
);
5026 Args
.push_back(&RHSArg
);
5028 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5029 std::string Name
= getName({"omp", "reduction", "reduction_func"});
5030 auto *Fn
= llvm::Function::Create(CGM
.getTypes().GetFunctionType(CGFI
),
5031 llvm::GlobalValue::InternalLinkage
, Name
,
5033 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, CGFI
);
5034 Fn
->setDoesNotRecurse();
5035 CodeGenFunction
CGF(CGM
);
5036 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, CGFI
, Args
, Loc
, Loc
);
5038 // Dst = (void*[n])(LHSArg);
5039 // Src = (void*[n])(RHSArg);
5040 Address
LHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5041 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&LHSArg
)),
5042 ArgsElemType
->getPointerTo()),
5043 ArgsElemType
, CGF
.getPointerAlign());
5044 Address
RHS(CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5045 CGF
.Builder
.CreateLoad(CGF
.GetAddrOfLocalVar(&RHSArg
)),
5046 ArgsElemType
->getPointerTo()),
5047 ArgsElemType
, CGF
.getPointerAlign());
5050 // *(Type<i>*)lhs[i] = RedOp<i>(*(Type<i>*)lhs[i], *(Type<i>*)rhs[i]);
5052 CodeGenFunction::OMPPrivateScope
Scope(CGF
);
5053 const auto *IPriv
= Privates
.begin();
5055 for (unsigned I
= 0, E
= ReductionOps
.size(); I
< E
; ++I
, ++IPriv
, ++Idx
) {
5056 const auto *RHSVar
=
5057 cast
<VarDecl
>(cast
<DeclRefExpr
>(RHSExprs
[I
])->getDecl());
5058 Scope
.addPrivate(RHSVar
, emitAddrOfVarFromArray(CGF
, RHS
, Idx
, RHSVar
));
5059 const auto *LHSVar
=
5060 cast
<VarDecl
>(cast
<DeclRefExpr
>(LHSExprs
[I
])->getDecl());
5061 Scope
.addPrivate(LHSVar
, emitAddrOfVarFromArray(CGF
, LHS
, Idx
, LHSVar
));
5062 QualType PrivTy
= (*IPriv
)->getType();
5063 if (PrivTy
->isVariablyModifiedType()) {
5064 // Get array size and emit VLA type.
5066 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(LHS
, Idx
);
5067 llvm::Value
*Ptr
= CGF
.Builder
.CreateLoad(Elem
);
5068 const VariableArrayType
*VLA
=
5069 CGF
.getContext().getAsVariableArrayType(PrivTy
);
5070 const auto *OVE
= cast
<OpaqueValueExpr
>(VLA
->getSizeExpr());
5071 CodeGenFunction::OpaqueValueMapping
OpaqueMap(
5072 CGF
, OVE
, RValue::get(CGF
.Builder
.CreatePtrToInt(Ptr
, CGF
.SizeTy
)));
5073 CGF
.EmitVariablyModifiedType(PrivTy
);
5077 IPriv
= Privates
.begin();
5078 const auto *ILHS
= LHSExprs
.begin();
5079 const auto *IRHS
= RHSExprs
.begin();
5080 for (const Expr
*E
: ReductionOps
) {
5081 if ((*IPriv
)->getType()->isArrayType()) {
5082 // Emit reduction for array section.
5083 const auto *LHSVar
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5084 const auto *RHSVar
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5085 EmitOMPAggregateReduction(
5086 CGF
, (*IPriv
)->getType(), LHSVar
, RHSVar
,
5087 [=](CodeGenFunction
&CGF
, const Expr
*, const Expr
*, const Expr
*) {
5088 emitReductionCombiner(CGF
, E
);
5091 // Emit reduction for array subscript or single variable.
5092 emitReductionCombiner(CGF
, E
);
5098 Scope
.ForceCleanup();
5099 CGF
.FinishFunction();
5103 void CGOpenMPRuntime::emitSingleReductionCombiner(CodeGenFunction
&CGF
,
5104 const Expr
*ReductionOp
,
5105 const Expr
*PrivateRef
,
5106 const DeclRefExpr
*LHS
,
5107 const DeclRefExpr
*RHS
) {
5108 if (PrivateRef
->getType()->isArrayType()) {
5109 // Emit reduction for array section.
5110 const auto *LHSVar
= cast
<VarDecl
>(LHS
->getDecl());
5111 const auto *RHSVar
= cast
<VarDecl
>(RHS
->getDecl());
5112 EmitOMPAggregateReduction(
5113 CGF
, PrivateRef
->getType(), LHSVar
, RHSVar
,
5114 [=](CodeGenFunction
&CGF
, const Expr
*, const Expr
*, const Expr
*) {
5115 emitReductionCombiner(CGF
, ReductionOp
);
5118 // Emit reduction for array subscript or single variable.
5119 emitReductionCombiner(CGF
, ReductionOp
);
5123 void CGOpenMPRuntime::emitReduction(CodeGenFunction
&CGF
, SourceLocation Loc
,
5124 ArrayRef
<const Expr
*> Privates
,
5125 ArrayRef
<const Expr
*> LHSExprs
,
5126 ArrayRef
<const Expr
*> RHSExprs
,
5127 ArrayRef
<const Expr
*> ReductionOps
,
5128 ReductionOptionsTy Options
) {
5129 if (!CGF
.HaveInsertPoint())
5132 bool WithNowait
= Options
.WithNowait
;
5133 bool SimpleReduction
= Options
.SimpleReduction
;
5135 // Next code should be emitted for reduction:
5137 // static kmp_critical_name lock = { 0 };
5139 // void reduce_func(void *lhs[<n>], void *rhs[<n>]) {
5140 // *(Type0*)lhs[0] = ReductionOperation0(*(Type0*)lhs[0], *(Type0*)rhs[0]);
5142 // *(Type<n>-1*)lhs[<n>-1] = ReductionOperation<n>-1(*(Type<n>-1*)lhs[<n>-1],
5143 // *(Type<n>-1*)rhs[<n>-1]);
5147 // void *RedList[<n>] = {&<RHSExprs>[0], ..., &<RHSExprs>[<n>-1]};
5148 // switch (__kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5149 // RedList, reduce_func, &<lock>)) {
5152 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5154 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5158 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5160 // [__kmpc_end_reduce(<loc>, <gtid>, &<lock>);]
5165 // if SimpleReduction is true, only the next code is generated:
5167 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5170 ASTContext
&C
= CGM
.getContext();
5172 if (SimpleReduction
) {
5173 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
5174 const auto *IPriv
= Privates
.begin();
5175 const auto *ILHS
= LHSExprs
.begin();
5176 const auto *IRHS
= RHSExprs
.begin();
5177 for (const Expr
*E
: ReductionOps
) {
5178 emitSingleReductionCombiner(CGF
, E
, *IPriv
, cast
<DeclRefExpr
>(*ILHS
),
5179 cast
<DeclRefExpr
>(*IRHS
));
5187 // 1. Build a list of reduction variables.
5188 // void *RedList[<n>] = {<ReductionVars>[0], ..., <ReductionVars>[<n>-1]};
5189 auto Size
= RHSExprs
.size();
5190 for (const Expr
*E
: Privates
) {
5191 if (E
->getType()->isVariablyModifiedType())
5192 // Reserve place for array size.
5195 llvm::APInt
ArraySize(/*unsigned int numBits=*/32, Size
);
5196 QualType ReductionArrayTy
=
5197 C
.getConstantArrayType(C
.VoidPtrTy
, ArraySize
, nullptr, ArrayType::Normal
,
5198 /*IndexTypeQuals=*/0);
5199 Address ReductionList
=
5200 CGF
.CreateMemTemp(ReductionArrayTy
, ".omp.reduction.red_list");
5201 const auto *IPriv
= Privates
.begin();
5203 for (unsigned I
= 0, E
= RHSExprs
.size(); I
< E
; ++I
, ++IPriv
, ++Idx
) {
5204 Address Elem
= CGF
.Builder
.CreateConstArrayGEP(ReductionList
, Idx
);
5205 CGF
.Builder
.CreateStore(
5206 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5207 CGF
.EmitLValue(RHSExprs
[I
]).getPointer(CGF
), CGF
.VoidPtrTy
),
5209 if ((*IPriv
)->getType()->isVariablyModifiedType()) {
5210 // Store array size.
5212 Elem
= CGF
.Builder
.CreateConstArrayGEP(ReductionList
, Idx
);
5213 llvm::Value
*Size
= CGF
.Builder
.CreateIntCast(
5215 CGF
.getContext().getAsVariableArrayType((*IPriv
)->getType()))
5217 CGF
.SizeTy
, /*isSigned=*/false);
5218 CGF
.Builder
.CreateStore(CGF
.Builder
.CreateIntToPtr(Size
, CGF
.VoidPtrTy
),
5223 // 2. Emit reduce_func().
5224 llvm::Function
*ReductionFn
=
5225 emitReductionFunction(Loc
, CGF
.ConvertTypeForMem(ReductionArrayTy
),
5226 Privates
, LHSExprs
, RHSExprs
, ReductionOps
);
5228 // 3. Create static kmp_critical_name lock = { 0 };
5229 std::string Name
= getName({"reduction"});
5230 llvm::Value
*Lock
= getCriticalRegionLock(Name
);
5232 // 4. Build res = __kmpc_reduce{_nowait}(<loc>, <gtid>, <n>, sizeof(RedList),
5233 // RedList, reduce_func, &<lock>);
5234 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
, OMP_ATOMIC_REDUCE
);
5235 llvm::Value
*ThreadId
= getThreadID(CGF
, Loc
);
5236 llvm::Value
*ReductionArrayTySize
= CGF
.getTypeSize(ReductionArrayTy
);
5237 llvm::Value
*RL
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5238 ReductionList
.getPointer(), CGF
.VoidPtrTy
);
5239 llvm::Value
*Args
[] = {
5240 IdentTLoc
, // ident_t *<loc>
5241 ThreadId
, // i32 <gtid>
5242 CGF
.Builder
.getInt32(RHSExprs
.size()), // i32 <n>
5243 ReductionArrayTySize
, // size_type sizeof(RedList)
5244 RL
, // void *RedList
5245 ReductionFn
, // void (*) (void *, void *) <reduce_func>
5246 Lock
// kmp_critical_name *&<lock>
5248 llvm::Value
*Res
= CGF
.EmitRuntimeCall(
5249 OMPBuilder
.getOrCreateRuntimeFunction(
5251 WithNowait
? OMPRTL___kmpc_reduce_nowait
: OMPRTL___kmpc_reduce
),
5254 // 5. Build switch(res)
5255 llvm::BasicBlock
*DefaultBB
= CGF
.createBasicBlock(".omp.reduction.default");
5256 llvm::SwitchInst
*SwInst
=
5257 CGF
.Builder
.CreateSwitch(Res
, DefaultBB
, /*NumCases=*/2);
5261 // <LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]);
5263 // __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5265 llvm::BasicBlock
*Case1BB
= CGF
.createBasicBlock(".omp.reduction.case1");
5266 SwInst
->addCase(CGF
.Builder
.getInt32(1), Case1BB
);
5267 CGF
.EmitBlock(Case1BB
);
5269 // Add emission of __kmpc_end_reduce{_nowait}(<loc>, <gtid>, &<lock>);
5270 llvm::Value
*EndArgs
[] = {
5271 IdentTLoc
, // ident_t *<loc>
5272 ThreadId
, // i32 <gtid>
5273 Lock
// kmp_critical_name *&<lock>
5275 auto &&CodeGen
= [Privates
, LHSExprs
, RHSExprs
, ReductionOps
](
5276 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5277 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5278 const auto *IPriv
= Privates
.begin();
5279 const auto *ILHS
= LHSExprs
.begin();
5280 const auto *IRHS
= RHSExprs
.begin();
5281 for (const Expr
*E
: ReductionOps
) {
5282 RT
.emitSingleReductionCombiner(CGF
, E
, *IPriv
, cast
<DeclRefExpr
>(*ILHS
),
5283 cast
<DeclRefExpr
>(*IRHS
));
5289 RegionCodeGenTy
RCG(CodeGen
);
5290 CommonActionTy
Action(
5291 nullptr, llvm::None
,
5292 OMPBuilder
.getOrCreateRuntimeFunction(
5293 CGM
.getModule(), WithNowait
? OMPRTL___kmpc_end_reduce_nowait
5294 : OMPRTL___kmpc_end_reduce
),
5296 RCG
.setAction(Action
);
5299 CGF
.EmitBranch(DefaultBB
);
5303 // Atomic(<LHSExprs>[i] = RedOp<i>(*<LHSExprs>[i], *<RHSExprs>[i]));
5306 llvm::BasicBlock
*Case2BB
= CGF
.createBasicBlock(".omp.reduction.case2");
5307 SwInst
->addCase(CGF
.Builder
.getInt32(2), Case2BB
);
5308 CGF
.EmitBlock(Case2BB
);
5310 auto &&AtomicCodeGen
= [Loc
, Privates
, LHSExprs
, RHSExprs
, ReductionOps
](
5311 CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5312 const auto *ILHS
= LHSExprs
.begin();
5313 const auto *IRHS
= RHSExprs
.begin();
5314 const auto *IPriv
= Privates
.begin();
5315 for (const Expr
*E
: ReductionOps
) {
5316 const Expr
*XExpr
= nullptr;
5317 const Expr
*EExpr
= nullptr;
5318 const Expr
*UpExpr
= nullptr;
5319 BinaryOperatorKind BO
= BO_Comma
;
5320 if (const auto *BO
= dyn_cast
<BinaryOperator
>(E
)) {
5321 if (BO
->getOpcode() == BO_Assign
) {
5322 XExpr
= BO
->getLHS();
5323 UpExpr
= BO
->getRHS();
5326 // Try to emit update expression as a simple atomic.
5327 const Expr
*RHSExpr
= UpExpr
;
5329 // Analyze RHS part of the whole expression.
5330 if (const auto *ACO
= dyn_cast
<AbstractConditionalOperator
>(
5331 RHSExpr
->IgnoreParenImpCasts())) {
5332 // If this is a conditional operator, analyze its condition for
5333 // min/max reduction operator.
5334 RHSExpr
= ACO
->getCond();
5336 if (const auto *BORHS
=
5337 dyn_cast
<BinaryOperator
>(RHSExpr
->IgnoreParenImpCasts())) {
5338 EExpr
= BORHS
->getRHS();
5339 BO
= BORHS
->getOpcode();
5343 const auto *VD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5344 auto &&AtomicRedGen
= [BO
, VD
,
5345 Loc
](CodeGenFunction
&CGF
, const Expr
*XExpr
,
5346 const Expr
*EExpr
, const Expr
*UpExpr
) {
5347 LValue X
= CGF
.EmitLValue(XExpr
);
5350 E
= CGF
.EmitAnyExpr(EExpr
);
5351 CGF
.EmitOMPAtomicSimpleUpdateExpr(
5352 X
, E
, BO
, /*IsXLHSInRHSPart=*/true,
5353 llvm::AtomicOrdering::Monotonic
, Loc
,
5354 [&CGF
, UpExpr
, VD
, Loc
](RValue XRValue
) {
5355 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
5356 Address LHSTemp
= CGF
.CreateMemTemp(VD
->getType());
5357 CGF
.emitOMPSimpleStore(
5358 CGF
.MakeAddrLValue(LHSTemp
, VD
->getType()), XRValue
,
5359 VD
->getType().getNonReferenceType(), Loc
);
5360 PrivateScope
.addPrivate(VD
, LHSTemp
);
5361 (void)PrivateScope
.Privatize();
5362 return CGF
.EmitAnyExpr(UpExpr
);
5365 if ((*IPriv
)->getType()->isArrayType()) {
5366 // Emit atomic reduction for array section.
5367 const auto *RHSVar
=
5368 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5369 EmitOMPAggregateReduction(CGF
, (*IPriv
)->getType(), VD
, RHSVar
,
5370 AtomicRedGen
, XExpr
, EExpr
, UpExpr
);
5372 // Emit atomic reduction for array subscript or single variable.
5373 AtomicRedGen(CGF
, XExpr
, EExpr
, UpExpr
);
5376 // Emit as a critical region.
5377 auto &&CritRedGen
= [E
, Loc
](CodeGenFunction
&CGF
, const Expr
*,
5378 const Expr
*, const Expr
*) {
5379 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5380 std::string Name
= RT
.getName({"atomic_reduction"});
5381 RT
.emitCriticalRegion(
5383 [=](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
5385 emitReductionCombiner(CGF
, E
);
5389 if ((*IPriv
)->getType()->isArrayType()) {
5390 const auto *LHSVar
=
5391 cast
<VarDecl
>(cast
<DeclRefExpr
>(*ILHS
)->getDecl());
5392 const auto *RHSVar
=
5393 cast
<VarDecl
>(cast
<DeclRefExpr
>(*IRHS
)->getDecl());
5394 EmitOMPAggregateReduction(CGF
, (*IPriv
)->getType(), LHSVar
, RHSVar
,
5397 CritRedGen(CGF
, nullptr, nullptr, nullptr);
5405 RegionCodeGenTy
AtomicRCG(AtomicCodeGen
);
5407 // Add emission of __kmpc_end_reduce(<loc>, <gtid>, &<lock>);
5408 llvm::Value
*EndArgs
[] = {
5409 IdentTLoc
, // ident_t *<loc>
5410 ThreadId
, // i32 <gtid>
5411 Lock
// kmp_critical_name *&<lock>
5413 CommonActionTy
Action(nullptr, llvm::None
,
5414 OMPBuilder
.getOrCreateRuntimeFunction(
5415 CGM
.getModule(), OMPRTL___kmpc_end_reduce
),
5417 AtomicRCG
.setAction(Action
);
5423 CGF
.EmitBranch(DefaultBB
);
5424 CGF
.EmitBlock(DefaultBB
, /*IsFinished=*/true);
5427 /// Generates unique name for artificial threadprivate variables.
5428 /// Format is: <Prefix> "." <Decl_mangled_name> "_" "<Decl_start_loc_raw_enc>"
5429 static std::string
generateUniqueName(CodeGenModule
&CGM
, StringRef Prefix
,
5431 SmallString
<256> Buffer
;
5432 llvm::raw_svector_ostream
Out(Buffer
);
5433 const clang::DeclRefExpr
*DE
;
5434 const VarDecl
*D
= ::getBaseDecl(Ref
, DE
);
5436 D
= cast
<VarDecl
>(cast
<DeclRefExpr
>(Ref
)->getDecl());
5437 D
= D
->getCanonicalDecl();
5438 std::string Name
= CGM
.getOpenMPRuntime().getName(
5439 {D
->isLocalVarDeclOrParm() ? D
->getName() : CGM
.getMangledName(D
)});
5440 Out
<< Prefix
<< Name
<< "_"
5441 << D
->getCanonicalDecl()->getBeginLoc().getRawEncoding();
5442 return std::string(Out
.str());
5445 /// Emits reduction initializer function:
5447 /// void @.red_init(void* %arg, void* %orig) {
5448 /// %0 = bitcast void* %arg to <type>*
5449 /// store <type> <init>, <type>* %0
5453 static llvm::Value
*emitReduceInitFunction(CodeGenModule
&CGM
,
5455 ReductionCodeGen
&RCG
, unsigned N
) {
5456 ASTContext
&C
= CGM
.getContext();
5457 QualType VoidPtrTy
= C
.VoidPtrTy
;
5458 VoidPtrTy
.addRestrict();
5459 FunctionArgList Args
;
5460 ImplicitParamDecl
Param(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, VoidPtrTy
,
5461 ImplicitParamDecl::Other
);
5462 ImplicitParamDecl
ParamOrig(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, VoidPtrTy
,
5463 ImplicitParamDecl::Other
);
5464 Args
.emplace_back(&Param
);
5465 Args
.emplace_back(&ParamOrig
);
5466 const auto &FnInfo
=
5467 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5468 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5469 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_init", ""});
5470 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5471 Name
, &CGM
.getModule());
5472 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5473 Fn
->setDoesNotRecurse();
5474 CodeGenFunction
CGF(CGM
);
5475 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5476 QualType PrivateType
= RCG
.getPrivateType(N
);
5477 Address PrivateAddr
= CGF
.EmitLoadOfPointer(
5478 CGF
.Builder
.CreateElementBitCast(
5479 CGF
.GetAddrOfLocalVar(&Param
),
5480 CGF
.ConvertTypeForMem(PrivateType
)->getPointerTo()),
5481 C
.getPointerType(PrivateType
)->castAs
<PointerType
>());
5482 llvm::Value
*Size
= nullptr;
5483 // If the size of the reduction item is non-constant, load it from global
5484 // threadprivate variable.
5485 if (RCG
.getSizes(N
).second
) {
5486 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5487 CGF
, CGM
.getContext().getSizeType(),
5488 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5489 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5490 CGM
.getContext().getSizeType(), Loc
);
5492 RCG
.emitAggregateType(CGF
, N
, Size
);
5493 Address OrigAddr
= Address::invalid();
5494 // If initializer uses initializer from declare reduction construct, emit a
5495 // pointer to the address of the original reduction item (reuired by reduction
5497 if (RCG
.usesReductionInitializer(N
)) {
5498 Address SharedAddr
= CGF
.GetAddrOfLocalVar(&ParamOrig
);
5499 OrigAddr
= CGF
.EmitLoadOfPointer(
5501 CGM
.getContext().VoidPtrTy
.castAs
<PointerType
>()->getTypePtr());
5503 // Emit the initializer:
5504 // %0 = bitcast void* %arg to <type>*
5505 // store <type> <init>, <type>* %0
5506 RCG
.emitInitialization(CGF
, N
, PrivateAddr
, OrigAddr
,
5507 [](CodeGenFunction
&) { return false; });
5508 CGF
.FinishFunction();
5512 /// Emits reduction combiner function:
5514 /// void @.red_comb(void* %arg0, void* %arg1) {
5515 /// %lhs = bitcast void* %arg0 to <type>*
5516 /// %rhs = bitcast void* %arg1 to <type>*
5517 /// %2 = <ReductionOp>(<type>* %lhs, <type>* %rhs)
5518 /// store <type> %2, <type>* %lhs
5522 static llvm::Value
*emitReduceCombFunction(CodeGenModule
&CGM
,
5524 ReductionCodeGen
&RCG
, unsigned N
,
5525 const Expr
*ReductionOp
,
5526 const Expr
*LHS
, const Expr
*RHS
,
5527 const Expr
*PrivateRef
) {
5528 ASTContext
&C
= CGM
.getContext();
5529 const auto *LHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(LHS
)->getDecl());
5530 const auto *RHSVD
= cast
<VarDecl
>(cast
<DeclRefExpr
>(RHS
)->getDecl());
5531 FunctionArgList Args
;
5532 ImplicitParamDecl
ParamInOut(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
5533 C
.VoidPtrTy
, ImplicitParamDecl::Other
);
5534 ImplicitParamDecl
ParamIn(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5535 ImplicitParamDecl::Other
);
5536 Args
.emplace_back(&ParamInOut
);
5537 Args
.emplace_back(&ParamIn
);
5538 const auto &FnInfo
=
5539 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5540 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5541 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_comb", ""});
5542 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5543 Name
, &CGM
.getModule());
5544 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5545 Fn
->setDoesNotRecurse();
5546 CodeGenFunction
CGF(CGM
);
5547 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5548 llvm::Value
*Size
= nullptr;
5549 // If the size of the reduction item is non-constant, load it from global
5550 // threadprivate variable.
5551 if (RCG
.getSizes(N
).second
) {
5552 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5553 CGF
, CGM
.getContext().getSizeType(),
5554 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5555 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5556 CGM
.getContext().getSizeType(), Loc
);
5558 RCG
.emitAggregateType(CGF
, N
, Size
);
5559 // Remap lhs and rhs variables to the addresses of the function arguments.
5560 // %lhs = bitcast void* %arg0 to <type>*
5561 // %rhs = bitcast void* %arg1 to <type>*
5562 CodeGenFunction::OMPPrivateScope
PrivateScope(CGF
);
5563 PrivateScope
.addPrivate(
5565 // Pull out the pointer to the variable.
5566 CGF
.EmitLoadOfPointer(
5567 CGF
.Builder
.CreateElementBitCast(
5568 CGF
.GetAddrOfLocalVar(&ParamInOut
),
5569 CGF
.ConvertTypeForMem(LHSVD
->getType())->getPointerTo()),
5570 C
.getPointerType(LHSVD
->getType())->castAs
<PointerType
>()));
5571 PrivateScope
.addPrivate(
5573 // Pull out the pointer to the variable.
5574 CGF
.EmitLoadOfPointer(
5575 CGF
.Builder
.CreateElementBitCast(
5576 CGF
.GetAddrOfLocalVar(&ParamIn
),
5577 CGF
.ConvertTypeForMem(RHSVD
->getType())->getPointerTo()),
5578 C
.getPointerType(RHSVD
->getType())->castAs
<PointerType
>()));
5579 PrivateScope
.Privatize();
5580 // Emit the combiner body:
5581 // %2 = <ReductionOp>(<type> *%lhs, <type> *%rhs)
5582 // store <type> %2, <type>* %lhs
5583 CGM
.getOpenMPRuntime().emitSingleReductionCombiner(
5584 CGF
, ReductionOp
, PrivateRef
, cast
<DeclRefExpr
>(LHS
),
5585 cast
<DeclRefExpr
>(RHS
));
5586 CGF
.FinishFunction();
5590 /// Emits reduction finalizer function:
5592 /// void @.red_fini(void* %arg) {
5593 /// %0 = bitcast void* %arg to <type>*
5594 /// <destroy>(<type>* %0)
5598 static llvm::Value
*emitReduceFiniFunction(CodeGenModule
&CGM
,
5600 ReductionCodeGen
&RCG
, unsigned N
) {
5601 if (!RCG
.needCleanups(N
))
5603 ASTContext
&C
= CGM
.getContext();
5604 FunctionArgList Args
;
5605 ImplicitParamDecl
Param(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
5606 ImplicitParamDecl::Other
);
5607 Args
.emplace_back(&Param
);
5608 const auto &FnInfo
=
5609 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
5610 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
5611 std::string Name
= CGM
.getOpenMPRuntime().getName({"red_fini", ""});
5612 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
5613 Name
, &CGM
.getModule());
5614 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
5615 Fn
->setDoesNotRecurse();
5616 CodeGenFunction
CGF(CGM
);
5617 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
5618 Address PrivateAddr
= CGF
.EmitLoadOfPointer(
5619 CGF
.GetAddrOfLocalVar(&Param
), C
.VoidPtrTy
.castAs
<PointerType
>());
5620 llvm::Value
*Size
= nullptr;
5621 // If the size of the reduction item is non-constant, load it from global
5622 // threadprivate variable.
5623 if (RCG
.getSizes(N
).second
) {
5624 Address SizeAddr
= CGM
.getOpenMPRuntime().getAddrOfArtificialThreadPrivate(
5625 CGF
, CGM
.getContext().getSizeType(),
5626 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5627 Size
= CGF
.EmitLoadOfScalar(SizeAddr
, /*Volatile=*/false,
5628 CGM
.getContext().getSizeType(), Loc
);
5630 RCG
.emitAggregateType(CGF
, N
, Size
);
5631 // Emit the finalizer body:
5632 // <destroy>(<type>* %0)
5633 RCG
.emitCleanups(CGF
, N
, PrivateAddr
);
5634 CGF
.FinishFunction(Loc
);
5638 llvm::Value
*CGOpenMPRuntime::emitTaskReductionInit(
5639 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> LHSExprs
,
5640 ArrayRef
<const Expr
*> RHSExprs
, const OMPTaskDataTy
&Data
) {
5641 if (!CGF
.HaveInsertPoint() || Data
.ReductionVars
.empty())
5644 // Build typedef struct:
5645 // kmp_taskred_input {
5646 // void *reduce_shar; // shared reduction item
5647 // void *reduce_orig; // original reduction item used for initialization
5648 // size_t reduce_size; // size of data item
5649 // void *reduce_init; // data initialization routine
5650 // void *reduce_fini; // data finalization routine
5651 // void *reduce_comb; // data combiner routine
5652 // kmp_task_red_flags_t flags; // flags for additional info from compiler
5653 // } kmp_taskred_input_t;
5654 ASTContext
&C
= CGM
.getContext();
5655 RecordDecl
*RD
= C
.buildImplicitRecord("kmp_taskred_input_t");
5656 RD
->startDefinition();
5657 const FieldDecl
*SharedFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5658 const FieldDecl
*OrigFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5659 const FieldDecl
*SizeFD
= addFieldToRecordDecl(C
, RD
, C
.getSizeType());
5660 const FieldDecl
*InitFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5661 const FieldDecl
*FiniFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5662 const FieldDecl
*CombFD
= addFieldToRecordDecl(C
, RD
, C
.VoidPtrTy
);
5663 const FieldDecl
*FlagsFD
= addFieldToRecordDecl(
5664 C
, RD
, C
.getIntTypeForBitwidth(/*DestWidth=*/32, /*Signed=*/false));
5665 RD
->completeDefinition();
5666 QualType RDType
= C
.getRecordType(RD
);
5667 unsigned Size
= Data
.ReductionVars
.size();
5668 llvm::APInt
ArraySize(/*numBits=*/64, Size
);
5669 QualType ArrayRDType
= C
.getConstantArrayType(
5670 RDType
, ArraySize
, nullptr, ArrayType::Normal
, /*IndexTypeQuals=*/0);
5671 // kmp_task_red_input_t .rd_input.[Size];
5672 Address TaskRedInput
= CGF
.CreateMemTemp(ArrayRDType
, ".rd_input.");
5673 ReductionCodeGen
RCG(Data
.ReductionVars
, Data
.ReductionOrigs
,
5674 Data
.ReductionCopies
, Data
.ReductionOps
);
5675 for (unsigned Cnt
= 0; Cnt
< Size
; ++Cnt
) {
5676 // kmp_task_red_input_t &ElemLVal = .rd_input.[Cnt];
5677 llvm::Value
*Idxs
[] = {llvm::ConstantInt::get(CGM
.SizeTy
, /*V=*/0),
5678 llvm::ConstantInt::get(CGM
.SizeTy
, Cnt
)};
5679 llvm::Value
*GEP
= CGF
.EmitCheckedInBoundsGEP(
5680 TaskRedInput
.getElementType(), TaskRedInput
.getPointer(), Idxs
,
5681 /*SignedIndices=*/false, /*IsSubtraction=*/false, Loc
,
5683 LValue ElemLVal
= CGF
.MakeNaturalAlignAddrLValue(GEP
, RDType
);
5684 // ElemLVal.reduce_shar = &Shareds[Cnt];
5685 LValue SharedLVal
= CGF
.EmitLValueForField(ElemLVal
, SharedFD
);
5686 RCG
.emitSharedOrigLValue(CGF
, Cnt
);
5687 llvm::Value
*CastedShared
=
5688 CGF
.EmitCastToVoidPtr(RCG
.getSharedLValue(Cnt
).getPointer(CGF
));
5689 CGF
.EmitStoreOfScalar(CastedShared
, SharedLVal
);
5690 // ElemLVal.reduce_orig = &Origs[Cnt];
5691 LValue OrigLVal
= CGF
.EmitLValueForField(ElemLVal
, OrigFD
);
5692 llvm::Value
*CastedOrig
=
5693 CGF
.EmitCastToVoidPtr(RCG
.getOrigLValue(Cnt
).getPointer(CGF
));
5694 CGF
.EmitStoreOfScalar(CastedOrig
, OrigLVal
);
5695 RCG
.emitAggregateType(CGF
, Cnt
);
5696 llvm::Value
*SizeValInChars
;
5697 llvm::Value
*SizeVal
;
5698 std::tie(SizeValInChars
, SizeVal
) = RCG
.getSizes(Cnt
);
5699 // We use delayed creation/initialization for VLAs and array sections. It is
5700 // required because runtime does not provide the way to pass the sizes of
5701 // VLAs/array sections to initializer/combiner/finalizer functions. Instead
5702 // threadprivate global variables are used to store these values and use
5703 // them in the functions.
5704 bool DelayedCreation
= !!SizeVal
;
5705 SizeValInChars
= CGF
.Builder
.CreateIntCast(SizeValInChars
, CGM
.SizeTy
,
5706 /*isSigned=*/false);
5707 LValue SizeLVal
= CGF
.EmitLValueForField(ElemLVal
, SizeFD
);
5708 CGF
.EmitStoreOfScalar(SizeValInChars
, SizeLVal
);
5709 // ElemLVal.reduce_init = init;
5710 LValue InitLVal
= CGF
.EmitLValueForField(ElemLVal
, InitFD
);
5711 llvm::Value
*InitAddr
=
5712 CGF
.EmitCastToVoidPtr(emitReduceInitFunction(CGM
, Loc
, RCG
, Cnt
));
5713 CGF
.EmitStoreOfScalar(InitAddr
, InitLVal
);
5714 // ElemLVal.reduce_fini = fini;
5715 LValue FiniLVal
= CGF
.EmitLValueForField(ElemLVal
, FiniFD
);
5716 llvm::Value
*Fini
= emitReduceFiniFunction(CGM
, Loc
, RCG
, Cnt
);
5717 llvm::Value
*FiniAddr
= Fini
5718 ? CGF
.EmitCastToVoidPtr(Fini
)
5719 : llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
);
5720 CGF
.EmitStoreOfScalar(FiniAddr
, FiniLVal
);
5721 // ElemLVal.reduce_comb = comb;
5722 LValue CombLVal
= CGF
.EmitLValueForField(ElemLVal
, CombFD
);
5723 llvm::Value
*CombAddr
= CGF
.EmitCastToVoidPtr(emitReduceCombFunction(
5724 CGM
, Loc
, RCG
, Cnt
, Data
.ReductionOps
[Cnt
], LHSExprs
[Cnt
],
5725 RHSExprs
[Cnt
], Data
.ReductionCopies
[Cnt
]));
5726 CGF
.EmitStoreOfScalar(CombAddr
, CombLVal
);
5727 // ElemLVal.flags = 0;
5728 LValue FlagsLVal
= CGF
.EmitLValueForField(ElemLVal
, FlagsFD
);
5729 if (DelayedCreation
) {
5730 CGF
.EmitStoreOfScalar(
5731 llvm::ConstantInt::get(CGM
.Int32Ty
, /*V=*/1, /*isSigned=*/true),
5734 CGF
.EmitNullInitialization(FlagsLVal
.getAddress(CGF
),
5735 FlagsLVal
.getType());
5737 if (Data
.IsReductionWithTaskMod
) {
5738 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5739 // is_ws, int num, void *data);
5740 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
);
5741 llvm::Value
*GTid
= CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5742 CGM
.IntTy
, /*isSigned=*/true);
5743 llvm::Value
*Args
[] = {
5745 llvm::ConstantInt::get(CGM
.IntTy
, Data
.IsWorksharingReduction
? 1 : 0,
5747 llvm::ConstantInt::get(CGM
.IntTy
, Size
, /*isSigned=*/true),
5748 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5749 TaskRedInput
.getPointer(), CGM
.VoidPtrTy
)};
5750 return CGF
.EmitRuntimeCall(
5751 OMPBuilder
.getOrCreateRuntimeFunction(
5752 CGM
.getModule(), OMPRTL___kmpc_taskred_modifier_init
),
5755 // Build call void *__kmpc_taskred_init(int gtid, int num_data, void *data);
5756 llvm::Value
*Args
[] = {
5757 CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
), CGM
.IntTy
,
5759 llvm::ConstantInt::get(CGM
.IntTy
, Size
, /*isSigned=*/true),
5760 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(TaskRedInput
.getPointer(),
5762 return CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
5763 CGM
.getModule(), OMPRTL___kmpc_taskred_init
),
5767 void CGOpenMPRuntime::emitTaskReductionFini(CodeGenFunction
&CGF
,
5769 bool IsWorksharingReduction
) {
5770 // Build call void *__kmpc_taskred_modifier_init(ident_t *loc, int gtid, int
5771 // is_ws, int num, void *data);
5772 llvm::Value
*IdentTLoc
= emitUpdateLocation(CGF
, Loc
);
5773 llvm::Value
*GTid
= CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5774 CGM
.IntTy
, /*isSigned=*/true);
5775 llvm::Value
*Args
[] = {IdentTLoc
, GTid
,
5776 llvm::ConstantInt::get(CGM
.IntTy
,
5777 IsWorksharingReduction
? 1 : 0,
5778 /*isSigned=*/true)};
5779 (void)CGF
.EmitRuntimeCall(
5780 OMPBuilder
.getOrCreateRuntimeFunction(
5781 CGM
.getModule(), OMPRTL___kmpc_task_reduction_modifier_fini
),
5785 void CGOpenMPRuntime::emitTaskReductionFixups(CodeGenFunction
&CGF
,
5787 ReductionCodeGen
&RCG
,
5789 auto Sizes
= RCG
.getSizes(N
);
5790 // Emit threadprivate global variable if the type is non-constant
5791 // (Sizes.second = nullptr).
5793 llvm::Value
*SizeVal
= CGF
.Builder
.CreateIntCast(Sizes
.second
, CGM
.SizeTy
,
5794 /*isSigned=*/false);
5795 Address SizeAddr
= getAddrOfArtificialThreadPrivate(
5796 CGF
, CGM
.getContext().getSizeType(),
5797 generateUniqueName(CGM
, "reduction_size", RCG
.getRefExpr(N
)));
5798 CGF
.Builder
.CreateStore(SizeVal
, SizeAddr
, /*IsVolatile=*/false);
5802 Address
CGOpenMPRuntime::getTaskReductionItem(CodeGenFunction
&CGF
,
5804 llvm::Value
*ReductionsPtr
,
5805 LValue SharedLVal
) {
5806 // Build call void *__kmpc_task_reduction_get_th_data(int gtid, void *tg, void
5808 llvm::Value
*Args
[] = {CGF
.Builder
.CreateIntCast(getThreadID(CGF
, Loc
),
5812 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
5813 SharedLVal
.getPointer(CGF
), CGM
.VoidPtrTy
)};
5815 CGF
.EmitRuntimeCall(
5816 OMPBuilder
.getOrCreateRuntimeFunction(
5817 CGM
.getModule(), OMPRTL___kmpc_task_reduction_get_th_data
),
5819 CGF
.Int8Ty
, SharedLVal
.getAlignment());
5822 void CGOpenMPRuntime::emitTaskwaitCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
5823 const OMPTaskDataTy
&Data
) {
5824 if (!CGF
.HaveInsertPoint())
5827 if (CGF
.CGM
.getLangOpts().OpenMPIRBuilder
&& Data
.Dependences
.empty()) {
5828 // TODO: Need to support taskwait with dependences in the OpenMPIRBuilder.
5829 OMPBuilder
.createTaskwait(CGF
.Builder
);
5831 llvm::Value
*ThreadID
= getThreadID(CGF
, Loc
);
5832 llvm::Value
*UpLoc
= emitUpdateLocation(CGF
, Loc
);
5833 auto &M
= CGM
.getModule();
5834 Address DependenciesArray
= Address::invalid();
5835 llvm::Value
*NumOfElements
;
5836 std::tie(NumOfElements
, DependenciesArray
) =
5837 emitDependClause(CGF
, Data
.Dependences
, Loc
);
5838 llvm::Value
*DepWaitTaskArgs
[6];
5839 if (!Data
.Dependences
.empty()) {
5840 DepWaitTaskArgs
[0] = UpLoc
;
5841 DepWaitTaskArgs
[1] = ThreadID
;
5842 DepWaitTaskArgs
[2] = NumOfElements
;
5843 DepWaitTaskArgs
[3] = DependenciesArray
.getPointer();
5844 DepWaitTaskArgs
[4] = CGF
.Builder
.getInt32(0);
5845 DepWaitTaskArgs
[5] = llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
5847 CodeGenFunction::RunCleanupsScope
LocalScope(CGF
);
5849 // Build void __kmpc_omp_wait_deps(ident_t *, kmp_int32 gtid,
5850 // kmp_int32 ndeps, kmp_depend_info_t *dep_list, kmp_int32
5851 // ndeps_noalias, kmp_depend_info_t *noalias_dep_list); if dependence info
5853 CGF
.EmitRuntimeCall(
5854 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_omp_wait_deps
),
5859 // Build call kmp_int32 __kmpc_omp_taskwait(ident_t *loc, kmp_int32
5861 llvm::Value
*Args
[] = {UpLoc
, ThreadID
};
5862 // Ignore return result until untied tasks are supported.
5863 CGF
.EmitRuntimeCall(
5864 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_omp_taskwait
),
5869 if (auto *Region
= dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
5870 Region
->emitUntiedSwitch(CGF
);
5873 void CGOpenMPRuntime::emitInlinedDirective(CodeGenFunction
&CGF
,
5874 OpenMPDirectiveKind InnerKind
,
5875 const RegionCodeGenTy
&CodeGen
,
5877 if (!CGF
.HaveInsertPoint())
5879 InlinedOpenMPRegionRAII
Region(CGF
, CodeGen
, InnerKind
, HasCancel
,
5880 InnerKind
!= OMPD_critical
&&
5881 InnerKind
!= OMPD_master
&&
5882 InnerKind
!= OMPD_masked
);
5883 CGF
.CapturedStmtInfo
->EmitBody(CGF
, /*S=*/nullptr);
5894 } // anonymous namespace
5896 static RTCancelKind
getCancellationKind(OpenMPDirectiveKind CancelRegion
) {
5897 RTCancelKind CancelKind
= CancelNoreq
;
5898 if (CancelRegion
== OMPD_parallel
)
5899 CancelKind
= CancelParallel
;
5900 else if (CancelRegion
== OMPD_for
)
5901 CancelKind
= CancelLoop
;
5902 else if (CancelRegion
== OMPD_sections
)
5903 CancelKind
= CancelSections
;
5905 assert(CancelRegion
== OMPD_taskgroup
);
5906 CancelKind
= CancelTaskgroup
;
5911 void CGOpenMPRuntime::emitCancellationPointCall(
5912 CodeGenFunction
&CGF
, SourceLocation Loc
,
5913 OpenMPDirectiveKind CancelRegion
) {
5914 if (!CGF
.HaveInsertPoint())
5916 // Build call kmp_int32 __kmpc_cancellationpoint(ident_t *loc, kmp_int32
5917 // global_tid, kmp_int32 cncl_kind);
5918 if (auto *OMPRegionInfo
=
5919 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
5920 // For 'cancellation point taskgroup', the task region info may not have a
5921 // cancel. This may instead happen in another adjacent task.
5922 if (CancelRegion
== OMPD_taskgroup
|| OMPRegionInfo
->hasCancel()) {
5923 llvm::Value
*Args
[] = {
5924 emitUpdateLocation(CGF
, Loc
), getThreadID(CGF
, Loc
),
5925 CGF
.Builder
.getInt32(getCancellationKind(CancelRegion
))};
5926 // Ignore return result until untied tasks are supported.
5927 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
5928 OMPBuilder
.getOrCreateRuntimeFunction(
5929 CGM
.getModule(), OMPRTL___kmpc_cancellationpoint
),
5931 // if (__kmpc_cancellationpoint()) {
5932 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5933 // exit from construct;
5935 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
5936 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
5937 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
5938 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
5939 CGF
.EmitBlock(ExitBB
);
5940 if (CancelRegion
== OMPD_parallel
)
5941 emitBarrierCall(CGF
, Loc
, OMPD_unknown
, /*EmitChecks=*/false);
5942 // exit from construct;
5943 CodeGenFunction::JumpDest CancelDest
=
5944 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
5945 CGF
.EmitBranchThroughCleanup(CancelDest
);
5946 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
5951 void CGOpenMPRuntime::emitCancelCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
5953 OpenMPDirectiveKind CancelRegion
) {
5954 if (!CGF
.HaveInsertPoint())
5956 // Build call kmp_int32 __kmpc_cancel(ident_t *loc, kmp_int32 global_tid,
5957 // kmp_int32 cncl_kind);
5958 auto &M
= CGM
.getModule();
5959 if (auto *OMPRegionInfo
=
5960 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
)) {
5961 auto &&ThenGen
= [this, &M
, Loc
, CancelRegion
,
5962 OMPRegionInfo
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
5963 CGOpenMPRuntime
&RT
= CGF
.CGM
.getOpenMPRuntime();
5964 llvm::Value
*Args
[] = {
5965 RT
.emitUpdateLocation(CGF
, Loc
), RT
.getThreadID(CGF
, Loc
),
5966 CGF
.Builder
.getInt32(getCancellationKind(CancelRegion
))};
5967 // Ignore return result until untied tasks are supported.
5968 llvm::Value
*Result
= CGF
.EmitRuntimeCall(
5969 OMPBuilder
.getOrCreateRuntimeFunction(M
, OMPRTL___kmpc_cancel
), Args
);
5970 // if (__kmpc_cancel()) {
5971 // call i32 @__kmpc_cancel_barrier( // for parallel cancellation only
5972 // exit from construct;
5974 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock(".cancel.exit");
5975 llvm::BasicBlock
*ContBB
= CGF
.createBasicBlock(".cancel.continue");
5976 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Result
);
5977 CGF
.Builder
.CreateCondBr(Cmp
, ExitBB
, ContBB
);
5978 CGF
.EmitBlock(ExitBB
);
5979 if (CancelRegion
== OMPD_parallel
)
5980 RT
.emitBarrierCall(CGF
, Loc
, OMPD_unknown
, /*EmitChecks=*/false);
5981 // exit from construct;
5982 CodeGenFunction::JumpDest CancelDest
=
5983 CGF
.getOMPCancelDestination(OMPRegionInfo
->getDirectiveKind());
5984 CGF
.EmitBranchThroughCleanup(CancelDest
);
5985 CGF
.EmitBlock(ContBB
, /*IsFinished=*/true);
5988 emitIfClause(CGF
, IfCond
, ThenGen
,
5989 [](CodeGenFunction
&, PrePostActionTy
&) {});
5991 RegionCodeGenTy
ThenRCG(ThenGen
);
5998 /// Cleanup action for uses_allocators support.
5999 class OMPUsesAllocatorsActionTy final
: public PrePostActionTy
{
6000 ArrayRef
<std::pair
<const Expr
*, const Expr
*>> Allocators
;
6003 OMPUsesAllocatorsActionTy(
6004 ArrayRef
<std::pair
<const Expr
*, const Expr
*>> Allocators
)
6005 : Allocators(Allocators
) {}
6006 void Enter(CodeGenFunction
&CGF
) override
{
6007 if (!CGF
.HaveInsertPoint())
6009 for (const auto &AllocatorData
: Allocators
) {
6010 CGF
.CGM
.getOpenMPRuntime().emitUsesAllocatorsInit(
6011 CGF
, AllocatorData
.first
, AllocatorData
.second
);
6014 void Exit(CodeGenFunction
&CGF
) override
{
6015 if (!CGF
.HaveInsertPoint())
6017 for (const auto &AllocatorData
: Allocators
) {
6018 CGF
.CGM
.getOpenMPRuntime().emitUsesAllocatorsFini(CGF
,
6019 AllocatorData
.first
);
6025 void CGOpenMPRuntime::emitTargetOutlinedFunction(
6026 const OMPExecutableDirective
&D
, StringRef ParentName
,
6027 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
6028 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
6029 assert(!ParentName
.empty() && "Invalid target entry parent name!");
6030 HasEmittedTargetRegion
= true;
6031 SmallVector
<std::pair
<const Expr
*, const Expr
*>, 4> Allocators
;
6032 for (const auto *C
: D
.getClausesOfKind
<OMPUsesAllocatorsClause
>()) {
6033 for (unsigned I
= 0, E
= C
->getNumberOfAllocators(); I
< E
; ++I
) {
6034 const OMPUsesAllocatorsClause::Data D
= C
->getAllocatorData(I
);
6035 if (!D
.AllocatorTraits
)
6037 Allocators
.emplace_back(D
.Allocator
, D
.AllocatorTraits
);
6040 OMPUsesAllocatorsActionTy
UsesAllocatorAction(Allocators
);
6041 CodeGen
.setAction(UsesAllocatorAction
);
6042 emitTargetOutlinedFunctionHelper(D
, ParentName
, OutlinedFn
, OutlinedFnID
,
6043 IsOffloadEntry
, CodeGen
);
6046 void CGOpenMPRuntime::emitUsesAllocatorsInit(CodeGenFunction
&CGF
,
6047 const Expr
*Allocator
,
6048 const Expr
*AllocatorTraits
) {
6049 llvm::Value
*ThreadId
= getThreadID(CGF
, Allocator
->getExprLoc());
6050 ThreadId
= CGF
.Builder
.CreateIntCast(ThreadId
, CGF
.IntTy
, /*isSigned=*/true);
6051 // Use default memspace handle.
6052 llvm::Value
*MemSpaceHandle
= llvm::ConstantPointerNull::get(CGF
.VoidPtrTy
);
6053 llvm::Value
*NumTraits
= llvm::ConstantInt::get(
6054 CGF
.IntTy
, cast
<ConstantArrayType
>(
6055 AllocatorTraits
->getType()->getAsArrayTypeUnsafe())
6057 .getLimitedValue());
6058 LValue AllocatorTraitsLVal
= CGF
.EmitLValue(AllocatorTraits
);
6059 Address Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
6060 AllocatorTraitsLVal
.getAddress(CGF
), CGF
.VoidPtrPtrTy
, CGF
.VoidPtrTy
);
6061 AllocatorTraitsLVal
= CGF
.MakeAddrLValue(Addr
, CGF
.getContext().VoidPtrTy
,
6062 AllocatorTraitsLVal
.getBaseInfo(),
6063 AllocatorTraitsLVal
.getTBAAInfo());
6064 llvm::Value
*Traits
=
6065 CGF
.EmitLoadOfScalar(AllocatorTraitsLVal
, AllocatorTraits
->getExprLoc());
6067 llvm::Value
*AllocatorVal
=
6068 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
6069 CGM
.getModule(), OMPRTL___kmpc_init_allocator
),
6070 {ThreadId
, MemSpaceHandle
, NumTraits
, Traits
});
6071 // Store to allocator.
6072 CGF
.EmitVarDecl(*cast
<VarDecl
>(
6073 cast
<DeclRefExpr
>(Allocator
->IgnoreParenImpCasts())->getDecl()));
6074 LValue AllocatorLVal
= CGF
.EmitLValue(Allocator
->IgnoreParenImpCasts());
6076 CGF
.EmitScalarConversion(AllocatorVal
, CGF
.getContext().VoidPtrTy
,
6077 Allocator
->getType(), Allocator
->getExprLoc());
6078 CGF
.EmitStoreOfScalar(AllocatorVal
, AllocatorLVal
);
6081 void CGOpenMPRuntime::emitUsesAllocatorsFini(CodeGenFunction
&CGF
,
6082 const Expr
*Allocator
) {
6083 llvm::Value
*ThreadId
= getThreadID(CGF
, Allocator
->getExprLoc());
6084 ThreadId
= CGF
.Builder
.CreateIntCast(ThreadId
, CGF
.IntTy
, /*isSigned=*/true);
6085 LValue AllocatorLVal
= CGF
.EmitLValue(Allocator
->IgnoreParenImpCasts());
6086 llvm::Value
*AllocatorVal
=
6087 CGF
.EmitLoadOfScalar(AllocatorLVal
, Allocator
->getExprLoc());
6088 AllocatorVal
= CGF
.EmitScalarConversion(AllocatorVal
, Allocator
->getType(),
6089 CGF
.getContext().VoidPtrTy
,
6090 Allocator
->getExprLoc());
6091 (void)CGF
.EmitRuntimeCall(
6092 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
6093 OMPRTL___kmpc_destroy_allocator
),
6094 {ThreadId
, AllocatorVal
});
6097 void CGOpenMPRuntime::emitTargetOutlinedFunctionHelper(
6098 const OMPExecutableDirective
&D
, StringRef ParentName
,
6099 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
6100 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
6101 // Create a unique name for the entry function using the source location
6102 // information of the current target region. The name will be something like:
6104 // __omp_offloading_DD_FFFF_PP_lBB[_CC]
6106 // where DD_FFFF is an ID unique to the file (device and file IDs), PP is the
6107 // mangled name of the function that encloses the target region and BB is the
6108 // line number of the target region. CC is a count added when more than one
6109 // region is located at the same location.
6111 const bool BuildOutlinedFn
= CGM
.getLangOpts().OpenMPIsDevice
||
6112 !CGM
.getLangOpts().OpenMPOffloadMandatory
;
6114 getTargetEntryUniqueInfo(CGM
.getContext(), D
.getBeginLoc(), ParentName
);
6116 SmallString
<64> EntryFnName
;
6117 OffloadEntriesInfoManager
.getTargetRegionEntryFnName(EntryFnName
, EntryInfo
);
6119 const CapturedStmt
&CS
= *D
.getCapturedStmt(OMPD_target
);
6121 CodeGenFunction
CGF(CGM
, true);
6122 CGOpenMPTargetRegionInfo
CGInfo(CS
, CodeGen
, EntryFnName
);
6123 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6125 if (BuildOutlinedFn
)
6126 OutlinedFn
= CGF
.GenerateOpenMPCapturedStmtFunction(CS
, D
.getBeginLoc());
6128 // If this target outline function is not an offload entry, we don't need to
6130 if (!IsOffloadEntry
)
6133 // The target region ID is used by the runtime library to identify the current
6134 // target region, so it only has to be unique and not necessarily point to
6135 // anything. It could be the pointer to the outlined function that implements
6136 // the target region, but we aren't using that so that the compiler doesn't
6137 // need to keep that, and could therefore inline the host function if proven
6138 // worthwhile during optimization. In the other hand, if emitting code for the
6139 // device, the ID has to be the function address so that it can retrieved from
6140 // the offloading entry and launched by the runtime library. We also mark the
6141 // outlined function to have external linkage in case we are emitting code for
6142 // the device, because these functions will be entry points to the device.
6144 if (CGM
.getLangOpts().OpenMPIsDevice
) {
6145 OutlinedFnID
= llvm::ConstantExpr::getBitCast(OutlinedFn
, CGM
.Int8PtrTy
);
6146 OutlinedFn
->setLinkage(llvm::GlobalValue::WeakODRLinkage
);
6147 OutlinedFn
->setDSOLocal(false);
6148 OutlinedFn
->setVisibility(llvm::GlobalValue::ProtectedVisibility
);
6149 if (CGM
.getTriple().isAMDGCN())
6150 OutlinedFn
->setCallingConv(llvm::CallingConv::AMDGPU_KERNEL
);
6152 std::string Name
= getName({EntryFnName
, "region_id"});
6153 OutlinedFnID
= new llvm::GlobalVariable(
6154 CGM
.getModule(), CGM
.Int8Ty
, /*isConstant=*/true,
6155 llvm::GlobalValue::WeakAnyLinkage
,
6156 llvm::Constant::getNullValue(CGM
.Int8Ty
), Name
);
6159 // If we do not allow host fallback we still need a named address to use.
6160 llvm::Constant
*TargetRegionEntryAddr
= OutlinedFn
;
6161 if (!BuildOutlinedFn
) {
6162 assert(!CGM
.getModule().getGlobalVariable(EntryFnName
, true) &&
6163 "Named kernel already exists?");
6164 TargetRegionEntryAddr
= new llvm::GlobalVariable(
6165 CGM
.getModule(), CGM
.Int8Ty
, /*isConstant=*/true,
6166 llvm::GlobalValue::InternalLinkage
,
6167 llvm::Constant::getNullValue(CGM
.Int8Ty
), EntryFnName
);
6170 // Register the information for the entry associated with this target region.
6171 OffloadEntriesInfoManager
.registerTargetRegionEntryInfo(
6172 EntryInfo
, TargetRegionEntryAddr
, OutlinedFnID
,
6173 llvm::OffloadEntriesInfoManager::OMPTargetRegionEntryTargetRegion
,
6174 CGM
.getLangOpts().OpenMPIsDevice
);
6176 // Add NumTeams and ThreadLimit attributes to the outlined GPU function
6177 int32_t DefaultValTeams
= -1;
6178 getNumTeamsExprForTargetDirective(CGF
, D
, DefaultValTeams
);
6179 if (DefaultValTeams
> 0 && OutlinedFn
) {
6180 OutlinedFn
->addFnAttr("omp_target_num_teams",
6181 std::to_string(DefaultValTeams
));
6183 int32_t DefaultValThreads
= -1;
6184 getNumThreadsExprForTargetDirective(CGF
, D
, DefaultValThreads
);
6185 if (DefaultValThreads
> 0 && OutlinedFn
) {
6186 OutlinedFn
->addFnAttr("omp_target_thread_limit",
6187 std::to_string(DefaultValThreads
));
6190 if (BuildOutlinedFn
)
6191 CGM
.getTargetCodeGenInfo().setTargetAttributes(nullptr, OutlinedFn
, CGM
);
6194 /// Checks if the expression is constant or does not have non-trivial function
6196 static bool isTrivial(ASTContext
&Ctx
, const Expr
* E
) {
6197 // We can skip constant expressions.
6198 // We can skip expressions with trivial calls or simple expressions.
6199 return (E
->isEvaluatable(Ctx
, Expr::SE_AllowUndefinedBehavior
) ||
6200 !E
->hasNonTrivialCall(Ctx
)) &&
6201 !E
->HasSideEffects(Ctx
, /*IncludePossibleEffects=*/true);
6204 const Stmt
*CGOpenMPRuntime::getSingleCompoundChild(ASTContext
&Ctx
,
6206 const Stmt
*Child
= Body
->IgnoreContainers();
6207 while (const auto *C
= dyn_cast_or_null
<CompoundStmt
>(Child
)) {
6209 for (const Stmt
*S
: C
->body()) {
6210 if (const auto *E
= dyn_cast
<Expr
>(S
)) {
6211 if (isTrivial(Ctx
, E
))
6214 // Some of the statements can be ignored.
6215 if (isa
<AsmStmt
>(S
) || isa
<NullStmt
>(S
) || isa
<OMPFlushDirective
>(S
) ||
6216 isa
<OMPBarrierDirective
>(S
) || isa
<OMPTaskyieldDirective
>(S
))
6218 // Analyze declarations.
6219 if (const auto *DS
= dyn_cast
<DeclStmt
>(S
)) {
6220 if (llvm::all_of(DS
->decls(), [](const Decl
*D
) {
6221 if (isa
<EmptyDecl
>(D
) || isa
<DeclContext
>(D
) ||
6222 isa
<TypeDecl
>(D
) || isa
<PragmaCommentDecl
>(D
) ||
6223 isa
<PragmaDetectMismatchDecl
>(D
) || isa
<UsingDecl
>(D
) ||
6224 isa
<UsingDirectiveDecl
>(D
) ||
6225 isa
<OMPDeclareReductionDecl
>(D
) ||
6226 isa
<OMPThreadPrivateDecl
>(D
) || isa
<OMPAllocateDecl
>(D
))
6228 const auto *VD
= dyn_cast
<VarDecl
>(D
);
6231 return VD
->hasGlobalStorage() || !VD
->isUsed();
6235 // Found multiple children - cannot get the one child only.
6241 Child
= Child
->IgnoreContainers();
6246 const Expr
*CGOpenMPRuntime::getNumTeamsExprForTargetDirective(
6247 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
6248 int32_t &DefaultVal
) {
6250 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6251 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6252 "Expected target-based executable directive.");
6253 switch (DirectiveKind
) {
6255 const auto *CS
= D
.getInnermostCapturedStmt();
6257 CS
->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
6258 const Stmt
*ChildStmt
=
6259 CGOpenMPRuntime::getSingleCompoundChild(CGF
.getContext(), Body
);
6260 if (const auto *NestedDir
=
6261 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
6262 if (isOpenMPTeamsDirective(NestedDir
->getDirectiveKind())) {
6263 if (NestedDir
->hasClausesOfKind
<OMPNumTeamsClause
>()) {
6264 const Expr
*NumTeams
=
6265 NestedDir
->getSingleClause
<OMPNumTeamsClause
>()->getNumTeams();
6266 if (NumTeams
->isIntegerConstantExpr(CGF
.getContext()))
6268 NumTeams
->getIntegerConstantExpr(CGF
.getContext()))
6269 DefaultVal
= Constant
->getExtValue();
6275 if (isOpenMPParallelDirective(NestedDir
->getDirectiveKind()) ||
6276 isOpenMPSimdDirective(NestedDir
->getDirectiveKind())) {
6283 // A value of -1 is used to check if we need to emit no teams region
6287 case OMPD_target_teams
:
6288 case OMPD_target_teams_distribute
:
6289 case OMPD_target_teams_distribute_simd
:
6290 case OMPD_target_teams_distribute_parallel_for
:
6291 case OMPD_target_teams_distribute_parallel_for_simd
: {
6292 if (D
.hasClausesOfKind
<OMPNumTeamsClause
>()) {
6293 const Expr
*NumTeams
=
6294 D
.getSingleClause
<OMPNumTeamsClause
>()->getNumTeams();
6295 if (NumTeams
->isIntegerConstantExpr(CGF
.getContext()))
6296 if (auto Constant
= NumTeams
->getIntegerConstantExpr(CGF
.getContext()))
6297 DefaultVal
= Constant
->getExtValue();
6303 case OMPD_target_parallel
:
6304 case OMPD_target_parallel_for
:
6305 case OMPD_target_parallel_for_simd
:
6306 case OMPD_target_simd
:
6311 case OMPD_parallel_for
:
6312 case OMPD_parallel_master
:
6313 case OMPD_parallel_sections
:
6315 case OMPD_parallel_for_simd
:
6317 case OMPD_cancellation_point
:
6319 case OMPD_threadprivate
:
6330 case OMPD_taskyield
:
6333 case OMPD_taskgroup
:
6339 case OMPD_target_data
:
6340 case OMPD_target_exit_data
:
6341 case OMPD_target_enter_data
:
6342 case OMPD_distribute
:
6343 case OMPD_distribute_simd
:
6344 case OMPD_distribute_parallel_for
:
6345 case OMPD_distribute_parallel_for_simd
:
6346 case OMPD_teams_distribute
:
6347 case OMPD_teams_distribute_simd
:
6348 case OMPD_teams_distribute_parallel_for
:
6349 case OMPD_teams_distribute_parallel_for_simd
:
6350 case OMPD_target_update
:
6351 case OMPD_declare_simd
:
6352 case OMPD_declare_variant
:
6353 case OMPD_begin_declare_variant
:
6354 case OMPD_end_declare_variant
:
6355 case OMPD_declare_target
:
6356 case OMPD_end_declare_target
:
6357 case OMPD_declare_reduction
:
6358 case OMPD_declare_mapper
:
6360 case OMPD_taskloop_simd
:
6361 case OMPD_master_taskloop
:
6362 case OMPD_master_taskloop_simd
:
6363 case OMPD_parallel_master_taskloop
:
6364 case OMPD_parallel_master_taskloop_simd
:
6366 case OMPD_metadirective
:
6372 llvm_unreachable("Unexpected directive kind.");
6375 llvm::Value
*CGOpenMPRuntime::emitNumTeamsForTargetDirective(
6376 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) {
6377 assert(!CGF
.getLangOpts().OpenMPIsDevice
&&
6378 "Clauses associated with the teams directive expected to be emitted "
6379 "only for the host!");
6380 CGBuilderTy
&Bld
= CGF
.Builder
;
6381 int32_t DefaultNT
= -1;
6382 const Expr
*NumTeams
= getNumTeamsExprForTargetDirective(CGF
, D
, DefaultNT
);
6383 if (NumTeams
!= nullptr) {
6384 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6386 switch (DirectiveKind
) {
6388 const auto *CS
= D
.getInnermostCapturedStmt();
6389 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6390 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6391 llvm::Value
*NumTeamsVal
= CGF
.EmitScalarExpr(NumTeams
,
6392 /*IgnoreResultAssign*/ true);
6393 return Bld
.CreateIntCast(NumTeamsVal
, CGF
.Int32Ty
,
6396 case OMPD_target_teams
:
6397 case OMPD_target_teams_distribute
:
6398 case OMPD_target_teams_distribute_simd
:
6399 case OMPD_target_teams_distribute_parallel_for
:
6400 case OMPD_target_teams_distribute_parallel_for_simd
: {
6401 CodeGenFunction::RunCleanupsScope
NumTeamsScope(CGF
);
6402 llvm::Value
*NumTeamsVal
= CGF
.EmitScalarExpr(NumTeams
,
6403 /*IgnoreResultAssign*/ true);
6404 return Bld
.CreateIntCast(NumTeamsVal
, CGF
.Int32Ty
,
6412 return llvm::ConstantInt::get(CGF
.Int32Ty
, DefaultNT
);
6415 static llvm::Value
*getNumThreads(CodeGenFunction
&CGF
, const CapturedStmt
*CS
,
6416 llvm::Value
*DefaultThreadLimitVal
) {
6417 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6418 CGF
.getContext(), CS
->getCapturedStmt());
6419 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6420 if (isOpenMPParallelDirective(Dir
->getDirectiveKind())) {
6421 llvm::Value
*NumThreads
= nullptr;
6422 llvm::Value
*CondVal
= nullptr;
6423 // Handle if clause. If if clause present, the number of threads is
6424 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6425 if (Dir
->hasClausesOfKind
<OMPIfClause
>()) {
6426 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6427 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6428 const OMPIfClause
*IfClause
= nullptr;
6429 for (const auto *C
: Dir
->getClausesOfKind
<OMPIfClause
>()) {
6430 if (C
->getNameModifier() == OMPD_unknown
||
6431 C
->getNameModifier() == OMPD_parallel
) {
6437 const Expr
*Cond
= IfClause
->getCondition();
6439 if (Cond
->EvaluateAsBooleanCondition(Result
, CGF
.getContext())) {
6441 return CGF
.Builder
.getInt32(1);
6443 CodeGenFunction::LexicalScope
Scope(CGF
, Cond
->getSourceRange());
6444 if (const auto *PreInit
=
6445 cast_or_null
<DeclStmt
>(IfClause
->getPreInitStmt())) {
6446 for (const auto *I
: PreInit
->decls()) {
6447 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6448 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6450 CodeGenFunction::AutoVarEmission Emission
=
6451 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6452 CGF
.EmitAutoVarCleanups(Emission
);
6456 CondVal
= CGF
.EvaluateExprAsBool(Cond
);
6460 // Check the value of num_threads clause iff if clause was not specified
6461 // or is not evaluated to false.
6462 if (Dir
->hasClausesOfKind
<OMPNumThreadsClause
>()) {
6463 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6464 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6465 const auto *NumThreadsClause
=
6466 Dir
->getSingleClause
<OMPNumThreadsClause
>();
6467 CodeGenFunction::LexicalScope
Scope(
6468 CGF
, NumThreadsClause
->getNumThreads()->getSourceRange());
6469 if (const auto *PreInit
=
6470 cast_or_null
<DeclStmt
>(NumThreadsClause
->getPreInitStmt())) {
6471 for (const auto *I
: PreInit
->decls()) {
6472 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6473 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6475 CodeGenFunction::AutoVarEmission Emission
=
6476 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6477 CGF
.EmitAutoVarCleanups(Emission
);
6481 NumThreads
= CGF
.EmitScalarExpr(NumThreadsClause
->getNumThreads());
6482 NumThreads
= CGF
.Builder
.CreateIntCast(NumThreads
, CGF
.Int32Ty
,
6483 /*isSigned=*/false);
6484 if (DefaultThreadLimitVal
)
6485 NumThreads
= CGF
.Builder
.CreateSelect(
6486 CGF
.Builder
.CreateICmpULT(DefaultThreadLimitVal
, NumThreads
),
6487 DefaultThreadLimitVal
, NumThreads
);
6489 NumThreads
= DefaultThreadLimitVal
? DefaultThreadLimitVal
6490 : CGF
.Builder
.getInt32(0);
6492 // Process condition of the if clause.
6494 NumThreads
= CGF
.Builder
.CreateSelect(CondVal
, NumThreads
,
6495 CGF
.Builder
.getInt32(1));
6499 if (isOpenMPSimdDirective(Dir
->getDirectiveKind()))
6500 return CGF
.Builder
.getInt32(1);
6501 return DefaultThreadLimitVal
;
6503 return DefaultThreadLimitVal
? DefaultThreadLimitVal
6504 : CGF
.Builder
.getInt32(0);
6507 const Expr
*CGOpenMPRuntime::getNumThreadsExprForTargetDirective(
6508 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
6509 int32_t &DefaultVal
) {
6510 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6511 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6512 "Expected target-based executable directive.");
6514 switch (DirectiveKind
) {
6516 // Teams have no clause thread_limit
6518 case OMPD_target_teams
:
6519 case OMPD_target_teams_distribute
:
6520 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6521 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6522 const Expr
*ThreadLimit
= ThreadLimitClause
->getThreadLimit();
6523 if (ThreadLimit
->isIntegerConstantExpr(CGF
.getContext()))
6525 ThreadLimit
->getIntegerConstantExpr(CGF
.getContext()))
6526 DefaultVal
= Constant
->getExtValue();
6530 case OMPD_target_parallel
:
6531 case OMPD_target_parallel_for
:
6532 case OMPD_target_parallel_for_simd
:
6533 case OMPD_target_teams_distribute_parallel_for
:
6534 case OMPD_target_teams_distribute_parallel_for_simd
: {
6535 Expr
*ThreadLimit
= nullptr;
6536 Expr
*NumThreads
= nullptr;
6537 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6538 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6539 ThreadLimit
= ThreadLimitClause
->getThreadLimit();
6540 if (ThreadLimit
->isIntegerConstantExpr(CGF
.getContext()))
6542 ThreadLimit
->getIntegerConstantExpr(CGF
.getContext()))
6543 DefaultVal
= Constant
->getExtValue();
6545 if (D
.hasClausesOfKind
<OMPNumThreadsClause
>()) {
6546 const auto *NumThreadsClause
= D
.getSingleClause
<OMPNumThreadsClause
>();
6547 NumThreads
= NumThreadsClause
->getNumThreads();
6548 if (NumThreads
->isIntegerConstantExpr(CGF
.getContext())) {
6550 NumThreads
->getIntegerConstantExpr(CGF
.getContext())) {
6551 if (Constant
->getExtValue() < DefaultVal
) {
6552 DefaultVal
= Constant
->getExtValue();
6553 ThreadLimit
= NumThreads
;
6560 case OMPD_target_teams_distribute_simd
:
6561 case OMPD_target_simd
:
6566 case OMPD_parallel_for
:
6567 case OMPD_parallel_master
:
6568 case OMPD_parallel_sections
:
6570 case OMPD_parallel_for_simd
:
6572 case OMPD_cancellation_point
:
6574 case OMPD_threadprivate
:
6585 case OMPD_taskyield
:
6588 case OMPD_taskgroup
:
6594 case OMPD_target_data
:
6595 case OMPD_target_exit_data
:
6596 case OMPD_target_enter_data
:
6597 case OMPD_distribute
:
6598 case OMPD_distribute_simd
:
6599 case OMPD_distribute_parallel_for
:
6600 case OMPD_distribute_parallel_for_simd
:
6601 case OMPD_teams_distribute
:
6602 case OMPD_teams_distribute_simd
:
6603 case OMPD_teams_distribute_parallel_for
:
6604 case OMPD_teams_distribute_parallel_for_simd
:
6605 case OMPD_target_update
:
6606 case OMPD_declare_simd
:
6607 case OMPD_declare_variant
:
6608 case OMPD_begin_declare_variant
:
6609 case OMPD_end_declare_variant
:
6610 case OMPD_declare_target
:
6611 case OMPD_end_declare_target
:
6612 case OMPD_declare_reduction
:
6613 case OMPD_declare_mapper
:
6615 case OMPD_taskloop_simd
:
6616 case OMPD_master_taskloop
:
6617 case OMPD_master_taskloop_simd
:
6618 case OMPD_parallel_master_taskloop
:
6619 case OMPD_parallel_master_taskloop_simd
:
6626 llvm_unreachable("Unsupported directive kind.");
6629 llvm::Value
*CGOpenMPRuntime::emitNumThreadsForTargetDirective(
6630 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) {
6631 assert(!CGF
.getLangOpts().OpenMPIsDevice
&&
6632 "Clauses associated with the teams directive expected to be emitted "
6633 "only for the host!");
6634 OpenMPDirectiveKind DirectiveKind
= D
.getDirectiveKind();
6635 assert(isOpenMPTargetExecutionDirective(DirectiveKind
) &&
6636 "Expected target-based executable directive.");
6637 CGBuilderTy
&Bld
= CGF
.Builder
;
6638 llvm::Value
*ThreadLimitVal
= nullptr;
6639 llvm::Value
*NumThreadsVal
= nullptr;
6640 switch (DirectiveKind
) {
6642 const CapturedStmt
*CS
= D
.getInnermostCapturedStmt();
6643 if (llvm::Value
*NumThreads
= getNumThreads(CGF
, CS
, ThreadLimitVal
))
6645 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6646 CGF
.getContext(), CS
->getCapturedStmt());
6647 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6648 if (Dir
->hasClausesOfKind
<OMPThreadLimitClause
>()) {
6649 CGOpenMPInnerExprInfo
CGInfo(CGF
, *CS
);
6650 CodeGenFunction::CGCapturedStmtRAII
CapInfoRAII(CGF
, &CGInfo
);
6651 const auto *ThreadLimitClause
=
6652 Dir
->getSingleClause
<OMPThreadLimitClause
>();
6653 CodeGenFunction::LexicalScope
Scope(
6654 CGF
, ThreadLimitClause
->getThreadLimit()->getSourceRange());
6655 if (const auto *PreInit
=
6656 cast_or_null
<DeclStmt
>(ThreadLimitClause
->getPreInitStmt())) {
6657 for (const auto *I
: PreInit
->decls()) {
6658 if (!I
->hasAttr
<OMPCaptureNoInitAttr
>()) {
6659 CGF
.EmitVarDecl(cast
<VarDecl
>(*I
));
6661 CodeGenFunction::AutoVarEmission Emission
=
6662 CGF
.EmitAutoVarAlloca(cast
<VarDecl
>(*I
));
6663 CGF
.EmitAutoVarCleanups(Emission
);
6667 llvm::Value
*ThreadLimit
= CGF
.EmitScalarExpr(
6668 ThreadLimitClause
->getThreadLimit(), /*IgnoreResultAssign=*/true);
6670 Bld
.CreateIntCast(ThreadLimit
, CGF
.Int32Ty
, /*isSigned=*/false);
6672 if (isOpenMPTeamsDirective(Dir
->getDirectiveKind()) &&
6673 !isOpenMPDistributeDirective(Dir
->getDirectiveKind())) {
6674 CS
= Dir
->getInnermostCapturedStmt();
6675 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6676 CGF
.getContext(), CS
->getCapturedStmt());
6677 Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
);
6679 if (Dir
&& isOpenMPDistributeDirective(Dir
->getDirectiveKind()) &&
6680 !isOpenMPSimdDirective(Dir
->getDirectiveKind())) {
6681 CS
= Dir
->getInnermostCapturedStmt();
6682 if (llvm::Value
*NumThreads
= getNumThreads(CGF
, CS
, ThreadLimitVal
))
6685 if (Dir
&& isOpenMPSimdDirective(Dir
->getDirectiveKind()))
6686 return Bld
.getInt32(1);
6688 return ThreadLimitVal
? ThreadLimitVal
: Bld
.getInt32(0);
6690 case OMPD_target_teams
: {
6691 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6692 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6693 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6694 llvm::Value
*ThreadLimit
= CGF
.EmitScalarExpr(
6695 ThreadLimitClause
->getThreadLimit(), /*IgnoreResultAssign=*/true);
6697 Bld
.CreateIntCast(ThreadLimit
, CGF
.Int32Ty
, /*isSigned=*/false);
6699 const CapturedStmt
*CS
= D
.getInnermostCapturedStmt();
6700 if (llvm::Value
*NumThreads
= getNumThreads(CGF
, CS
, ThreadLimitVal
))
6702 const Stmt
*Child
= CGOpenMPRuntime::getSingleCompoundChild(
6703 CGF
.getContext(), CS
->getCapturedStmt());
6704 if (const auto *Dir
= dyn_cast_or_null
<OMPExecutableDirective
>(Child
)) {
6705 if (Dir
->getDirectiveKind() == OMPD_distribute
) {
6706 CS
= Dir
->getInnermostCapturedStmt();
6707 if (llvm::Value
*NumThreads
= getNumThreads(CGF
, CS
, ThreadLimitVal
))
6711 return ThreadLimitVal
? ThreadLimitVal
: Bld
.getInt32(0);
6713 case OMPD_target_teams_distribute
:
6714 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6715 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6716 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6717 llvm::Value
*ThreadLimit
= CGF
.EmitScalarExpr(
6718 ThreadLimitClause
->getThreadLimit(), /*IgnoreResultAssign=*/true);
6720 Bld
.CreateIntCast(ThreadLimit
, CGF
.Int32Ty
, /*isSigned=*/false);
6722 return getNumThreads(CGF
, D
.getInnermostCapturedStmt(), ThreadLimitVal
);
6723 case OMPD_target_parallel
:
6724 case OMPD_target_parallel_for
:
6725 case OMPD_target_parallel_for_simd
:
6726 case OMPD_target_teams_distribute_parallel_for
:
6727 case OMPD_target_teams_distribute_parallel_for_simd
: {
6728 llvm::Value
*CondVal
= nullptr;
6729 // Handle if clause. If if clause present, the number of threads is
6730 // calculated as <cond> ? (<numthreads> ? <numthreads> : 0 ) : 1.
6731 if (D
.hasClausesOfKind
<OMPIfClause
>()) {
6732 const OMPIfClause
*IfClause
= nullptr;
6733 for (const auto *C
: D
.getClausesOfKind
<OMPIfClause
>()) {
6734 if (C
->getNameModifier() == OMPD_unknown
||
6735 C
->getNameModifier() == OMPD_parallel
) {
6741 const Expr
*Cond
= IfClause
->getCondition();
6743 if (Cond
->EvaluateAsBooleanCondition(Result
, CGF
.getContext())) {
6745 return Bld
.getInt32(1);
6747 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
6748 CondVal
= CGF
.EvaluateExprAsBool(Cond
);
6752 if (D
.hasClausesOfKind
<OMPThreadLimitClause
>()) {
6753 CodeGenFunction::RunCleanupsScope
ThreadLimitScope(CGF
);
6754 const auto *ThreadLimitClause
= D
.getSingleClause
<OMPThreadLimitClause
>();
6755 llvm::Value
*ThreadLimit
= CGF
.EmitScalarExpr(
6756 ThreadLimitClause
->getThreadLimit(), /*IgnoreResultAssign=*/true);
6758 Bld
.CreateIntCast(ThreadLimit
, CGF
.Int32Ty
, /*isSigned=*/false);
6760 if (D
.hasClausesOfKind
<OMPNumThreadsClause
>()) {
6761 CodeGenFunction::RunCleanupsScope
NumThreadsScope(CGF
);
6762 const auto *NumThreadsClause
= D
.getSingleClause
<OMPNumThreadsClause
>();
6763 llvm::Value
*NumThreads
= CGF
.EmitScalarExpr(
6764 NumThreadsClause
->getNumThreads(), /*IgnoreResultAssign=*/true);
6766 Bld
.CreateIntCast(NumThreads
, CGF
.Int32Ty
, /*isSigned=*/false);
6767 ThreadLimitVal
= ThreadLimitVal
6768 ? Bld
.CreateSelect(Bld
.CreateICmpULT(NumThreadsVal
,
6770 NumThreadsVal
, ThreadLimitVal
)
6773 if (!ThreadLimitVal
)
6774 ThreadLimitVal
= Bld
.getInt32(0);
6776 return Bld
.CreateSelect(CondVal
, ThreadLimitVal
, Bld
.getInt32(1));
6777 return ThreadLimitVal
;
6779 case OMPD_target_teams_distribute_simd
:
6780 case OMPD_target_simd
:
6781 return Bld
.getInt32(1);
6784 case OMPD_parallel_for
:
6785 case OMPD_parallel_master
:
6786 case OMPD_parallel_sections
:
6788 case OMPD_parallel_for_simd
:
6790 case OMPD_cancellation_point
:
6792 case OMPD_threadprivate
:
6803 case OMPD_taskyield
:
6806 case OMPD_taskgroup
:
6812 case OMPD_target_data
:
6813 case OMPD_target_exit_data
:
6814 case OMPD_target_enter_data
:
6815 case OMPD_distribute
:
6816 case OMPD_distribute_simd
:
6817 case OMPD_distribute_parallel_for
:
6818 case OMPD_distribute_parallel_for_simd
:
6819 case OMPD_teams_distribute
:
6820 case OMPD_teams_distribute_simd
:
6821 case OMPD_teams_distribute_parallel_for
:
6822 case OMPD_teams_distribute_parallel_for_simd
:
6823 case OMPD_target_update
:
6824 case OMPD_declare_simd
:
6825 case OMPD_declare_variant
:
6826 case OMPD_begin_declare_variant
:
6827 case OMPD_end_declare_variant
:
6828 case OMPD_declare_target
:
6829 case OMPD_end_declare_target
:
6830 case OMPD_declare_reduction
:
6831 case OMPD_declare_mapper
:
6833 case OMPD_taskloop_simd
:
6834 case OMPD_master_taskloop
:
6835 case OMPD_master_taskloop_simd
:
6836 case OMPD_parallel_master_taskloop
:
6837 case OMPD_parallel_master_taskloop_simd
:
6839 case OMPD_metadirective
:
6845 llvm_unreachable("Unsupported directive kind.");
6849 LLVM_ENABLE_BITMASK_ENUMS_IN_NAMESPACE();
6851 // Utility to handle information from clauses associated with a given
6852 // construct that use mappable expressions (e.g. 'map' clause, 'to' clause).
6853 // It provides a convenient interface to obtain the information and generate
6854 // code for that information.
6855 class MappableExprsHandler
{
6857 /// Values for bit flags used to specify the mapping type for
6859 enum OpenMPOffloadMappingFlags
: uint64_t {
6862 /// Allocate memory on the device and move data from host to device.
6864 /// Allocate memory on the device and move data from device to host.
6865 OMP_MAP_FROM
= 0x02,
6866 /// Always perform the requested mapping action on the element, even
6867 /// if it was already mapped before.
6868 OMP_MAP_ALWAYS
= 0x04,
6869 /// Delete the element from the device environment, ignoring the
6870 /// current reference count associated with the element.
6871 OMP_MAP_DELETE
= 0x08,
6872 /// The element being mapped is a pointer-pointee pair; both the
6873 /// pointer and the pointee should be mapped.
6874 OMP_MAP_PTR_AND_OBJ
= 0x10,
6875 /// This flags signals that the base address of an entry should be
6876 /// passed to the target kernel as an argument.
6877 OMP_MAP_TARGET_PARAM
= 0x20,
6878 /// Signal that the runtime library has to return the device pointer
6879 /// in the current position for the data being mapped. Used when we have the
6880 /// use_device_ptr or use_device_addr clause.
6881 OMP_MAP_RETURN_PARAM
= 0x40,
6882 /// This flag signals that the reference being passed is a pointer to
6884 OMP_MAP_PRIVATE
= 0x80,
6885 /// Pass the element to the device by value.
6886 OMP_MAP_LITERAL
= 0x100,
6888 OMP_MAP_IMPLICIT
= 0x200,
6889 /// Close is a hint to the runtime to allocate memory close to
6890 /// the target device.
6891 OMP_MAP_CLOSE
= 0x400,
6892 /// 0x800 is reserved for compatibility with XLC.
6893 /// Produce a runtime error if the data is not already allocated.
6894 OMP_MAP_PRESENT
= 0x1000,
6895 // Increment and decrement a separate reference counter so that the data
6896 // cannot be unmapped within the associated region. Thus, this flag is
6897 // intended to be used on 'target' and 'target data' directives because they
6898 // are inherently structured. It is not intended to be used on 'target
6899 // enter data' and 'target exit data' directives because they are inherently
6901 // This is an OpenMP extension for the sake of OpenACC support.
6902 OMP_MAP_OMPX_HOLD
= 0x2000,
6903 /// Signal that the runtime library should use args as an array of
6904 /// descriptor_dim pointers and use args_size as dims. Used when we have
6905 /// non-contiguous list items in target update directive
6906 OMP_MAP_NON_CONTIG
= 0x100000000000,
6907 /// The 16 MSBs of the flags indicate whether the entry is member of some
6909 OMP_MAP_MEMBER_OF
= 0xffff000000000000,
6910 LLVM_MARK_AS_BITMASK_ENUM(/* LargestFlag = */ OMP_MAP_MEMBER_OF
),
6913 /// Get the offset of the OMP_MAP_MEMBER_OF field.
6914 static unsigned getFlagMemberOffset() {
6915 unsigned Offset
= 0;
6916 for (uint64_t Remain
= OMP_MAP_MEMBER_OF
; !(Remain
& 1);
6917 Remain
= Remain
>> 1)
6922 /// Class that holds debugging information for a data mapping to be passed to
6923 /// the runtime library.
6924 class MappingExprInfo
{
6925 /// The variable declaration used for the data mapping.
6926 const ValueDecl
*MapDecl
= nullptr;
6927 /// The original expression used in the map clause, or null if there is
6929 const Expr
*MapExpr
= nullptr;
6932 MappingExprInfo(const ValueDecl
*MapDecl
, const Expr
*MapExpr
= nullptr)
6933 : MapDecl(MapDecl
), MapExpr(MapExpr
) {}
6935 const ValueDecl
*getMapDecl() const { return MapDecl
; }
6936 const Expr
*getMapExpr() const { return MapExpr
; }
6939 /// Class that associates information with a base pointer to be passed to the
6940 /// runtime library.
6941 class BasePointerInfo
{
6942 /// The base pointer.
6943 llvm::Value
*Ptr
= nullptr;
6944 /// The base declaration that refers to this device pointer, or null if
6946 const ValueDecl
*DevPtrDecl
= nullptr;
6949 BasePointerInfo(llvm::Value
*Ptr
, const ValueDecl
*DevPtrDecl
= nullptr)
6950 : Ptr(Ptr
), DevPtrDecl(DevPtrDecl
) {}
6951 llvm::Value
*operator*() const { return Ptr
; }
6952 const ValueDecl
*getDevicePtrDecl() const { return DevPtrDecl
; }
6953 void setDevicePtrDecl(const ValueDecl
*D
) { DevPtrDecl
= D
; }
6956 using MapExprsArrayTy
= SmallVector
<MappingExprInfo
, 4>;
6957 using MapBaseValuesArrayTy
= SmallVector
<BasePointerInfo
, 4>;
6958 using MapValuesArrayTy
= SmallVector
<llvm::Value
*, 4>;
6959 using MapFlagsArrayTy
= SmallVector
<OpenMPOffloadMappingFlags
, 4>;
6960 using MapMappersArrayTy
= SmallVector
<const ValueDecl
*, 4>;
6961 using MapDimArrayTy
= SmallVector
<uint64_t, 4>;
6962 using MapNonContiguousArrayTy
= SmallVector
<MapValuesArrayTy
, 4>;
6964 /// This structure contains combined information generated for mappable
6965 /// clauses, including base pointers, pointers, sizes, map types, user-defined
6966 /// mappers, and non-contiguous information.
6967 struct MapCombinedInfoTy
{
6968 struct StructNonContiguousInfo
{
6969 bool IsNonContiguous
= false;
6971 MapNonContiguousArrayTy Offsets
;
6972 MapNonContiguousArrayTy Counts
;
6973 MapNonContiguousArrayTy Strides
;
6975 MapExprsArrayTy Exprs
;
6976 MapBaseValuesArrayTy BasePointers
;
6977 MapValuesArrayTy Pointers
;
6978 MapValuesArrayTy Sizes
;
6979 MapFlagsArrayTy Types
;
6980 MapMappersArrayTy Mappers
;
6981 StructNonContiguousInfo NonContigInfo
;
6983 /// Append arrays in \a CurInfo.
6984 void append(MapCombinedInfoTy
&CurInfo
) {
6985 Exprs
.append(CurInfo
.Exprs
.begin(), CurInfo
.Exprs
.end());
6986 BasePointers
.append(CurInfo
.BasePointers
.begin(),
6987 CurInfo
.BasePointers
.end());
6988 Pointers
.append(CurInfo
.Pointers
.begin(), CurInfo
.Pointers
.end());
6989 Sizes
.append(CurInfo
.Sizes
.begin(), CurInfo
.Sizes
.end());
6990 Types
.append(CurInfo
.Types
.begin(), CurInfo
.Types
.end());
6991 Mappers
.append(CurInfo
.Mappers
.begin(), CurInfo
.Mappers
.end());
6992 NonContigInfo
.Dims
.append(CurInfo
.NonContigInfo
.Dims
.begin(),
6993 CurInfo
.NonContigInfo
.Dims
.end());
6994 NonContigInfo
.Offsets
.append(CurInfo
.NonContigInfo
.Offsets
.begin(),
6995 CurInfo
.NonContigInfo
.Offsets
.end());
6996 NonContigInfo
.Counts
.append(CurInfo
.NonContigInfo
.Counts
.begin(),
6997 CurInfo
.NonContigInfo
.Counts
.end());
6998 NonContigInfo
.Strides
.append(CurInfo
.NonContigInfo
.Strides
.begin(),
6999 CurInfo
.NonContigInfo
.Strides
.end());
7003 /// Map between a struct and the its lowest & highest elements which have been
7005 /// [ValueDecl *] --> {LE(FieldIndex, Pointer),
7006 /// HE(FieldIndex, Pointer)}
7007 struct StructRangeInfoTy
{
7008 MapCombinedInfoTy PreliminaryMapData
;
7009 std::pair
<unsigned /*FieldIndex*/, Address
/*Pointer*/> LowestElem
= {
7010 0, Address::invalid()};
7011 std::pair
<unsigned /*FieldIndex*/, Address
/*Pointer*/> HighestElem
= {
7012 0, Address::invalid()};
7013 Address Base
= Address::invalid();
7014 Address LB
= Address::invalid();
7015 bool IsArraySection
= false;
7016 bool HasCompleteRecord
= false;
7020 /// Kind that defines how a device pointer has to be returned.
7022 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
7023 OpenMPMapClauseKind MapType
= OMPC_MAP_unknown
;
7024 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
7025 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
;
7026 bool ReturnDevicePointer
= false;
7027 bool IsImplicit
= false;
7028 const ValueDecl
*Mapper
= nullptr;
7029 const Expr
*VarRef
= nullptr;
7030 bool ForDeviceAddr
= false;
7032 MapInfo() = default;
7034 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
,
7035 OpenMPMapClauseKind MapType
,
7036 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
7037 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
7038 bool ReturnDevicePointer
, bool IsImplicit
,
7039 const ValueDecl
*Mapper
= nullptr, const Expr
*VarRef
= nullptr,
7040 bool ForDeviceAddr
= false)
7041 : Components(Components
), MapType(MapType
), MapModifiers(MapModifiers
),
7042 MotionModifiers(MotionModifiers
),
7043 ReturnDevicePointer(ReturnDevicePointer
), IsImplicit(IsImplicit
),
7044 Mapper(Mapper
), VarRef(VarRef
), ForDeviceAddr(ForDeviceAddr
) {}
7047 /// If use_device_ptr or use_device_addr is used on a decl which is a struct
7048 /// member and there is no map information about it, then emission of that
7049 /// entry is deferred until the whole struct has been processed.
7050 struct DeferredDevicePtrEntryTy
{
7051 const Expr
*IE
= nullptr;
7052 const ValueDecl
*VD
= nullptr;
7053 bool ForDeviceAddr
= false;
7055 DeferredDevicePtrEntryTy(const Expr
*IE
, const ValueDecl
*VD
,
7057 : IE(IE
), VD(VD
), ForDeviceAddr(ForDeviceAddr
) {}
7060 /// The target directive from where the mappable clauses were extracted. It
7061 /// is either a executable directive or a user-defined mapper directive.
7062 llvm::PointerUnion
<const OMPExecutableDirective
*,
7063 const OMPDeclareMapperDecl
*>
7066 /// Function the directive is being generated for.
7067 CodeGenFunction
&CGF
;
7069 /// Set of all first private variables in the current directive.
7070 /// bool data is set to true if the variable is implicitly marked as
7071 /// firstprivate, false otherwise.
7072 llvm::DenseMap
<CanonicalDeclPtr
<const VarDecl
>, bool> FirstPrivateDecls
;
7074 /// Map between device pointer declarations and their expression components.
7075 /// The key value for declarations in 'this' is null.
7078 SmallVector
<OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>>
7081 /// Map between device addr declarations and their expression components.
7082 /// The key value for declarations in 'this' is null.
7085 SmallVector
<OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>>
7088 /// Map between lambda declarations and their map type.
7089 llvm::DenseMap
<const ValueDecl
*, const OMPMapClause
*> LambdasMap
;
7091 llvm::Value
*getExprTypeSize(const Expr
*E
) const {
7092 QualType ExprTy
= E
->getType().getCanonicalType();
7094 // Calculate the size for array shaping expression.
7095 if (const auto *OAE
= dyn_cast
<OMPArrayShapingExpr
>(E
)) {
7097 CGF
.getTypeSize(OAE
->getBase()->getType()->getPointeeType());
7098 for (const Expr
*SE
: OAE
->getDimensions()) {
7099 llvm::Value
*Sz
= CGF
.EmitScalarExpr(SE
);
7100 Sz
= CGF
.EmitScalarConversion(Sz
, SE
->getType(),
7101 CGF
.getContext().getSizeType(),
7103 Size
= CGF
.Builder
.CreateNUWMul(Size
, Sz
);
7108 // Reference types are ignored for mapping purposes.
7109 if (const auto *RefTy
= ExprTy
->getAs
<ReferenceType
>())
7110 ExprTy
= RefTy
->getPointeeType().getCanonicalType();
7112 // Given that an array section is considered a built-in type, we need to
7113 // do the calculation based on the length of the section instead of relying
7114 // on CGF.getTypeSize(E->getType()).
7115 if (const auto *OAE
= dyn_cast
<OMPArraySectionExpr
>(E
)) {
7116 QualType BaseTy
= OMPArraySectionExpr::getBaseOriginalType(
7117 OAE
->getBase()->IgnoreParenImpCasts())
7118 .getCanonicalType();
7120 // If there is no length associated with the expression and lower bound is
7121 // not specified too, that means we are using the whole length of the
7123 if (!OAE
->getLength() && OAE
->getColonLocFirst().isValid() &&
7124 !OAE
->getLowerBound())
7125 return CGF
.getTypeSize(BaseTy
);
7127 llvm::Value
*ElemSize
;
7128 if (const auto *PTy
= BaseTy
->getAs
<PointerType
>()) {
7129 ElemSize
= CGF
.getTypeSize(PTy
->getPointeeType().getCanonicalType());
7131 const auto *ATy
= cast
<ArrayType
>(BaseTy
.getTypePtr());
7132 assert(ATy
&& "Expecting array type if not a pointer type.");
7133 ElemSize
= CGF
.getTypeSize(ATy
->getElementType().getCanonicalType());
7136 // If we don't have a length at this point, that is because we have an
7137 // array section with a single element.
7138 if (!OAE
->getLength() && OAE
->getColonLocFirst().isInvalid())
7141 if (const Expr
*LenExpr
= OAE
->getLength()) {
7142 llvm::Value
*LengthVal
= CGF
.EmitScalarExpr(LenExpr
);
7143 LengthVal
= CGF
.EmitScalarConversion(LengthVal
, LenExpr
->getType(),
7144 CGF
.getContext().getSizeType(),
7145 LenExpr
->getExprLoc());
7146 return CGF
.Builder
.CreateNUWMul(LengthVal
, ElemSize
);
7148 assert(!OAE
->getLength() && OAE
->getColonLocFirst().isValid() &&
7149 OAE
->getLowerBound() && "expected array_section[lb:].");
7150 // Size = sizetype - lb * elemtype;
7151 llvm::Value
*LengthVal
= CGF
.getTypeSize(BaseTy
);
7152 llvm::Value
*LBVal
= CGF
.EmitScalarExpr(OAE
->getLowerBound());
7153 LBVal
= CGF
.EmitScalarConversion(LBVal
, OAE
->getLowerBound()->getType(),
7154 CGF
.getContext().getSizeType(),
7155 OAE
->getLowerBound()->getExprLoc());
7156 LBVal
= CGF
.Builder
.CreateNUWMul(LBVal
, ElemSize
);
7157 llvm::Value
*Cmp
= CGF
.Builder
.CreateICmpUGT(LengthVal
, LBVal
);
7158 llvm::Value
*TrueVal
= CGF
.Builder
.CreateNUWSub(LengthVal
, LBVal
);
7159 LengthVal
= CGF
.Builder
.CreateSelect(
7160 Cmp
, TrueVal
, llvm::ConstantInt::get(CGF
.SizeTy
, 0));
7163 return CGF
.getTypeSize(ExprTy
);
7166 /// Return the corresponding bits for a given map clause modifier. Add
7167 /// a flag marking the map as a pointer if requested. Add a flag marking the
7168 /// map as the first one of a series of maps that relate to the same map
7170 OpenMPOffloadMappingFlags
getMapTypeBits(
7171 OpenMPMapClauseKind MapType
, ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
7172 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
, bool IsImplicit
,
7173 bool AddPtrFlag
, bool AddIsTargetParamFlag
, bool IsNonContiguous
) const {
7174 OpenMPOffloadMappingFlags Bits
=
7175 IsImplicit
? OMP_MAP_IMPLICIT
: OMP_MAP_NONE
;
7177 case OMPC_MAP_alloc
:
7178 case OMPC_MAP_release
:
7179 // alloc and release is the default behavior in the runtime library, i.e.
7180 // if we don't pass any bits alloc/release that is what the runtime is
7181 // going to do. Therefore, we don't need to signal anything for these two
7188 Bits
|= OMP_MAP_FROM
;
7190 case OMPC_MAP_tofrom
:
7191 Bits
|= OMP_MAP_TO
| OMP_MAP_FROM
;
7193 case OMPC_MAP_delete
:
7194 Bits
|= OMP_MAP_DELETE
;
7196 case OMPC_MAP_unknown
:
7197 llvm_unreachable("Unexpected map type!");
7200 Bits
|= OMP_MAP_PTR_AND_OBJ
;
7201 if (AddIsTargetParamFlag
)
7202 Bits
|= OMP_MAP_TARGET_PARAM
;
7203 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_always
))
7204 Bits
|= OMP_MAP_ALWAYS
;
7205 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_close
))
7206 Bits
|= OMP_MAP_CLOSE
;
7207 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_present
) ||
7208 llvm::is_contained(MotionModifiers
, OMPC_MOTION_MODIFIER_present
))
7209 Bits
|= OMP_MAP_PRESENT
;
7210 if (llvm::is_contained(MapModifiers
, OMPC_MAP_MODIFIER_ompx_hold
))
7211 Bits
|= OMP_MAP_OMPX_HOLD
;
7212 if (IsNonContiguous
)
7213 Bits
|= OMP_MAP_NON_CONTIG
;
7217 /// Return true if the provided expression is a final array section. A
7218 /// final array section, is one whose length can't be proved to be one.
7219 bool isFinalArraySectionExpression(const Expr
*E
) const {
7220 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(E
);
7222 // It is not an array section and therefore not a unity-size one.
7226 // An array section with no colon always refer to a single element.
7227 if (OASE
->getColonLocFirst().isInvalid())
7230 const Expr
*Length
= OASE
->getLength();
7232 // If we don't have a length we have to check if the array has size 1
7233 // for this dimension. Also, we should always expect a length if the
7234 // base type is pointer.
7236 QualType BaseQTy
= OMPArraySectionExpr::getBaseOriginalType(
7237 OASE
->getBase()->IgnoreParenImpCasts())
7238 .getCanonicalType();
7239 if (const auto *ATy
= dyn_cast
<ConstantArrayType
>(BaseQTy
.getTypePtr()))
7240 return ATy
->getSize().getSExtValue() != 1;
7241 // If we don't have a constant dimension length, we have to consider
7242 // the current section as having any size, so it is not necessarily
7243 // unitary. If it happen to be unity size, that's user fault.
7247 // Check if the length evaluates to 1.
7248 Expr::EvalResult Result
;
7249 if (!Length
->EvaluateAsInt(Result
, CGF
.getContext()))
7250 return true; // Can have more that size 1.
7252 llvm::APSInt ConstLength
= Result
.Val
.getInt();
7253 return ConstLength
.getSExtValue() != 1;
7256 /// Generate the base pointers, section pointers, sizes, map type bits, and
7257 /// user-defined mappers (all included in \a CombinedInfo) for the provided
7258 /// map type, map or motion modifiers, and expression components.
7259 /// \a IsFirstComponent should be set to true if the provided set of
7260 /// components is the first associated with a capture.
7261 void generateInfoForComponentList(
7262 OpenMPMapClauseKind MapType
, ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
7263 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
7264 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
,
7265 MapCombinedInfoTy
&CombinedInfo
, StructRangeInfoTy
&PartialStruct
,
7266 bool IsFirstComponentList
, bool IsImplicit
,
7267 const ValueDecl
*Mapper
= nullptr, bool ForDeviceAddr
= false,
7268 const ValueDecl
*BaseDecl
= nullptr, const Expr
*MapExpr
= nullptr,
7269 ArrayRef
<OMPClauseMappableExprCommon::MappableExprComponentListRef
>
7270 OverlappedElements
= llvm::None
) const {
7271 // The following summarizes what has to be generated for each map and the
7272 // types below. The generated information is expressed in this order:
7273 // base pointer, section pointer, size, flags
7274 // (to add to the ones that come from the map type and modifier).
7296 // &d, &d, sizeof(double), TARGET_PARAM | TO | FROM
7299 // &i, &i, 100*sizeof(int), TARGET_PARAM | TO | FROM
7302 // &i(=&i[0]), &i[1], 23*sizeof(int), TARGET_PARAM | TO | FROM
7305 // &p, &p, sizeof(float*), TARGET_PARAM | TO | FROM
7308 // &p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM | PTR_AND_OBJ
7309 // in unified shared memory mode or for local pointers
7310 // p, &p[1], 24*sizeof(float), TARGET_PARAM | TO | FROM
7313 // &s, &s, sizeof(S2), TARGET_PARAM | TO | FROM
7316 // &s, &(s.i), sizeof(int), TARGET_PARAM | TO | FROM
7319 // &s, &(s.s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7322 // &s, &(s.p), sizeof(double*), TARGET_PARAM | TO | FROM
7324 // map(to: s.p[:22])
7325 // &s, &(s.p), sizeof(double*), TARGET_PARAM (*)
7326 // &s, &(s.p), sizeof(double*), MEMBER_OF(1) (**)
7327 // &(s.p), &(s.p[0]), 22*sizeof(double),
7328 // MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7329 // (*) alloc space for struct members, only this is a target parameter
7330 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7331 // optimizes this entry out, same in the examples below)
7332 // (***) map the pointee (map: to)
7335 // &s, &(s.ref), sizeof(int*), TARGET_PARAM (*)
7336 // &s, &(s.ref), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | TO (***)
7337 // (*) alloc space for struct members, only this is a target parameter
7338 // (**) map the pointer (nothing to be mapped in this example) (the compiler
7339 // optimizes this entry out, same in the examples below)
7340 // (***) map the pointee (map: to)
7343 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7345 // map(from: s.ps->s.i)
7346 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7347 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7348 // &(s.ps), &(s.ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7350 // map(to: s.ps->ps)
7351 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7352 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7353 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | TO
7355 // map(s.ps->ps->ps)
7356 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7357 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7358 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7359 // &(s.ps->ps), &(s.ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7361 // map(to: s.ps->ps->s.f[:22])
7362 // &s, &(s.ps), sizeof(S2*), TARGET_PARAM
7363 // &s, &(s.ps), sizeof(S2*), MEMBER_OF(1)
7364 // &(s.ps), &(s.ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7365 // &(s.ps->ps), &(s.ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7368 // &ps, &ps, sizeof(S2*), TARGET_PARAM | TO | FROM
7371 // ps, &(ps->i), sizeof(int), TARGET_PARAM | TO | FROM
7374 // ps, &(ps->s.f[0]), 50*sizeof(float), TARGET_PARAM | TO | FROM
7377 // ps, &(ps->p), sizeof(double*), TARGET_PARAM | FROM
7379 // map(to: ps->p[:22])
7380 // ps, &(ps->p), sizeof(double*), TARGET_PARAM
7381 // ps, &(ps->p), sizeof(double*), MEMBER_OF(1)
7382 // &(ps->p), &(ps->p[0]), 22*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | TO
7385 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM | TO | FROM
7387 // map(from: ps->ps->s.i)
7388 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7389 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7390 // &(ps->ps), &(ps->ps->s.i), sizeof(int), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7392 // map(from: ps->ps->ps)
7393 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7394 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7395 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7397 // map(ps->ps->ps->ps)
7398 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7399 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7400 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7401 // &(ps->ps->ps), &(ps->ps->ps->ps), sizeof(S2*), PTR_AND_OBJ | TO | FROM
7403 // map(to: ps->ps->ps->s.f[:22])
7404 // ps, &(ps->ps), sizeof(S2*), TARGET_PARAM
7405 // ps, &(ps->ps), sizeof(S2*), MEMBER_OF(1)
7406 // &(ps->ps), &(ps->ps->ps), sizeof(S2*), MEMBER_OF(1) | PTR_AND_OBJ
7407 // &(ps->ps->ps), &(ps->ps->ps->s.f[0]), 22*sizeof(float), PTR_AND_OBJ | TO
7409 // map(to: s.f[:22]) map(from: s.p[:33])
7410 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1) +
7411 // sizeof(double*) (**), TARGET_PARAM
7412 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | TO
7413 // &s, &(s.p), sizeof(double*), MEMBER_OF(1)
7414 // &(s.p), &(s.p[0]), 33*sizeof(double), MEMBER_OF(1) | PTR_AND_OBJ | FROM
7415 // (*) allocate contiguous space needed to fit all mapped members even if
7416 // we allocate space for members not mapped (in this example,
7417 // s.f[22..49] and s.s are not mapped, yet we must allocate space for
7418 // them as well because they fall between &s.f[0] and &s.p)
7420 // map(from: s.f[:22]) map(to: ps->p[:33])
7421 // &s, &(s.f[0]), 22*sizeof(float), TARGET_PARAM | FROM
7422 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7423 // ps, &(ps->p), sizeof(double*), MEMBER_OF(2) (*)
7424 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(2) | PTR_AND_OBJ | TO
7425 // (*) the struct this entry pertains to is the 2nd element in the list of
7426 // arguments, hence MEMBER_OF(2)
7428 // map(from: s.f[:22], s.s) map(to: ps->p[:33])
7429 // &s, &(s.f[0]), 50*sizeof(float) + sizeof(struct S1), TARGET_PARAM
7430 // &s, &(s.f[0]), 22*sizeof(float), MEMBER_OF(1) | FROM
7431 // &s, &(s.s), sizeof(struct S1), MEMBER_OF(1) | FROM
7432 // ps, &(ps->p), sizeof(S2*), TARGET_PARAM
7433 // ps, &(ps->p), sizeof(double*), MEMBER_OF(4) (*)
7434 // &(ps->p), &(ps->p[0]), 33*sizeof(double), MEMBER_OF(4) | PTR_AND_OBJ | TO
7435 // (*) the struct this entry pertains to is the 4th element in the list
7436 // of arguments, hence MEMBER_OF(4)
7438 // Track if the map information being generated is the first for a capture.
7439 bool IsCaptureFirstInfo
= IsFirstComponentList
;
7440 // When the variable is on a declare target link or in a to clause with
7441 // unified memory, a reference is needed to hold the host/device address
7443 bool RequiresReference
= false;
7445 // Scan the components from the base to the complete expression.
7446 auto CI
= Components
.rbegin();
7447 auto CE
= Components
.rend();
7450 // Track if the map information being generated is the first for a list of
7452 bool IsExpressionFirstInfo
= true;
7453 bool FirstPointerInComplexData
= false;
7454 Address BP
= Address::invalid();
7455 const Expr
*AssocExpr
= I
->getAssociatedExpression();
7456 const auto *AE
= dyn_cast
<ArraySubscriptExpr
>(AssocExpr
);
7457 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(AssocExpr
);
7458 const auto *OAShE
= dyn_cast
<OMPArrayShapingExpr
>(AssocExpr
);
7460 if (isa
<MemberExpr
>(AssocExpr
)) {
7461 // The base is the 'this' pointer. The content of the pointer is going
7462 // to be the base of the field being mapped.
7463 BP
= CGF
.LoadCXXThisAddress();
7464 } else if ((AE
&& isa
<CXXThisExpr
>(AE
->getBase()->IgnoreParenImpCasts())) ||
7466 isa
<CXXThisExpr
>(OASE
->getBase()->IgnoreParenImpCasts()))) {
7467 BP
= CGF
.EmitOMPSharedLValue(AssocExpr
).getAddress(CGF
);
7469 isa
<CXXThisExpr
>(OAShE
->getBase()->IgnoreParenCasts())) {
7471 CGF
.EmitScalarExpr(OAShE
->getBase()),
7472 CGF
.ConvertTypeForMem(OAShE
->getBase()->getType()->getPointeeType()),
7473 CGF
.getContext().getTypeAlignInChars(OAShE
->getBase()->getType()));
7475 // The base is the reference to the variable.
7477 BP
= CGF
.EmitOMPSharedLValue(AssocExpr
).getAddress(CGF
);
7478 if (const auto *VD
=
7479 dyn_cast_or_null
<VarDecl
>(I
->getAssociatedDeclaration())) {
7480 if (llvm::Optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
7481 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
)) {
7482 if ((*Res
== OMPDeclareTargetDeclAttr::MT_Link
) ||
7483 (*Res
== OMPDeclareTargetDeclAttr::MT_To
&&
7484 CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory())) {
7485 RequiresReference
= true;
7486 BP
= CGF
.CGM
.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD
);
7491 // If the variable is a pointer and is being dereferenced (i.e. is not
7492 // the last component), the base has to be the pointer itself, not its
7493 // reference. References are ignored for mapping purposes.
7495 I
->getAssociatedDeclaration()->getType().getNonReferenceType();
7496 if (Ty
->isAnyPointerType() && std::next(I
) != CE
) {
7497 // No need to generate individual map information for the pointer, it
7498 // can be associated with the combined storage if shared memory mode is
7499 // active or the base declaration is not global variable.
7500 const auto *VD
= dyn_cast
<VarDecl
>(I
->getAssociatedDeclaration());
7501 if (CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
7502 !VD
|| VD
->hasLocalStorage())
7503 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7505 FirstPointerInComplexData
= true;
7510 // Track whether a component of the list should be marked as MEMBER_OF some
7511 // combined entry (for partial structs). Only the first PTR_AND_OBJ entry
7512 // in a component list should be marked as MEMBER_OF, all subsequent entries
7513 // do not belong to the base struct. E.g.
7515 // s.ps->ps->ps->f[:]
7517 // ps(1) is a member pointer, ps(2) is a pointee of ps(1), so it is a
7518 // PTR_AND_OBJ entry; the PTR is ps(1), so MEMBER_OF the base struct. ps(3)
7519 // is the pointee of ps(2) which is not member of struct s, so it should not
7520 // be marked as such (it is still PTR_AND_OBJ).
7521 // The variable is initialized to false so that PTR_AND_OBJ entries which
7522 // are not struct members are not considered (e.g. array of pointers to
7524 bool ShouldBeMemberOf
= false;
7526 // Variable keeping track of whether or not we have encountered a component
7527 // in the component list which is a member expression. Useful when we have a
7528 // pointer or a final array section, in which case it is the previous
7529 // component in the list which tells us whether we have a member expression.
7531 // While processing the final array section "[:]" it is "f" which tells us
7532 // whether we are dealing with a member of a declared struct.
7533 const MemberExpr
*EncounteredME
= nullptr;
7535 // Track for the total number of dimension. Start from one for the dummy
7537 uint64_t DimSize
= 1;
7539 bool IsNonContiguous
= CombinedInfo
.NonContigInfo
.IsNonContiguous
;
7540 bool IsPrevMemberReference
= false;
7542 for (; I
!= CE
; ++I
) {
7543 // If the current component is member of a struct (parent struct) mark it.
7544 if (!EncounteredME
) {
7545 EncounteredME
= dyn_cast
<MemberExpr
>(I
->getAssociatedExpression());
7546 // If we encounter a PTR_AND_OBJ entry from now on it should be marked
7547 // as MEMBER_OF the parent struct.
7548 if (EncounteredME
) {
7549 ShouldBeMemberOf
= true;
7550 // Do not emit as complex pointer if this is actually not array-like
7552 if (FirstPointerInComplexData
) {
7553 QualType Ty
= std::prev(I
)
7554 ->getAssociatedDeclaration()
7556 .getNonReferenceType();
7557 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7558 FirstPointerInComplexData
= false;
7563 auto Next
= std::next(I
);
7565 // We need to generate the addresses and sizes if this is the last
7566 // component, if the component is a pointer or if it is an array section
7567 // whose length can't be proved to be one. If this is a pointer, it
7568 // becomes the base address for the following components.
7570 // A final array section, is one whose length can't be proved to be one.
7571 // If the map item is non-contiguous then we don't treat any array section
7572 // as final array section.
7573 bool IsFinalArraySection
=
7575 isFinalArraySectionExpression(I
->getAssociatedExpression());
7577 // If we have a declaration for the mapping use that, otherwise use
7578 // the base declaration of the map clause.
7579 const ValueDecl
*MapDecl
= (I
->getAssociatedDeclaration())
7580 ? I
->getAssociatedDeclaration()
7582 MapExpr
= (I
->getAssociatedExpression()) ? I
->getAssociatedExpression()
7585 // Get information on whether the element is a pointer. Have to do a
7586 // special treatment for array sections given that they are built-in
7589 dyn_cast
<OMPArraySectionExpr
>(I
->getAssociatedExpression());
7591 dyn_cast
<OMPArrayShapingExpr
>(I
->getAssociatedExpression());
7592 const auto *UO
= dyn_cast
<UnaryOperator
>(I
->getAssociatedExpression());
7593 const auto *BO
= dyn_cast
<BinaryOperator
>(I
->getAssociatedExpression());
7596 (OASE
&& OMPArraySectionExpr::getBaseOriginalType(OASE
)
7598 ->isAnyPointerType()) ||
7599 I
->getAssociatedExpression()->getType()->isAnyPointerType();
7600 bool IsMemberReference
= isa
<MemberExpr
>(I
->getAssociatedExpression()) &&
7602 MapDecl
->getType()->isLValueReferenceType();
7603 bool IsNonDerefPointer
= IsPointer
&& !UO
&& !BO
&& !IsNonContiguous
;
7608 if (Next
== CE
|| IsMemberReference
|| IsNonDerefPointer
||
7609 IsFinalArraySection
) {
7610 // If this is not the last component, we expect the pointer to be
7611 // associated with an array expression or member expression.
7612 assert((Next
== CE
||
7613 isa
<MemberExpr
>(Next
->getAssociatedExpression()) ||
7614 isa
<ArraySubscriptExpr
>(Next
->getAssociatedExpression()) ||
7615 isa
<OMPArraySectionExpr
>(Next
->getAssociatedExpression()) ||
7616 isa
<OMPArrayShapingExpr
>(Next
->getAssociatedExpression()) ||
7617 isa
<UnaryOperator
>(Next
->getAssociatedExpression()) ||
7618 isa
<BinaryOperator
>(Next
->getAssociatedExpression())) &&
7619 "Unexpected expression");
7621 Address LB
= Address::invalid();
7622 Address LowestElem
= Address::invalid();
7623 auto &&EmitMemberExprBase
= [](CodeGenFunction
&CGF
,
7624 const MemberExpr
*E
) {
7625 const Expr
*BaseExpr
= E
->getBase();
7626 // If this is s.x, emit s as an lvalue. If it is s->x, emit s as a
7630 LValueBaseInfo BaseInfo
;
7631 TBAAAccessInfo TBAAInfo
;
7633 CGF
.EmitPointerWithAlignment(BaseExpr
, &BaseInfo
, &TBAAInfo
);
7634 QualType PtrTy
= BaseExpr
->getType()->getPointeeType();
7635 BaseLV
= CGF
.MakeAddrLValue(Addr
, PtrTy
, BaseInfo
, TBAAInfo
);
7637 BaseLV
= CGF
.EmitOMPSharedLValue(BaseExpr
);
7643 Address(CGF
.EmitScalarExpr(OAShE
->getBase()),
7644 CGF
.ConvertTypeForMem(
7645 OAShE
->getBase()->getType()->getPointeeType()),
7646 CGF
.getContext().getTypeAlignInChars(
7647 OAShE
->getBase()->getType()));
7648 } else if (IsMemberReference
) {
7649 const auto *ME
= cast
<MemberExpr
>(I
->getAssociatedExpression());
7650 LValue BaseLVal
= EmitMemberExprBase(CGF
, ME
);
7651 LowestElem
= CGF
.EmitLValueForFieldInitialization(
7652 BaseLVal
, cast
<FieldDecl
>(MapDecl
))
7654 LB
= CGF
.EmitLoadOfReferenceLValue(LowestElem
, MapDecl
->getType())
7658 CGF
.EmitOMPSharedLValue(I
->getAssociatedExpression())
7662 // If this component is a pointer inside the base struct then we don't
7663 // need to create any entry for it - it will be combined with the object
7664 // it is pointing to into a single PTR_AND_OBJ entry.
7665 bool IsMemberPointerOrAddr
=
7667 (((IsPointer
|| ForDeviceAddr
) &&
7668 I
->getAssociatedExpression() == EncounteredME
) ||
7669 (IsPrevMemberReference
&& !IsPointer
) ||
7670 (IsMemberReference
&& Next
!= CE
&&
7671 !Next
->getAssociatedExpression()->getType()->isPointerType()));
7672 if (!OverlappedElements
.empty() && Next
== CE
) {
7673 // Handle base element with the info for overlapped elements.
7674 assert(!PartialStruct
.Base
.isValid() && "The base element is set.");
7675 assert(!IsPointer
&&
7676 "Unexpected base element with the pointer type.");
7677 // Mark the whole struct as the struct that requires allocation on the
7679 PartialStruct
.LowestElem
= {0, LowestElem
};
7680 CharUnits TypeSize
= CGF
.getContext().getTypeSizeInChars(
7681 I
->getAssociatedExpression()->getType());
7682 Address HB
= CGF
.Builder
.CreateConstGEP(
7683 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
7684 LowestElem
, CGF
.VoidPtrTy
, CGF
.Int8Ty
),
7685 TypeSize
.getQuantity() - 1);
7686 PartialStruct
.HighestElem
= {
7687 std::numeric_limits
<decltype(
7688 PartialStruct
.HighestElem
.first
)>::max(),
7690 PartialStruct
.Base
= BP
;
7691 PartialStruct
.LB
= LB
;
7693 PartialStruct
.PreliminaryMapData
.BasePointers
.empty() &&
7694 "Overlapped elements must be used only once for the variable.");
7695 std::swap(PartialStruct
.PreliminaryMapData
, CombinedInfo
);
7696 // Emit data for non-overlapped data.
7697 OpenMPOffloadMappingFlags Flags
=
7699 getMapTypeBits(MapType
, MapModifiers
, MotionModifiers
, IsImplicit
,
7700 /*AddPtrFlag=*/false,
7701 /*AddIsTargetParamFlag=*/false, IsNonContiguous
);
7702 llvm::Value
*Size
= nullptr;
7703 // Do bitcopy of all non-overlapped structure elements.
7704 for (OMPClauseMappableExprCommon::MappableExprComponentListRef
7705 Component
: OverlappedElements
) {
7706 Address ComponentLB
= Address::invalid();
7707 for (const OMPClauseMappableExprCommon::MappableComponent
&MC
:
7709 if (const ValueDecl
*VD
= MC
.getAssociatedDeclaration()) {
7710 const auto *FD
= dyn_cast
<FieldDecl
>(VD
);
7711 if (FD
&& FD
->getType()->isLValueReferenceType()) {
7713 cast
<MemberExpr
>(MC
.getAssociatedExpression());
7714 LValue BaseLVal
= EmitMemberExprBase(CGF
, ME
);
7716 CGF
.EmitLValueForFieldInitialization(BaseLVal
, FD
)
7720 CGF
.EmitOMPSharedLValue(MC
.getAssociatedExpression())
7723 Size
= CGF
.Builder
.CreatePtrDiff(
7724 CGF
.Int8Ty
, CGF
.EmitCastToVoidPtr(ComponentLB
.getPointer()),
7725 CGF
.EmitCastToVoidPtr(LB
.getPointer()));
7729 assert(Size
&& "Failed to determine structure size");
7730 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7731 CombinedInfo
.BasePointers
.push_back(BP
.getPointer());
7732 CombinedInfo
.Pointers
.push_back(LB
.getPointer());
7733 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
7734 Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7735 CombinedInfo
.Types
.push_back(Flags
);
7736 CombinedInfo
.Mappers
.push_back(nullptr);
7737 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7739 LB
= CGF
.Builder
.CreateConstGEP(ComponentLB
, 1);
7741 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7742 CombinedInfo
.BasePointers
.push_back(BP
.getPointer());
7743 CombinedInfo
.Pointers
.push_back(LB
.getPointer());
7744 Size
= CGF
.Builder
.CreatePtrDiff(
7745 CGF
.Int8Ty
, CGF
.Builder
.CreateConstGEP(HB
, 1).getPointer(),
7746 CGF
.EmitCastToVoidPtr(LB
.getPointer()));
7747 CombinedInfo
.Sizes
.push_back(
7748 CGF
.Builder
.CreateIntCast(Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7749 CombinedInfo
.Types
.push_back(Flags
);
7750 CombinedInfo
.Mappers
.push_back(nullptr);
7751 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7755 llvm::Value
*Size
= getExprTypeSize(I
->getAssociatedExpression());
7756 if (!IsMemberPointerOrAddr
||
7757 (Next
== CE
&& MapType
!= OMPC_MAP_unknown
)) {
7758 CombinedInfo
.Exprs
.emplace_back(MapDecl
, MapExpr
);
7759 CombinedInfo
.BasePointers
.push_back(BP
.getPointer());
7760 CombinedInfo
.Pointers
.push_back(LB
.getPointer());
7761 CombinedInfo
.Sizes
.push_back(
7762 CGF
.Builder
.CreateIntCast(Size
, CGF
.Int64Ty
, /*isSigned=*/true));
7763 CombinedInfo
.NonContigInfo
.Dims
.push_back(IsNonContiguous
? DimSize
7766 // If Mapper is valid, the last component inherits the mapper.
7767 bool HasMapper
= Mapper
&& Next
== CE
;
7768 CombinedInfo
.Mappers
.push_back(HasMapper
? Mapper
: nullptr);
7770 // We need to add a pointer flag for each map that comes from the
7771 // same expression except for the first one. We also need to signal
7772 // this map is the first one that relates with the current capture
7773 // (there is a set of entries for each capture).
7774 OpenMPOffloadMappingFlags Flags
= getMapTypeBits(
7775 MapType
, MapModifiers
, MotionModifiers
, IsImplicit
,
7776 !IsExpressionFirstInfo
|| RequiresReference
||
7777 FirstPointerInComplexData
|| IsMemberReference
,
7778 IsCaptureFirstInfo
&& !RequiresReference
, IsNonContiguous
);
7780 if (!IsExpressionFirstInfo
|| IsMemberReference
) {
7781 // If we have a PTR_AND_OBJ pair where the OBJ is a pointer as well,
7782 // then we reset the TO/FROM/ALWAYS/DELETE/CLOSE flags.
7783 if (IsPointer
|| (IsMemberReference
&& Next
!= CE
))
7784 Flags
&= ~(OMP_MAP_TO
| OMP_MAP_FROM
| OMP_MAP_ALWAYS
|
7785 OMP_MAP_DELETE
| OMP_MAP_CLOSE
);
7787 if (ShouldBeMemberOf
) {
7788 // Set placeholder value MEMBER_OF=FFFF to indicate that the flag
7789 // should be later updated with the correct value of MEMBER_OF.
7790 Flags
|= OMP_MAP_MEMBER_OF
;
7791 // From now on, all subsequent PTR_AND_OBJ entries should not be
7792 // marked as MEMBER_OF.
7793 ShouldBeMemberOf
= false;
7797 CombinedInfo
.Types
.push_back(Flags
);
7800 // If we have encountered a member expression so far, keep track of the
7801 // mapped member. If the parent is "*this", then the value declaration
7803 if (EncounteredME
) {
7804 const auto *FD
= cast
<FieldDecl
>(EncounteredME
->getMemberDecl());
7805 unsigned FieldIndex
= FD
->getFieldIndex();
7807 // Update info about the lowest and highest elements for this struct
7808 if (!PartialStruct
.Base
.isValid()) {
7809 PartialStruct
.LowestElem
= {FieldIndex
, LowestElem
};
7810 if (IsFinalArraySection
) {
7812 CGF
.EmitOMPArraySectionExpr(OASE
, /*IsLowerBound=*/false)
7814 PartialStruct
.HighestElem
= {FieldIndex
, HB
};
7816 PartialStruct
.HighestElem
= {FieldIndex
, LowestElem
};
7818 PartialStruct
.Base
= BP
;
7819 PartialStruct
.LB
= BP
;
7820 } else if (FieldIndex
< PartialStruct
.LowestElem
.first
) {
7821 PartialStruct
.LowestElem
= {FieldIndex
, LowestElem
};
7822 } else if (FieldIndex
> PartialStruct
.HighestElem
.first
) {
7823 PartialStruct
.HighestElem
= {FieldIndex
, LowestElem
};
7827 // Need to emit combined struct for array sections.
7828 if (IsFinalArraySection
|| IsNonContiguous
)
7829 PartialStruct
.IsArraySection
= true;
7831 // If we have a final array section, we are done with this expression.
7832 if (IsFinalArraySection
)
7835 // The pointer becomes the base for the next element.
7837 BP
= IsMemberReference
? LowestElem
: LB
;
7839 IsExpressionFirstInfo
= false;
7840 IsCaptureFirstInfo
= false;
7841 FirstPointerInComplexData
= false;
7842 IsPrevMemberReference
= IsMemberReference
;
7843 } else if (FirstPointerInComplexData
) {
7844 QualType Ty
= Components
.rbegin()
7845 ->getAssociatedDeclaration()
7847 .getNonReferenceType();
7848 BP
= CGF
.EmitLoadOfPointer(BP
, Ty
->castAs
<PointerType
>());
7849 FirstPointerInComplexData
= false;
7852 // If ran into the whole component - allocate the space for the whole
7855 PartialStruct
.HasCompleteRecord
= true;
7857 if (!IsNonContiguous
)
7860 const ASTContext
&Context
= CGF
.getContext();
7862 // For supporting stride in array section, we need to initialize the first
7863 // dimension size as 1, first offset as 0, and first count as 1
7864 MapValuesArrayTy CurOffsets
= {llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 0)};
7865 MapValuesArrayTy CurCounts
= {llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 1)};
7866 MapValuesArrayTy CurStrides
;
7867 MapValuesArrayTy DimSizes
{llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, 1)};
7868 uint64_t ElementTypeSize
;
7870 // Collect Size information for each dimension and get the element size as
7871 // the first Stride. For example, for `int arr[10][10]`, the DimSizes
7872 // should be [10, 10] and the first stride is 4 btyes.
7873 for (const OMPClauseMappableExprCommon::MappableComponent
&Component
:
7875 const Expr
*AssocExpr
= Component
.getAssociatedExpression();
7876 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(AssocExpr
);
7881 QualType Ty
= OMPArraySectionExpr::getBaseOriginalType(OASE
->getBase());
7882 auto *CAT
= Context
.getAsConstantArrayType(Ty
);
7883 auto *VAT
= Context
.getAsVariableArrayType(Ty
);
7885 // We need all the dimension size except for the last dimension.
7886 assert((VAT
|| CAT
|| &Component
== &*Components
.begin()) &&
7887 "Should be either ConstantArray or VariableArray if not the "
7890 // Get element size if CurStrides is empty.
7891 if (CurStrides
.empty()) {
7892 const Type
*ElementType
= nullptr;
7894 ElementType
= CAT
->getElementType().getTypePtr();
7896 ElementType
= VAT
->getElementType().getTypePtr();
7898 assert(&Component
== &*Components
.begin() &&
7899 "Only expect pointer (non CAT or VAT) when this is the "
7901 // If ElementType is null, then it means the base is a pointer
7902 // (neither CAT nor VAT) and we'll attempt to get ElementType again
7903 // for next iteration.
7905 // For the case that having pointer as base, we need to remove one
7906 // level of indirection.
7907 if (&Component
!= &*Components
.begin())
7908 ElementType
= ElementType
->getPointeeOrArrayElementType();
7910 Context
.getTypeSizeInChars(ElementType
).getQuantity();
7911 CurStrides
.push_back(
7912 llvm::ConstantInt::get(CGF
.Int64Ty
, ElementTypeSize
));
7915 // Get dimension value except for the last dimension since we don't need
7917 if (DimSizes
.size() < Components
.size() - 1) {
7919 DimSizes
.push_back(llvm::ConstantInt::get(
7920 CGF
.Int64Ty
, CAT
->getSize().getZExtValue()));
7922 DimSizes
.push_back(CGF
.Builder
.CreateIntCast(
7923 CGF
.EmitScalarExpr(VAT
->getSizeExpr()), CGF
.Int64Ty
,
7924 /*IsSigned=*/false));
7928 // Skip the dummy dimension since we have already have its information.
7929 auto *DI
= DimSizes
.begin() + 1;
7930 // Product of dimension.
7931 llvm::Value
*DimProd
=
7932 llvm::ConstantInt::get(CGF
.CGM
.Int64Ty
, ElementTypeSize
);
7934 // Collect info for non-contiguous. Notice that offset, count, and stride
7935 // are only meaningful for array-section, so we insert a null for anything
7936 // other than array-section.
7937 // Also, the size of offset, count, and stride are not the same as
7938 // pointers, base_pointers, sizes, or dims. Instead, the size of offset,
7939 // count, and stride are the same as the number of non-contiguous
7940 // declaration in target update to/from clause.
7941 for (const OMPClauseMappableExprCommon::MappableComponent
&Component
:
7943 const Expr
*AssocExpr
= Component
.getAssociatedExpression();
7945 if (const auto *AE
= dyn_cast
<ArraySubscriptExpr
>(AssocExpr
)) {
7946 llvm::Value
*Offset
= CGF
.Builder
.CreateIntCast(
7947 CGF
.EmitScalarExpr(AE
->getIdx()), CGF
.Int64Ty
,
7948 /*isSigned=*/false);
7949 CurOffsets
.push_back(Offset
);
7950 CurCounts
.push_back(llvm::ConstantInt::get(CGF
.Int64Ty
, /*V=*/1));
7951 CurStrides
.push_back(CurStrides
.back());
7955 const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(AssocExpr
);
7961 const Expr
*OffsetExpr
= OASE
->getLowerBound();
7962 llvm::Value
*Offset
= nullptr;
7964 // If offset is absent, then we just set it to zero.
7965 Offset
= llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
7967 Offset
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(OffsetExpr
),
7969 /*isSigned=*/false);
7971 CurOffsets
.push_back(Offset
);
7974 const Expr
*CountExpr
= OASE
->getLength();
7975 llvm::Value
*Count
= nullptr;
7977 // In Clang, once a high dimension is an array section, we construct all
7978 // the lower dimension as array section, however, for case like
7979 // arr[0:2][2], Clang construct the inner dimension as an array section
7980 // but it actually is not in an array section form according to spec.
7981 if (!OASE
->getColonLocFirst().isValid() &&
7982 !OASE
->getColonLocSecond().isValid()) {
7983 Count
= llvm::ConstantInt::get(CGF
.Int64Ty
, 1);
7985 // OpenMP 5.0, 2.1.5 Array Sections, Description.
7986 // When the length is absent it defaults to ⌈(size −
7987 // lower-bound)/stride⌉, where size is the size of the array
7989 const Expr
*StrideExpr
= OASE
->getStride();
7990 llvm::Value
*Stride
=
7992 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(StrideExpr
),
7993 CGF
.Int64Ty
, /*isSigned=*/false)
7996 Count
= CGF
.Builder
.CreateUDiv(
7997 CGF
.Builder
.CreateNUWSub(*DI
, Offset
), Stride
);
7999 Count
= CGF
.Builder
.CreateNUWSub(*DI
, Offset
);
8002 Count
= CGF
.EmitScalarExpr(CountExpr
);
8004 Count
= CGF
.Builder
.CreateIntCast(Count
, CGF
.Int64Ty
, /*isSigned=*/false);
8005 CurCounts
.push_back(Count
);
8007 // Stride_n' = Stride_n * (D_0 * D_1 ... * D_n-1) * Unit size
8008 // Take `int arr[5][5][5]` and `arr[0:2:2][1:2:1][0:2:2]` as an example:
8009 // Offset Count Stride
8010 // D0 0 1 4 (int) <- dummy dimension
8011 // D1 0 2 8 (2 * (1) * 4)
8012 // D2 1 2 20 (1 * (1 * 5) * 4)
8013 // D3 0 2 200 (2 * (1 * 5 * 4) * 4)
8014 const Expr
*StrideExpr
= OASE
->getStride();
8015 llvm::Value
*Stride
=
8017 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(StrideExpr
),
8018 CGF
.Int64Ty
, /*isSigned=*/false)
8020 DimProd
= CGF
.Builder
.CreateNUWMul(DimProd
, *(DI
- 1));
8022 CurStrides
.push_back(CGF
.Builder
.CreateNUWMul(DimProd
, Stride
));
8024 CurStrides
.push_back(DimProd
);
8025 if (DI
!= DimSizes
.end())
8029 CombinedInfo
.NonContigInfo
.Offsets
.push_back(CurOffsets
);
8030 CombinedInfo
.NonContigInfo
.Counts
.push_back(CurCounts
);
8031 CombinedInfo
.NonContigInfo
.Strides
.push_back(CurStrides
);
8034 /// Return the adjusted map modifiers if the declaration a capture refers to
8035 /// appears in a first-private clause. This is expected to be used only with
8036 /// directives that start with 'target'.
8037 MappableExprsHandler::OpenMPOffloadMappingFlags
8038 getMapModifiersForPrivateClauses(const CapturedStmt::Capture
&Cap
) const {
8039 assert(Cap
.capturesVariable() && "Expected capture by reference only!");
8041 // A first private variable captured by reference will use only the
8042 // 'private ptr' and 'map to' flag. Return the right flags if the captured
8043 // declaration is known as first-private in this handler.
8044 if (FirstPrivateDecls
.count(Cap
.getCapturedVar())) {
8045 if (Cap
.getCapturedVar()->getType()->isAnyPointerType())
8046 return MappableExprsHandler::OMP_MAP_TO
|
8047 MappableExprsHandler::OMP_MAP_PTR_AND_OBJ
;
8048 return MappableExprsHandler::OMP_MAP_PRIVATE
|
8049 MappableExprsHandler::OMP_MAP_TO
;
8051 auto I
= LambdasMap
.find(Cap
.getCapturedVar()->getCanonicalDecl());
8052 if (I
!= LambdasMap
.end())
8053 // for map(to: lambda): using user specified map type.
8054 return getMapTypeBits(
8055 I
->getSecond()->getMapType(), I
->getSecond()->getMapTypeModifiers(),
8056 /*MotionModifiers=*/llvm::None
, I
->getSecond()->isImplicit(),
8057 /*AddPtrFlag=*/false,
8058 /*AddIsTargetParamFlag=*/false,
8059 /*isNonContiguous=*/false);
8060 return MappableExprsHandler::OMP_MAP_TO
|
8061 MappableExprsHandler::OMP_MAP_FROM
;
8064 static OpenMPOffloadMappingFlags
getMemberOfFlag(unsigned Position
) {
8065 // Rotate by getFlagMemberOffset() bits.
8066 return static_cast<OpenMPOffloadMappingFlags
>(((uint64_t)Position
+ 1)
8067 << getFlagMemberOffset());
8070 static void setCorrectMemberOfFlag(OpenMPOffloadMappingFlags
&Flags
,
8071 OpenMPOffloadMappingFlags MemberOfFlag
) {
8072 // If the entry is PTR_AND_OBJ but has not been marked with the special
8073 // placeholder value 0xFFFF in the MEMBER_OF field, then it should not be
8074 // marked as MEMBER_OF.
8075 if ((Flags
& OMP_MAP_PTR_AND_OBJ
) &&
8076 ((Flags
& OMP_MAP_MEMBER_OF
) != OMP_MAP_MEMBER_OF
))
8079 // Reset the placeholder value to prepare the flag for the assignment of the
8080 // proper MEMBER_OF value.
8081 Flags
&= ~OMP_MAP_MEMBER_OF
;
8082 Flags
|= MemberOfFlag
;
8085 void getPlainLayout(const CXXRecordDecl
*RD
,
8086 llvm::SmallVectorImpl
<const FieldDecl
*> &Layout
,
8087 bool AsBase
) const {
8088 const CGRecordLayout
&RL
= CGF
.getTypes().getCGRecordLayout(RD
);
8090 llvm::StructType
*St
=
8091 AsBase
? RL
.getBaseSubobjectLLVMType() : RL
.getLLVMType();
8093 unsigned NumElements
= St
->getNumElements();
8095 llvm::PointerUnion
<const CXXRecordDecl
*, const FieldDecl
*>, 4>
8096 RecordLayout(NumElements
);
8099 for (const auto &I
: RD
->bases()) {
8102 const auto *Base
= I
.getType()->getAsCXXRecordDecl();
8103 // Ignore empty bases.
8104 if (Base
->isEmpty() || CGF
.getContext()
8105 .getASTRecordLayout(Base
)
8106 .getNonVirtualSize()
8110 unsigned FieldIndex
= RL
.getNonVirtualBaseLLVMFieldNo(Base
);
8111 RecordLayout
[FieldIndex
] = Base
;
8113 // Fill in virtual bases.
8114 for (const auto &I
: RD
->vbases()) {
8115 const auto *Base
= I
.getType()->getAsCXXRecordDecl();
8116 // Ignore empty bases.
8117 if (Base
->isEmpty())
8119 unsigned FieldIndex
= RL
.getVirtualBaseIndex(Base
);
8120 if (RecordLayout
[FieldIndex
])
8122 RecordLayout
[FieldIndex
] = Base
;
8124 // Fill in all the fields.
8125 assert(!RD
->isUnion() && "Unexpected union.");
8126 for (const auto *Field
: RD
->fields()) {
8127 // Fill in non-bitfields. (Bitfields always use a zero pattern, which we
8128 // will fill in later.)
8129 if (!Field
->isBitField() && !Field
->isZeroSize(CGF
.getContext())) {
8130 unsigned FieldIndex
= RL
.getLLVMFieldNo(Field
);
8131 RecordLayout
[FieldIndex
] = Field
;
8134 for (const llvm::PointerUnion
<const CXXRecordDecl
*, const FieldDecl
*>
8135 &Data
: RecordLayout
) {
8138 if (const auto *Base
= Data
.dyn_cast
<const CXXRecordDecl
*>())
8139 getPlainLayout(Base
, Layout
, /*AsBase=*/true);
8141 Layout
.push_back(Data
.get
<const FieldDecl
*>());
8145 /// Generate all the base pointers, section pointers, sizes, map types, and
8146 /// mappers for the extracted mappable expressions (all included in \a
8147 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8148 /// pair of the relevant declaration and index where it occurs is appended to
8149 /// the device pointers info array.
8150 void generateAllInfoForClauses(
8151 ArrayRef
<const OMPClause
*> Clauses
, MapCombinedInfoTy
&CombinedInfo
,
8152 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkipVarSet
=
8153 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) const {
8154 // We have to process the component lists that relate with the same
8155 // declaration in a single chunk so that we can generate the map flags
8156 // correctly. Therefore, we organize all lists in a map.
8157 enum MapKind
{ Present
, Allocs
, Other
, Total
};
8158 llvm::MapVector
<CanonicalDeclPtr
<const Decl
>,
8159 SmallVector
<SmallVector
<MapInfo
, 8>, 4>>
8162 // Helper function to fill the information map for the different supported
8165 [&Info
, &SkipVarSet
](
8166 const ValueDecl
*D
, MapKind Kind
,
8167 OMPClauseMappableExprCommon::MappableExprComponentListRef L
,
8168 OpenMPMapClauseKind MapType
,
8169 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
,
8170 ArrayRef
<OpenMPMotionModifierKind
> MotionModifiers
,
8171 bool ReturnDevicePointer
, bool IsImplicit
, const ValueDecl
*Mapper
,
8172 const Expr
*VarRef
= nullptr, bool ForDeviceAddr
= false) {
8173 if (SkipVarSet
.contains(D
))
8175 auto It
= Info
.find(D
);
8176 if (It
== Info
.end())
8178 .insert(std::make_pair(
8179 D
, SmallVector
<SmallVector
<MapInfo
, 8>, 4>(Total
)))
8181 It
->second
[Kind
].emplace_back(
8182 L
, MapType
, MapModifiers
, MotionModifiers
, ReturnDevicePointer
,
8183 IsImplicit
, Mapper
, VarRef
, ForDeviceAddr
);
8186 for (const auto *Cl
: Clauses
) {
8187 const auto *C
= dyn_cast
<OMPMapClause
>(Cl
);
8190 MapKind Kind
= Other
;
8191 if (llvm::is_contained(C
->getMapTypeModifiers(),
8192 OMPC_MAP_MODIFIER_present
))
8194 else if (C
->getMapType() == OMPC_MAP_alloc
)
8196 const auto *EI
= C
->getVarRefs().begin();
8197 for (const auto L
: C
->component_lists()) {
8198 const Expr
*E
= (C
->getMapLoc().isValid()) ? *EI
: nullptr;
8199 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), C
->getMapType(),
8200 C
->getMapTypeModifiers(), llvm::None
,
8201 /*ReturnDevicePointer=*/false, C
->isImplicit(), std::get
<2>(L
),
8206 for (const auto *Cl
: Clauses
) {
8207 const auto *C
= dyn_cast
<OMPToClause
>(Cl
);
8210 MapKind Kind
= Other
;
8211 if (llvm::is_contained(C
->getMotionModifiers(),
8212 OMPC_MOTION_MODIFIER_present
))
8214 const auto *EI
= C
->getVarRefs().begin();
8215 for (const auto L
: C
->component_lists()) {
8216 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), OMPC_MAP_to
, llvm::None
,
8217 C
->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8218 C
->isImplicit(), std::get
<2>(L
), *EI
);
8222 for (const auto *Cl
: Clauses
) {
8223 const auto *C
= dyn_cast
<OMPFromClause
>(Cl
);
8226 MapKind Kind
= Other
;
8227 if (llvm::is_contained(C
->getMotionModifiers(),
8228 OMPC_MOTION_MODIFIER_present
))
8230 const auto *EI
= C
->getVarRefs().begin();
8231 for (const auto L
: C
->component_lists()) {
8232 InfoGen(std::get
<0>(L
), Kind
, std::get
<1>(L
), OMPC_MAP_from
, llvm::None
,
8233 C
->getMotionModifiers(), /*ReturnDevicePointer=*/false,
8234 C
->isImplicit(), std::get
<2>(L
), *EI
);
8239 // Look at the use_device_ptr and use_device_addr clauses information and
8240 // mark the existing map entries as such. If there is no map information for
8241 // an entry in the use_device_ptr and use_device_addr list, we create one
8242 // with map type 'alloc' and zero size section. It is the user fault if that
8243 // was not mapped before. If there is no map information and the pointer is
8244 // a struct member, then we defer the emission of that entry until the whole
8245 // struct has been processed.
8246 llvm::MapVector
<CanonicalDeclPtr
<const Decl
>,
8247 SmallVector
<DeferredDevicePtrEntryTy
, 4>>
8249 MapCombinedInfoTy UseDeviceDataCombinedInfo
;
8251 auto &&UseDeviceDataCombinedInfoGen
=
8252 [&UseDeviceDataCombinedInfo
](const ValueDecl
*VD
, llvm::Value
*Ptr
,
8253 CodeGenFunction
&CGF
) {
8254 UseDeviceDataCombinedInfo
.Exprs
.push_back(VD
);
8255 UseDeviceDataCombinedInfo
.BasePointers
.emplace_back(Ptr
, VD
);
8256 UseDeviceDataCombinedInfo
.Pointers
.push_back(Ptr
);
8257 UseDeviceDataCombinedInfo
.Sizes
.push_back(
8258 llvm::Constant::getNullValue(CGF
.Int64Ty
));
8259 UseDeviceDataCombinedInfo
.Types
.push_back(OMP_MAP_RETURN_PARAM
);
8260 UseDeviceDataCombinedInfo
.Mappers
.push_back(nullptr);
8264 [&DeferredInfo
, &UseDeviceDataCombinedInfoGen
,
8265 &InfoGen
](CodeGenFunction
&CGF
, const Expr
*IE
, const ValueDecl
*VD
,
8266 OMPClauseMappableExprCommon::MappableExprComponentListRef
8268 bool IsImplicit
, bool IsDevAddr
) {
8269 // We didn't find any match in our map information - generate a zero
8270 // size array section - if the pointer is a struct member we defer
8271 // this action until the whole struct has been processed.
8272 if (isa
<MemberExpr
>(IE
)) {
8273 // Insert the pointer into Info to be processed by
8274 // generateInfoForComponentList. Because it is a member pointer
8275 // without a pointee, no entry will be generated for it, therefore
8276 // we need to generate one after the whole struct has been
8277 // processed. Nonetheless, generateInfoForComponentList must be
8278 // called to take the pointer into account for the calculation of
8279 // the range of the partial struct.
8280 InfoGen(nullptr, Other
, Components
, OMPC_MAP_unknown
, llvm::None
,
8281 llvm::None
, /*ReturnDevicePointer=*/false, IsImplicit
,
8282 nullptr, nullptr, IsDevAddr
);
8283 DeferredInfo
[nullptr].emplace_back(IE
, VD
, IsDevAddr
);
8287 if (IE
->isGLValue())
8288 Ptr
= CGF
.EmitLValue(IE
).getPointer(CGF
);
8290 Ptr
= CGF
.EmitScalarExpr(IE
);
8292 Ptr
= CGF
.EmitLoadOfScalar(CGF
.EmitLValue(IE
), IE
->getExprLoc());
8294 UseDeviceDataCombinedInfoGen(VD
, Ptr
, CGF
);
8298 auto &&IsMapInfoExist
= [&Info
](CodeGenFunction
&CGF
, const ValueDecl
*VD
,
8299 const Expr
*IE
, bool IsDevAddr
) -> bool {
8300 // We potentially have map information for this declaration already.
8301 // Look for the first set of components that refer to it. If found,
8303 // If the first component is a member expression, we have to look into
8304 // 'this', which maps to null in the map of map information. Otherwise
8305 // look directly for the information.
8306 auto It
= Info
.find(isa
<MemberExpr
>(IE
) ? nullptr : VD
);
8307 if (It
!= Info
.end()) {
8309 for (auto &Data
: It
->second
) {
8310 auto *CI
= llvm::find_if(Data
, [VD
](const MapInfo
&MI
) {
8311 return MI
.Components
.back().getAssociatedDeclaration() == VD
;
8313 // If we found a map entry, signal that the pointer has to be
8314 // returned and move on to the next declaration. Exclude cases where
8315 // the base pointer is mapped as array subscript, array section or
8316 // array shaping. The base address is passed as a pointer to base in
8317 // this case and cannot be used as a base for use_device_ptr list
8319 if (CI
!= Data
.end()) {
8321 CI
->ReturnDevicePointer
= true;
8325 auto PrevCI
= std::next(CI
->Components
.rbegin());
8326 const auto *VarD
= dyn_cast
<VarDecl
>(VD
);
8327 if (CGF
.CGM
.getOpenMPRuntime().hasRequiresUnifiedSharedMemory() ||
8328 isa
<MemberExpr
>(IE
) ||
8329 !VD
->getType().getNonReferenceType()->isPointerType() ||
8330 PrevCI
== CI
->Components
.rend() ||
8331 isa
<MemberExpr
>(PrevCI
->getAssociatedExpression()) || !VarD
||
8332 VarD
->hasLocalStorage()) {
8333 CI
->ReturnDevicePointer
= true;
8345 // Look at the use_device_ptr clause information and mark the existing map
8346 // entries as such. If there is no map information for an entry in the
8347 // use_device_ptr list, we create one with map type 'alloc' and zero size
8348 // section. It is the user fault if that was not mapped before. If there is
8349 // no map information and the pointer is a struct member, then we defer the
8350 // emission of that entry until the whole struct has been processed.
8351 for (const auto *Cl
: Clauses
) {
8352 const auto *C
= dyn_cast
<OMPUseDevicePtrClause
>(Cl
);
8355 for (const auto L
: C
->component_lists()) {
8356 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
=
8358 assert(!Components
.empty() &&
8359 "Not expecting empty list of components!");
8360 const ValueDecl
*VD
= Components
.back().getAssociatedDeclaration();
8361 VD
= cast
<ValueDecl
>(VD
->getCanonicalDecl());
8362 const Expr
*IE
= Components
.back().getAssociatedExpression();
8363 if (IsMapInfoExist(CGF
, VD
, IE
, /*IsDevAddr=*/false))
8365 MapInfoGen(CGF
, IE
, VD
, Components
, C
->isImplicit(),
8366 /*IsDevAddr=*/false);
8370 llvm::SmallDenseSet
<CanonicalDeclPtr
<const Decl
>, 4> Processed
;
8371 for (const auto *Cl
: Clauses
) {
8372 const auto *C
= dyn_cast
<OMPUseDeviceAddrClause
>(Cl
);
8375 for (const auto L
: C
->component_lists()) {
8376 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
=
8378 assert(!std::get
<1>(L
).empty() &&
8379 "Not expecting empty list of components!");
8380 const ValueDecl
*VD
= std::get
<1>(L
).back().getAssociatedDeclaration();
8381 if (!Processed
.insert(VD
).second
)
8383 VD
= cast
<ValueDecl
>(VD
->getCanonicalDecl());
8384 const Expr
*IE
= std::get
<1>(L
).back().getAssociatedExpression();
8385 if (IsMapInfoExist(CGF
, VD
, IE
, /*IsDevAddr=*/true))
8387 MapInfoGen(CGF
, IE
, VD
, Components
, C
->isImplicit(),
8388 /*IsDevAddr=*/true);
8392 for (const auto &Data
: Info
) {
8393 StructRangeInfoTy PartialStruct
;
8394 // Temporary generated information.
8395 MapCombinedInfoTy CurInfo
;
8396 const Decl
*D
= Data
.first
;
8397 const ValueDecl
*VD
= cast_or_null
<ValueDecl
>(D
);
8398 for (const auto &M
: Data
.second
) {
8399 for (const MapInfo
&L
: M
) {
8400 assert(!L
.Components
.empty() &&
8401 "Not expecting declaration with no component lists.");
8403 // Remember the current base pointer index.
8404 unsigned CurrentBasePointersIdx
= CurInfo
.BasePointers
.size();
8405 CurInfo
.NonContigInfo
.IsNonContiguous
=
8406 L
.Components
.back().isNonContiguous();
8407 generateInfoForComponentList(
8408 L
.MapType
, L
.MapModifiers
, L
.MotionModifiers
, L
.Components
,
8409 CurInfo
, PartialStruct
, /*IsFirstComponentList=*/false,
8410 L
.IsImplicit
, L
.Mapper
, L
.ForDeviceAddr
, VD
, L
.VarRef
);
8412 // If this entry relates with a device pointer, set the relevant
8413 // declaration and add the 'return pointer' flag.
8414 if (L
.ReturnDevicePointer
) {
8415 assert(CurInfo
.BasePointers
.size() > CurrentBasePointersIdx
&&
8416 "Unexpected number of mapped base pointers.");
8418 const ValueDecl
*RelevantVD
=
8419 L
.Components
.back().getAssociatedDeclaration();
8420 assert(RelevantVD
&&
8421 "No relevant declaration related with device pointer??");
8423 CurInfo
.BasePointers
[CurrentBasePointersIdx
].setDevicePtrDecl(
8425 CurInfo
.Types
[CurrentBasePointersIdx
] |= OMP_MAP_RETURN_PARAM
;
8430 // Append any pending zero-length pointers which are struct members and
8431 // used with use_device_ptr or use_device_addr.
8432 auto CI
= DeferredInfo
.find(Data
.first
);
8433 if (CI
!= DeferredInfo
.end()) {
8434 for (const DeferredDevicePtrEntryTy
&L
: CI
->second
) {
8435 llvm::Value
*BasePtr
;
8437 if (L
.ForDeviceAddr
) {
8438 if (L
.IE
->isGLValue())
8439 Ptr
= this->CGF
.EmitLValue(L
.IE
).getPointer(CGF
);
8441 Ptr
= this->CGF
.EmitScalarExpr(L
.IE
);
8443 // Entry is RETURN_PARAM. Also, set the placeholder value
8444 // MEMBER_OF=FFFF so that the entry is later updated with the
8445 // correct value of MEMBER_OF.
8446 CurInfo
.Types
.push_back(OMP_MAP_RETURN_PARAM
| OMP_MAP_MEMBER_OF
);
8448 BasePtr
= this->CGF
.EmitLValue(L
.IE
).getPointer(CGF
);
8449 Ptr
= this->CGF
.EmitLoadOfScalar(this->CGF
.EmitLValue(L
.IE
),
8450 L
.IE
->getExprLoc());
8451 // Entry is PTR_AND_OBJ and RETURN_PARAM. Also, set the
8452 // placeholder value MEMBER_OF=FFFF so that the entry is later
8453 // updated with the correct value of MEMBER_OF.
8454 CurInfo
.Types
.push_back(OMP_MAP_PTR_AND_OBJ
| OMP_MAP_RETURN_PARAM
|
8457 CurInfo
.Exprs
.push_back(L
.VD
);
8458 CurInfo
.BasePointers
.emplace_back(BasePtr
, L
.VD
);
8459 CurInfo
.Pointers
.push_back(Ptr
);
8460 CurInfo
.Sizes
.push_back(
8461 llvm::Constant::getNullValue(this->CGF
.Int64Ty
));
8462 CurInfo
.Mappers
.push_back(nullptr);
8465 // If there is an entry in PartialStruct it means we have a struct with
8466 // individual members mapped. Emit an extra combined entry.
8467 if (PartialStruct
.Base
.isValid()) {
8468 CurInfo
.NonContigInfo
.Dims
.push_back(0);
8469 emitCombinedEntry(CombinedInfo
, CurInfo
.Types
, PartialStruct
, VD
);
8472 // We need to append the results of this capture to what we already
8474 CombinedInfo
.append(CurInfo
);
8476 // Append data for use_device_ptr clauses.
8477 CombinedInfo
.append(UseDeviceDataCombinedInfo
);
8481 MappableExprsHandler(const OMPExecutableDirective
&Dir
, CodeGenFunction
&CGF
)
8482 : CurDir(&Dir
), CGF(CGF
) {
8483 // Extract firstprivate clause information.
8484 for (const auto *C
: Dir
.getClausesOfKind
<OMPFirstprivateClause
>())
8485 for (const auto *D
: C
->varlists())
8486 FirstPrivateDecls
.try_emplace(
8487 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
)->getDecl()), C
->isImplicit());
8488 // Extract implicit firstprivates from uses_allocators clauses.
8489 for (const auto *C
: Dir
.getClausesOfKind
<OMPUsesAllocatorsClause
>()) {
8490 for (unsigned I
= 0, E
= C
->getNumberOfAllocators(); I
< E
; ++I
) {
8491 OMPUsesAllocatorsClause::Data D
= C
->getAllocatorData(I
);
8492 if (const auto *DRE
= dyn_cast_or_null
<DeclRefExpr
>(D
.AllocatorTraits
))
8493 FirstPrivateDecls
.try_emplace(cast
<VarDecl
>(DRE
->getDecl()),
8495 else if (const auto *VD
= dyn_cast
<VarDecl
>(
8496 cast
<DeclRefExpr
>(D
.Allocator
->IgnoreParenImpCasts())
8498 FirstPrivateDecls
.try_emplace(VD
, /*Implicit=*/true);
8501 // Extract device pointer clause information.
8502 for (const auto *C
: Dir
.getClausesOfKind
<OMPIsDevicePtrClause
>())
8503 for (auto L
: C
->component_lists())
8504 DevPointersMap
[std::get
<0>(L
)].push_back(std::get
<1>(L
));
8505 // Extract device addr clause information.
8506 for (const auto *C
: Dir
.getClausesOfKind
<OMPHasDeviceAddrClause
>())
8507 for (auto L
: C
->component_lists())
8508 HasDevAddrsMap
[std::get
<0>(L
)].push_back(std::get
<1>(L
));
8509 // Extract map information.
8510 for (const auto *C
: Dir
.getClausesOfKind
<OMPMapClause
>()) {
8511 if (C
->getMapType() != OMPC_MAP_to
)
8513 for (auto L
: C
->component_lists()) {
8514 const ValueDecl
*VD
= std::get
<0>(L
);
8515 const auto *RD
= VD
? VD
->getType()
8517 .getNonReferenceType()
8518 ->getAsCXXRecordDecl()
8520 if (RD
&& RD
->isLambda())
8521 LambdasMap
.try_emplace(std::get
<0>(L
), C
);
8526 /// Constructor for the declare mapper directive.
8527 MappableExprsHandler(const OMPDeclareMapperDecl
&Dir
, CodeGenFunction
&CGF
)
8528 : CurDir(&Dir
), CGF(CGF
) {}
8530 /// Generate code for the combined entry if we have a partially mapped struct
8531 /// and take care of the mapping flags of the arguments corresponding to
8532 /// individual struct members.
8533 void emitCombinedEntry(MapCombinedInfoTy
&CombinedInfo
,
8534 MapFlagsArrayTy
&CurTypes
,
8535 const StructRangeInfoTy
&PartialStruct
,
8536 const ValueDecl
*VD
= nullptr,
8537 bool NotTargetParams
= true) const {
8538 if (CurTypes
.size() == 1 &&
8539 ((CurTypes
.back() & OMP_MAP_MEMBER_OF
) != OMP_MAP_MEMBER_OF
) &&
8540 !PartialStruct
.IsArraySection
)
8542 Address LBAddr
= PartialStruct
.LowestElem
.second
;
8543 Address HBAddr
= PartialStruct
.HighestElem
.second
;
8544 if (PartialStruct
.HasCompleteRecord
) {
8545 LBAddr
= PartialStruct
.LB
;
8546 HBAddr
= PartialStruct
.LB
;
8548 CombinedInfo
.Exprs
.push_back(VD
);
8549 // Base is the base of the struct
8550 CombinedInfo
.BasePointers
.push_back(PartialStruct
.Base
.getPointer());
8551 // Pointer is the address of the lowest element
8552 llvm::Value
*LB
= LBAddr
.getPointer();
8553 CombinedInfo
.Pointers
.push_back(LB
);
8554 // There should not be a mapper for a combined entry.
8555 CombinedInfo
.Mappers
.push_back(nullptr);
8556 // Size is (addr of {highest+1} element) - (addr of lowest element)
8557 llvm::Value
*HB
= HBAddr
.getPointer();
8558 llvm::Value
*HAddr
=
8559 CGF
.Builder
.CreateConstGEP1_32(HBAddr
.getElementType(), HB
, /*Idx0=*/1);
8560 llvm::Value
*CLAddr
= CGF
.Builder
.CreatePointerCast(LB
, CGF
.VoidPtrTy
);
8561 llvm::Value
*CHAddr
= CGF
.Builder
.CreatePointerCast(HAddr
, CGF
.VoidPtrTy
);
8562 llvm::Value
*Diff
= CGF
.Builder
.CreatePtrDiff(CGF
.Int8Ty
, CHAddr
, CLAddr
);
8563 llvm::Value
*Size
= CGF
.Builder
.CreateIntCast(Diff
, CGF
.Int64Ty
,
8564 /*isSigned=*/false);
8565 CombinedInfo
.Sizes
.push_back(Size
);
8566 // Map type is always TARGET_PARAM, if generate info for captures.
8567 CombinedInfo
.Types
.push_back(NotTargetParams
? OMP_MAP_NONE
8568 : OMP_MAP_TARGET_PARAM
);
8569 // If any element has the present modifier, then make sure the runtime
8570 // doesn't attempt to allocate the struct.
8571 if (CurTypes
.end() !=
8572 llvm::find_if(CurTypes
, [](OpenMPOffloadMappingFlags Type
) {
8573 return Type
& OMP_MAP_PRESENT
;
8575 CombinedInfo
.Types
.back() |= OMP_MAP_PRESENT
;
8576 // Remove TARGET_PARAM flag from the first element
8577 (*CurTypes
.begin()) &= ~OMP_MAP_TARGET_PARAM
;
8578 // If any element has the ompx_hold modifier, then make sure the runtime
8579 // uses the hold reference count for the struct as a whole so that it won't
8580 // be unmapped by an extra dynamic reference count decrement. Add it to all
8581 // elements as well so the runtime knows which reference count to check
8582 // when determining whether it's time for device-to-host transfers of
8583 // individual elements.
8584 if (CurTypes
.end() !=
8585 llvm::find_if(CurTypes
, [](OpenMPOffloadMappingFlags Type
) {
8586 return Type
& OMP_MAP_OMPX_HOLD
;
8588 CombinedInfo
.Types
.back() |= OMP_MAP_OMPX_HOLD
;
8589 for (auto &M
: CurTypes
)
8590 M
|= OMP_MAP_OMPX_HOLD
;
8593 // All other current entries will be MEMBER_OF the combined entry
8594 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8595 // 0xFFFF in the MEMBER_OF field).
8596 OpenMPOffloadMappingFlags MemberOfFlag
=
8597 getMemberOfFlag(CombinedInfo
.BasePointers
.size() - 1);
8598 for (auto &M
: CurTypes
)
8599 setCorrectMemberOfFlag(M
, MemberOfFlag
);
8602 /// Generate all the base pointers, section pointers, sizes, map types, and
8603 /// mappers for the extracted mappable expressions (all included in \a
8604 /// CombinedInfo). Also, for each item that relates with a device pointer, a
8605 /// pair of the relevant declaration and index where it occurs is appended to
8606 /// the device pointers info array.
8607 void generateAllInfo(
8608 MapCombinedInfoTy
&CombinedInfo
,
8609 const llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &SkipVarSet
=
8610 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>>()) const {
8611 assert(CurDir
.is
<const OMPExecutableDirective
*>() &&
8612 "Expect a executable directive");
8613 const auto *CurExecDir
= CurDir
.get
<const OMPExecutableDirective
*>();
8614 generateAllInfoForClauses(CurExecDir
->clauses(), CombinedInfo
, SkipVarSet
);
8617 /// Generate all the base pointers, section pointers, sizes, map types, and
8618 /// mappers for the extracted map clauses of user-defined mapper (all included
8619 /// in \a CombinedInfo).
8620 void generateAllInfoForMapper(MapCombinedInfoTy
&CombinedInfo
) const {
8621 assert(CurDir
.is
<const OMPDeclareMapperDecl
*>() &&
8622 "Expect a declare mapper directive");
8623 const auto *CurMapperDir
= CurDir
.get
<const OMPDeclareMapperDecl
*>();
8624 generateAllInfoForClauses(CurMapperDir
->clauses(), CombinedInfo
);
8627 /// Emit capture info for lambdas for variables captured by reference.
8628 void generateInfoForLambdaCaptures(
8629 const ValueDecl
*VD
, llvm::Value
*Arg
, MapCombinedInfoTy
&CombinedInfo
,
8630 llvm::DenseMap
<llvm::Value
*, llvm::Value
*> &LambdaPointers
) const {
8631 QualType VDType
= VD
->getType().getCanonicalType().getNonReferenceType();
8632 const auto *RD
= VDType
->getAsCXXRecordDecl();
8633 if (!RD
|| !RD
->isLambda())
8635 Address
VDAddr(Arg
, CGF
.ConvertTypeForMem(VDType
),
8636 CGF
.getContext().getDeclAlign(VD
));
8637 LValue VDLVal
= CGF
.MakeAddrLValue(VDAddr
, VDType
);
8638 llvm::DenseMap
<const ValueDecl
*, FieldDecl
*> Captures
;
8639 FieldDecl
*ThisCapture
= nullptr;
8640 RD
->getCaptureFields(Captures
, ThisCapture
);
8643 CGF
.EmitLValueForFieldInitialization(VDLVal
, ThisCapture
);
8644 LValue ThisLValVal
= CGF
.EmitLValueForField(VDLVal
, ThisCapture
);
8645 LambdaPointers
.try_emplace(ThisLVal
.getPointer(CGF
),
8646 VDLVal
.getPointer(CGF
));
8647 CombinedInfo
.Exprs
.push_back(VD
);
8648 CombinedInfo
.BasePointers
.push_back(ThisLVal
.getPointer(CGF
));
8649 CombinedInfo
.Pointers
.push_back(ThisLValVal
.getPointer(CGF
));
8650 CombinedInfo
.Sizes
.push_back(
8651 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(CGF
.getContext().VoidPtrTy
),
8652 CGF
.Int64Ty
, /*isSigned=*/true));
8653 CombinedInfo
.Types
.push_back(OMP_MAP_PTR_AND_OBJ
| OMP_MAP_LITERAL
|
8654 OMP_MAP_MEMBER_OF
| OMP_MAP_IMPLICIT
);
8655 CombinedInfo
.Mappers
.push_back(nullptr);
8657 for (const LambdaCapture
&LC
: RD
->captures()) {
8658 if (!LC
.capturesVariable())
8660 const VarDecl
*VD
= cast
<VarDecl
>(LC
.getCapturedVar());
8661 if (LC
.getCaptureKind() != LCK_ByRef
&& !VD
->getType()->isPointerType())
8663 auto It
= Captures
.find(VD
);
8664 assert(It
!= Captures
.end() && "Found lambda capture without field.");
8665 LValue VarLVal
= CGF
.EmitLValueForFieldInitialization(VDLVal
, It
->second
);
8666 if (LC
.getCaptureKind() == LCK_ByRef
) {
8667 LValue VarLValVal
= CGF
.EmitLValueForField(VDLVal
, It
->second
);
8668 LambdaPointers
.try_emplace(VarLVal
.getPointer(CGF
),
8669 VDLVal
.getPointer(CGF
));
8670 CombinedInfo
.Exprs
.push_back(VD
);
8671 CombinedInfo
.BasePointers
.push_back(VarLVal
.getPointer(CGF
));
8672 CombinedInfo
.Pointers
.push_back(VarLValVal
.getPointer(CGF
));
8673 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8675 VD
->getType().getCanonicalType().getNonReferenceType()),
8676 CGF
.Int64Ty
, /*isSigned=*/true));
8678 RValue VarRVal
= CGF
.EmitLoadOfLValue(VarLVal
, RD
->getLocation());
8679 LambdaPointers
.try_emplace(VarLVal
.getPointer(CGF
),
8680 VDLVal
.getPointer(CGF
));
8681 CombinedInfo
.Exprs
.push_back(VD
);
8682 CombinedInfo
.BasePointers
.push_back(VarLVal
.getPointer(CGF
));
8683 CombinedInfo
.Pointers
.push_back(VarRVal
.getScalarVal());
8684 CombinedInfo
.Sizes
.push_back(llvm::ConstantInt::get(CGF
.Int64Ty
, 0));
8686 CombinedInfo
.Types
.push_back(OMP_MAP_PTR_AND_OBJ
| OMP_MAP_LITERAL
|
8687 OMP_MAP_MEMBER_OF
| OMP_MAP_IMPLICIT
);
8688 CombinedInfo
.Mappers
.push_back(nullptr);
8692 /// Set correct indices for lambdas captures.
8693 void adjustMemberOfForLambdaCaptures(
8694 const llvm::DenseMap
<llvm::Value
*, llvm::Value
*> &LambdaPointers
,
8695 MapBaseValuesArrayTy
&BasePointers
, MapValuesArrayTy
&Pointers
,
8696 MapFlagsArrayTy
&Types
) const {
8697 for (unsigned I
= 0, E
= Types
.size(); I
< E
; ++I
) {
8698 // Set correct member_of idx for all implicit lambda captures.
8699 if (Types
[I
] != (OMP_MAP_PTR_AND_OBJ
| OMP_MAP_LITERAL
|
8700 OMP_MAP_MEMBER_OF
| OMP_MAP_IMPLICIT
))
8702 llvm::Value
*BasePtr
= LambdaPointers
.lookup(*BasePointers
[I
]);
8703 assert(BasePtr
&& "Unable to find base lambda address.");
8705 for (unsigned J
= I
; J
> 0; --J
) {
8706 unsigned Idx
= J
- 1;
8707 if (Pointers
[Idx
] != BasePtr
)
8712 assert(TgtIdx
!= -1 && "Unable to find parent lambda.");
8713 // All other current entries will be MEMBER_OF the combined entry
8714 // (except for PTR_AND_OBJ entries which do not have a placeholder value
8715 // 0xFFFF in the MEMBER_OF field).
8716 OpenMPOffloadMappingFlags MemberOfFlag
= getMemberOfFlag(TgtIdx
);
8717 setCorrectMemberOfFlag(Types
[I
], MemberOfFlag
);
8721 /// Generate the base pointers, section pointers, sizes, map types, and
8722 /// mappers associated to a given capture (all included in \a CombinedInfo).
8723 void generateInfoForCapture(const CapturedStmt::Capture
*Cap
,
8724 llvm::Value
*Arg
, MapCombinedInfoTy
&CombinedInfo
,
8725 StructRangeInfoTy
&PartialStruct
) const {
8726 assert(!Cap
->capturesVariableArrayType() &&
8727 "Not expecting to generate map info for a variable array type!");
8729 // We need to know when we generating information for the first component
8730 const ValueDecl
*VD
= Cap
->capturesThis()
8732 : Cap
->getCapturedVar()->getCanonicalDecl();
8734 // for map(to: lambda): skip here, processing it in
8735 // generateDefaultMapInfo
8736 if (LambdasMap
.count(VD
))
8739 // If this declaration appears in a is_device_ptr clause we just have to
8740 // pass the pointer by value. If it is a reference to a declaration, we just
8742 if (VD
&& (DevPointersMap
.count(VD
) || HasDevAddrsMap
.count(VD
))) {
8743 CombinedInfo
.Exprs
.push_back(VD
);
8744 CombinedInfo
.BasePointers
.emplace_back(Arg
, VD
);
8745 CombinedInfo
.Pointers
.push_back(Arg
);
8746 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
8747 CGF
.getTypeSize(CGF
.getContext().VoidPtrTy
), CGF
.Int64Ty
,
8748 /*isSigned=*/true));
8749 CombinedInfo
.Types
.push_back(
8750 (Cap
->capturesVariable() ? OMP_MAP_TO
: OMP_MAP_LITERAL
) |
8751 OMP_MAP_TARGET_PARAM
);
8752 CombinedInfo
.Mappers
.push_back(nullptr);
8757 std::tuple
<OMPClauseMappableExprCommon::MappableExprComponentListRef
,
8758 OpenMPMapClauseKind
, ArrayRef
<OpenMPMapModifierKind
>, bool,
8759 const ValueDecl
*, const Expr
*>;
8760 SmallVector
<MapData
, 4> DeclComponentLists
;
8761 // For member fields list in is_device_ptr, store it in
8762 // DeclComponentLists for generating components info.
8763 static const OpenMPMapModifierKind Unknown
= OMPC_MAP_MODIFIER_unknown
;
8764 auto It
= DevPointersMap
.find(VD
);
8765 if (It
!= DevPointersMap
.end())
8766 for (const auto &MCL
: It
->second
)
8767 DeclComponentLists
.emplace_back(MCL
, OMPC_MAP_to
, Unknown
,
8768 /*IsImpicit = */ true, nullptr,
8770 auto I
= HasDevAddrsMap
.find(VD
);
8771 if (I
!= HasDevAddrsMap
.end())
8772 for (const auto &MCL
: I
->second
)
8773 DeclComponentLists
.emplace_back(MCL
, OMPC_MAP_tofrom
, Unknown
,
8774 /*IsImpicit = */ true, nullptr,
8776 assert(CurDir
.is
<const OMPExecutableDirective
*>() &&
8777 "Expect a executable directive");
8778 const auto *CurExecDir
= CurDir
.get
<const OMPExecutableDirective
*>();
8779 for (const auto *C
: CurExecDir
->getClausesOfKind
<OMPMapClause
>()) {
8780 const auto *EI
= C
->getVarRefs().begin();
8781 for (const auto L
: C
->decl_component_lists(VD
)) {
8782 const ValueDecl
*VDecl
, *Mapper
;
8783 // The Expression is not correct if the mapping is implicit
8784 const Expr
*E
= (C
->getMapLoc().isValid()) ? *EI
: nullptr;
8785 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8786 std::tie(VDecl
, Components
, Mapper
) = L
;
8787 assert(VDecl
== VD
&& "We got information for the wrong declaration??");
8788 assert(!Components
.empty() &&
8789 "Not expecting declaration with no component lists.");
8790 DeclComponentLists
.emplace_back(Components
, C
->getMapType(),
8791 C
->getMapTypeModifiers(),
8792 C
->isImplicit(), Mapper
, E
);
8796 llvm::stable_sort(DeclComponentLists
, [](const MapData
&LHS
,
8797 const MapData
&RHS
) {
8798 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
= std::get
<2>(LHS
);
8799 OpenMPMapClauseKind MapType
= std::get
<1>(RHS
);
8801 llvm::is_contained(MapModifiers
, clang::OMPC_MAP_MODIFIER_present
);
8802 bool HasAllocs
= MapType
== OMPC_MAP_alloc
;
8803 MapModifiers
= std::get
<2>(RHS
);
8804 MapType
= std::get
<1>(LHS
);
8806 llvm::is_contained(MapModifiers
, clang::OMPC_MAP_MODIFIER_present
);
8807 bool HasAllocsR
= MapType
== OMPC_MAP_alloc
;
8808 return (HasPresent
&& !HasPresentR
) || (HasAllocs
&& !HasAllocsR
);
8811 // Find overlapping elements (including the offset from the base element).
8812 llvm::SmallDenseMap
<
8815 OMPClauseMappableExprCommon::MappableExprComponentListRef
, 4>,
8819 for (const MapData
&L
: DeclComponentLists
) {
8820 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8821 OpenMPMapClauseKind MapType
;
8822 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8824 const ValueDecl
*Mapper
;
8826 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8829 for (const MapData
&L1
: makeArrayRef(DeclComponentLists
).slice(Count
)) {
8830 OMPClauseMappableExprCommon::MappableExprComponentListRef Components1
;
8831 std::tie(Components1
, MapType
, MapModifiers
, IsImplicit
, Mapper
,
8833 auto CI
= Components
.rbegin();
8834 auto CE
= Components
.rend();
8835 auto SI
= Components1
.rbegin();
8836 auto SE
= Components1
.rend();
8837 for (; CI
!= CE
&& SI
!= SE
; ++CI
, ++SI
) {
8838 if (CI
->getAssociatedExpression()->getStmtClass() !=
8839 SI
->getAssociatedExpression()->getStmtClass())
8841 // Are we dealing with different variables/fields?
8842 if (CI
->getAssociatedDeclaration() != SI
->getAssociatedDeclaration())
8845 // Found overlapping if, at least for one component, reached the head
8846 // of the components list.
8847 if (CI
== CE
|| SI
== SE
) {
8848 // Ignore it if it is the same component.
8849 if (CI
== CE
&& SI
== SE
)
8851 const auto It
= (SI
== SE
) ? CI
: SI
;
8852 // If one component is a pointer and another one is a kind of
8853 // dereference of this pointer (array subscript, section, dereference,
8854 // etc.), it is not an overlapping.
8855 // Same, if one component is a base and another component is a
8856 // dereferenced pointer memberexpr with the same base.
8857 if (!isa
<MemberExpr
>(It
->getAssociatedExpression()) ||
8858 (std::prev(It
)->getAssociatedDeclaration() &&
8860 ->getAssociatedDeclaration()
8862 ->isPointerType()) ||
8863 (It
->getAssociatedDeclaration() &&
8864 It
->getAssociatedDeclaration()->getType()->isPointerType() &&
8865 std::next(It
) != CE
&& std::next(It
) != SE
))
8867 const MapData
&BaseData
= CI
== CE
? L
: L1
;
8868 OMPClauseMappableExprCommon::MappableExprComponentListRef SubData
=
8869 SI
== SE
? Components
: Components1
;
8870 auto &OverlappedElements
= OverlappedData
.FindAndConstruct(&BaseData
);
8871 OverlappedElements
.getSecond().push_back(SubData
);
8875 // Sort the overlapped elements for each item.
8876 llvm::SmallVector
<const FieldDecl
*, 4> Layout
;
8877 if (!OverlappedData
.empty()) {
8878 const Type
*BaseType
= VD
->getType().getCanonicalType().getTypePtr();
8879 const Type
*OrigType
= BaseType
->getPointeeOrArrayElementType();
8880 while (BaseType
!= OrigType
) {
8881 BaseType
= OrigType
->getCanonicalTypeInternal().getTypePtr();
8882 OrigType
= BaseType
->getPointeeOrArrayElementType();
8885 if (const auto *CRD
= BaseType
->getAsCXXRecordDecl())
8886 getPlainLayout(CRD
, Layout
, /*AsBase=*/false);
8888 const auto *RD
= BaseType
->getAsRecordDecl();
8889 Layout
.append(RD
->field_begin(), RD
->field_end());
8892 for (auto &Pair
: OverlappedData
) {
8896 OMPClauseMappableExprCommon::MappableExprComponentListRef First
,
8897 OMPClauseMappableExprCommon::MappableExprComponentListRef
8899 auto CI
= First
.rbegin();
8900 auto CE
= First
.rend();
8901 auto SI
= Second
.rbegin();
8902 auto SE
= Second
.rend();
8903 for (; CI
!= CE
&& SI
!= SE
; ++CI
, ++SI
) {
8904 if (CI
->getAssociatedExpression()->getStmtClass() !=
8905 SI
->getAssociatedExpression()->getStmtClass())
8907 // Are we dealing with different variables/fields?
8908 if (CI
->getAssociatedDeclaration() !=
8909 SI
->getAssociatedDeclaration())
8913 // Lists contain the same elements.
8914 if (CI
== CE
&& SI
== SE
)
8917 // List with less elements is less than list with more elements.
8918 if (CI
== CE
|| SI
== SE
)
8921 const auto *FD1
= cast
<FieldDecl
>(CI
->getAssociatedDeclaration());
8922 const auto *FD2
= cast
<FieldDecl
>(SI
->getAssociatedDeclaration());
8923 if (FD1
->getParent() == FD2
->getParent())
8924 return FD1
->getFieldIndex() < FD2
->getFieldIndex();
8926 llvm::find_if(Layout
, [FD1
, FD2
](const FieldDecl
*FD
) {
8927 return FD
== FD1
|| FD
== FD2
;
8933 // Associated with a capture, because the mapping flags depend on it.
8934 // Go through all of the elements with the overlapped elements.
8935 bool IsFirstComponentList
= true;
8936 for (const auto &Pair
: OverlappedData
) {
8937 const MapData
&L
= *Pair
.getFirst();
8938 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8939 OpenMPMapClauseKind MapType
;
8940 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8942 const ValueDecl
*Mapper
;
8944 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8946 ArrayRef
<OMPClauseMappableExprCommon::MappableExprComponentListRef
>
8947 OverlappedComponents
= Pair
.getSecond();
8948 generateInfoForComponentList(
8949 MapType
, MapModifiers
, llvm::None
, Components
, CombinedInfo
,
8950 PartialStruct
, IsFirstComponentList
, IsImplicit
, Mapper
,
8951 /*ForDeviceAddr=*/false, VD
, VarRef
, OverlappedComponents
);
8952 IsFirstComponentList
= false;
8954 // Go through other elements without overlapped elements.
8955 for (const MapData
&L
: DeclComponentLists
) {
8956 OMPClauseMappableExprCommon::MappableExprComponentListRef Components
;
8957 OpenMPMapClauseKind MapType
;
8958 ArrayRef
<OpenMPMapModifierKind
> MapModifiers
;
8960 const ValueDecl
*Mapper
;
8962 std::tie(Components
, MapType
, MapModifiers
, IsImplicit
, Mapper
, VarRef
) =
8964 auto It
= OverlappedData
.find(&L
);
8965 if (It
== OverlappedData
.end())
8966 generateInfoForComponentList(MapType
, MapModifiers
, llvm::None
,
8967 Components
, CombinedInfo
, PartialStruct
,
8968 IsFirstComponentList
, IsImplicit
, Mapper
,
8969 /*ForDeviceAddr=*/false, VD
, VarRef
);
8970 IsFirstComponentList
= false;
8974 /// Generate the default map information for a given capture \a CI,
8975 /// record field declaration \a RI and captured value \a CV.
8976 void generateDefaultMapInfo(const CapturedStmt::Capture
&CI
,
8977 const FieldDecl
&RI
, llvm::Value
*CV
,
8978 MapCombinedInfoTy
&CombinedInfo
) const {
8979 bool IsImplicit
= true;
8980 // Do the default mapping.
8981 if (CI
.capturesThis()) {
8982 CombinedInfo
.Exprs
.push_back(nullptr);
8983 CombinedInfo
.BasePointers
.push_back(CV
);
8984 CombinedInfo
.Pointers
.push_back(CV
);
8985 const auto *PtrTy
= cast
<PointerType
>(RI
.getType().getTypePtr());
8986 CombinedInfo
.Sizes
.push_back(
8987 CGF
.Builder
.CreateIntCast(CGF
.getTypeSize(PtrTy
->getPointeeType()),
8988 CGF
.Int64Ty
, /*isSigned=*/true));
8989 // Default map type.
8990 CombinedInfo
.Types
.push_back(OMP_MAP_TO
| OMP_MAP_FROM
);
8991 } else if (CI
.capturesVariableByCopy()) {
8992 const VarDecl
*VD
= CI
.getCapturedVar();
8993 CombinedInfo
.Exprs
.push_back(VD
->getCanonicalDecl());
8994 CombinedInfo
.BasePointers
.push_back(CV
);
8995 CombinedInfo
.Pointers
.push_back(CV
);
8996 if (!RI
.getType()->isAnyPointerType()) {
8997 // We have to signal to the runtime captures passed by value that are
8999 CombinedInfo
.Types
.push_back(OMP_MAP_LITERAL
);
9000 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
9001 CGF
.getTypeSize(RI
.getType()), CGF
.Int64Ty
, /*isSigned=*/true));
9003 // Pointers are implicitly mapped with a zero size and no flags
9004 // (other than first map that is added for all implicit maps).
9005 CombinedInfo
.Types
.push_back(OMP_MAP_NONE
);
9006 CombinedInfo
.Sizes
.push_back(llvm::Constant::getNullValue(CGF
.Int64Ty
));
9008 auto I
= FirstPrivateDecls
.find(VD
);
9009 if (I
!= FirstPrivateDecls
.end())
9010 IsImplicit
= I
->getSecond();
9012 assert(CI
.capturesVariable() && "Expected captured reference.");
9013 const auto *PtrTy
= cast
<ReferenceType
>(RI
.getType().getTypePtr());
9014 QualType ElementType
= PtrTy
->getPointeeType();
9015 CombinedInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
9016 CGF
.getTypeSize(ElementType
), CGF
.Int64Ty
, /*isSigned=*/true));
9017 // The default map type for a scalar/complex type is 'to' because by
9018 // default the value doesn't have to be retrieved. For an aggregate
9019 // type, the default is 'tofrom'.
9020 CombinedInfo
.Types
.push_back(getMapModifiersForPrivateClauses(CI
));
9021 const VarDecl
*VD
= CI
.getCapturedVar();
9022 auto I
= FirstPrivateDecls
.find(VD
);
9023 CombinedInfo
.Exprs
.push_back(VD
->getCanonicalDecl());
9024 CombinedInfo
.BasePointers
.push_back(CV
);
9025 if (I
!= FirstPrivateDecls
.end() && ElementType
->isAnyPointerType()) {
9026 Address PtrAddr
= CGF
.EmitLoadOfReference(CGF
.MakeAddrLValue(
9027 CV
, ElementType
, CGF
.getContext().getDeclAlign(VD
),
9028 AlignmentSource::Decl
));
9029 CombinedInfo
.Pointers
.push_back(PtrAddr
.getPointer());
9031 CombinedInfo
.Pointers
.push_back(CV
);
9033 if (I
!= FirstPrivateDecls
.end())
9034 IsImplicit
= I
->getSecond();
9036 // Every default map produces a single argument which is a target parameter.
9037 CombinedInfo
.Types
.back() |= OMP_MAP_TARGET_PARAM
;
9039 // Add flag stating this is an implicit map.
9041 CombinedInfo
.Types
.back() |= OMP_MAP_IMPLICIT
;
9043 // No user-defined mapper for default mapping.
9044 CombinedInfo
.Mappers
.push_back(nullptr);
9047 } // anonymous namespace
9049 static void emitNonContiguousDescriptor(
9050 CodeGenFunction
&CGF
, MappableExprsHandler::MapCombinedInfoTy
&CombinedInfo
,
9051 CGOpenMPRuntime::TargetDataInfo
&Info
) {
9052 CodeGenModule
&CGM
= CGF
.CGM
;
9053 MappableExprsHandler::MapCombinedInfoTy::StructNonContiguousInfo
9054 &NonContigInfo
= CombinedInfo
.NonContigInfo
;
9056 // Build an array of struct descriptor_dim and then assign it to
9059 // struct descriptor_dim {
9064 ASTContext
&C
= CGF
.getContext();
9065 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/0);
9067 RD
= C
.buildImplicitRecord("descriptor_dim");
9068 RD
->startDefinition();
9069 addFieldToRecordDecl(C
, RD
, Int64Ty
);
9070 addFieldToRecordDecl(C
, RD
, Int64Ty
);
9071 addFieldToRecordDecl(C
, RD
, Int64Ty
);
9072 RD
->completeDefinition();
9073 QualType DimTy
= C
.getRecordType(RD
);
9075 enum { OffsetFD
= 0, CountFD
, StrideFD
};
9076 // We need two index variable here since the size of "Dims" is the same as the
9077 // size of Components, however, the size of offset, count, and stride is equal
9078 // to the size of base declaration that is non-contiguous.
9079 for (unsigned I
= 0, L
= 0, E
= NonContigInfo
.Dims
.size(); I
< E
; ++I
) {
9080 // Skip emitting ir if dimension size is 1 since it cannot be
9082 if (NonContigInfo
.Dims
[I
] == 1)
9084 llvm::APInt
Size(/*numBits=*/32, NonContigInfo
.Dims
[I
]);
9086 C
.getConstantArrayType(DimTy
, Size
, nullptr, ArrayType::Normal
, 0);
9087 Address DimsAddr
= CGF
.CreateMemTemp(ArrayTy
, "dims");
9088 for (unsigned II
= 0, EE
= NonContigInfo
.Dims
[I
]; II
< EE
; ++II
) {
9089 unsigned RevIdx
= EE
- II
- 1;
9090 LValue DimsLVal
= CGF
.MakeAddrLValue(
9091 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, II
), DimTy
);
9093 LValue OffsetLVal
= CGF
.EmitLValueForField(
9094 DimsLVal
, *std::next(RD
->field_begin(), OffsetFD
));
9095 CGF
.EmitStoreOfScalar(NonContigInfo
.Offsets
[L
][RevIdx
], OffsetLVal
);
9097 LValue CountLVal
= CGF
.EmitLValueForField(
9098 DimsLVal
, *std::next(RD
->field_begin(), CountFD
));
9099 CGF
.EmitStoreOfScalar(NonContigInfo
.Counts
[L
][RevIdx
], CountLVal
);
9101 LValue StrideLVal
= CGF
.EmitLValueForField(
9102 DimsLVal
, *std::next(RD
->field_begin(), StrideFD
));
9103 CGF
.EmitStoreOfScalar(NonContigInfo
.Strides
[L
][RevIdx
], StrideLVal
);
9106 Address DAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
9107 DimsAddr
, CGM
.Int8PtrTy
, CGM
.Int8Ty
);
9108 llvm::Value
*P
= CGF
.Builder
.CreateConstInBoundsGEP2_32(
9109 llvm::ArrayType::get(CGM
.VoidPtrTy
, Info
.NumberOfPtrs
),
9110 Info
.RTArgs
.PointersArray
, 0, I
);
9111 Address
PAddr(P
, CGM
.VoidPtrTy
, CGF
.getPointerAlign());
9112 CGF
.Builder
.CreateStore(DAddr
.getPointer(), PAddr
);
9117 // Try to extract the base declaration from a `this->x` expression if possible.
9118 static ValueDecl
*getDeclFromThisExpr(const Expr
*E
) {
9122 if (const auto *OASE
= dyn_cast
<OMPArraySectionExpr
>(E
->IgnoreParenCasts()))
9123 if (const MemberExpr
*ME
=
9124 dyn_cast
<MemberExpr
>(OASE
->getBase()->IgnoreParenImpCasts()))
9125 return ME
->getMemberDecl();
9129 /// Emit a string constant containing the names of the values mapped to the
9130 /// offloading runtime library.
9132 emitMappingInformation(CodeGenFunction
&CGF
, llvm::OpenMPIRBuilder
&OMPBuilder
,
9133 MappableExprsHandler::MappingExprInfo
&MapExprs
) {
9135 uint32_t SrcLocStrSize
;
9136 if (!MapExprs
.getMapDecl() && !MapExprs
.getMapExpr())
9137 return OMPBuilder
.getOrCreateDefaultSrcLocStr(SrcLocStrSize
);
9140 if (!MapExprs
.getMapDecl() && MapExprs
.getMapExpr()) {
9141 if (const ValueDecl
*VD
= getDeclFromThisExpr(MapExprs
.getMapExpr()))
9142 Loc
= VD
->getLocation();
9144 Loc
= MapExprs
.getMapExpr()->getExprLoc();
9146 Loc
= MapExprs
.getMapDecl()->getLocation();
9149 std::string ExprName
;
9150 if (MapExprs
.getMapExpr()) {
9151 PrintingPolicy
P(CGF
.getContext().getLangOpts());
9152 llvm::raw_string_ostream
OS(ExprName
);
9153 MapExprs
.getMapExpr()->printPretty(OS
, nullptr, P
);
9156 ExprName
= MapExprs
.getMapDecl()->getNameAsString();
9159 PresumedLoc PLoc
= CGF
.getContext().getSourceManager().getPresumedLoc(Loc
);
9160 return OMPBuilder
.getOrCreateSrcLocStr(PLoc
.getFilename(), ExprName
,
9161 PLoc
.getLine(), PLoc
.getColumn(),
9165 /// Emit the arrays used to pass the captures and map information to the
9166 /// offloading runtime library. If there is no map or capture information,
9167 /// return nullptr by reference.
9168 static void emitOffloadingArrays(
9169 CodeGenFunction
&CGF
, MappableExprsHandler::MapCombinedInfoTy
&CombinedInfo
,
9170 CGOpenMPRuntime::TargetDataInfo
&Info
, llvm::OpenMPIRBuilder
&OMPBuilder
,
9171 bool IsNonContiguous
= false) {
9172 CodeGenModule
&CGM
= CGF
.CGM
;
9173 ASTContext
&Ctx
= CGF
.getContext();
9175 // Reset the array information.
9176 Info
.clearArrayInfo();
9177 Info
.NumberOfPtrs
= CombinedInfo
.BasePointers
.size();
9179 if (Info
.NumberOfPtrs
) {
9180 // Detect if we have any capture size requiring runtime evaluation of the
9181 // size so that a constant array could be eventually used.
9183 llvm::APInt
PointerNumAP(32, Info
.NumberOfPtrs
, /*isSigned=*/true);
9184 QualType PointerArrayType
= Ctx
.getConstantArrayType(
9185 Ctx
.VoidPtrTy
, PointerNumAP
, nullptr, ArrayType::Normal
,
9186 /*IndexTypeQuals=*/0);
9188 Info
.RTArgs
.BasePointersArray
=
9189 CGF
.CreateMemTemp(PointerArrayType
, ".offload_baseptrs").getPointer();
9190 Info
.RTArgs
.PointersArray
=
9191 CGF
.CreateMemTemp(PointerArrayType
, ".offload_ptrs").getPointer();
9192 Address MappersArray
=
9193 CGF
.CreateMemTemp(PointerArrayType
, ".offload_mappers");
9194 Info
.RTArgs
.MappersArray
= MappersArray
.getPointer();
9196 // If we don't have any VLA types or other types that require runtime
9197 // evaluation, we can use a constant array for the map sizes, otherwise we
9198 // need to fill up the arrays as we do for the pointers.
9200 Ctx
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
9201 SmallVector
<llvm::Constant
*> ConstSizes(
9202 CombinedInfo
.Sizes
.size(), llvm::ConstantInt::get(CGF
.Int64Ty
, 0));
9203 llvm::SmallBitVector
RuntimeSizes(CombinedInfo
.Sizes
.size());
9204 for (unsigned I
= 0, E
= CombinedInfo
.Sizes
.size(); I
< E
; ++I
) {
9205 if (auto *CI
= dyn_cast
<llvm::Constant
>(CombinedInfo
.Sizes
[I
])) {
9206 if (!isa
<llvm::ConstantExpr
>(CI
) && !isa
<llvm::GlobalValue
>(CI
)) {
9207 if (IsNonContiguous
&& (CombinedInfo
.Types
[I
] &
9208 MappableExprsHandler::OMP_MAP_NON_CONTIG
))
9209 ConstSizes
[I
] = llvm::ConstantInt::get(
9210 CGF
.Int64Ty
, CombinedInfo
.NonContigInfo
.Dims
[I
]);
9216 RuntimeSizes
.set(I
);
9219 if (RuntimeSizes
.all()) {
9220 QualType SizeArrayType
= Ctx
.getConstantArrayType(
9221 Int64Ty
, PointerNumAP
, nullptr, ArrayType::Normal
,
9222 /*IndexTypeQuals=*/0);
9223 Info
.RTArgs
.SizesArray
=
9224 CGF
.CreateMemTemp(SizeArrayType
, ".offload_sizes").getPointer();
9226 auto *SizesArrayInit
= llvm::ConstantArray::get(
9227 llvm::ArrayType::get(CGM
.Int64Ty
, ConstSizes
.size()), ConstSizes
);
9228 std::string Name
= CGM
.getOpenMPRuntime().getName({"offload_sizes"});
9229 auto *SizesArrayGbl
= new llvm::GlobalVariable(
9230 CGM
.getModule(), SizesArrayInit
->getType(), /*isConstant=*/true,
9231 llvm::GlobalValue::PrivateLinkage
, SizesArrayInit
, Name
);
9232 SizesArrayGbl
->setUnnamedAddr(llvm::GlobalValue::UnnamedAddr::Global
);
9233 if (RuntimeSizes
.any()) {
9234 QualType SizeArrayType
= Ctx
.getConstantArrayType(
9235 Int64Ty
, PointerNumAP
, nullptr, ArrayType::Normal
,
9236 /*IndexTypeQuals=*/0);
9237 Address Buffer
= CGF
.CreateMemTemp(SizeArrayType
, ".offload_sizes");
9238 llvm::Value
*GblConstPtr
=
9239 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
9240 SizesArrayGbl
, CGM
.Int64Ty
->getPointerTo());
9241 CGF
.Builder
.CreateMemCpy(
9243 Address(GblConstPtr
, CGM
.Int64Ty
,
9244 CGM
.getNaturalTypeAlignment(Ctx
.getIntTypeForBitwidth(
9245 /*DestWidth=*/64, /*Signed=*/false))),
9246 CGF
.getTypeSize(SizeArrayType
));
9247 Info
.RTArgs
.SizesArray
= Buffer
.getPointer();
9249 Info
.RTArgs
.SizesArray
= SizesArrayGbl
;
9253 // The map types are always constant so we don't need to generate code to
9254 // fill arrays. Instead, we create an array constant.
9255 SmallVector
<uint64_t, 4> Mapping(CombinedInfo
.Types
.size(), 0);
9256 llvm::copy(CombinedInfo
.Types
, Mapping
.begin());
9257 std::string MaptypesName
=
9258 CGM
.getOpenMPRuntime().getName({"offload_maptypes"});
9259 auto *MapTypesArrayGbl
=
9260 OMPBuilder
.createOffloadMaptypes(Mapping
, MaptypesName
);
9261 Info
.RTArgs
.MapTypesArray
= MapTypesArrayGbl
;
9263 // The information types are only built if there is debug information
9265 if (CGM
.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo
) {
9266 Info
.RTArgs
.MapNamesArray
= llvm::Constant::getNullValue(
9267 llvm::Type::getInt8Ty(CGF
.Builder
.getContext())->getPointerTo());
9269 auto fillInfoMap
= [&](MappableExprsHandler::MappingExprInfo
&MapExpr
) {
9270 return emitMappingInformation(CGF
, OMPBuilder
, MapExpr
);
9272 SmallVector
<llvm::Constant
*, 4> InfoMap(CombinedInfo
.Exprs
.size());
9273 llvm::transform(CombinedInfo
.Exprs
, InfoMap
.begin(), fillInfoMap
);
9274 std::string MapnamesName
=
9275 CGM
.getOpenMPRuntime().getName({"offload_mapnames"});
9276 auto *MapNamesArrayGbl
=
9277 OMPBuilder
.createOffloadMapnames(InfoMap
, MapnamesName
);
9278 Info
.RTArgs
.MapNamesArray
= MapNamesArrayGbl
;
9281 // If there's a present map type modifier, it must not be applied to the end
9282 // of a region, so generate a separate map type array in that case.
9283 if (Info
.separateBeginEndCalls()) {
9284 bool EndMapTypesDiffer
= false;
9285 for (uint64_t &Type
: Mapping
) {
9286 if (Type
& MappableExprsHandler::OMP_MAP_PRESENT
) {
9287 Type
&= ~MappableExprsHandler::OMP_MAP_PRESENT
;
9288 EndMapTypesDiffer
= true;
9291 if (EndMapTypesDiffer
) {
9293 OMPBuilder
.createOffloadMaptypes(Mapping
, MaptypesName
);
9294 Info
.RTArgs
.MapTypesArrayEnd
= MapTypesArrayGbl
;
9298 for (unsigned I
= 0; I
< Info
.NumberOfPtrs
; ++I
) {
9299 llvm::Value
*BPVal
= *CombinedInfo
.BasePointers
[I
];
9300 llvm::Value
*BP
= CGF
.Builder
.CreateConstInBoundsGEP2_32(
9301 llvm::ArrayType::get(CGM
.VoidPtrTy
, Info
.NumberOfPtrs
),
9302 Info
.RTArgs
.BasePointersArray
, 0, I
);
9303 BP
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
9304 BP
, BPVal
->getType()->getPointerTo(/*AddrSpace=*/0));
9305 Address
BPAddr(BP
, BPVal
->getType(),
9306 Ctx
.getTypeAlignInChars(Ctx
.VoidPtrTy
));
9307 CGF
.Builder
.CreateStore(BPVal
, BPAddr
);
9309 if (Info
.requiresDevicePointerInfo())
9310 if (const ValueDecl
*DevVD
=
9311 CombinedInfo
.BasePointers
[I
].getDevicePtrDecl())
9312 Info
.CaptureDeviceAddrMap
.try_emplace(DevVD
, BPAddr
);
9314 llvm::Value
*PVal
= CombinedInfo
.Pointers
[I
];
9315 llvm::Value
*P
= CGF
.Builder
.CreateConstInBoundsGEP2_32(
9316 llvm::ArrayType::get(CGM
.VoidPtrTy
, Info
.NumberOfPtrs
),
9317 Info
.RTArgs
.PointersArray
, 0, I
);
9318 P
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
9319 P
, PVal
->getType()->getPointerTo(/*AddrSpace=*/0));
9320 Address
PAddr(P
, PVal
->getType(), Ctx
.getTypeAlignInChars(Ctx
.VoidPtrTy
));
9321 CGF
.Builder
.CreateStore(PVal
, PAddr
);
9323 if (RuntimeSizes
.test(I
)) {
9324 llvm::Value
*S
= CGF
.Builder
.CreateConstInBoundsGEP2_32(
9325 llvm::ArrayType::get(CGM
.Int64Ty
, Info
.NumberOfPtrs
),
9326 Info
.RTArgs
.SizesArray
,
9329 Address
SAddr(S
, CGM
.Int64Ty
, Ctx
.getTypeAlignInChars(Int64Ty
));
9330 CGF
.Builder
.CreateStore(CGF
.Builder
.CreateIntCast(CombinedInfo
.Sizes
[I
],
9336 // Fill up the mapper array.
9337 llvm::Value
*MFunc
= llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
);
9338 if (CombinedInfo
.Mappers
[I
]) {
9339 MFunc
= CGM
.getOpenMPRuntime().getOrCreateUserDefinedMapperFunc(
9340 cast
<OMPDeclareMapperDecl
>(CombinedInfo
.Mappers
[I
]));
9341 MFunc
= CGF
.Builder
.CreatePointerCast(MFunc
, CGM
.VoidPtrTy
);
9342 Info
.HasMapper
= true;
9344 Address MAddr
= CGF
.Builder
.CreateConstArrayGEP(MappersArray
, I
);
9345 CGF
.Builder
.CreateStore(MFunc
, MAddr
);
9349 if (!IsNonContiguous
|| CombinedInfo
.NonContigInfo
.Offsets
.empty() ||
9350 Info
.NumberOfPtrs
== 0)
9353 emitNonContiguousDescriptor(CGF
, CombinedInfo
, Info
);
9356 /// Check for inner distribute directive.
9357 static const OMPExecutableDirective
*
9358 getNestedDistributeDirective(ASTContext
&Ctx
, const OMPExecutableDirective
&D
) {
9359 const auto *CS
= D
.getInnermostCapturedStmt();
9361 CS
->getCapturedStmt()->IgnoreContainers(/*IgnoreCaptured=*/true);
9362 const Stmt
*ChildStmt
=
9363 CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx
, Body
);
9365 if (const auto *NestedDir
=
9366 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
9367 OpenMPDirectiveKind DKind
= NestedDir
->getDirectiveKind();
9368 switch (D
.getDirectiveKind()) {
9370 if (isOpenMPDistributeDirective(DKind
))
9372 if (DKind
== OMPD_teams
) {
9373 Body
= NestedDir
->getInnermostCapturedStmt()->IgnoreContainers(
9374 /*IgnoreCaptured=*/true);
9377 ChildStmt
= CGOpenMPSIMDRuntime::getSingleCompoundChild(Ctx
, Body
);
9378 if (const auto *NND
=
9379 dyn_cast_or_null
<OMPExecutableDirective
>(ChildStmt
)) {
9380 DKind
= NND
->getDirectiveKind();
9381 if (isOpenMPDistributeDirective(DKind
))
9386 case OMPD_target_teams
:
9387 if (isOpenMPDistributeDirective(DKind
))
9390 case OMPD_target_parallel
:
9391 case OMPD_target_simd
:
9392 case OMPD_target_parallel_for
:
9393 case OMPD_target_parallel_for_simd
:
9395 case OMPD_target_teams_distribute
:
9396 case OMPD_target_teams_distribute_simd
:
9397 case OMPD_target_teams_distribute_parallel_for
:
9398 case OMPD_target_teams_distribute_parallel_for_simd
:
9401 case OMPD_parallel_for
:
9402 case OMPD_parallel_master
:
9403 case OMPD_parallel_sections
:
9405 case OMPD_parallel_for_simd
:
9407 case OMPD_cancellation_point
:
9409 case OMPD_threadprivate
:
9420 case OMPD_taskyield
:
9423 case OMPD_taskgroup
:
9429 case OMPD_target_data
:
9430 case OMPD_target_exit_data
:
9431 case OMPD_target_enter_data
:
9432 case OMPD_distribute
:
9433 case OMPD_distribute_simd
:
9434 case OMPD_distribute_parallel_for
:
9435 case OMPD_distribute_parallel_for_simd
:
9436 case OMPD_teams_distribute
:
9437 case OMPD_teams_distribute_simd
:
9438 case OMPD_teams_distribute_parallel_for
:
9439 case OMPD_teams_distribute_parallel_for_simd
:
9440 case OMPD_target_update
:
9441 case OMPD_declare_simd
:
9442 case OMPD_declare_variant
:
9443 case OMPD_begin_declare_variant
:
9444 case OMPD_end_declare_variant
:
9445 case OMPD_declare_target
:
9446 case OMPD_end_declare_target
:
9447 case OMPD_declare_reduction
:
9448 case OMPD_declare_mapper
:
9450 case OMPD_taskloop_simd
:
9451 case OMPD_master_taskloop
:
9452 case OMPD_master_taskloop_simd
:
9453 case OMPD_parallel_master_taskloop
:
9454 case OMPD_parallel_master_taskloop_simd
:
9456 case OMPD_metadirective
:
9459 llvm_unreachable("Unexpected directive.");
9466 /// Emit the user-defined mapper function. The code generation follows the
9467 /// pattern in the example below.
9469 /// void .omp_mapper.<type_name>.<mapper_id>.(void *rt_mapper_handle,
9470 /// void *base, void *begin,
9471 /// int64_t size, int64_t type,
9472 /// void *name = nullptr) {
9473 /// // Allocate space for an array section first or add a base/begin for
9474 /// // pointer dereference.
9475 /// if ((size > 1 || (base != begin && maptype.IsPtrAndObj)) &&
9476 /// !maptype.IsDelete)
9477 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9478 /// size*sizeof(Ty), clearToFromMember(type));
9480 /// for (unsigned i = 0; i < size; i++) {
9481 /// // For each component specified by this mapper:
9482 /// for (auto c : begin[i]->all_components) {
9483 /// if (c.hasMapper())
9484 /// (*c.Mapper())(rt_mapper_handle, c.arg_base, c.arg_begin, c.arg_size,
9485 /// c.arg_type, c.arg_name);
9487 /// __tgt_push_mapper_component(rt_mapper_handle, c.arg_base,
9488 /// c.arg_begin, c.arg_size, c.arg_type,
9492 /// // Delete the array section.
9493 /// if (size > 1 && maptype.IsDelete)
9494 /// __tgt_push_mapper_component(rt_mapper_handle, base, begin,
9495 /// size*sizeof(Ty), clearToFromMember(type));
9498 void CGOpenMPRuntime::emitUserDefinedMapper(const OMPDeclareMapperDecl
*D
,
9499 CodeGenFunction
*CGF
) {
9500 if (UDMMap
.count(D
) > 0)
9502 ASTContext
&C
= CGM
.getContext();
9503 QualType Ty
= D
->getType();
9504 QualType PtrTy
= C
.getPointerType(Ty
).withRestrict();
9505 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
9506 auto *MapperVarDecl
=
9507 cast
<VarDecl
>(cast
<DeclRefExpr
>(D
->getMapperVarRef())->getDecl());
9508 SourceLocation Loc
= D
->getLocation();
9509 CharUnits ElementSize
= C
.getTypeSizeInChars(Ty
);
9510 llvm::Type
*ElemTy
= CGM
.getTypes().ConvertTypeForMem(Ty
);
9512 // Prepare mapper function arguments and attributes.
9513 ImplicitParamDecl
HandleArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
9514 C
.VoidPtrTy
, ImplicitParamDecl::Other
);
9515 ImplicitParamDecl
BaseArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
9516 ImplicitParamDecl::Other
);
9517 ImplicitParamDecl
BeginArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr,
9518 C
.VoidPtrTy
, ImplicitParamDecl::Other
);
9519 ImplicitParamDecl
SizeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, Int64Ty
,
9520 ImplicitParamDecl::Other
);
9521 ImplicitParamDecl
TypeArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, Int64Ty
,
9522 ImplicitParamDecl::Other
);
9523 ImplicitParamDecl
NameArg(C
, /*DC=*/nullptr, Loc
, /*Id=*/nullptr, C
.VoidPtrTy
,
9524 ImplicitParamDecl::Other
);
9525 FunctionArgList Args
;
9526 Args
.push_back(&HandleArg
);
9527 Args
.push_back(&BaseArg
);
9528 Args
.push_back(&BeginArg
);
9529 Args
.push_back(&SizeArg
);
9530 Args
.push_back(&TypeArg
);
9531 Args
.push_back(&NameArg
);
9532 const CGFunctionInfo
&FnInfo
=
9533 CGM
.getTypes().arrangeBuiltinFunctionDeclaration(C
.VoidTy
, Args
);
9534 llvm::FunctionType
*FnTy
= CGM
.getTypes().GetFunctionType(FnInfo
);
9535 SmallString
<64> TyStr
;
9536 llvm::raw_svector_ostream
Out(TyStr
);
9537 CGM
.getCXXABI().getMangleContext().mangleTypeName(Ty
, Out
);
9538 std::string Name
= getName({"omp_mapper", TyStr
, D
->getName()});
9539 auto *Fn
= llvm::Function::Create(FnTy
, llvm::GlobalValue::InternalLinkage
,
9540 Name
, &CGM
.getModule());
9541 CGM
.SetInternalFunctionAttributes(GlobalDecl(), Fn
, FnInfo
);
9542 Fn
->removeFnAttr(llvm::Attribute::OptimizeNone
);
9543 // Start the mapper function code generation.
9544 CodeGenFunction
MapperCGF(CGM
);
9545 MapperCGF
.StartFunction(GlobalDecl(), C
.VoidTy
, Fn
, FnInfo
, Args
, Loc
, Loc
);
9546 // Compute the starting and end addresses of array elements.
9547 llvm::Value
*Size
= MapperCGF
.EmitLoadOfScalar(
9548 MapperCGF
.GetAddrOfLocalVar(&SizeArg
), /*Volatile=*/false,
9549 C
.getPointerType(Int64Ty
), Loc
);
9550 // Prepare common arguments for array initiation and deletion.
9551 llvm::Value
*Handle
= MapperCGF
.EmitLoadOfScalar(
9552 MapperCGF
.GetAddrOfLocalVar(&HandleArg
),
9553 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9554 llvm::Value
*BaseIn
= MapperCGF
.EmitLoadOfScalar(
9555 MapperCGF
.GetAddrOfLocalVar(&BaseArg
),
9556 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9557 llvm::Value
*BeginIn
= MapperCGF
.EmitLoadOfScalar(
9558 MapperCGF
.GetAddrOfLocalVar(&BeginArg
),
9559 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9560 // Convert the size in bytes into the number of array elements.
9561 Size
= MapperCGF
.Builder
.CreateExactUDiv(
9562 Size
, MapperCGF
.Builder
.getInt64(ElementSize
.getQuantity()));
9563 llvm::Value
*PtrBegin
= MapperCGF
.Builder
.CreateBitCast(
9564 BeginIn
, CGM
.getTypes().ConvertTypeForMem(PtrTy
));
9565 llvm::Value
*PtrEnd
= MapperCGF
.Builder
.CreateGEP(ElemTy
, PtrBegin
, Size
);
9566 llvm::Value
*MapType
= MapperCGF
.EmitLoadOfScalar(
9567 MapperCGF
.GetAddrOfLocalVar(&TypeArg
), /*Volatile=*/false,
9568 C
.getPointerType(Int64Ty
), Loc
);
9569 llvm::Value
*MapName
= MapperCGF
.EmitLoadOfScalar(
9570 MapperCGF
.GetAddrOfLocalVar(&NameArg
),
9571 /*Volatile=*/false, C
.getPointerType(C
.VoidPtrTy
), Loc
);
9573 // Emit array initiation if this is an array section and \p MapType indicates
9574 // that memory allocation is required.
9575 llvm::BasicBlock
*HeadBB
= MapperCGF
.createBasicBlock("omp.arraymap.head");
9576 emitUDMapperArrayInitOrDel(MapperCGF
, Handle
, BaseIn
, BeginIn
, Size
, MapType
,
9577 MapName
, ElementSize
, HeadBB
, /*IsInit=*/true);
9579 // Emit a for loop to iterate through SizeArg of elements and map all of them.
9581 // Emit the loop header block.
9582 MapperCGF
.EmitBlock(HeadBB
);
9583 llvm::BasicBlock
*BodyBB
= MapperCGF
.createBasicBlock("omp.arraymap.body");
9584 llvm::BasicBlock
*DoneBB
= MapperCGF
.createBasicBlock("omp.done");
9585 // Evaluate whether the initial condition is satisfied.
9586 llvm::Value
*IsEmpty
=
9587 MapperCGF
.Builder
.CreateICmpEQ(PtrBegin
, PtrEnd
, "omp.arraymap.isempty");
9588 MapperCGF
.Builder
.CreateCondBr(IsEmpty
, DoneBB
, BodyBB
);
9589 llvm::BasicBlock
*EntryBB
= MapperCGF
.Builder
.GetInsertBlock();
9591 // Emit the loop body block.
9592 MapperCGF
.EmitBlock(BodyBB
);
9593 llvm::BasicBlock
*LastBB
= BodyBB
;
9594 llvm::PHINode
*PtrPHI
= MapperCGF
.Builder
.CreatePHI(
9595 PtrBegin
->getType(), 2, "omp.arraymap.ptrcurrent");
9596 PtrPHI
->addIncoming(PtrBegin
, EntryBB
);
9597 Address
PtrCurrent(PtrPHI
, ElemTy
,
9598 MapperCGF
.GetAddrOfLocalVar(&BeginArg
)
9600 .alignmentOfArrayElement(ElementSize
));
9601 // Privatize the declared variable of mapper to be the current array element.
9602 CodeGenFunction::OMPPrivateScope
Scope(MapperCGF
);
9603 Scope
.addPrivate(MapperVarDecl
, PtrCurrent
);
9604 (void)Scope
.Privatize();
9606 // Get map clause information. Fill up the arrays with all mapped variables.
9607 MappableExprsHandler::MapCombinedInfoTy Info
;
9608 MappableExprsHandler
MEHandler(*D
, MapperCGF
);
9609 MEHandler
.generateAllInfoForMapper(Info
);
9611 // Call the runtime API __tgt_mapper_num_components to get the number of
9612 // pre-existing components.
9613 llvm::Value
*OffloadingArgs
[] = {Handle
};
9614 llvm::Value
*PreviousSize
= MapperCGF
.EmitRuntimeCall(
9615 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
9616 OMPRTL___tgt_mapper_num_components
),
9618 llvm::Value
*ShiftedPreviousSize
= MapperCGF
.Builder
.CreateShl(
9620 MapperCGF
.Builder
.getInt64(MappableExprsHandler::getFlagMemberOffset()));
9622 // Fill up the runtime mapper handle for all components.
9623 for (unsigned I
= 0; I
< Info
.BasePointers
.size(); ++I
) {
9624 llvm::Value
*CurBaseArg
= MapperCGF
.Builder
.CreateBitCast(
9625 *Info
.BasePointers
[I
], CGM
.getTypes().ConvertTypeForMem(C
.VoidPtrTy
));
9626 llvm::Value
*CurBeginArg
= MapperCGF
.Builder
.CreateBitCast(
9627 Info
.Pointers
[I
], CGM
.getTypes().ConvertTypeForMem(C
.VoidPtrTy
));
9628 llvm::Value
*CurSizeArg
= Info
.Sizes
[I
];
9629 llvm::Value
*CurNameArg
=
9630 (CGM
.getCodeGenOpts().getDebugInfo() == codegenoptions::NoDebugInfo
)
9631 ? llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
)
9632 : emitMappingInformation(MapperCGF
, OMPBuilder
, Info
.Exprs
[I
]);
9634 // Extract the MEMBER_OF field from the map type.
9635 llvm::Value
*OriMapType
= MapperCGF
.Builder
.getInt64(Info
.Types
[I
]);
9636 llvm::Value
*MemberMapType
=
9637 MapperCGF
.Builder
.CreateNUWAdd(OriMapType
, ShiftedPreviousSize
);
9639 // Combine the map type inherited from user-defined mapper with that
9640 // specified in the program. According to the OMP_MAP_TO and OMP_MAP_FROM
9641 // bits of the \a MapType, which is the input argument of the mapper
9642 // function, the following code will set the OMP_MAP_TO and OMP_MAP_FROM
9643 // bits of MemberMapType.
9644 // [OpenMP 5.0], 1.2.6. map-type decay.
9645 // | alloc | to | from | tofrom | release | delete
9646 // ----------------------------------------------------------
9647 // alloc | alloc | alloc | alloc | alloc | release | delete
9648 // to | alloc | to | alloc | to | release | delete
9649 // from | alloc | alloc | from | from | release | delete
9650 // tofrom | alloc | to | from | tofrom | release | delete
9651 llvm::Value
*LeftToFrom
= MapperCGF
.Builder
.CreateAnd(
9653 MapperCGF
.Builder
.getInt64(MappableExprsHandler::OMP_MAP_TO
|
9654 MappableExprsHandler::OMP_MAP_FROM
));
9655 llvm::BasicBlock
*AllocBB
= MapperCGF
.createBasicBlock("omp.type.alloc");
9656 llvm::BasicBlock
*AllocElseBB
=
9657 MapperCGF
.createBasicBlock("omp.type.alloc.else");
9658 llvm::BasicBlock
*ToBB
= MapperCGF
.createBasicBlock("omp.type.to");
9659 llvm::BasicBlock
*ToElseBB
= MapperCGF
.createBasicBlock("omp.type.to.else");
9660 llvm::BasicBlock
*FromBB
= MapperCGF
.createBasicBlock("omp.type.from");
9661 llvm::BasicBlock
*EndBB
= MapperCGF
.createBasicBlock("omp.type.end");
9662 llvm::Value
*IsAlloc
= MapperCGF
.Builder
.CreateIsNull(LeftToFrom
);
9663 MapperCGF
.Builder
.CreateCondBr(IsAlloc
, AllocBB
, AllocElseBB
);
9664 // In case of alloc, clear OMP_MAP_TO and OMP_MAP_FROM.
9665 MapperCGF
.EmitBlock(AllocBB
);
9666 llvm::Value
*AllocMapType
= MapperCGF
.Builder
.CreateAnd(
9668 MapperCGF
.Builder
.getInt64(~(MappableExprsHandler::OMP_MAP_TO
|
9669 MappableExprsHandler::OMP_MAP_FROM
)));
9670 MapperCGF
.Builder
.CreateBr(EndBB
);
9671 MapperCGF
.EmitBlock(AllocElseBB
);
9672 llvm::Value
*IsTo
= MapperCGF
.Builder
.CreateICmpEQ(
9674 MapperCGF
.Builder
.getInt64(MappableExprsHandler::OMP_MAP_TO
));
9675 MapperCGF
.Builder
.CreateCondBr(IsTo
, ToBB
, ToElseBB
);
9676 // In case of to, clear OMP_MAP_FROM.
9677 MapperCGF
.EmitBlock(ToBB
);
9678 llvm::Value
*ToMapType
= MapperCGF
.Builder
.CreateAnd(
9680 MapperCGF
.Builder
.getInt64(~MappableExprsHandler::OMP_MAP_FROM
));
9681 MapperCGF
.Builder
.CreateBr(EndBB
);
9682 MapperCGF
.EmitBlock(ToElseBB
);
9683 llvm::Value
*IsFrom
= MapperCGF
.Builder
.CreateICmpEQ(
9685 MapperCGF
.Builder
.getInt64(MappableExprsHandler::OMP_MAP_FROM
));
9686 MapperCGF
.Builder
.CreateCondBr(IsFrom
, FromBB
, EndBB
);
9687 // In case of from, clear OMP_MAP_TO.
9688 MapperCGF
.EmitBlock(FromBB
);
9689 llvm::Value
*FromMapType
= MapperCGF
.Builder
.CreateAnd(
9691 MapperCGF
.Builder
.getInt64(~MappableExprsHandler::OMP_MAP_TO
));
9692 // In case of tofrom, do nothing.
9693 MapperCGF
.EmitBlock(EndBB
);
9695 llvm::PHINode
*CurMapType
=
9696 MapperCGF
.Builder
.CreatePHI(CGM
.Int64Ty
, 4, "omp.maptype");
9697 CurMapType
->addIncoming(AllocMapType
, AllocBB
);
9698 CurMapType
->addIncoming(ToMapType
, ToBB
);
9699 CurMapType
->addIncoming(FromMapType
, FromBB
);
9700 CurMapType
->addIncoming(MemberMapType
, ToElseBB
);
9702 llvm::Value
*OffloadingArgs
[] = {Handle
, CurBaseArg
, CurBeginArg
,
9703 CurSizeArg
, CurMapType
, CurNameArg
};
9704 if (Info
.Mappers
[I
]) {
9705 // Call the corresponding mapper function.
9706 llvm::Function
*MapperFunc
= getOrCreateUserDefinedMapperFunc(
9707 cast
<OMPDeclareMapperDecl
>(Info
.Mappers
[I
]));
9708 assert(MapperFunc
&& "Expect a valid mapper function is available.");
9709 MapperCGF
.EmitNounwindRuntimeCall(MapperFunc
, OffloadingArgs
);
9711 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9713 MapperCGF
.EmitRuntimeCall(
9714 OMPBuilder
.getOrCreateRuntimeFunction(
9715 CGM
.getModule(), OMPRTL___tgt_push_mapper_component
),
9720 // Update the pointer to point to the next element that needs to be mapped,
9721 // and check whether we have mapped all elements.
9722 llvm::Value
*PtrNext
= MapperCGF
.Builder
.CreateConstGEP1_32(
9723 ElemTy
, PtrPHI
, /*Idx0=*/1, "omp.arraymap.next");
9724 PtrPHI
->addIncoming(PtrNext
, LastBB
);
9725 llvm::Value
*IsDone
=
9726 MapperCGF
.Builder
.CreateICmpEQ(PtrNext
, PtrEnd
, "omp.arraymap.isdone");
9727 llvm::BasicBlock
*ExitBB
= MapperCGF
.createBasicBlock("omp.arraymap.exit");
9728 MapperCGF
.Builder
.CreateCondBr(IsDone
, ExitBB
, BodyBB
);
9730 MapperCGF
.EmitBlock(ExitBB
);
9731 // Emit array deletion if this is an array section and \p MapType indicates
9732 // that deletion is required.
9733 emitUDMapperArrayInitOrDel(MapperCGF
, Handle
, BaseIn
, BeginIn
, Size
, MapType
,
9734 MapName
, ElementSize
, DoneBB
, /*IsInit=*/false);
9736 // Emit the function exit block.
9737 MapperCGF
.EmitBlock(DoneBB
, /*IsFinished=*/true);
9738 MapperCGF
.FinishFunction();
9739 UDMMap
.try_emplace(D
, Fn
);
9741 auto &Decls
= FunctionUDMMap
.FindAndConstruct(CGF
->CurFn
);
9742 Decls
.second
.push_back(D
);
9746 /// Emit the array initialization or deletion portion for user-defined mapper
9747 /// code generation. First, it evaluates whether an array section is mapped and
9748 /// whether the \a MapType instructs to delete this section. If \a IsInit is
9749 /// true, and \a MapType indicates to not delete this array, array
9750 /// initialization code is generated. If \a IsInit is false, and \a MapType
9751 /// indicates to not this array, array deletion code is generated.
9752 void CGOpenMPRuntime::emitUDMapperArrayInitOrDel(
9753 CodeGenFunction
&MapperCGF
, llvm::Value
*Handle
, llvm::Value
*Base
,
9754 llvm::Value
*Begin
, llvm::Value
*Size
, llvm::Value
*MapType
,
9755 llvm::Value
*MapName
, CharUnits ElementSize
, llvm::BasicBlock
*ExitBB
,
9757 StringRef Prefix
= IsInit
? ".init" : ".del";
9759 // Evaluate if this is an array section.
9760 llvm::BasicBlock
*BodyBB
=
9761 MapperCGF
.createBasicBlock(getName({"omp.array", Prefix
}));
9762 llvm::Value
*IsArray
= MapperCGF
.Builder
.CreateICmpSGT(
9763 Size
, MapperCGF
.Builder
.getInt64(1), "omp.arrayinit.isarray");
9764 llvm::Value
*DeleteBit
= MapperCGF
.Builder
.CreateAnd(
9766 MapperCGF
.Builder
.getInt64(MappableExprsHandler::OMP_MAP_DELETE
));
9767 llvm::Value
*DeleteCond
;
9771 llvm::Value
*BaseIsBegin
= MapperCGF
.Builder
.CreateICmpNE(Base
, Begin
);
9773 llvm::Value
*PtrAndObjBit
= MapperCGF
.Builder
.CreateAnd(
9775 MapperCGF
.Builder
.getInt64(MappableExprsHandler::OMP_MAP_PTR_AND_OBJ
));
9776 PtrAndObjBit
= MapperCGF
.Builder
.CreateIsNotNull(PtrAndObjBit
);
9777 BaseIsBegin
= MapperCGF
.Builder
.CreateAnd(BaseIsBegin
, PtrAndObjBit
);
9778 Cond
= MapperCGF
.Builder
.CreateOr(IsArray
, BaseIsBegin
);
9779 DeleteCond
= MapperCGF
.Builder
.CreateIsNull(
9780 DeleteBit
, getName({"omp.array", Prefix
, ".delete"}));
9783 DeleteCond
= MapperCGF
.Builder
.CreateIsNotNull(
9784 DeleteBit
, getName({"omp.array", Prefix
, ".delete"}));
9786 Cond
= MapperCGF
.Builder
.CreateAnd(Cond
, DeleteCond
);
9787 MapperCGF
.Builder
.CreateCondBr(Cond
, BodyBB
, ExitBB
);
9789 MapperCGF
.EmitBlock(BodyBB
);
9790 // Get the array size by multiplying element size and element number (i.e., \p
9792 llvm::Value
*ArraySize
= MapperCGF
.Builder
.CreateNUWMul(
9793 Size
, MapperCGF
.Builder
.getInt64(ElementSize
.getQuantity()));
9794 // Remove OMP_MAP_TO and OMP_MAP_FROM from the map type, so that it achieves
9795 // memory allocation/deletion purpose only.
9796 llvm::Value
*MapTypeArg
= MapperCGF
.Builder
.CreateAnd(
9798 MapperCGF
.Builder
.getInt64(~(MappableExprsHandler::OMP_MAP_TO
|
9799 MappableExprsHandler::OMP_MAP_FROM
)));
9800 MapTypeArg
= MapperCGF
.Builder
.CreateOr(
9802 MapperCGF
.Builder
.getInt64(MappableExprsHandler::OMP_MAP_IMPLICIT
));
9804 // Call the runtime API __tgt_push_mapper_component to fill up the runtime
9806 llvm::Value
*OffloadingArgs
[] = {Handle
, Base
, Begin
,
9807 ArraySize
, MapTypeArg
, MapName
};
9808 MapperCGF
.EmitRuntimeCall(
9809 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
9810 OMPRTL___tgt_push_mapper_component
),
9814 llvm::Function
*CGOpenMPRuntime::getOrCreateUserDefinedMapperFunc(
9815 const OMPDeclareMapperDecl
*D
) {
9816 auto I
= UDMMap
.find(D
);
9817 if (I
!= UDMMap
.end())
9819 emitUserDefinedMapper(D
);
9820 return UDMMap
.lookup(D
);
9823 llvm::Value
*CGOpenMPRuntime::emitTargetNumIterationsCall(
9824 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
9825 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9826 const OMPLoopDirective
&D
)>
9828 OpenMPDirectiveKind Kind
= D
.getDirectiveKind();
9829 const OMPExecutableDirective
*TD
= &D
;
9830 // Get nested teams distribute kind directive, if any.
9831 if (!isOpenMPDistributeDirective(Kind
) || !isOpenMPTeamsDirective(Kind
))
9832 TD
= getNestedDistributeDirective(CGM
.getContext(), D
);
9834 return llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
9836 const auto *LD
= cast
<OMPLoopDirective
>(TD
);
9837 if (llvm::Value
*NumIterations
= SizeEmitter(CGF
, *LD
))
9838 return NumIterations
;
9839 return llvm::ConstantInt::get(CGF
.Int64Ty
, 0);
9842 void CGOpenMPRuntime::emitTargetCall(
9843 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
9844 llvm::Function
*OutlinedFn
, llvm::Value
*OutlinedFnID
, const Expr
*IfCond
,
9845 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
9846 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
9847 const OMPLoopDirective
&D
)>
9849 if (!CGF
.HaveInsertPoint())
9852 const bool OffloadingMandatory
= !CGM
.getLangOpts().OpenMPIsDevice
&&
9853 CGM
.getLangOpts().OpenMPOffloadMandatory
;
9855 assert((OffloadingMandatory
|| OutlinedFn
) && "Invalid outlined function!");
9857 const bool RequiresOuterTask
= D
.hasClausesOfKind
<OMPDependClause
>() ||
9858 D
.hasClausesOfKind
<OMPNowaitClause
>() ||
9859 D
.hasClausesOfKind
<OMPInReductionClause
>();
9860 llvm::SmallVector
<llvm::Value
*, 16> CapturedVars
;
9861 const CapturedStmt
&CS
= *D
.getCapturedStmt(OMPD_target
);
9862 auto &&ArgsCodegen
= [&CS
, &CapturedVars
](CodeGenFunction
&CGF
,
9863 PrePostActionTy
&) {
9864 CGF
.GenerateOpenMPCapturedVars(CS
, CapturedVars
);
9866 emitInlinedDirective(CGF
, OMPD_unknown
, ArgsCodegen
);
9868 CodeGenFunction::OMPTargetDataInfo InputInfo
;
9869 llvm::Value
*MapTypesArray
= nullptr;
9870 llvm::Value
*MapNamesArray
= nullptr;
9871 // Generate code for the host fallback function.
9872 auto &&FallbackGen
= [this, OutlinedFn
, &D
, &CapturedVars
, RequiresOuterTask
,
9873 &CS
, OffloadingMandatory
](CodeGenFunction
&CGF
) {
9874 if (OffloadingMandatory
) {
9875 CGF
.Builder
.CreateUnreachable();
9877 if (RequiresOuterTask
) {
9878 CapturedVars
.clear();
9879 CGF
.GenerateOpenMPCapturedVars(CS
, CapturedVars
);
9881 emitOutlinedFunctionCall(CGF
, D
.getBeginLoc(), OutlinedFn
, CapturedVars
);
9884 // Fill up the pointer arrays and transfer execution to the device.
9885 auto &&ThenGen
= [this, Device
, OutlinedFnID
, &D
, &InputInfo
, &MapTypesArray
,
9886 &MapNamesArray
, SizeEmitter
,
9887 FallbackGen
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
9888 if (Device
.getInt() == OMPC_DEVICE_ancestor
) {
9889 // Reverse offloading is not supported, so just execute on the host.
9894 // On top of the arrays that were filled up, the target offloading call
9895 // takes as arguments the device id as well as the host pointer. The host
9896 // pointer is used by the runtime library to identify the current target
9897 // region, so it only has to be unique and not necessarily point to
9898 // anything. It could be the pointer to the outlined function that
9899 // implements the target region, but we aren't using that so that the
9900 // compiler doesn't need to keep that, and could therefore inline the host
9901 // function if proven worthwhile during optimization.
9903 // From this point on, we need to have an ID of the target region defined.
9904 assert(OutlinedFnID
&& "Invalid outlined function ID!");
9907 // Emit device ID if any.
9908 llvm::Value
*DeviceID
;
9909 if (Device
.getPointer()) {
9910 assert((Device
.getInt() == OMPC_DEVICE_unknown
||
9911 Device
.getInt() == OMPC_DEVICE_device_num
) &&
9912 "Expected device_num modifier.");
9913 llvm::Value
*DevVal
= CGF
.EmitScalarExpr(Device
.getPointer());
9915 CGF
.Builder
.CreateIntCast(DevVal
, CGF
.Int64Ty
, /*isSigned=*/true);
9917 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
9920 // Emit the number of elements in the offloading arrays.
9921 llvm::Value
*PointerNum
=
9922 CGF
.Builder
.getInt32(InputInfo
.NumberOfTargetItems
);
9924 // Return value of the runtime offloading call.
9925 llvm::Value
*Return
;
9927 llvm::Value
*NumTeams
= emitNumTeamsForTargetDirective(CGF
, D
);
9928 llvm::Value
*NumThreads
= emitNumThreadsForTargetDirective(CGF
, D
);
9930 // Source location for the ident struct
9931 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
9933 // Get tripcount for the target loop-based directive.
9934 llvm::Value
*NumIterations
=
9935 emitTargetNumIterationsCall(CGF
, D
, SizeEmitter
);
9937 // Arguments for the target kernel.
9938 SmallVector
<llvm::Value
*> KernelArgs
{
9939 CGF
.Builder
.getInt32(/* Version */ 1),
9941 InputInfo
.BasePointersArray
.getPointer(),
9942 InputInfo
.PointersArray
.getPointer(),
9943 InputInfo
.SizesArray
.getPointer(),
9946 InputInfo
.MappersArray
.getPointer(),
9949 // Arguments passed to the 'nowait' variant.
9950 SmallVector
<llvm::Value
*> NoWaitKernelArgs
{
9951 CGF
.Builder
.getInt32(0),
9952 llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
),
9953 CGF
.Builder
.getInt32(0),
9954 llvm::ConstantPointerNull::get(CGM
.VoidPtrTy
),
9957 bool HasNoWait
= D
.hasClausesOfKind
<OMPNowaitClause
>();
9959 // The target region is an outlined function launched by the runtime
9960 // via calls to __tgt_target_kernel().
9962 // Note that on the host and CPU targets, the runtime implementation of
9963 // these calls simply call the outlined function without forking threads.
9964 // The outlined functions themselves have runtime calls to
9965 // __kmpc_fork_teams() and __kmpc_fork() for this purpose, codegen'd by
9966 // the compiler in emitTeamsCall() and emitParallelCall().
9968 // In contrast, on the NVPTX target, the implementation of
9969 // __tgt_target_teams() launches a GPU kernel with the requested number
9970 // of teams and threads so no additional calls to the runtime are required.
9971 // Check the error code and execute the host version if required.
9972 CGF
.Builder
.restoreIP(
9973 HasNoWait
? OMPBuilder
.emitTargetKernel(
9974 CGF
.Builder
, Return
, RTLoc
, DeviceID
, NumTeams
,
9975 NumThreads
, OutlinedFnID
, KernelArgs
, NoWaitKernelArgs
)
9976 : OMPBuilder
.emitTargetKernel(CGF
.Builder
, Return
, RTLoc
,
9977 DeviceID
, NumTeams
, NumThreads
,
9978 OutlinedFnID
, KernelArgs
));
9980 llvm::BasicBlock
*OffloadFailedBlock
=
9981 CGF
.createBasicBlock("omp_offload.failed");
9982 llvm::BasicBlock
*OffloadContBlock
=
9983 CGF
.createBasicBlock("omp_offload.cont");
9984 llvm::Value
*Failed
= CGF
.Builder
.CreateIsNotNull(Return
);
9985 CGF
.Builder
.CreateCondBr(Failed
, OffloadFailedBlock
, OffloadContBlock
);
9987 CGF
.EmitBlock(OffloadFailedBlock
);
9990 CGF
.EmitBranch(OffloadContBlock
);
9992 CGF
.EmitBlock(OffloadContBlock
, /*IsFinished=*/true);
9995 // Notify that the host version must be executed.
9996 auto &&ElseGen
= [FallbackGen
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
10000 auto &&TargetThenGen
= [this, &ThenGen
, &D
, &InputInfo
, &MapTypesArray
,
10001 &MapNamesArray
, &CapturedVars
, RequiresOuterTask
,
10002 &CS
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
10003 // Fill up the arrays with all the captured variables.
10004 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10006 // Get mappable expression information.
10007 MappableExprsHandler
MEHandler(D
, CGF
);
10008 llvm::DenseMap
<llvm::Value
*, llvm::Value
*> LambdaPointers
;
10009 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> MappedVarSet
;
10011 auto RI
= CS
.getCapturedRecordDecl()->field_begin();
10012 auto *CV
= CapturedVars
.begin();
10013 for (CapturedStmt::const_capture_iterator CI
= CS
.capture_begin(),
10014 CE
= CS
.capture_end();
10015 CI
!= CE
; ++CI
, ++RI
, ++CV
) {
10016 MappableExprsHandler::MapCombinedInfoTy CurInfo
;
10017 MappableExprsHandler::StructRangeInfoTy PartialStruct
;
10019 // VLA sizes are passed to the outlined region by copy and do not have map
10020 // information associated.
10021 if (CI
->capturesVariableArrayType()) {
10022 CurInfo
.Exprs
.push_back(nullptr);
10023 CurInfo
.BasePointers
.push_back(*CV
);
10024 CurInfo
.Pointers
.push_back(*CV
);
10025 CurInfo
.Sizes
.push_back(CGF
.Builder
.CreateIntCast(
10026 CGF
.getTypeSize(RI
->getType()), CGF
.Int64Ty
, /*isSigned=*/true));
10027 // Copy to the device as an argument. No need to retrieve it.
10028 CurInfo
.Types
.push_back(MappableExprsHandler::OMP_MAP_LITERAL
|
10029 MappableExprsHandler::OMP_MAP_TARGET_PARAM
|
10030 MappableExprsHandler::OMP_MAP_IMPLICIT
);
10031 CurInfo
.Mappers
.push_back(nullptr);
10033 // If we have any information in the map clause, we use it, otherwise we
10034 // just do a default mapping.
10035 MEHandler
.generateInfoForCapture(CI
, *CV
, CurInfo
, PartialStruct
);
10036 if (!CI
->capturesThis())
10037 MappedVarSet
.insert(CI
->getCapturedVar());
10039 MappedVarSet
.insert(nullptr);
10040 if (CurInfo
.BasePointers
.empty() && !PartialStruct
.Base
.isValid())
10041 MEHandler
.generateDefaultMapInfo(*CI
, **RI
, *CV
, CurInfo
);
10042 // Generate correct mapping for variables captured by reference in
10044 if (CI
->capturesVariable())
10045 MEHandler
.generateInfoForLambdaCaptures(CI
->getCapturedVar(), *CV
,
10046 CurInfo
, LambdaPointers
);
10048 // We expect to have at least an element of information for this capture.
10049 assert((!CurInfo
.BasePointers
.empty() || PartialStruct
.Base
.isValid()) &&
10050 "Non-existing map pointer for capture!");
10051 assert(CurInfo
.BasePointers
.size() == CurInfo
.Pointers
.size() &&
10052 CurInfo
.BasePointers
.size() == CurInfo
.Sizes
.size() &&
10053 CurInfo
.BasePointers
.size() == CurInfo
.Types
.size() &&
10054 CurInfo
.BasePointers
.size() == CurInfo
.Mappers
.size() &&
10055 "Inconsistent map information sizes!");
10057 // If there is an entry in PartialStruct it means we have a struct with
10058 // individual members mapped. Emit an extra combined entry.
10059 if (PartialStruct
.Base
.isValid()) {
10060 CombinedInfo
.append(PartialStruct
.PreliminaryMapData
);
10061 MEHandler
.emitCombinedEntry(
10062 CombinedInfo
, CurInfo
.Types
, PartialStruct
, nullptr,
10063 !PartialStruct
.PreliminaryMapData
.BasePointers
.empty());
10066 // We need to append the results of this capture to what we already have.
10067 CombinedInfo
.append(CurInfo
);
10069 // Adjust MEMBER_OF flags for the lambdas captures.
10070 MEHandler
.adjustMemberOfForLambdaCaptures(
10071 LambdaPointers
, CombinedInfo
.BasePointers
, CombinedInfo
.Pointers
,
10072 CombinedInfo
.Types
);
10073 // Map any list items in a map clause that were not captures because they
10074 // weren't referenced within the construct.
10075 MEHandler
.generateAllInfo(CombinedInfo
, MappedVarSet
);
10077 CGOpenMPRuntime::TargetDataInfo Info
;
10078 // Fill up the arrays and create the arguments.
10079 emitOffloadingArrays(CGF
, CombinedInfo
, Info
, OMPBuilder
);
10081 CGF
.CGM
.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo
;
10082 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, Info
.RTArgs
, Info
,
10084 /*ForEndCall=*/false);
10086 InputInfo
.NumberOfTargetItems
= Info
.NumberOfPtrs
;
10087 InputInfo
.BasePointersArray
= Address(Info
.RTArgs
.BasePointersArray
,
10088 CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10089 InputInfo
.PointersArray
= Address(Info
.RTArgs
.PointersArray
, CGF
.VoidPtrTy
,
10090 CGM
.getPointerAlign());
10091 InputInfo
.SizesArray
=
10092 Address(Info
.RTArgs
.SizesArray
, CGF
.Int64Ty
, CGM
.getPointerAlign());
10093 InputInfo
.MappersArray
=
10094 Address(Info
.RTArgs
.MappersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10095 MapTypesArray
= Info
.RTArgs
.MapTypesArray
;
10096 MapNamesArray
= Info
.RTArgs
.MapNamesArray
;
10097 if (RequiresOuterTask
)
10098 CGF
.EmitOMPTargetTaskBasedDirective(D
, ThenGen
, InputInfo
);
10100 emitInlinedDirective(CGF
, D
.getDirectiveKind(), ThenGen
);
10103 auto &&TargetElseGen
= [this, &ElseGen
, &D
, RequiresOuterTask
](
10104 CodeGenFunction
&CGF
, PrePostActionTy
&) {
10105 if (RequiresOuterTask
) {
10106 CodeGenFunction::OMPTargetDataInfo InputInfo
;
10107 CGF
.EmitOMPTargetTaskBasedDirective(D
, ElseGen
, InputInfo
);
10109 emitInlinedDirective(CGF
, D
.getDirectiveKind(), ElseGen
);
10113 // If we have a target function ID it means that we need to support
10114 // offloading, otherwise, just execute on the host. We need to execute on host
10115 // regardless of the conditional in the if clause if, e.g., the user do not
10116 // specify target triples.
10117 if (OutlinedFnID
) {
10119 emitIfClause(CGF
, IfCond
, TargetThenGen
, TargetElseGen
);
10121 RegionCodeGenTy
ThenRCG(TargetThenGen
);
10125 RegionCodeGenTy
ElseRCG(TargetElseGen
);
10130 void CGOpenMPRuntime::scanForTargetRegionsFunctions(const Stmt
*S
,
10131 StringRef ParentName
) {
10135 // Codegen OMP target directives that offload compute to the device.
10136 bool RequiresDeviceCodegen
=
10137 isa
<OMPExecutableDirective
>(S
) &&
10138 isOpenMPTargetExecutionDirective(
10139 cast
<OMPExecutableDirective
>(S
)->getDirectiveKind());
10141 if (RequiresDeviceCodegen
) {
10142 const auto &E
= *cast
<OMPExecutableDirective
>(S
);
10144 getTargetEntryUniqueInfo(CGM
.getContext(), E
.getBeginLoc(), ParentName
);
10146 // Is this a target region that should not be emitted as an entry point? If
10147 // so just signal we are done with this target region.
10148 if (!OffloadEntriesInfoManager
.hasTargetRegionEntryInfo(EntryInfo
))
10151 switch (E
.getDirectiveKind()) {
10153 CodeGenFunction::EmitOMPTargetDeviceFunction(CGM
, ParentName
,
10154 cast
<OMPTargetDirective
>(E
));
10156 case OMPD_target_parallel
:
10157 CodeGenFunction::EmitOMPTargetParallelDeviceFunction(
10158 CGM
, ParentName
, cast
<OMPTargetParallelDirective
>(E
));
10160 case OMPD_target_teams
:
10161 CodeGenFunction::EmitOMPTargetTeamsDeviceFunction(
10162 CGM
, ParentName
, cast
<OMPTargetTeamsDirective
>(E
));
10164 case OMPD_target_teams_distribute
:
10165 CodeGenFunction::EmitOMPTargetTeamsDistributeDeviceFunction(
10166 CGM
, ParentName
, cast
<OMPTargetTeamsDistributeDirective
>(E
));
10168 case OMPD_target_teams_distribute_simd
:
10169 CodeGenFunction::EmitOMPTargetTeamsDistributeSimdDeviceFunction(
10170 CGM
, ParentName
, cast
<OMPTargetTeamsDistributeSimdDirective
>(E
));
10172 case OMPD_target_parallel_for
:
10173 CodeGenFunction::EmitOMPTargetParallelForDeviceFunction(
10174 CGM
, ParentName
, cast
<OMPTargetParallelForDirective
>(E
));
10176 case OMPD_target_parallel_for_simd
:
10177 CodeGenFunction::EmitOMPTargetParallelForSimdDeviceFunction(
10178 CGM
, ParentName
, cast
<OMPTargetParallelForSimdDirective
>(E
));
10180 case OMPD_target_simd
:
10181 CodeGenFunction::EmitOMPTargetSimdDeviceFunction(
10182 CGM
, ParentName
, cast
<OMPTargetSimdDirective
>(E
));
10184 case OMPD_target_teams_distribute_parallel_for
:
10185 CodeGenFunction::EmitOMPTargetTeamsDistributeParallelForDeviceFunction(
10187 cast
<OMPTargetTeamsDistributeParallelForDirective
>(E
));
10189 case OMPD_target_teams_distribute_parallel_for_simd
:
10191 EmitOMPTargetTeamsDistributeParallelForSimdDeviceFunction(
10193 cast
<OMPTargetTeamsDistributeParallelForSimdDirective
>(E
));
10195 case OMPD_parallel
:
10197 case OMPD_parallel_for
:
10198 case OMPD_parallel_master
:
10199 case OMPD_parallel_sections
:
10200 case OMPD_for_simd
:
10201 case OMPD_parallel_for_simd
:
10203 case OMPD_cancellation_point
:
10205 case OMPD_threadprivate
:
10206 case OMPD_allocate
:
10211 case OMPD_sections
:
10215 case OMPD_critical
:
10216 case OMPD_taskyield
:
10218 case OMPD_taskwait
:
10219 case OMPD_taskgroup
:
10225 case OMPD_target_data
:
10226 case OMPD_target_exit_data
:
10227 case OMPD_target_enter_data
:
10228 case OMPD_distribute
:
10229 case OMPD_distribute_simd
:
10230 case OMPD_distribute_parallel_for
:
10231 case OMPD_distribute_parallel_for_simd
:
10232 case OMPD_teams_distribute
:
10233 case OMPD_teams_distribute_simd
:
10234 case OMPD_teams_distribute_parallel_for
:
10235 case OMPD_teams_distribute_parallel_for_simd
:
10236 case OMPD_target_update
:
10237 case OMPD_declare_simd
:
10238 case OMPD_declare_variant
:
10239 case OMPD_begin_declare_variant
:
10240 case OMPD_end_declare_variant
:
10241 case OMPD_declare_target
:
10242 case OMPD_end_declare_target
:
10243 case OMPD_declare_reduction
:
10244 case OMPD_declare_mapper
:
10245 case OMPD_taskloop
:
10246 case OMPD_taskloop_simd
:
10247 case OMPD_master_taskloop
:
10248 case OMPD_master_taskloop_simd
:
10249 case OMPD_parallel_master_taskloop
:
10250 case OMPD_parallel_master_taskloop_simd
:
10251 case OMPD_requires
:
10252 case OMPD_metadirective
:
10255 llvm_unreachable("Unknown target directive for OpenMP device codegen.");
10260 if (const auto *E
= dyn_cast
<OMPExecutableDirective
>(S
)) {
10261 if (!E
->hasAssociatedStmt() || !E
->getAssociatedStmt())
10264 scanForTargetRegionsFunctions(E
->getRawStmt(), ParentName
);
10268 // If this is a lambda function, look into its body.
10269 if (const auto *L
= dyn_cast
<LambdaExpr
>(S
))
10272 // Keep looking for target regions recursively.
10273 for (const Stmt
*II
: S
->children())
10274 scanForTargetRegionsFunctions(II
, ParentName
);
10277 static bool isAssumedToBeNotEmitted(const ValueDecl
*VD
, bool IsDevice
) {
10278 Optional
<OMPDeclareTargetDeclAttr::DevTypeTy
> DevTy
=
10279 OMPDeclareTargetDeclAttr::getDeviceType(VD
);
10282 // Do not emit device_type(nohost) functions for the host.
10283 if (!IsDevice
&& DevTy
== OMPDeclareTargetDeclAttr::DT_NoHost
)
10285 // Do not emit device_type(host) functions for the device.
10286 if (IsDevice
&& DevTy
== OMPDeclareTargetDeclAttr::DT_Host
)
10291 bool CGOpenMPRuntime::emitTargetFunctions(GlobalDecl GD
) {
10292 // If emitting code for the host, we do not process FD here. Instead we do
10293 // the normal code generation.
10294 if (!CGM
.getLangOpts().OpenMPIsDevice
) {
10295 if (const auto *FD
= dyn_cast
<FunctionDecl
>(GD
.getDecl()))
10296 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(FD
),
10297 CGM
.getLangOpts().OpenMPIsDevice
))
10302 const ValueDecl
*VD
= cast
<ValueDecl
>(GD
.getDecl());
10303 // Try to detect target regions in the function.
10304 if (const auto *FD
= dyn_cast
<FunctionDecl
>(VD
)) {
10305 StringRef Name
= CGM
.getMangledName(GD
);
10306 scanForTargetRegionsFunctions(FD
->getBody(), Name
);
10307 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(FD
),
10308 CGM
.getLangOpts().OpenMPIsDevice
))
10312 // Do not to emit function if it is not marked as declare target.
10313 return !OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
) &&
10314 AlreadyEmittedTargetDecls
.count(VD
) == 0;
10317 bool CGOpenMPRuntime::emitTargetGlobalVariable(GlobalDecl GD
) {
10318 if (isAssumedToBeNotEmitted(cast
<ValueDecl
>(GD
.getDecl()),
10319 CGM
.getLangOpts().OpenMPIsDevice
))
10322 if (!CGM
.getLangOpts().OpenMPIsDevice
)
10325 // Check if there are Ctors/Dtors in this declaration and look for target
10326 // regions in it. We use the complete variant to produce the kernel name
10328 QualType RDTy
= cast
<VarDecl
>(GD
.getDecl())->getType();
10329 if (const auto *RD
= RDTy
->getBaseElementTypeUnsafe()->getAsCXXRecordDecl()) {
10330 for (const CXXConstructorDecl
*Ctor
: RD
->ctors()) {
10331 StringRef ParentName
=
10332 CGM
.getMangledName(GlobalDecl(Ctor
, Ctor_Complete
));
10333 scanForTargetRegionsFunctions(Ctor
->getBody(), ParentName
);
10335 if (const CXXDestructorDecl
*Dtor
= RD
->getDestructor()) {
10336 StringRef ParentName
=
10337 CGM
.getMangledName(GlobalDecl(Dtor
, Dtor_Complete
));
10338 scanForTargetRegionsFunctions(Dtor
->getBody(), ParentName
);
10342 // Do not to emit variable if it is not marked as declare target.
10343 llvm::Optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10344 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(
10345 cast
<VarDecl
>(GD
.getDecl()));
10346 if (!Res
|| *Res
== OMPDeclareTargetDeclAttr::MT_Link
||
10347 (*Res
== OMPDeclareTargetDeclAttr::MT_To
&&
10348 HasRequiresUnifiedSharedMemory
)) {
10349 DeferredGlobalVariables
.insert(cast
<VarDecl
>(GD
.getDecl()));
10355 void CGOpenMPRuntime::registerTargetGlobalVariable(const VarDecl
*VD
,
10356 llvm::Constant
*Addr
) {
10357 if (CGM
.getLangOpts().OMPTargetTriples
.empty() &&
10358 !CGM
.getLangOpts().OpenMPIsDevice
)
10361 // If we have host/nohost variables, they do not need to be registered.
10362 Optional
<OMPDeclareTargetDeclAttr::DevTypeTy
> DevTy
=
10363 OMPDeclareTargetDeclAttr::getDeviceType(VD
);
10364 if (DevTy
&& *DevTy
!= OMPDeclareTargetDeclAttr::DT_Any
)
10367 llvm::Optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10368 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
10370 if (CGM
.getLangOpts().OpenMPIsDevice
) {
10371 // Register non-target variables being emitted in device code (debug info
10372 // may cause this).
10373 StringRef VarName
= CGM
.getMangledName(VD
);
10374 EmittedNonTargetVariables
.try_emplace(VarName
, Addr
);
10378 // Register declare target variables.
10379 llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryKind Flags
;
10382 llvm::GlobalValue::LinkageTypes Linkage
;
10384 if (*Res
== OMPDeclareTargetDeclAttr::MT_To
&&
10385 !HasRequiresUnifiedSharedMemory
) {
10386 Flags
= llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo
;
10387 VarName
= CGM
.getMangledName(VD
);
10388 if (VD
->hasDefinition(CGM
.getContext()) != VarDecl::DeclarationOnly
) {
10390 CGM
.getContext().getTypeSizeInChars(VD
->getType()).getQuantity();
10391 assert(VarSize
!= 0 && "Expected non-zero size of the variable");
10395 Linkage
= CGM
.getLLVMLinkageVarDefinition(VD
, /*IsConstant=*/false);
10396 // Temp solution to prevent optimizations of the internal variables.
10397 if (CGM
.getLangOpts().OpenMPIsDevice
&& !VD
->isExternallyVisible()) {
10398 // Do not create a "ref-variable" if the original is not also available
10400 if (!OffloadEntriesInfoManager
.hasDeviceGlobalVarEntryInfo(VarName
))
10402 std::string RefName
= getName({VarName
, "ref"});
10403 if (!CGM
.GetGlobalValue(RefName
)) {
10404 llvm::Constant
*AddrRef
=
10405 getOrCreateInternalVariable(Addr
->getType(), RefName
);
10406 auto *GVAddrRef
= cast
<llvm::GlobalVariable
>(AddrRef
);
10407 GVAddrRef
->setConstant(/*Val=*/true);
10408 GVAddrRef
->setLinkage(llvm::GlobalValue::InternalLinkage
);
10409 GVAddrRef
->setInitializer(Addr
);
10410 CGM
.addCompilerUsedGlobal(GVAddrRef
);
10414 assert(((*Res
== OMPDeclareTargetDeclAttr::MT_Link
) ||
10415 (*Res
== OMPDeclareTargetDeclAttr::MT_To
&&
10416 HasRequiresUnifiedSharedMemory
)) &&
10417 "Declare target attribute must link or to with unified memory.");
10418 if (*Res
== OMPDeclareTargetDeclAttr::MT_Link
)
10419 Flags
= llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryLink
;
10421 Flags
= llvm::OffloadEntriesInfoManager::OMPTargetGlobalVarEntryTo
;
10423 if (CGM
.getLangOpts().OpenMPIsDevice
) {
10424 VarName
= Addr
->getName();
10427 VarName
= getAddrOfDeclareTargetVar(VD
).getName();
10428 Addr
= cast
<llvm::Constant
>(getAddrOfDeclareTargetVar(VD
).getPointer());
10430 VarSize
= CGM
.getPointerSize().getQuantity();
10431 Linkage
= llvm::GlobalValue::WeakAnyLinkage
;
10434 OffloadEntriesInfoManager
.registerDeviceGlobalVarEntryInfo(
10435 VarName
, Addr
, VarSize
, Flags
, Linkage
, CGM
.getLangOpts().OpenMPIsDevice
);
10438 bool CGOpenMPRuntime::emitTargetGlobal(GlobalDecl GD
) {
10439 if (isa
<FunctionDecl
>(GD
.getDecl()) ||
10440 isa
<OMPDeclareReductionDecl
>(GD
.getDecl()))
10441 return emitTargetFunctions(GD
);
10443 return emitTargetGlobalVariable(GD
);
10446 void CGOpenMPRuntime::emitDeferredTargetDecls() const {
10447 for (const VarDecl
*VD
: DeferredGlobalVariables
) {
10448 llvm::Optional
<OMPDeclareTargetDeclAttr::MapTypeTy
> Res
=
10449 OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(VD
);
10452 if (*Res
== OMPDeclareTargetDeclAttr::MT_To
&&
10453 !HasRequiresUnifiedSharedMemory
) {
10454 CGM
.EmitGlobal(VD
);
10456 assert((*Res
== OMPDeclareTargetDeclAttr::MT_Link
||
10457 (*Res
== OMPDeclareTargetDeclAttr::MT_To
&&
10458 HasRequiresUnifiedSharedMemory
)) &&
10459 "Expected link clause or to clause with unified memory.");
10460 (void)CGM
.getOpenMPRuntime().getAddrOfDeclareTargetVar(VD
);
10465 void CGOpenMPRuntime::adjustTargetSpecificDataForLambdas(
10466 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
) const {
10467 assert(isOpenMPTargetExecutionDirective(D
.getDirectiveKind()) &&
10468 " Expected target-based directive.");
10471 void CGOpenMPRuntime::processRequiresDirective(const OMPRequiresDecl
*D
) {
10472 for (const OMPClause
*Clause
: D
->clauselists()) {
10473 if (Clause
->getClauseKind() == OMPC_unified_shared_memory
) {
10474 HasRequiresUnifiedSharedMemory
= true;
10475 } else if (const auto *AC
=
10476 dyn_cast
<OMPAtomicDefaultMemOrderClause
>(Clause
)) {
10477 switch (AC
->getAtomicDefaultMemOrderKind()) {
10478 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_acq_rel
:
10479 RequiresAtomicOrdering
= llvm::AtomicOrdering::AcquireRelease
;
10481 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_seq_cst
:
10482 RequiresAtomicOrdering
= llvm::AtomicOrdering::SequentiallyConsistent
;
10484 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_relaxed
:
10485 RequiresAtomicOrdering
= llvm::AtomicOrdering::Monotonic
;
10487 case OMPC_ATOMIC_DEFAULT_MEM_ORDER_unknown
:
10494 llvm::AtomicOrdering
CGOpenMPRuntime::getDefaultMemoryOrdering() const {
10495 return RequiresAtomicOrdering
;
10498 bool CGOpenMPRuntime::hasAllocateAttributeForGlobalVar(const VarDecl
*VD
,
10500 if (!VD
|| !VD
->hasAttr
<OMPAllocateDeclAttr
>())
10502 const auto *A
= VD
->getAttr
<OMPAllocateDeclAttr
>();
10503 switch(A
->getAllocatorType()) {
10504 case OMPAllocateDeclAttr::OMPNullMemAlloc
:
10505 case OMPAllocateDeclAttr::OMPDefaultMemAlloc
:
10506 // Not supported, fallback to the default mem space.
10507 case OMPAllocateDeclAttr::OMPLargeCapMemAlloc
:
10508 case OMPAllocateDeclAttr::OMPCGroupMemAlloc
:
10509 case OMPAllocateDeclAttr::OMPHighBWMemAlloc
:
10510 case OMPAllocateDeclAttr::OMPLowLatMemAlloc
:
10511 case OMPAllocateDeclAttr::OMPThreadMemAlloc
:
10512 case OMPAllocateDeclAttr::OMPConstMemAlloc
:
10513 case OMPAllocateDeclAttr::OMPPTeamMemAlloc
:
10514 AS
= LangAS::Default
;
10516 case OMPAllocateDeclAttr::OMPUserDefinedMemAlloc
:
10517 llvm_unreachable("Expected predefined allocator for the variables with the "
10518 "static storage.");
10523 bool CGOpenMPRuntime::hasRequiresUnifiedSharedMemory() const {
10524 return HasRequiresUnifiedSharedMemory
;
10527 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::DisableAutoDeclareTargetRAII(
10528 CodeGenModule
&CGM
)
10530 if (CGM
.getLangOpts().OpenMPIsDevice
) {
10531 SavedShouldMarkAsGlobal
= CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
;
10532 CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
= false;
10536 CGOpenMPRuntime::DisableAutoDeclareTargetRAII::~DisableAutoDeclareTargetRAII() {
10537 if (CGM
.getLangOpts().OpenMPIsDevice
)
10538 CGM
.getOpenMPRuntime().ShouldMarkAsGlobal
= SavedShouldMarkAsGlobal
;
10541 bool CGOpenMPRuntime::markAsGlobalTarget(GlobalDecl GD
) {
10542 if (!CGM
.getLangOpts().OpenMPIsDevice
|| !ShouldMarkAsGlobal
)
10545 const auto *D
= cast
<FunctionDecl
>(GD
.getDecl());
10546 // Do not to emit function if it is marked as declare target as it was already
10548 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(D
)) {
10549 if (D
->hasBody() && AlreadyEmittedTargetDecls
.count(D
) == 0) {
10550 if (auto *F
= dyn_cast_or_null
<llvm::Function
>(
10551 CGM
.GetGlobalValue(CGM
.getMangledName(GD
))))
10552 return !F
->isDeclaration();
10558 return !AlreadyEmittedTargetDecls
.insert(D
).second
;
10561 llvm::Function
*CGOpenMPRuntime::emitRequiresDirectiveRegFun() {
10562 // If we don't have entries or if we are emitting code for the device, we
10563 // don't need to do anything.
10564 if (CGM
.getLangOpts().OMPTargetTriples
.empty() ||
10565 CGM
.getLangOpts().OpenMPSimd
|| CGM
.getLangOpts().OpenMPIsDevice
||
10566 (OffloadEntriesInfoManager
.empty() &&
10567 !HasEmittedDeclareTargetRegion
&&
10568 !HasEmittedTargetRegion
))
10571 // Create and register the function that handles the requires directives.
10572 ASTContext
&C
= CGM
.getContext();
10574 llvm::Function
*RequiresRegFn
;
10576 CodeGenFunction
CGF(CGM
);
10577 const auto &FI
= CGM
.getTypes().arrangeNullaryFunction();
10578 llvm::FunctionType
*FTy
= CGM
.getTypes().GetFunctionType(FI
);
10579 std::string ReqName
= getName({"omp_offloading", "requires_reg"});
10580 RequiresRegFn
= CGM
.CreateGlobalInitOrCleanUpFunction(FTy
, ReqName
, FI
);
10581 CGF
.StartFunction(GlobalDecl(), C
.VoidTy
, RequiresRegFn
, FI
, {});
10582 OpenMPOffloadingRequiresDirFlags Flags
= OMP_REQ_NONE
;
10583 // TODO: check for other requires clauses.
10584 // The requires directive takes effect only when a target region is
10585 // present in the compilation unit. Otherwise it is ignored and not
10586 // passed to the runtime. This avoids the runtime from throwing an error
10587 // for mismatching requires clauses across compilation units that don't
10588 // contain at least 1 target region.
10589 assert((HasEmittedTargetRegion
||
10590 HasEmittedDeclareTargetRegion
||
10591 !OffloadEntriesInfoManager
.empty()) &&
10592 "Target or declare target region expected.");
10593 if (HasRequiresUnifiedSharedMemory
)
10594 Flags
= OMP_REQ_UNIFIED_SHARED_MEMORY
;
10595 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10596 CGM
.getModule(), OMPRTL___tgt_register_requires
),
10597 llvm::ConstantInt::get(CGM
.Int64Ty
, Flags
));
10598 CGF
.FinishFunction();
10600 return RequiresRegFn
;
10603 void CGOpenMPRuntime::emitTeamsCall(CodeGenFunction
&CGF
,
10604 const OMPExecutableDirective
&D
,
10605 SourceLocation Loc
,
10606 llvm::Function
*OutlinedFn
,
10607 ArrayRef
<llvm::Value
*> CapturedVars
) {
10608 if (!CGF
.HaveInsertPoint())
10611 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10612 CodeGenFunction::RunCleanupsScope
Scope(CGF
);
10614 // Build call __kmpc_fork_teams(loc, n, microtask, var1, .., varn);
10615 llvm::Value
*Args
[] = {
10617 CGF
.Builder
.getInt32(CapturedVars
.size()), // Number of captured vars
10618 CGF
.Builder
.CreateBitCast(OutlinedFn
, getKmpc_MicroPointerTy())};
10619 llvm::SmallVector
<llvm::Value
*, 16> RealArgs
;
10620 RealArgs
.append(std::begin(Args
), std::end(Args
));
10621 RealArgs
.append(CapturedVars
.begin(), CapturedVars
.end());
10623 llvm::FunctionCallee RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
10624 CGM
.getModule(), OMPRTL___kmpc_fork_teams
);
10625 CGF
.EmitRuntimeCall(RTLFn
, RealArgs
);
10628 void CGOpenMPRuntime::emitNumTeamsClause(CodeGenFunction
&CGF
,
10629 const Expr
*NumTeams
,
10630 const Expr
*ThreadLimit
,
10631 SourceLocation Loc
) {
10632 if (!CGF
.HaveInsertPoint())
10635 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, Loc
);
10637 llvm::Value
*NumTeamsVal
=
10639 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(NumTeams
),
10640 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10641 : CGF
.Builder
.getInt32(0);
10643 llvm::Value
*ThreadLimitVal
=
10645 ? CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(ThreadLimit
),
10646 CGF
.CGM
.Int32Ty
, /* isSigned = */ true)
10647 : CGF
.Builder
.getInt32(0);
10649 // Build call __kmpc_push_num_teamss(&loc, global_tid, num_teams, thread_limit)
10650 llvm::Value
*PushNumTeamsArgs
[] = {RTLoc
, getThreadID(CGF
, Loc
), NumTeamsVal
,
10652 CGF
.EmitRuntimeCall(OMPBuilder
.getOrCreateRuntimeFunction(
10653 CGM
.getModule(), OMPRTL___kmpc_push_num_teams
),
10657 void CGOpenMPRuntime::emitTargetDataCalls(
10658 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
10659 const Expr
*Device
, const RegionCodeGenTy
&CodeGen
,
10660 CGOpenMPRuntime::TargetDataInfo
&Info
) {
10661 if (!CGF
.HaveInsertPoint())
10664 // Action used to replace the default codegen action and turn privatization
10666 PrePostActionTy NoPrivAction
;
10668 // Generate the code for the opening of the data environment. Capture all the
10669 // arguments of the runtime call by reference because they are used in the
10670 // closing of the region.
10671 auto &&BeginThenGen
= [this, &D
, Device
, &Info
,
10672 &CodeGen
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
10673 // Fill up the arrays with all the mapped variables.
10674 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10676 // Get map clause information.
10677 MappableExprsHandler
MEHandler(D
, CGF
);
10678 MEHandler
.generateAllInfo(CombinedInfo
);
10680 // Fill up the arrays and create the arguments.
10681 emitOffloadingArrays(CGF
, CombinedInfo
, Info
, OMPBuilder
,
10682 /*IsNonContiguous=*/true);
10684 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs
;
10686 CGF
.CGM
.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo
;
10687 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, RTArgs
, Info
,
10690 // Emit device ID if any.
10691 llvm::Value
*DeviceID
= nullptr;
10693 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10694 CGF
.Int64Ty
, /*isSigned=*/true);
10696 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10699 // Emit the number of elements in the offloading arrays.
10700 llvm::Value
*PointerNum
= CGF
.Builder
.getInt32(Info
.NumberOfPtrs
);
10702 // Source location for the ident struct
10703 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10705 llvm::Value
*OffloadingArgs
[] = {RTLoc
,
10708 RTArgs
.BasePointersArray
,
10709 RTArgs
.PointersArray
,
10711 RTArgs
.MapTypesArray
,
10712 RTArgs
.MapNamesArray
,
10713 RTArgs
.MappersArray
};
10714 CGF
.EmitRuntimeCall(
10715 OMPBuilder
.getOrCreateRuntimeFunction(
10716 CGM
.getModule(), OMPRTL___tgt_target_data_begin_mapper
),
10719 // If device pointer privatization is required, emit the body of the region
10720 // here. It will have to be duplicated: with and without privatization.
10721 if (!Info
.CaptureDeviceAddrMap
.empty())
10725 // Generate code for the closing of the data region.
10726 auto &&EndThenGen
= [this, Device
, &Info
, &D
](CodeGenFunction
&CGF
,
10727 PrePostActionTy
&) {
10728 assert(Info
.isValid() && "Invalid data environment closing arguments.");
10730 llvm::OpenMPIRBuilder::TargetDataRTArgs RTArgs
;
10732 CGF
.CGM
.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo
;
10733 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, RTArgs
, Info
,
10735 /*ForEndCall=*/true);
10737 // Emit device ID if any.
10738 llvm::Value
*DeviceID
= nullptr;
10740 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10741 CGF
.Int64Ty
, /*isSigned=*/true);
10743 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10746 // Emit the number of elements in the offloading arrays.
10747 llvm::Value
*PointerNum
= CGF
.Builder
.getInt32(Info
.NumberOfPtrs
);
10749 // Source location for the ident struct
10750 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10752 llvm::Value
*OffloadingArgs
[] = {RTLoc
,
10755 RTArgs
.BasePointersArray
,
10756 RTArgs
.PointersArray
,
10758 RTArgs
.MapTypesArray
,
10759 RTArgs
.MapNamesArray
,
10760 RTArgs
.MappersArray
};
10761 CGF
.EmitRuntimeCall(
10762 OMPBuilder
.getOrCreateRuntimeFunction(
10763 CGM
.getModule(), OMPRTL___tgt_target_data_end_mapper
),
10767 // If we need device pointer privatization, we need to emit the body of the
10768 // region with no privatization in the 'else' branch of the conditional.
10769 // Otherwise, we don't have to do anything.
10770 auto &&BeginElseGen
= [&Info
, &CodeGen
, &NoPrivAction
](CodeGenFunction
&CGF
,
10771 PrePostActionTy
&) {
10772 if (!Info
.CaptureDeviceAddrMap
.empty()) {
10773 CodeGen
.setAction(NoPrivAction
);
10778 // We don't have to do anything to close the region if the if clause evaluates
10780 auto &&EndElseGen
= [](CodeGenFunction
&CGF
, PrePostActionTy
&) {};
10783 emitIfClause(CGF
, IfCond
, BeginThenGen
, BeginElseGen
);
10785 RegionCodeGenTy
RCG(BeginThenGen
);
10789 // If we don't require privatization of device pointers, we emit the body in
10790 // between the runtime calls. This avoids duplicating the body code.
10791 if (Info
.CaptureDeviceAddrMap
.empty()) {
10792 CodeGen
.setAction(NoPrivAction
);
10797 emitIfClause(CGF
, IfCond
, EndThenGen
, EndElseGen
);
10799 RegionCodeGenTy
RCG(EndThenGen
);
10804 void CGOpenMPRuntime::emitTargetDataStandAloneCall(
10805 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
10806 const Expr
*Device
) {
10807 if (!CGF
.HaveInsertPoint())
10810 assert((isa
<OMPTargetEnterDataDirective
>(D
) ||
10811 isa
<OMPTargetExitDataDirective
>(D
) ||
10812 isa
<OMPTargetUpdateDirective
>(D
)) &&
10813 "Expecting either target enter, exit data, or update directives.");
10815 CodeGenFunction::OMPTargetDataInfo InputInfo
;
10816 llvm::Value
*MapTypesArray
= nullptr;
10817 llvm::Value
*MapNamesArray
= nullptr;
10818 // Generate the code for the opening of the data environment.
10819 auto &&ThenGen
= [this, &D
, Device
, &InputInfo
, &MapTypesArray
,
10820 &MapNamesArray
](CodeGenFunction
&CGF
, PrePostActionTy
&) {
10821 // Emit device ID if any.
10822 llvm::Value
*DeviceID
= nullptr;
10824 DeviceID
= CGF
.Builder
.CreateIntCast(CGF
.EmitScalarExpr(Device
),
10825 CGF
.Int64Ty
, /*isSigned=*/true);
10827 DeviceID
= CGF
.Builder
.getInt64(OMP_DEVICEID_UNDEF
);
10830 // Emit the number of elements in the offloading arrays.
10831 llvm::Constant
*PointerNum
=
10832 CGF
.Builder
.getInt32(InputInfo
.NumberOfTargetItems
);
10834 // Source location for the ident struct
10835 llvm::Value
*RTLoc
= emitUpdateLocation(CGF
, D
.getBeginLoc());
10837 llvm::Value
*OffloadingArgs
[] = {RTLoc
,
10840 InputInfo
.BasePointersArray
.getPointer(),
10841 InputInfo
.PointersArray
.getPointer(),
10842 InputInfo
.SizesArray
.getPointer(),
10845 InputInfo
.MappersArray
.getPointer()};
10847 // Select the right runtime function call for each standalone
10849 const bool HasNowait
= D
.hasClausesOfKind
<OMPNowaitClause
>();
10850 RuntimeFunction RTLFn
;
10851 switch (D
.getDirectiveKind()) {
10852 case OMPD_target_enter_data
:
10853 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_begin_nowait_mapper
10854 : OMPRTL___tgt_target_data_begin_mapper
;
10856 case OMPD_target_exit_data
:
10857 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_end_nowait_mapper
10858 : OMPRTL___tgt_target_data_end_mapper
;
10860 case OMPD_target_update
:
10861 RTLFn
= HasNowait
? OMPRTL___tgt_target_data_update_nowait_mapper
10862 : OMPRTL___tgt_target_data_update_mapper
;
10864 case OMPD_parallel
:
10866 case OMPD_parallel_for
:
10867 case OMPD_parallel_master
:
10868 case OMPD_parallel_sections
:
10869 case OMPD_for_simd
:
10870 case OMPD_parallel_for_simd
:
10872 case OMPD_cancellation_point
:
10874 case OMPD_threadprivate
:
10875 case OMPD_allocate
:
10880 case OMPD_sections
:
10884 case OMPD_critical
:
10885 case OMPD_taskyield
:
10887 case OMPD_taskwait
:
10888 case OMPD_taskgroup
:
10894 case OMPD_target_data
:
10895 case OMPD_distribute
:
10896 case OMPD_distribute_simd
:
10897 case OMPD_distribute_parallel_for
:
10898 case OMPD_distribute_parallel_for_simd
:
10899 case OMPD_teams_distribute
:
10900 case OMPD_teams_distribute_simd
:
10901 case OMPD_teams_distribute_parallel_for
:
10902 case OMPD_teams_distribute_parallel_for_simd
:
10903 case OMPD_declare_simd
:
10904 case OMPD_declare_variant
:
10905 case OMPD_begin_declare_variant
:
10906 case OMPD_end_declare_variant
:
10907 case OMPD_declare_target
:
10908 case OMPD_end_declare_target
:
10909 case OMPD_declare_reduction
:
10910 case OMPD_declare_mapper
:
10911 case OMPD_taskloop
:
10912 case OMPD_taskloop_simd
:
10913 case OMPD_master_taskloop
:
10914 case OMPD_master_taskloop_simd
:
10915 case OMPD_parallel_master_taskloop
:
10916 case OMPD_parallel_master_taskloop_simd
:
10918 case OMPD_target_simd
:
10919 case OMPD_target_teams_distribute
:
10920 case OMPD_target_teams_distribute_simd
:
10921 case OMPD_target_teams_distribute_parallel_for
:
10922 case OMPD_target_teams_distribute_parallel_for_simd
:
10923 case OMPD_target_teams
:
10924 case OMPD_target_parallel
:
10925 case OMPD_target_parallel_for
:
10926 case OMPD_target_parallel_for_simd
:
10927 case OMPD_requires
:
10928 case OMPD_metadirective
:
10931 llvm_unreachable("Unexpected standalone target data directive.");
10934 CGF
.EmitRuntimeCall(
10935 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(), RTLFn
),
10939 auto &&TargetThenGen
= [this, &ThenGen
, &D
, &InputInfo
, &MapTypesArray
,
10940 &MapNamesArray
](CodeGenFunction
&CGF
,
10941 PrePostActionTy
&) {
10942 // Fill up the arrays with all the mapped variables.
10943 MappableExprsHandler::MapCombinedInfoTy CombinedInfo
;
10945 // Get map clause information.
10946 MappableExprsHandler
MEHandler(D
, CGF
);
10947 MEHandler
.generateAllInfo(CombinedInfo
);
10949 CGOpenMPRuntime::TargetDataInfo Info
;
10950 // Fill up the arrays and create the arguments.
10951 emitOffloadingArrays(CGF
, CombinedInfo
, Info
, OMPBuilder
,
10952 /*IsNonContiguous=*/true);
10953 bool RequiresOuterTask
= D
.hasClausesOfKind
<OMPDependClause
>() ||
10954 D
.hasClausesOfKind
<OMPNowaitClause
>();
10956 CGF
.CGM
.getCodeGenOpts().getDebugInfo() != codegenoptions::NoDebugInfo
;
10957 OMPBuilder
.emitOffloadingArraysArgument(CGF
.Builder
, Info
.RTArgs
, Info
,
10959 /*ForEndCall=*/false);
10960 InputInfo
.NumberOfTargetItems
= Info
.NumberOfPtrs
;
10961 InputInfo
.BasePointersArray
= Address(Info
.RTArgs
.BasePointersArray
,
10962 CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10963 InputInfo
.PointersArray
= Address(Info
.RTArgs
.PointersArray
, CGF
.VoidPtrTy
,
10964 CGM
.getPointerAlign());
10965 InputInfo
.SizesArray
=
10966 Address(Info
.RTArgs
.SizesArray
, CGF
.Int64Ty
, CGM
.getPointerAlign());
10967 InputInfo
.MappersArray
=
10968 Address(Info
.RTArgs
.MappersArray
, CGF
.VoidPtrTy
, CGM
.getPointerAlign());
10969 MapTypesArray
= Info
.RTArgs
.MapTypesArray
;
10970 MapNamesArray
= Info
.RTArgs
.MapNamesArray
;
10971 if (RequiresOuterTask
)
10972 CGF
.EmitOMPTargetTaskBasedDirective(D
, ThenGen
, InputInfo
);
10974 emitInlinedDirective(CGF
, D
.getDirectiveKind(), ThenGen
);
10978 emitIfClause(CGF
, IfCond
, TargetThenGen
,
10979 [](CodeGenFunction
&CGF
, PrePostActionTy
&) {});
10981 RegionCodeGenTy
ThenRCG(TargetThenGen
);
10987 /// Kind of parameter in a function with 'declare simd' directive.
10996 /// Attribute set of the parameter.
10997 struct ParamAttrTy
{
10998 ParamKindTy Kind
= Vector
;
10999 llvm::APSInt StrideOrArg
;
11000 llvm::APSInt Alignment
;
11001 bool HasVarStride
= false;
11005 static unsigned evaluateCDTSize(const FunctionDecl
*FD
,
11006 ArrayRef
<ParamAttrTy
> ParamAttrs
) {
11007 // Every vector variant of a SIMD-enabled function has a vector length (VLEN).
11008 // If OpenMP clause "simdlen" is used, the VLEN is the value of the argument
11009 // of that clause. The VLEN value must be power of 2.
11010 // In other case the notion of the function`s "characteristic data type" (CDT)
11011 // is used to compute the vector length.
11012 // CDT is defined in the following order:
11013 // a) For non-void function, the CDT is the return type.
11014 // b) If the function has any non-uniform, non-linear parameters, then the
11015 // CDT is the type of the first such parameter.
11016 // c) If the CDT determined by a) or b) above is struct, union, or class
11017 // type which is pass-by-value (except for the type that maps to the
11018 // built-in complex data type), the characteristic data type is int.
11019 // d) If none of the above three cases is applicable, the CDT is int.
11020 // The VLEN is then determined based on the CDT and the size of vector
11021 // register of that ISA for which current vector version is generated. The
11022 // VLEN is computed using the formula below:
11023 // VLEN = sizeof(vector_register) / sizeof(CDT),
11024 // where vector register size specified in section 3.2.1 Registers and the
11025 // Stack Frame of original AMD64 ABI document.
11026 QualType RetType
= FD
->getReturnType();
11027 if (RetType
.isNull())
11029 ASTContext
&C
= FD
->getASTContext();
11031 if (!RetType
.isNull() && !RetType
->isVoidType()) {
11034 unsigned Offset
= 0;
11035 if (const auto *MD
= dyn_cast
<CXXMethodDecl
>(FD
)) {
11036 if (ParamAttrs
[Offset
].Kind
== Vector
)
11037 CDT
= C
.getPointerType(C
.getRecordType(MD
->getParent()));
11040 if (CDT
.isNull()) {
11041 for (unsigned I
= 0, E
= FD
->getNumParams(); I
< E
; ++I
) {
11042 if (ParamAttrs
[I
+ Offset
].Kind
== Vector
) {
11043 CDT
= FD
->getParamDecl(I
)->getType();
11051 CDT
= CDT
->getCanonicalTypeUnqualified();
11052 if (CDT
->isRecordType() || CDT
->isUnionType())
11054 return C
.getTypeSize(CDT
);
11057 /// Mangle the parameter part of the vector function name according to
11058 /// their OpenMP classification. The mangling function is defined in
11059 /// section 4.5 of the AAVFABI(2021Q1).
11060 static std::string
mangleVectorParameters(ArrayRef
<ParamAttrTy
> ParamAttrs
) {
11061 SmallString
<256> Buffer
;
11062 llvm::raw_svector_ostream
Out(Buffer
);
11063 for (const auto &ParamAttr
: ParamAttrs
) {
11064 switch (ParamAttr
.Kind
) {
11084 if (ParamAttr
.HasVarStride
)
11085 Out
<< "s" << ParamAttr
.StrideOrArg
;
11086 else if (ParamAttr
.Kind
== Linear
|| ParamAttr
.Kind
== LinearRef
||
11087 ParamAttr
.Kind
== LinearUVal
|| ParamAttr
.Kind
== LinearVal
) {
11088 // Don't print the step value if it is not present or if it is
11090 if (ParamAttr
.StrideOrArg
< 0)
11091 Out
<< 'n' << -ParamAttr
.StrideOrArg
;
11092 else if (ParamAttr
.StrideOrArg
!= 1)
11093 Out
<< ParamAttr
.StrideOrArg
;
11096 if (!!ParamAttr
.Alignment
)
11097 Out
<< 'a' << ParamAttr
.Alignment
;
11100 return std::string(Out
.str());
11104 emitX86DeclareSimdFunction(const FunctionDecl
*FD
, llvm::Function
*Fn
,
11105 const llvm::APSInt
&VLENVal
,
11106 ArrayRef
<ParamAttrTy
> ParamAttrs
,
11107 OMPDeclareSimdDeclAttr::BranchStateTy State
) {
11110 unsigned VecRegSize
;
11112 ISADataTy ISAData
[] = {
11126 llvm::SmallVector
<char, 2> Masked
;
11128 case OMPDeclareSimdDeclAttr::BS_Undefined
:
11129 Masked
.push_back('N');
11130 Masked
.push_back('M');
11132 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
11133 Masked
.push_back('N');
11135 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
11136 Masked
.push_back('M');
11139 for (char Mask
: Masked
) {
11140 for (const ISADataTy
&Data
: ISAData
) {
11141 SmallString
<256> Buffer
;
11142 llvm::raw_svector_ostream
Out(Buffer
);
11143 Out
<< "_ZGV" << Data
.ISA
<< Mask
;
11145 unsigned NumElts
= evaluateCDTSize(FD
, ParamAttrs
);
11146 assert(NumElts
&& "Non-zero simdlen/cdtsize expected");
11147 Out
<< llvm::APSInt::getUnsigned(Data
.VecRegSize
/ NumElts
);
11151 Out
<< mangleVectorParameters(ParamAttrs
);
11152 Out
<< '_' << Fn
->getName();
11153 Fn
->addFnAttr(Out
.str());
11158 // This are the Functions that are needed to mangle the name of the
11159 // vector functions generated by the compiler, according to the rules
11160 // defined in the "Vector Function ABI specifications for AArch64",
11162 // https://developer.arm.com/products/software-development-tools/hpc/arm-compiler-for-hpc/vector-function-abi.
11164 /// Maps To Vector (MTV), as defined in 4.1.1 of the AAVFABI (2021Q1).
11165 static bool getAArch64MTV(QualType QT
, ParamKindTy Kind
) {
11166 QT
= QT
.getCanonicalType();
11168 if (QT
->isVoidType())
11171 if (Kind
== ParamKindTy::Uniform
)
11174 if (Kind
== ParamKindTy::LinearUVal
|| ParamKindTy::LinearRef
)
11177 if ((Kind
== ParamKindTy::Linear
|| Kind
== ParamKindTy::LinearVal
) &&
11178 !QT
->isReferenceType())
11184 /// Pass By Value (PBV), as defined in 3.1.2 of the AAVFABI.
11185 static bool getAArch64PBV(QualType QT
, ASTContext
&C
) {
11186 QT
= QT
.getCanonicalType();
11187 unsigned Size
= C
.getTypeSize(QT
);
11189 // Only scalars and complex within 16 bytes wide set PVB to true.
11190 if (Size
!= 8 && Size
!= 16 && Size
!= 32 && Size
!= 64 && Size
!= 128)
11193 if (QT
->isFloatingType())
11196 if (QT
->isIntegerType())
11199 if (QT
->isPointerType())
11202 // TODO: Add support for complex types (section 3.1.2, item 2).
11207 /// Computes the lane size (LS) of a return type or of an input parameter,
11208 /// as defined by `LS(P)` in 3.2.1 of the AAVFABI.
11209 /// TODO: Add support for references, section 3.2.1, item 1.
11210 static unsigned getAArch64LS(QualType QT
, ParamKindTy Kind
, ASTContext
&C
) {
11211 if (!getAArch64MTV(QT
, Kind
) && QT
.getCanonicalType()->isPointerType()) {
11212 QualType PTy
= QT
.getCanonicalType()->getPointeeType();
11213 if (getAArch64PBV(PTy
, C
))
11214 return C
.getTypeSize(PTy
);
11216 if (getAArch64PBV(QT
, C
))
11217 return C
.getTypeSize(QT
);
11219 return C
.getTypeSize(C
.getUIntPtrType());
11222 // Get Narrowest Data Size (NDS) and Widest Data Size (WDS) from the
11223 // signature of the scalar function, as defined in 3.2.2 of the
11225 static std::tuple
<unsigned, unsigned, bool>
11226 getNDSWDS(const FunctionDecl
*FD
, ArrayRef
<ParamAttrTy
> ParamAttrs
) {
11227 QualType RetType
= FD
->getReturnType().getCanonicalType();
11229 ASTContext
&C
= FD
->getASTContext();
11231 bool OutputBecomesInput
= false;
11233 llvm::SmallVector
<unsigned, 8> Sizes
;
11234 if (!RetType
->isVoidType()) {
11235 Sizes
.push_back(getAArch64LS(RetType
, ParamKindTy::Vector
, C
));
11236 if (!getAArch64PBV(RetType
, C
) && getAArch64MTV(RetType
, {}))
11237 OutputBecomesInput
= true;
11239 for (unsigned I
= 0, E
= FD
->getNumParams(); I
< E
; ++I
) {
11240 QualType QT
= FD
->getParamDecl(I
)->getType().getCanonicalType();
11241 Sizes
.push_back(getAArch64LS(QT
, ParamAttrs
[I
].Kind
, C
));
11244 assert(!Sizes
.empty() && "Unable to determine NDS and WDS.");
11245 // The LS of a function parameter / return value can only be a power
11246 // of 2, starting from 8 bits, up to 128.
11247 assert(llvm::all_of(Sizes
,
11248 [](unsigned Size
) {
11249 return Size
== 8 || Size
== 16 || Size
== 32 ||
11250 Size
== 64 || Size
== 128;
11254 return std::make_tuple(*std::min_element(std::begin(Sizes
), std::end(Sizes
)),
11255 *std::max_element(std::begin(Sizes
), std::end(Sizes
)),
11256 OutputBecomesInput
);
11259 // Function used to add the attribute. The parameter `VLEN` is
11260 // templated to allow the use of "x" when targeting scalable functions
11262 template <typename T
>
11263 static void addAArch64VectorName(T VLEN
, StringRef LMask
, StringRef Prefix
,
11264 char ISA
, StringRef ParSeq
,
11265 StringRef MangledName
, bool OutputBecomesInput
,
11266 llvm::Function
*Fn
) {
11267 SmallString
<256> Buffer
;
11268 llvm::raw_svector_ostream
Out(Buffer
);
11269 Out
<< Prefix
<< ISA
<< LMask
<< VLEN
;
11270 if (OutputBecomesInput
)
11272 Out
<< ParSeq
<< "_" << MangledName
;
11273 Fn
->addFnAttr(Out
.str());
11276 // Helper function to generate the Advanced SIMD names depending on
11277 // the value of the NDS when simdlen is not present.
11278 static void addAArch64AdvSIMDNDSNames(unsigned NDS
, StringRef Mask
,
11279 StringRef Prefix
, char ISA
,
11280 StringRef ParSeq
, StringRef MangledName
,
11281 bool OutputBecomesInput
,
11282 llvm::Function
*Fn
) {
11285 addAArch64VectorName(8, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11286 OutputBecomesInput
, Fn
);
11287 addAArch64VectorName(16, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11288 OutputBecomesInput
, Fn
);
11291 addAArch64VectorName(4, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11292 OutputBecomesInput
, Fn
);
11293 addAArch64VectorName(8, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11294 OutputBecomesInput
, Fn
);
11297 addAArch64VectorName(2, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11298 OutputBecomesInput
, Fn
);
11299 addAArch64VectorName(4, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11300 OutputBecomesInput
, Fn
);
11304 addAArch64VectorName(2, Mask
, Prefix
, ISA
, ParSeq
, MangledName
,
11305 OutputBecomesInput
, Fn
);
11308 llvm_unreachable("Scalar type is too wide.");
11312 /// Emit vector function attributes for AArch64, as defined in the AAVFABI.
11313 static void emitAArch64DeclareSimdFunction(
11314 CodeGenModule
&CGM
, const FunctionDecl
*FD
, unsigned UserVLEN
,
11315 ArrayRef
<ParamAttrTy
> ParamAttrs
,
11316 OMPDeclareSimdDeclAttr::BranchStateTy State
, StringRef MangledName
,
11317 char ISA
, unsigned VecRegSize
, llvm::Function
*Fn
, SourceLocation SLoc
) {
11319 // Get basic data for building the vector signature.
11320 const auto Data
= getNDSWDS(FD
, ParamAttrs
);
11321 const unsigned NDS
= std::get
<0>(Data
);
11322 const unsigned WDS
= std::get
<1>(Data
);
11323 const bool OutputBecomesInput
= std::get
<2>(Data
);
11325 // Check the values provided via `simdlen` by the user.
11326 // 1. A `simdlen(1)` doesn't produce vector signatures,
11327 if (UserVLEN
== 1) {
11328 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
11329 DiagnosticsEngine::Warning
,
11330 "The clause simdlen(1) has no effect when targeting aarch64.");
11331 CGM
.getDiags().Report(SLoc
, DiagID
);
11335 // 2. Section 3.3.1, item 1: user input must be a power of 2 for
11336 // Advanced SIMD output.
11337 if (ISA
== 'n' && UserVLEN
&& !llvm::isPowerOf2_32(UserVLEN
)) {
11338 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
11339 DiagnosticsEngine::Warning
, "The value specified in simdlen must be a "
11340 "power of 2 when targeting Advanced SIMD.");
11341 CGM
.getDiags().Report(SLoc
, DiagID
);
11345 // 3. Section 3.4.1. SVE fixed lengh must obey the architectural
11347 if (ISA
== 's' && UserVLEN
!= 0) {
11348 if ((UserVLEN
* WDS
> 2048) || (UserVLEN
* WDS
% 128 != 0)) {
11349 unsigned DiagID
= CGM
.getDiags().getCustomDiagID(
11350 DiagnosticsEngine::Warning
, "The clause simdlen must fit the %0-bit "
11351 "lanes in the architectural constraints "
11352 "for SVE (min is 128-bit, max is "
11353 "2048-bit, by steps of 128-bit)");
11354 CGM
.getDiags().Report(SLoc
, DiagID
) << WDS
;
11359 // Sort out parameter sequence.
11360 const std::string ParSeq
= mangleVectorParameters(ParamAttrs
);
11361 StringRef Prefix
= "_ZGV";
11362 // Generate simdlen from user input (if any).
11365 // SVE generates only a masked function.
11366 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
11367 OutputBecomesInput
, Fn
);
11369 assert(ISA
== 'n' && "Expected ISA either 's' or 'n'.");
11370 // Advanced SIMD generates one or two functions, depending on
11371 // the `[not]inbranch` clause.
11373 case OMPDeclareSimdDeclAttr::BS_Undefined
:
11374 addAArch64VectorName(UserVLEN
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
11375 OutputBecomesInput
, Fn
);
11376 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
11377 OutputBecomesInput
, Fn
);
11379 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
11380 addAArch64VectorName(UserVLEN
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
11381 OutputBecomesInput
, Fn
);
11383 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
11384 addAArch64VectorName(UserVLEN
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
11385 OutputBecomesInput
, Fn
);
11390 // If no user simdlen is provided, follow the AAVFABI rules for
11391 // generating the vector length.
11393 // SVE, section 3.4.1, item 1.
11394 addAArch64VectorName("x", "M", Prefix
, ISA
, ParSeq
, MangledName
,
11395 OutputBecomesInput
, Fn
);
11397 assert(ISA
== 'n' && "Expected ISA either 's' or 'n'.");
11398 // Advanced SIMD, Section 3.3.1 of the AAVFABI, generates one or
11399 // two vector names depending on the use of the clause
11400 // `[not]inbranch`.
11402 case OMPDeclareSimdDeclAttr::BS_Undefined
:
11403 addAArch64AdvSIMDNDSNames(NDS
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
11404 OutputBecomesInput
, Fn
);
11405 addAArch64AdvSIMDNDSNames(NDS
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
11406 OutputBecomesInput
, Fn
);
11408 case OMPDeclareSimdDeclAttr::BS_Notinbranch
:
11409 addAArch64AdvSIMDNDSNames(NDS
, "N", Prefix
, ISA
, ParSeq
, MangledName
,
11410 OutputBecomesInput
, Fn
);
11412 case OMPDeclareSimdDeclAttr::BS_Inbranch
:
11413 addAArch64AdvSIMDNDSNames(NDS
, "M", Prefix
, ISA
, ParSeq
, MangledName
,
11414 OutputBecomesInput
, Fn
);
11421 void CGOpenMPRuntime::emitDeclareSimdFunction(const FunctionDecl
*FD
,
11422 llvm::Function
*Fn
) {
11423 ASTContext
&C
= CGM
.getContext();
11424 FD
= FD
->getMostRecentDecl();
11426 // Map params to their positions in function decl.
11427 llvm::DenseMap
<const Decl
*, unsigned> ParamPositions
;
11428 if (isa
<CXXMethodDecl
>(FD
))
11429 ParamPositions
.try_emplace(FD
, 0);
11430 unsigned ParamPos
= ParamPositions
.size();
11431 for (const ParmVarDecl
*P
: FD
->parameters()) {
11432 ParamPositions
.try_emplace(P
->getCanonicalDecl(), ParamPos
);
11435 for (const auto *Attr
: FD
->specific_attrs
<OMPDeclareSimdDeclAttr
>()) {
11436 llvm::SmallVector
<ParamAttrTy
, 8> ParamAttrs(ParamPositions
.size());
11437 // Mark uniform parameters.
11438 for (const Expr
*E
: Attr
->uniforms()) {
11439 E
= E
->IgnoreParenImpCasts();
11441 if (isa
<CXXThisExpr
>(E
)) {
11442 Pos
= ParamPositions
[FD
];
11444 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11445 ->getCanonicalDecl();
11446 auto It
= ParamPositions
.find(PVD
);
11447 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11450 ParamAttrs
[Pos
].Kind
= Uniform
;
11452 // Get alignment info.
11453 auto *NI
= Attr
->alignments_begin();
11454 for (const Expr
*E
: Attr
->aligneds()) {
11455 E
= E
->IgnoreParenImpCasts();
11458 if (isa
<CXXThisExpr
>(E
)) {
11459 Pos
= ParamPositions
[FD
];
11460 ParmTy
= E
->getType();
11462 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11463 ->getCanonicalDecl();
11464 auto It
= ParamPositions
.find(PVD
);
11465 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11467 ParmTy
= PVD
->getType();
11469 ParamAttrs
[Pos
].Alignment
=
11471 ? (*NI
)->EvaluateKnownConstInt(C
)
11472 : llvm::APSInt::getUnsigned(
11473 C
.toCharUnitsFromBits(C
.getOpenMPDefaultSimdAlign(ParmTy
))
11477 // Mark linear parameters.
11478 auto *SI
= Attr
->steps_begin();
11479 auto *MI
= Attr
->modifiers_begin();
11480 for (const Expr
*E
: Attr
->linears()) {
11481 E
= E
->IgnoreParenImpCasts();
11483 bool IsReferenceType
= false;
11484 // Rescaling factor needed to compute the linear parameter
11485 // value in the mangled name.
11486 unsigned PtrRescalingFactor
= 1;
11487 if (isa
<CXXThisExpr
>(E
)) {
11488 Pos
= ParamPositions
[FD
];
11489 auto *P
= cast
<PointerType
>(E
->getType());
11490 PtrRescalingFactor
= CGM
.getContext()
11491 .getTypeSizeInChars(P
->getPointeeType())
11494 const auto *PVD
= cast
<ParmVarDecl
>(cast
<DeclRefExpr
>(E
)->getDecl())
11495 ->getCanonicalDecl();
11496 auto It
= ParamPositions
.find(PVD
);
11497 assert(It
!= ParamPositions
.end() && "Function parameter not found");
11499 if (auto *P
= dyn_cast
<PointerType
>(PVD
->getType()))
11500 PtrRescalingFactor
= CGM
.getContext()
11501 .getTypeSizeInChars(P
->getPointeeType())
11503 else if (PVD
->getType()->isReferenceType()) {
11504 IsReferenceType
= true;
11505 PtrRescalingFactor
=
11507 .getTypeSizeInChars(PVD
->getType().getNonReferenceType())
11511 ParamAttrTy
&ParamAttr
= ParamAttrs
[Pos
];
11512 if (*MI
== OMPC_LINEAR_ref
)
11513 ParamAttr
.Kind
= LinearRef
;
11514 else if (*MI
== OMPC_LINEAR_uval
)
11515 ParamAttr
.Kind
= LinearUVal
;
11516 else if (IsReferenceType
)
11517 ParamAttr
.Kind
= LinearVal
;
11519 ParamAttr
.Kind
= Linear
;
11520 // Assuming a stride of 1, for `linear` without modifiers.
11521 ParamAttr
.StrideOrArg
= llvm::APSInt::getUnsigned(1);
11523 Expr::EvalResult Result
;
11524 if (!(*SI
)->EvaluateAsInt(Result
, C
, Expr::SE_AllowSideEffects
)) {
11525 if (const auto *DRE
=
11526 cast
<DeclRefExpr
>((*SI
)->IgnoreParenImpCasts())) {
11527 if (const auto *StridePVD
=
11528 dyn_cast
<ParmVarDecl
>(DRE
->getDecl())) {
11529 ParamAttr
.HasVarStride
= true;
11530 auto It
= ParamPositions
.find(StridePVD
->getCanonicalDecl());
11531 assert(It
!= ParamPositions
.end() &&
11532 "Function parameter not found");
11533 ParamAttr
.StrideOrArg
= llvm::APSInt::getUnsigned(It
->second
);
11537 ParamAttr
.StrideOrArg
= Result
.Val
.getInt();
11540 // If we are using a linear clause on a pointer, we need to
11541 // rescale the value of linear_step with the byte size of the
11543 if (!ParamAttr
.HasVarStride
&&
11544 (ParamAttr
.Kind
== Linear
|| ParamAttr
.Kind
== LinearRef
))
11545 ParamAttr
.StrideOrArg
= ParamAttr
.StrideOrArg
* PtrRescalingFactor
;
11549 llvm::APSInt VLENVal
;
11550 SourceLocation ExprLoc
;
11551 const Expr
*VLENExpr
= Attr
->getSimdlen();
11553 VLENVal
= VLENExpr
->EvaluateKnownConstInt(C
);
11554 ExprLoc
= VLENExpr
->getExprLoc();
11556 OMPDeclareSimdDeclAttr::BranchStateTy State
= Attr
->getBranchState();
11557 if (CGM
.getTriple().isX86()) {
11558 emitX86DeclareSimdFunction(FD
, Fn
, VLENVal
, ParamAttrs
, State
);
11559 } else if (CGM
.getTriple().getArch() == llvm::Triple::aarch64
) {
11560 unsigned VLEN
= VLENVal
.getExtValue();
11561 StringRef MangledName
= Fn
->getName();
11562 if (CGM
.getTarget().hasFeature("sve"))
11563 emitAArch64DeclareSimdFunction(CGM
, FD
, VLEN
, ParamAttrs
, State
,
11564 MangledName
, 's', 128, Fn
, ExprLoc
);
11565 if (CGM
.getTarget().hasFeature("neon"))
11566 emitAArch64DeclareSimdFunction(CGM
, FD
, VLEN
, ParamAttrs
, State
,
11567 MangledName
, 'n', 128, Fn
, ExprLoc
);
11570 FD
= FD
->getPreviousDecl();
11575 /// Cleanup action for doacross support.
11576 class DoacrossCleanupTy final
: public EHScopeStack::Cleanup
{
11578 static const int DoacrossFinArgs
= 2;
11581 llvm::FunctionCallee RTLFn
;
11582 llvm::Value
*Args
[DoacrossFinArgs
];
11585 DoacrossCleanupTy(llvm::FunctionCallee RTLFn
,
11586 ArrayRef
<llvm::Value
*> CallArgs
)
11588 assert(CallArgs
.size() == DoacrossFinArgs
);
11589 std::copy(CallArgs
.begin(), CallArgs
.end(), std::begin(Args
));
11591 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
11592 if (!CGF
.HaveInsertPoint())
11594 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11599 void CGOpenMPRuntime::emitDoacrossInit(CodeGenFunction
&CGF
,
11600 const OMPLoopDirective
&D
,
11601 ArrayRef
<Expr
*> NumIterations
) {
11602 if (!CGF
.HaveInsertPoint())
11605 ASTContext
&C
= CGM
.getContext();
11606 QualType Int64Ty
= C
.getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/true);
11608 if (KmpDimTy
.isNull()) {
11609 // Build struct kmp_dim { // loop bounds info casted to kmp_int64
11610 // kmp_int64 lo; // lower
11611 // kmp_int64 up; // upper
11612 // kmp_int64 st; // stride
11614 RD
= C
.buildImplicitRecord("kmp_dim");
11615 RD
->startDefinition();
11616 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11617 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11618 addFieldToRecordDecl(C
, RD
, Int64Ty
);
11619 RD
->completeDefinition();
11620 KmpDimTy
= C
.getRecordType(RD
);
11622 RD
= cast
<RecordDecl
>(KmpDimTy
->getAsTagDecl());
11624 llvm::APInt
Size(/*numBits=*/32, NumIterations
.size());
11626 C
.getConstantArrayType(KmpDimTy
, Size
, nullptr, ArrayType::Normal
, 0);
11628 Address DimsAddr
= CGF
.CreateMemTemp(ArrayTy
, "dims");
11629 CGF
.EmitNullInitialization(DimsAddr
, ArrayTy
);
11630 enum { LowerFD
= 0, UpperFD
, StrideFD
};
11631 // Fill dims with data.
11632 for (unsigned I
= 0, E
= NumIterations
.size(); I
< E
; ++I
) {
11633 LValue DimsLVal
= CGF
.MakeAddrLValue(
11634 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, I
), KmpDimTy
);
11635 // dims.upper = num_iterations;
11636 LValue UpperLVal
= CGF
.EmitLValueForField(
11637 DimsLVal
, *std::next(RD
->field_begin(), UpperFD
));
11638 llvm::Value
*NumIterVal
= CGF
.EmitScalarConversion(
11639 CGF
.EmitScalarExpr(NumIterations
[I
]), NumIterations
[I
]->getType(),
11640 Int64Ty
, NumIterations
[I
]->getExprLoc());
11641 CGF
.EmitStoreOfScalar(NumIterVal
, UpperLVal
);
11642 // dims.stride = 1;
11643 LValue StrideLVal
= CGF
.EmitLValueForField(
11644 DimsLVal
, *std::next(RD
->field_begin(), StrideFD
));
11645 CGF
.EmitStoreOfScalar(llvm::ConstantInt::getSigned(CGM
.Int64Ty
, /*V=*/1),
11649 // Build call void __kmpc_doacross_init(ident_t *loc, kmp_int32 gtid,
11650 // kmp_int32 num_dims, struct kmp_dim * dims);
11651 llvm::Value
*Args
[] = {
11652 emitUpdateLocation(CGF
, D
.getBeginLoc()),
11653 getThreadID(CGF
, D
.getBeginLoc()),
11654 llvm::ConstantInt::getSigned(CGM
.Int32Ty
, NumIterations
.size()),
11655 CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11656 CGF
.Builder
.CreateConstArrayGEP(DimsAddr
, 0).getPointer(),
11659 llvm::FunctionCallee RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11660 CGM
.getModule(), OMPRTL___kmpc_doacross_init
);
11661 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11662 llvm::Value
*FiniArgs
[DoacrossCleanupTy::DoacrossFinArgs
] = {
11663 emitUpdateLocation(CGF
, D
.getEndLoc()), getThreadID(CGF
, D
.getEndLoc())};
11664 llvm::FunctionCallee FiniRTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11665 CGM
.getModule(), OMPRTL___kmpc_doacross_fini
);
11666 CGF
.EHStack
.pushCleanup
<DoacrossCleanupTy
>(NormalAndEHCleanup
, FiniRTLFn
,
11667 llvm::makeArrayRef(FiniArgs
));
11670 void CGOpenMPRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
11671 const OMPDependClause
*C
) {
11673 CGM
.getContext().getIntTypeForBitwidth(/*DestWidth=*/64, /*Signed=*/1);
11674 llvm::APInt
Size(/*numBits=*/32, C
->getNumLoops());
11675 QualType ArrayTy
= CGM
.getContext().getConstantArrayType(
11676 Int64Ty
, Size
, nullptr, ArrayType::Normal
, 0);
11677 Address CntAddr
= CGF
.CreateMemTemp(ArrayTy
, ".cnt.addr");
11678 for (unsigned I
= 0, E
= C
->getNumLoops(); I
< E
; ++I
) {
11679 const Expr
*CounterVal
= C
->getLoopData(I
);
11680 assert(CounterVal
);
11681 llvm::Value
*CntVal
= CGF
.EmitScalarConversion(
11682 CGF
.EmitScalarExpr(CounterVal
), CounterVal
->getType(), Int64Ty
,
11683 CounterVal
->getExprLoc());
11684 CGF
.EmitStoreOfScalar(CntVal
, CGF
.Builder
.CreateConstArrayGEP(CntAddr
, I
),
11685 /*Volatile=*/false, Int64Ty
);
11687 llvm::Value
*Args
[] = {
11688 emitUpdateLocation(CGF
, C
->getBeginLoc()),
11689 getThreadID(CGF
, C
->getBeginLoc()),
11690 CGF
.Builder
.CreateConstArrayGEP(CntAddr
, 0).getPointer()};
11691 llvm::FunctionCallee RTLFn
;
11692 if (C
->getDependencyKind() == OMPC_DEPEND_source
) {
11693 RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
11694 OMPRTL___kmpc_doacross_post
);
11696 assert(C
->getDependencyKind() == OMPC_DEPEND_sink
);
11697 RTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(),
11698 OMPRTL___kmpc_doacross_wait
);
11700 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11703 void CGOpenMPRuntime::emitCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
11704 llvm::FunctionCallee Callee
,
11705 ArrayRef
<llvm::Value
*> Args
) const {
11706 assert(Loc
.isValid() && "Outlined function call location must be valid.");
11707 auto DL
= ApplyDebugLocation::CreateDefaultArtificial(CGF
, Loc
);
11709 if (auto *Fn
= dyn_cast
<llvm::Function
>(Callee
.getCallee())) {
11710 if (Fn
->doesNotThrow()) {
11711 CGF
.EmitNounwindRuntimeCall(Fn
, Args
);
11715 CGF
.EmitRuntimeCall(Callee
, Args
);
11718 void CGOpenMPRuntime::emitOutlinedFunctionCall(
11719 CodeGenFunction
&CGF
, SourceLocation Loc
, llvm::FunctionCallee OutlinedFn
,
11720 ArrayRef
<llvm::Value
*> Args
) const {
11721 emitCall(CGF
, Loc
, OutlinedFn
, Args
);
11724 void CGOpenMPRuntime::emitFunctionProlog(CodeGenFunction
&CGF
, const Decl
*D
) {
11725 if (const auto *FD
= dyn_cast
<FunctionDecl
>(D
))
11726 if (OMPDeclareTargetDeclAttr::isDeclareTargetDeclaration(FD
))
11727 HasEmittedDeclareTargetRegion
= true;
11730 Address
CGOpenMPRuntime::getParameterAddress(CodeGenFunction
&CGF
,
11731 const VarDecl
*NativeParam
,
11732 const VarDecl
*TargetParam
) const {
11733 return CGF
.GetAddrOfLocalVar(NativeParam
);
11736 /// Return allocator value from expression, or return a null allocator (default
11737 /// when no allocator specified).
11738 static llvm::Value
*getAllocatorVal(CodeGenFunction
&CGF
,
11739 const Expr
*Allocator
) {
11740 llvm::Value
*AllocVal
;
11742 AllocVal
= CGF
.EmitScalarExpr(Allocator
);
11743 // According to the standard, the original allocator type is a enum
11744 // (integer). Convert to pointer type, if required.
11745 AllocVal
= CGF
.EmitScalarConversion(AllocVal
, Allocator
->getType(),
11746 CGF
.getContext().VoidPtrTy
,
11747 Allocator
->getExprLoc());
11749 // If no allocator specified, it defaults to the null allocator.
11750 AllocVal
= llvm::Constant::getNullValue(
11751 CGF
.CGM
.getTypes().ConvertType(CGF
.getContext().VoidPtrTy
));
11756 /// Return the alignment from an allocate directive if present.
11757 static llvm::Value
*getAlignmentValue(CodeGenModule
&CGM
, const VarDecl
*VD
) {
11758 llvm::Optional
<CharUnits
> AllocateAlignment
= CGM
.getOMPAllocateAlignment(VD
);
11760 if (!AllocateAlignment
)
11763 return llvm::ConstantInt::get(CGM
.SizeTy
, AllocateAlignment
->getQuantity());
11766 Address
CGOpenMPRuntime::getAddressOfLocalVariable(CodeGenFunction
&CGF
,
11767 const VarDecl
*VD
) {
11769 return Address::invalid();
11770 Address UntiedAddr
= Address::invalid();
11771 Address UntiedRealAddr
= Address::invalid();
11772 auto It
= FunctionToUntiedTaskStackMap
.find(CGF
.CurFn
);
11773 if (It
!= FunctionToUntiedTaskStackMap
.end()) {
11774 const UntiedLocalVarsAddressesMap
&UntiedData
=
11775 UntiedLocalVarsStack
[It
->second
];
11776 auto I
= UntiedData
.find(VD
);
11777 if (I
!= UntiedData
.end()) {
11778 UntiedAddr
= I
->second
.first
;
11779 UntiedRealAddr
= I
->second
.second
;
11782 const VarDecl
*CVD
= VD
->getCanonicalDecl();
11783 if (CVD
->hasAttr
<OMPAllocateDeclAttr
>()) {
11784 // Use the default allocation.
11785 if (!isAllocatableDecl(VD
))
11788 CharUnits Align
= CGM
.getContext().getDeclAlign(CVD
);
11789 if (CVD
->getType()->isVariablyModifiedType()) {
11790 Size
= CGF
.getTypeSize(CVD
->getType());
11791 // Align the size: ((size + align - 1) / align) * align
11792 Size
= CGF
.Builder
.CreateNUWAdd(
11793 Size
, CGM
.getSize(Align
- CharUnits::fromQuantity(1)));
11794 Size
= CGF
.Builder
.CreateUDiv(Size
, CGM
.getSize(Align
));
11795 Size
= CGF
.Builder
.CreateNUWMul(Size
, CGM
.getSize(Align
));
11797 CharUnits Sz
= CGM
.getContext().getTypeSizeInChars(CVD
->getType());
11798 Size
= CGM
.getSize(Sz
.alignTo(Align
));
11800 llvm::Value
*ThreadID
= getThreadID(CGF
, CVD
->getBeginLoc());
11801 const auto *AA
= CVD
->getAttr
<OMPAllocateDeclAttr
>();
11802 const Expr
*Allocator
= AA
->getAllocator();
11803 llvm::Value
*AllocVal
= getAllocatorVal(CGF
, Allocator
);
11804 llvm::Value
*Alignment
= getAlignmentValue(CGM
, CVD
);
11805 SmallVector
<llvm::Value
*, 4> Args
;
11806 Args
.push_back(ThreadID
);
11808 Args
.push_back(Alignment
);
11809 Args
.push_back(Size
);
11810 Args
.push_back(AllocVal
);
11811 llvm::omp::RuntimeFunction FnID
=
11812 Alignment
? OMPRTL___kmpc_aligned_alloc
: OMPRTL___kmpc_alloc
;
11813 llvm::Value
*Addr
= CGF
.EmitRuntimeCall(
11814 OMPBuilder
.getOrCreateRuntimeFunction(CGM
.getModule(), FnID
), Args
,
11815 getName({CVD
->getName(), ".void.addr"}));
11816 llvm::FunctionCallee FiniRTLFn
= OMPBuilder
.getOrCreateRuntimeFunction(
11817 CGM
.getModule(), OMPRTL___kmpc_free
);
11818 QualType Ty
= CGM
.getContext().getPointerType(CVD
->getType());
11819 Addr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11820 Addr
, CGF
.ConvertTypeForMem(Ty
), getName({CVD
->getName(), ".addr"}));
11821 if (UntiedAddr
.isValid())
11822 CGF
.EmitStoreOfScalar(Addr
, UntiedAddr
, /*Volatile=*/false, Ty
);
11824 // Cleanup action for allocate support.
11825 class OMPAllocateCleanupTy final
: public EHScopeStack::Cleanup
{
11826 llvm::FunctionCallee RTLFn
;
11827 SourceLocation::UIntTy LocEncoding
;
11829 const Expr
*AllocExpr
;
11832 OMPAllocateCleanupTy(llvm::FunctionCallee RTLFn
,
11833 SourceLocation::UIntTy LocEncoding
, Address Addr
,
11834 const Expr
*AllocExpr
)
11835 : RTLFn(RTLFn
), LocEncoding(LocEncoding
), Addr(Addr
),
11836 AllocExpr(AllocExpr
) {}
11837 void Emit(CodeGenFunction
&CGF
, Flags
/*flags*/) override
{
11838 if (!CGF
.HaveInsertPoint())
11840 llvm::Value
*Args
[3];
11841 Args
[0] = CGF
.CGM
.getOpenMPRuntime().getThreadID(
11842 CGF
, SourceLocation::getFromRawEncoding(LocEncoding
));
11843 Args
[1] = CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
11844 Addr
.getPointer(), CGF
.VoidPtrTy
);
11845 llvm::Value
*AllocVal
= getAllocatorVal(CGF
, AllocExpr
);
11846 Args
[2] = AllocVal
;
11847 CGF
.EmitRuntimeCall(RTLFn
, Args
);
11851 UntiedRealAddr
.isValid()
11853 : Address(Addr
, CGF
.ConvertTypeForMem(CVD
->getType()), Align
);
11854 CGF
.EHStack
.pushCleanup
<OMPAllocateCleanupTy
>(
11855 NormalAndEHCleanup
, FiniRTLFn
, CVD
->getLocation().getRawEncoding(),
11856 VDAddr
, Allocator
);
11857 if (UntiedRealAddr
.isValid())
11859 dyn_cast_or_null
<CGOpenMPRegionInfo
>(CGF
.CapturedStmtInfo
))
11860 Region
->emitUntiedSwitch(CGF
);
11866 bool CGOpenMPRuntime::isLocalVarInUntiedTask(CodeGenFunction
&CGF
,
11867 const VarDecl
*VD
) const {
11868 auto It
= FunctionToUntiedTaskStackMap
.find(CGF
.CurFn
);
11869 if (It
== FunctionToUntiedTaskStackMap
.end())
11871 return UntiedLocalVarsStack
[It
->second
].count(VD
) > 0;
11874 CGOpenMPRuntime::NontemporalDeclsRAII::NontemporalDeclsRAII(
11875 CodeGenModule
&CGM
, const OMPLoopDirective
&S
)
11876 : CGM(CGM
), NeedToPush(S
.hasClausesOfKind
<OMPNontemporalClause
>()) {
11877 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11880 NontemporalDeclsSet
&DS
=
11881 CGM
.getOpenMPRuntime().NontemporalDeclsStack
.emplace_back();
11882 for (const auto *C
: S
.getClausesOfKind
<OMPNontemporalClause
>()) {
11883 for (const Stmt
*Ref
: C
->private_refs()) {
11884 const auto *SimpleRefExpr
= cast
<Expr
>(Ref
)->IgnoreParenImpCasts();
11885 const ValueDecl
*VD
;
11886 if (const auto *DRE
= dyn_cast
<DeclRefExpr
>(SimpleRefExpr
)) {
11887 VD
= DRE
->getDecl();
11889 const auto *ME
= cast
<MemberExpr
>(SimpleRefExpr
);
11890 assert((ME
->isImplicitCXXThis() ||
11891 isa
<CXXThisExpr
>(ME
->getBase()->IgnoreParenImpCasts())) &&
11892 "Expected member of current class.");
11893 VD
= ME
->getMemberDecl();
11900 CGOpenMPRuntime::NontemporalDeclsRAII::~NontemporalDeclsRAII() {
11903 CGM
.getOpenMPRuntime().NontemporalDeclsStack
.pop_back();
11906 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::UntiedTaskLocalDeclsRAII(
11907 CodeGenFunction
&CGF
,
11908 const llvm::MapVector
<CanonicalDeclPtr
<const VarDecl
>,
11909 std::pair
<Address
, Address
>> &LocalVars
)
11910 : CGM(CGF
.CGM
), NeedToPush(!LocalVars
.empty()) {
11913 CGM
.getOpenMPRuntime().FunctionToUntiedTaskStackMap
.try_emplace(
11914 CGF
.CurFn
, CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.size());
11915 CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.push_back(LocalVars
);
11918 CGOpenMPRuntime::UntiedTaskLocalDeclsRAII::~UntiedTaskLocalDeclsRAII() {
11921 CGM
.getOpenMPRuntime().UntiedLocalVarsStack
.pop_back();
11924 bool CGOpenMPRuntime::isNontemporalDecl(const ValueDecl
*VD
) const {
11925 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
11927 return llvm::any_of(
11928 CGM
.getOpenMPRuntime().NontemporalDeclsStack
,
11929 [VD
](const NontemporalDeclsSet
&Set
) { return Set
.contains(VD
); });
11932 void CGOpenMPRuntime::LastprivateConditionalRAII::tryToDisableInnerAnalysis(
11933 const OMPExecutableDirective
&S
,
11934 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> &NeedToAddForLPCsAsDisabled
)
11936 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> NeedToCheckForLPCs
;
11937 // Vars in target/task regions must be excluded completely.
11938 if (isOpenMPTargetExecutionDirective(S
.getDirectiveKind()) ||
11939 isOpenMPTaskingDirective(S
.getDirectiveKind())) {
11940 SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
11941 getOpenMPCaptureRegions(CaptureRegions
, S
.getDirectiveKind());
11942 const CapturedStmt
*CS
= S
.getCapturedStmt(CaptureRegions
.front());
11943 for (const CapturedStmt::Capture
&Cap
: CS
->captures()) {
11944 if (Cap
.capturesVariable() || Cap
.capturesVariableByCopy())
11945 NeedToCheckForLPCs
.insert(Cap
.getCapturedVar());
11948 // Exclude vars in private clauses.
11949 for (const auto *C
: S
.getClausesOfKind
<OMPPrivateClause
>()) {
11950 for (const Expr
*Ref
: C
->varlists()) {
11951 if (!Ref
->getType()->isScalarType())
11953 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11956 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11959 for (const auto *C
: S
.getClausesOfKind
<OMPFirstprivateClause
>()) {
11960 for (const Expr
*Ref
: C
->varlists()) {
11961 if (!Ref
->getType()->isScalarType())
11963 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11966 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11969 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
11970 for (const Expr
*Ref
: C
->varlists()) {
11971 if (!Ref
->getType()->isScalarType())
11973 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11976 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11979 for (const auto *C
: S
.getClausesOfKind
<OMPReductionClause
>()) {
11980 for (const Expr
*Ref
: C
->varlists()) {
11981 if (!Ref
->getType()->isScalarType())
11983 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11986 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11989 for (const auto *C
: S
.getClausesOfKind
<OMPLinearClause
>()) {
11990 for (const Expr
*Ref
: C
->varlists()) {
11991 if (!Ref
->getType()->isScalarType())
11993 const auto *DRE
= dyn_cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts());
11996 NeedToCheckForLPCs
.insert(DRE
->getDecl());
11999 for (const Decl
*VD
: NeedToCheckForLPCs
) {
12000 for (const LastprivateConditionalData
&Data
:
12001 llvm::reverse(CGM
.getOpenMPRuntime().LastprivateConditionalStack
)) {
12002 if (Data
.DeclToUniqueName
.count(VD
) > 0) {
12003 if (!Data
.Disabled
)
12004 NeedToAddForLPCsAsDisabled
.insert(VD
);
12011 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12012 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
, LValue IVLVal
)
12014 Action((CGM
.getLangOpts().OpenMP
>= 50 &&
12015 llvm::any_of(S
.getClausesOfKind
<OMPLastprivateClause
>(),
12016 [](const OMPLastprivateClause
*C
) {
12017 return C
->getKind() ==
12018 OMPC_LASTPRIVATE_conditional
;
12020 ? ActionToDo::PushAsLastprivateConditional
12021 : ActionToDo::DoNotPush
) {
12022 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
12023 if (CGM
.getLangOpts().OpenMP
< 50 || Action
== ActionToDo::DoNotPush
)
12025 assert(Action
== ActionToDo::PushAsLastprivateConditional
&&
12026 "Expected a push action.");
12027 LastprivateConditionalData
&Data
=
12028 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.emplace_back();
12029 for (const auto *C
: S
.getClausesOfKind
<OMPLastprivateClause
>()) {
12030 if (C
->getKind() != OMPC_LASTPRIVATE_conditional
)
12033 for (const Expr
*Ref
: C
->varlists()) {
12034 Data
.DeclToUniqueName
.insert(std::make_pair(
12035 cast
<DeclRefExpr
>(Ref
->IgnoreParenImpCasts())->getDecl(),
12036 SmallString
<16>(generateUniqueName(CGM
, "pl_cond", Ref
))));
12039 Data
.IVLVal
= IVLVal
;
12040 Data
.Fn
= CGF
.CurFn
;
12043 CGOpenMPRuntime::LastprivateConditionalRAII::LastprivateConditionalRAII(
12044 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
)
12045 : CGM(CGF
.CGM
), Action(ActionToDo::DoNotPush
) {
12046 assert(CGM
.getLangOpts().OpenMP
&& "Not in OpenMP mode.");
12047 if (CGM
.getLangOpts().OpenMP
< 50)
12049 llvm::DenseSet
<CanonicalDeclPtr
<const Decl
>> NeedToAddForLPCsAsDisabled
;
12050 tryToDisableInnerAnalysis(S
, NeedToAddForLPCsAsDisabled
);
12051 if (!NeedToAddForLPCsAsDisabled
.empty()) {
12052 Action
= ActionToDo::DisableLastprivateConditional
;
12053 LastprivateConditionalData
&Data
=
12054 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.emplace_back();
12055 for (const Decl
*VD
: NeedToAddForLPCsAsDisabled
)
12056 Data
.DeclToUniqueName
.insert(std::make_pair(VD
, SmallString
<16>()));
12057 Data
.Fn
= CGF
.CurFn
;
12058 Data
.Disabled
= true;
12062 CGOpenMPRuntime::LastprivateConditionalRAII
12063 CGOpenMPRuntime::LastprivateConditionalRAII::disable(
12064 CodeGenFunction
&CGF
, const OMPExecutableDirective
&S
) {
12065 return LastprivateConditionalRAII(CGF
, S
);
12068 CGOpenMPRuntime::LastprivateConditionalRAII::~LastprivateConditionalRAII() {
12069 if (CGM
.getLangOpts().OpenMP
< 50)
12071 if (Action
== ActionToDo::DisableLastprivateConditional
) {
12072 assert(CGM
.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled
&&
12073 "Expected list of disabled private vars.");
12074 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.pop_back();
12076 if (Action
== ActionToDo::PushAsLastprivateConditional
) {
12078 !CGM
.getOpenMPRuntime().LastprivateConditionalStack
.back().Disabled
&&
12079 "Expected list of lastprivate conditional vars.");
12080 CGM
.getOpenMPRuntime().LastprivateConditionalStack
.pop_back();
12084 Address
CGOpenMPRuntime::emitLastprivateConditionalInit(CodeGenFunction
&CGF
,
12085 const VarDecl
*VD
) {
12086 ASTContext
&C
= CGM
.getContext();
12087 auto I
= LastprivateConditionalToTypes
.find(CGF
.CurFn
);
12088 if (I
== LastprivateConditionalToTypes
.end())
12089 I
= LastprivateConditionalToTypes
.try_emplace(CGF
.CurFn
).first
;
12091 const FieldDecl
*VDField
;
12092 const FieldDecl
*FiredField
;
12094 auto VI
= I
->getSecond().find(VD
);
12095 if (VI
== I
->getSecond().end()) {
12096 RecordDecl
*RD
= C
.buildImplicitRecord("lasprivate.conditional");
12097 RD
->startDefinition();
12098 VDField
= addFieldToRecordDecl(C
, RD
, VD
->getType().getNonReferenceType());
12099 FiredField
= addFieldToRecordDecl(C
, RD
, C
.CharTy
);
12100 RD
->completeDefinition();
12101 NewType
= C
.getRecordType(RD
);
12102 Address Addr
= CGF
.CreateMemTemp(NewType
, C
.getDeclAlign(VD
), VD
->getName());
12103 BaseLVal
= CGF
.MakeAddrLValue(Addr
, NewType
, AlignmentSource::Decl
);
12104 I
->getSecond().try_emplace(VD
, NewType
, VDField
, FiredField
, BaseLVal
);
12106 NewType
= std::get
<0>(VI
->getSecond());
12107 VDField
= std::get
<1>(VI
->getSecond());
12108 FiredField
= std::get
<2>(VI
->getSecond());
12109 BaseLVal
= std::get
<3>(VI
->getSecond());
12112 CGF
.EmitLValueForField(BaseLVal
, FiredField
);
12113 CGF
.EmitStoreOfScalar(
12114 llvm::ConstantInt::getNullValue(CGF
.ConvertTypeForMem(C
.CharTy
)),
12116 return CGF
.EmitLValueForField(BaseLVal
, VDField
).getAddress(CGF
);
12120 /// Checks if the lastprivate conditional variable is referenced in LHS.
12121 class LastprivateConditionalRefChecker final
12122 : public ConstStmtVisitor
<LastprivateConditionalRefChecker
, bool> {
12123 ArrayRef
<CGOpenMPRuntime::LastprivateConditionalData
> LPM
;
12124 const Expr
*FoundE
= nullptr;
12125 const Decl
*FoundD
= nullptr;
12126 StringRef UniqueDeclName
;
12128 llvm::Function
*FoundFn
= nullptr;
12129 SourceLocation Loc
;
12132 bool VisitDeclRefExpr(const DeclRefExpr
*E
) {
12133 for (const CGOpenMPRuntime::LastprivateConditionalData
&D
:
12134 llvm::reverse(LPM
)) {
12135 auto It
= D
.DeclToUniqueName
.find(E
->getDecl());
12136 if (It
== D
.DeclToUniqueName
.end())
12141 FoundD
= E
->getDecl()->getCanonicalDecl();
12142 UniqueDeclName
= It
->second
;
12147 return FoundE
== E
;
12149 bool VisitMemberExpr(const MemberExpr
*E
) {
12150 if (!CodeGenFunction::IsWrappedCXXThis(E
->getBase()))
12152 for (const CGOpenMPRuntime::LastprivateConditionalData
&D
:
12153 llvm::reverse(LPM
)) {
12154 auto It
= D
.DeclToUniqueName
.find(E
->getMemberDecl());
12155 if (It
== D
.DeclToUniqueName
.end())
12160 FoundD
= E
->getMemberDecl()->getCanonicalDecl();
12161 UniqueDeclName
= It
->second
;
12166 return FoundE
== E
;
12168 bool VisitStmt(const Stmt
*S
) {
12169 for (const Stmt
*Child
: S
->children()) {
12172 if (const auto *E
= dyn_cast
<Expr
>(Child
))
12173 if (!E
->isGLValue())
12180 explicit LastprivateConditionalRefChecker(
12181 ArrayRef
<CGOpenMPRuntime::LastprivateConditionalData
> LPM
)
12183 std::tuple
<const Expr
*, const Decl
*, StringRef
, LValue
, llvm::Function
*>
12184 getFoundData() const {
12185 return std::make_tuple(FoundE
, FoundD
, UniqueDeclName
, IVLVal
, FoundFn
);
12190 void CGOpenMPRuntime::emitLastprivateConditionalUpdate(CodeGenFunction
&CGF
,
12192 StringRef UniqueDeclName
,
12194 SourceLocation Loc
) {
12195 // Last updated loop counter for the lastprivate conditional var.
12196 // int<xx> last_iv = 0;
12197 llvm::Type
*LLIVTy
= CGF
.ConvertTypeForMem(IVLVal
.getType());
12198 llvm::Constant
*LastIV
=
12199 getOrCreateInternalVariable(LLIVTy
, getName({UniqueDeclName
, "iv"}));
12200 cast
<llvm::GlobalVariable
>(LastIV
)->setAlignment(
12201 IVLVal
.getAlignment().getAsAlign());
12202 LValue LastIVLVal
= CGF
.MakeNaturalAlignAddrLValue(LastIV
, IVLVal
.getType());
12204 // Last value of the lastprivate conditional.
12205 // decltype(priv_a) last_a;
12206 llvm::GlobalVariable
*Last
= getOrCreateInternalVariable(
12207 CGF
.ConvertTypeForMem(LVal
.getType()), UniqueDeclName
);
12208 Last
->setAlignment(LVal
.getAlignment().getAsAlign());
12209 LValue LastLVal
= CGF
.MakeAddrLValue(
12210 Address(Last
, Last
->getValueType(), LVal
.getAlignment()), LVal
.getType());
12212 // Global loop counter. Required to handle inner parallel-for regions.
12214 llvm::Value
*IVVal
= CGF
.EmitLoadOfScalar(IVLVal
, Loc
);
12216 // #pragma omp critical(a)
12217 // if (last_iv <= iv) {
12219 // last_a = priv_a;
12221 auto &&CodeGen
= [&LastIVLVal
, &IVLVal
, IVVal
, &LVal
, &LastLVal
,
12222 Loc
](CodeGenFunction
&CGF
, PrePostActionTy
&Action
) {
12224 llvm::Value
*LastIVVal
= CGF
.EmitLoadOfScalar(LastIVLVal
, Loc
);
12225 // (last_iv <= iv) ? Check if the variable is updated and store new
12226 // value in global var.
12227 llvm::Value
*CmpRes
;
12228 if (IVLVal
.getType()->isSignedIntegerType()) {
12229 CmpRes
= CGF
.Builder
.CreateICmpSLE(LastIVVal
, IVVal
);
12231 assert(IVLVal
.getType()->isUnsignedIntegerType() &&
12232 "Loop iteration variable must be integer.");
12233 CmpRes
= CGF
.Builder
.CreateICmpULE(LastIVVal
, IVVal
);
12235 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock("lp_cond_then");
12236 llvm::BasicBlock
*ExitBB
= CGF
.createBasicBlock("lp_cond_exit");
12237 CGF
.Builder
.CreateCondBr(CmpRes
, ThenBB
, ExitBB
);
12239 CGF
.EmitBlock(ThenBB
);
12242 CGF
.EmitStoreOfScalar(IVVal
, LastIVLVal
);
12244 // last_a = priv_a;
12245 switch (CGF
.getEvaluationKind(LVal
.getType())) {
12247 llvm::Value
*PrivVal
= CGF
.EmitLoadOfScalar(LVal
, Loc
);
12248 CGF
.EmitStoreOfScalar(PrivVal
, LastLVal
);
12251 case TEK_Complex
: {
12252 CodeGenFunction::ComplexPairTy PrivVal
= CGF
.EmitLoadOfComplex(LVal
, Loc
);
12253 CGF
.EmitStoreOfComplex(PrivVal
, LastLVal
, /*isInit=*/false);
12256 case TEK_Aggregate
:
12258 "Aggregates are not supported in lastprivate conditional.");
12261 CGF
.EmitBranch(ExitBB
);
12262 // There is no need to emit line number for unconditional branch.
12263 (void)ApplyDebugLocation::CreateEmpty(CGF
);
12264 CGF
.EmitBlock(ExitBB
, /*IsFinished=*/true);
12267 if (CGM
.getLangOpts().OpenMPSimd
) {
12268 // Do not emit as a critical region as no parallel region could be emitted.
12269 RegionCodeGenTy
ThenRCG(CodeGen
);
12272 emitCriticalRegion(CGF
, UniqueDeclName
, CodeGen
, Loc
);
12276 void CGOpenMPRuntime::checkAndEmitLastprivateConditional(CodeGenFunction
&CGF
,
12278 if (CGF
.getLangOpts().OpenMP
< 50 || LastprivateConditionalStack
.empty())
12280 LastprivateConditionalRefChecker
Checker(LastprivateConditionalStack
);
12281 if (!Checker
.Visit(LHS
))
12283 const Expr
*FoundE
;
12284 const Decl
*FoundD
;
12285 StringRef UniqueDeclName
;
12287 llvm::Function
*FoundFn
;
12288 std::tie(FoundE
, FoundD
, UniqueDeclName
, IVLVal
, FoundFn
) =
12289 Checker
.getFoundData();
12290 if (FoundFn
!= CGF
.CurFn
) {
12291 // Special codegen for inner parallel regions.
12292 // ((struct.lastprivate.conditional*)&priv_a)->Fired = 1;
12293 auto It
= LastprivateConditionalToTypes
[FoundFn
].find(FoundD
);
12294 assert(It
!= LastprivateConditionalToTypes
[FoundFn
].end() &&
12295 "Lastprivate conditional is not found in outer region.");
12296 QualType StructTy
= std::get
<0>(It
->getSecond());
12297 const FieldDecl
* FiredDecl
= std::get
<2>(It
->getSecond());
12298 LValue PrivLVal
= CGF
.EmitLValue(FoundE
);
12299 Address StructAddr
= CGF
.Builder
.CreatePointerBitCastOrAddrSpaceCast(
12300 PrivLVal
.getAddress(CGF
),
12301 CGF
.ConvertTypeForMem(CGF
.getContext().getPointerType(StructTy
)),
12302 CGF
.ConvertTypeForMem(StructTy
));
12304 CGF
.MakeAddrLValue(StructAddr
, StructTy
, AlignmentSource::Decl
);
12305 LValue FiredLVal
= CGF
.EmitLValueForField(BaseLVal
, FiredDecl
);
12306 CGF
.EmitAtomicStore(RValue::get(llvm::ConstantInt::get(
12307 CGF
.ConvertTypeForMem(FiredDecl
->getType()), 1)),
12308 FiredLVal
, llvm::AtomicOrdering::Unordered
,
12309 /*IsVolatile=*/true, /*isInit=*/false);
12313 // Private address of the lastprivate conditional in the current context.
12315 LValue LVal
= CGF
.EmitLValue(FoundE
);
12316 emitLastprivateConditionalUpdate(CGF
, IVLVal
, UniqueDeclName
, LVal
,
12317 FoundE
->getExprLoc());
12320 void CGOpenMPRuntime::checkAndEmitSharedLastprivateConditional(
12321 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
12322 const llvm::DenseSet
<CanonicalDeclPtr
<const VarDecl
>> &IgnoredDecls
) {
12323 if (CGF
.getLangOpts().OpenMP
< 50 || LastprivateConditionalStack
.empty())
12325 auto Range
= llvm::reverse(LastprivateConditionalStack
);
12326 auto It
= llvm::find_if(
12327 Range
, [](const LastprivateConditionalData
&D
) { return !D
.Disabled
; });
12328 if (It
== Range
.end() || It
->Fn
!= CGF
.CurFn
)
12330 auto LPCI
= LastprivateConditionalToTypes
.find(It
->Fn
);
12331 assert(LPCI
!= LastprivateConditionalToTypes
.end() &&
12332 "Lastprivates must be registered already.");
12333 SmallVector
<OpenMPDirectiveKind
, 4> CaptureRegions
;
12334 getOpenMPCaptureRegions(CaptureRegions
, D
.getDirectiveKind());
12335 const CapturedStmt
*CS
= D
.getCapturedStmt(CaptureRegions
.back());
12336 for (const auto &Pair
: It
->DeclToUniqueName
) {
12337 const auto *VD
= cast
<VarDecl
>(Pair
.first
->getCanonicalDecl());
12338 if (!CS
->capturesVariable(VD
) || IgnoredDecls
.contains(VD
))
12340 auto I
= LPCI
->getSecond().find(Pair
.first
);
12341 assert(I
!= LPCI
->getSecond().end() &&
12342 "Lastprivate must be rehistered already.");
12343 // bool Cmp = priv_a.Fired != 0;
12344 LValue BaseLVal
= std::get
<3>(I
->getSecond());
12346 CGF
.EmitLValueForField(BaseLVal
, std::get
<2>(I
->getSecond()));
12347 llvm::Value
*Res
= CGF
.EmitLoadOfScalar(FiredLVal
, D
.getBeginLoc());
12348 llvm::Value
*Cmp
= CGF
.Builder
.CreateIsNotNull(Res
);
12349 llvm::BasicBlock
*ThenBB
= CGF
.createBasicBlock("lpc.then");
12350 llvm::BasicBlock
*DoneBB
= CGF
.createBasicBlock("lpc.done");
12352 CGF
.Builder
.CreateCondBr(Cmp
, ThenBB
, DoneBB
);
12353 CGF
.EmitBlock(ThenBB
);
12354 Address Addr
= CGF
.GetAddrOfLocalVar(VD
);
12356 if (VD
->getType()->isReferenceType())
12357 LVal
= CGF
.EmitLoadOfReferenceLValue(Addr
, VD
->getType(),
12358 AlignmentSource::Decl
);
12360 LVal
= CGF
.MakeAddrLValue(Addr
, VD
->getType().getNonReferenceType(),
12361 AlignmentSource::Decl
);
12362 emitLastprivateConditionalUpdate(CGF
, It
->IVLVal
, Pair
.second
, LVal
,
12364 auto AL
= ApplyDebugLocation::CreateArtificial(CGF
);
12365 CGF
.EmitBlock(DoneBB
, /*IsFinal=*/true);
12370 void CGOpenMPRuntime::emitLastprivateConditionalFinalUpdate(
12371 CodeGenFunction
&CGF
, LValue PrivLVal
, const VarDecl
*VD
,
12372 SourceLocation Loc
) {
12373 if (CGF
.getLangOpts().OpenMP
< 50)
12375 auto It
= LastprivateConditionalStack
.back().DeclToUniqueName
.find(VD
);
12376 assert(It
!= LastprivateConditionalStack
.back().DeclToUniqueName
.end() &&
12377 "Unknown lastprivate conditional variable.");
12378 StringRef UniqueName
= It
->second
;
12379 llvm::GlobalVariable
*GV
= CGM
.getModule().getNamedGlobal(UniqueName
);
12380 // The variable was not updated in the region - exit.
12383 LValue LPLVal
= CGF
.MakeAddrLValue(
12384 Address(GV
, GV
->getValueType(), PrivLVal
.getAlignment()),
12385 PrivLVal
.getType().getNonReferenceType());
12386 llvm::Value
*Res
= CGF
.EmitLoadOfScalar(LPLVal
, Loc
);
12387 CGF
.EmitStoreOfScalar(Res
, PrivLVal
);
12390 llvm::Function
*CGOpenMPSIMDRuntime::emitParallelOutlinedFunction(
12391 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
12392 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
) {
12393 llvm_unreachable("Not supported in SIMD-only mode");
12396 llvm::Function
*CGOpenMPSIMDRuntime::emitTeamsOutlinedFunction(
12397 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
12398 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
) {
12399 llvm_unreachable("Not supported in SIMD-only mode");
12402 llvm::Function
*CGOpenMPSIMDRuntime::emitTaskOutlinedFunction(
12403 const OMPExecutableDirective
&D
, const VarDecl
*ThreadIDVar
,
12404 const VarDecl
*PartIDVar
, const VarDecl
*TaskTVar
,
12405 OpenMPDirectiveKind InnermostKind
, const RegionCodeGenTy
&CodeGen
,
12406 bool Tied
, unsigned &NumberOfParts
) {
12407 llvm_unreachable("Not supported in SIMD-only mode");
12410 void CGOpenMPSIMDRuntime::emitParallelCall(CodeGenFunction
&CGF
,
12411 SourceLocation Loc
,
12412 llvm::Function
*OutlinedFn
,
12413 ArrayRef
<llvm::Value
*> CapturedVars
,
12414 const Expr
*IfCond
,
12415 llvm::Value
*NumThreads
) {
12416 llvm_unreachable("Not supported in SIMD-only mode");
12419 void CGOpenMPSIMDRuntime::emitCriticalRegion(
12420 CodeGenFunction
&CGF
, StringRef CriticalName
,
12421 const RegionCodeGenTy
&CriticalOpGen
, SourceLocation Loc
,
12422 const Expr
*Hint
) {
12423 llvm_unreachable("Not supported in SIMD-only mode");
12426 void CGOpenMPSIMDRuntime::emitMasterRegion(CodeGenFunction
&CGF
,
12427 const RegionCodeGenTy
&MasterOpGen
,
12428 SourceLocation Loc
) {
12429 llvm_unreachable("Not supported in SIMD-only mode");
12432 void CGOpenMPSIMDRuntime::emitMaskedRegion(CodeGenFunction
&CGF
,
12433 const RegionCodeGenTy
&MasterOpGen
,
12434 SourceLocation Loc
,
12435 const Expr
*Filter
) {
12436 llvm_unreachable("Not supported in SIMD-only mode");
12439 void CGOpenMPSIMDRuntime::emitTaskyieldCall(CodeGenFunction
&CGF
,
12440 SourceLocation Loc
) {
12441 llvm_unreachable("Not supported in SIMD-only mode");
12444 void CGOpenMPSIMDRuntime::emitTaskgroupRegion(
12445 CodeGenFunction
&CGF
, const RegionCodeGenTy
&TaskgroupOpGen
,
12446 SourceLocation Loc
) {
12447 llvm_unreachable("Not supported in SIMD-only mode");
12450 void CGOpenMPSIMDRuntime::emitSingleRegion(
12451 CodeGenFunction
&CGF
, const RegionCodeGenTy
&SingleOpGen
,
12452 SourceLocation Loc
, ArrayRef
<const Expr
*> CopyprivateVars
,
12453 ArrayRef
<const Expr
*> DestExprs
, ArrayRef
<const Expr
*> SrcExprs
,
12454 ArrayRef
<const Expr
*> AssignmentOps
) {
12455 llvm_unreachable("Not supported in SIMD-only mode");
12458 void CGOpenMPSIMDRuntime::emitOrderedRegion(CodeGenFunction
&CGF
,
12459 const RegionCodeGenTy
&OrderedOpGen
,
12460 SourceLocation Loc
,
12462 llvm_unreachable("Not supported in SIMD-only mode");
12465 void CGOpenMPSIMDRuntime::emitBarrierCall(CodeGenFunction
&CGF
,
12466 SourceLocation Loc
,
12467 OpenMPDirectiveKind Kind
,
12469 bool ForceSimpleCall
) {
12470 llvm_unreachable("Not supported in SIMD-only mode");
12473 void CGOpenMPSIMDRuntime::emitForDispatchInit(
12474 CodeGenFunction
&CGF
, SourceLocation Loc
,
12475 const OpenMPScheduleTy
&ScheduleKind
, unsigned IVSize
, bool IVSigned
,
12476 bool Ordered
, const DispatchRTInput
&DispatchValues
) {
12477 llvm_unreachable("Not supported in SIMD-only mode");
12480 void CGOpenMPSIMDRuntime::emitForStaticInit(
12481 CodeGenFunction
&CGF
, SourceLocation Loc
, OpenMPDirectiveKind DKind
,
12482 const OpenMPScheduleTy
&ScheduleKind
, const StaticRTInput
&Values
) {
12483 llvm_unreachable("Not supported in SIMD-only mode");
12486 void CGOpenMPSIMDRuntime::emitDistributeStaticInit(
12487 CodeGenFunction
&CGF
, SourceLocation Loc
,
12488 OpenMPDistScheduleClauseKind SchedKind
, const StaticRTInput
&Values
) {
12489 llvm_unreachable("Not supported in SIMD-only mode");
12492 void CGOpenMPSIMDRuntime::emitForOrderedIterationEnd(CodeGenFunction
&CGF
,
12493 SourceLocation Loc
,
12496 llvm_unreachable("Not supported in SIMD-only mode");
12499 void CGOpenMPSIMDRuntime::emitForStaticFinish(CodeGenFunction
&CGF
,
12500 SourceLocation Loc
,
12501 OpenMPDirectiveKind DKind
) {
12502 llvm_unreachable("Not supported in SIMD-only mode");
12505 llvm::Value
*CGOpenMPSIMDRuntime::emitForNext(CodeGenFunction
&CGF
,
12506 SourceLocation Loc
,
12507 unsigned IVSize
, bool IVSigned
,
12508 Address IL
, Address LB
,
12509 Address UB
, Address ST
) {
12510 llvm_unreachable("Not supported in SIMD-only mode");
12513 void CGOpenMPSIMDRuntime::emitNumThreadsClause(CodeGenFunction
&CGF
,
12514 llvm::Value
*NumThreads
,
12515 SourceLocation Loc
) {
12516 llvm_unreachable("Not supported in SIMD-only mode");
12519 void CGOpenMPSIMDRuntime::emitProcBindClause(CodeGenFunction
&CGF
,
12520 ProcBindKind ProcBind
,
12521 SourceLocation Loc
) {
12522 llvm_unreachable("Not supported in SIMD-only mode");
12525 Address
CGOpenMPSIMDRuntime::getAddrOfThreadPrivate(CodeGenFunction
&CGF
,
12528 SourceLocation Loc
) {
12529 llvm_unreachable("Not supported in SIMD-only mode");
12532 llvm::Function
*CGOpenMPSIMDRuntime::emitThreadPrivateVarDefinition(
12533 const VarDecl
*VD
, Address VDAddr
, SourceLocation Loc
, bool PerformInit
,
12534 CodeGenFunction
*CGF
) {
12535 llvm_unreachable("Not supported in SIMD-only mode");
12538 Address
CGOpenMPSIMDRuntime::getAddrOfArtificialThreadPrivate(
12539 CodeGenFunction
&CGF
, QualType VarType
, StringRef Name
) {
12540 llvm_unreachable("Not supported in SIMD-only mode");
12543 void CGOpenMPSIMDRuntime::emitFlush(CodeGenFunction
&CGF
,
12544 ArrayRef
<const Expr
*> Vars
,
12545 SourceLocation Loc
,
12546 llvm::AtomicOrdering AO
) {
12547 llvm_unreachable("Not supported in SIMD-only mode");
12550 void CGOpenMPSIMDRuntime::emitTaskCall(CodeGenFunction
&CGF
, SourceLocation Loc
,
12551 const OMPExecutableDirective
&D
,
12552 llvm::Function
*TaskFunction
,
12553 QualType SharedsTy
, Address Shareds
,
12554 const Expr
*IfCond
,
12555 const OMPTaskDataTy
&Data
) {
12556 llvm_unreachable("Not supported in SIMD-only mode");
12559 void CGOpenMPSIMDRuntime::emitTaskLoopCall(
12560 CodeGenFunction
&CGF
, SourceLocation Loc
, const OMPLoopDirective
&D
,
12561 llvm::Function
*TaskFunction
, QualType SharedsTy
, Address Shareds
,
12562 const Expr
*IfCond
, const OMPTaskDataTy
&Data
) {
12563 llvm_unreachable("Not supported in SIMD-only mode");
12566 void CGOpenMPSIMDRuntime::emitReduction(
12567 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> Privates
,
12568 ArrayRef
<const Expr
*> LHSExprs
, ArrayRef
<const Expr
*> RHSExprs
,
12569 ArrayRef
<const Expr
*> ReductionOps
, ReductionOptionsTy Options
) {
12570 assert(Options
.SimpleReduction
&& "Only simple reduction is expected.");
12571 CGOpenMPRuntime::emitReduction(CGF
, Loc
, Privates
, LHSExprs
, RHSExprs
,
12572 ReductionOps
, Options
);
12575 llvm::Value
*CGOpenMPSIMDRuntime::emitTaskReductionInit(
12576 CodeGenFunction
&CGF
, SourceLocation Loc
, ArrayRef
<const Expr
*> LHSExprs
,
12577 ArrayRef
<const Expr
*> RHSExprs
, const OMPTaskDataTy
&Data
) {
12578 llvm_unreachable("Not supported in SIMD-only mode");
12581 void CGOpenMPSIMDRuntime::emitTaskReductionFini(CodeGenFunction
&CGF
,
12582 SourceLocation Loc
,
12583 bool IsWorksharingReduction
) {
12584 llvm_unreachable("Not supported in SIMD-only mode");
12587 void CGOpenMPSIMDRuntime::emitTaskReductionFixups(CodeGenFunction
&CGF
,
12588 SourceLocation Loc
,
12589 ReductionCodeGen
&RCG
,
12591 llvm_unreachable("Not supported in SIMD-only mode");
12594 Address
CGOpenMPSIMDRuntime::getTaskReductionItem(CodeGenFunction
&CGF
,
12595 SourceLocation Loc
,
12596 llvm::Value
*ReductionsPtr
,
12597 LValue SharedLVal
) {
12598 llvm_unreachable("Not supported in SIMD-only mode");
12601 void CGOpenMPSIMDRuntime::emitTaskwaitCall(CodeGenFunction
&CGF
,
12602 SourceLocation Loc
,
12603 const OMPTaskDataTy
&Data
) {
12604 llvm_unreachable("Not supported in SIMD-only mode");
12607 void CGOpenMPSIMDRuntime::emitCancellationPointCall(
12608 CodeGenFunction
&CGF
, SourceLocation Loc
,
12609 OpenMPDirectiveKind CancelRegion
) {
12610 llvm_unreachable("Not supported in SIMD-only mode");
12613 void CGOpenMPSIMDRuntime::emitCancelCall(CodeGenFunction
&CGF
,
12614 SourceLocation Loc
, const Expr
*IfCond
,
12615 OpenMPDirectiveKind CancelRegion
) {
12616 llvm_unreachable("Not supported in SIMD-only mode");
12619 void CGOpenMPSIMDRuntime::emitTargetOutlinedFunction(
12620 const OMPExecutableDirective
&D
, StringRef ParentName
,
12621 llvm::Function
*&OutlinedFn
, llvm::Constant
*&OutlinedFnID
,
12622 bool IsOffloadEntry
, const RegionCodeGenTy
&CodeGen
) {
12623 llvm_unreachable("Not supported in SIMD-only mode");
12626 void CGOpenMPSIMDRuntime::emitTargetCall(
12627 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
,
12628 llvm::Function
*OutlinedFn
, llvm::Value
*OutlinedFnID
, const Expr
*IfCond
,
12629 llvm::PointerIntPair
<const Expr
*, 2, OpenMPDeviceClauseModifier
> Device
,
12630 llvm::function_ref
<llvm::Value
*(CodeGenFunction
&CGF
,
12631 const OMPLoopDirective
&D
)>
12633 llvm_unreachable("Not supported in SIMD-only mode");
12636 bool CGOpenMPSIMDRuntime::emitTargetFunctions(GlobalDecl GD
) {
12637 llvm_unreachable("Not supported in SIMD-only mode");
12640 bool CGOpenMPSIMDRuntime::emitTargetGlobalVariable(GlobalDecl GD
) {
12641 llvm_unreachable("Not supported in SIMD-only mode");
12644 bool CGOpenMPSIMDRuntime::emitTargetGlobal(GlobalDecl GD
) {
12648 void CGOpenMPSIMDRuntime::emitTeamsCall(CodeGenFunction
&CGF
,
12649 const OMPExecutableDirective
&D
,
12650 SourceLocation Loc
,
12651 llvm::Function
*OutlinedFn
,
12652 ArrayRef
<llvm::Value
*> CapturedVars
) {
12653 llvm_unreachable("Not supported in SIMD-only mode");
12656 void CGOpenMPSIMDRuntime::emitNumTeamsClause(CodeGenFunction
&CGF
,
12657 const Expr
*NumTeams
,
12658 const Expr
*ThreadLimit
,
12659 SourceLocation Loc
) {
12660 llvm_unreachable("Not supported in SIMD-only mode");
12663 void CGOpenMPSIMDRuntime::emitTargetDataCalls(
12664 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
12665 const Expr
*Device
, const RegionCodeGenTy
&CodeGen
,
12666 CGOpenMPRuntime::TargetDataInfo
&Info
) {
12667 llvm_unreachable("Not supported in SIMD-only mode");
12670 void CGOpenMPSIMDRuntime::emitTargetDataStandAloneCall(
12671 CodeGenFunction
&CGF
, const OMPExecutableDirective
&D
, const Expr
*IfCond
,
12672 const Expr
*Device
) {
12673 llvm_unreachable("Not supported in SIMD-only mode");
12676 void CGOpenMPSIMDRuntime::emitDoacrossInit(CodeGenFunction
&CGF
,
12677 const OMPLoopDirective
&D
,
12678 ArrayRef
<Expr
*> NumIterations
) {
12679 llvm_unreachable("Not supported in SIMD-only mode");
12682 void CGOpenMPSIMDRuntime::emitDoacrossOrdered(CodeGenFunction
&CGF
,
12683 const OMPDependClause
*C
) {
12684 llvm_unreachable("Not supported in SIMD-only mode");
12688 CGOpenMPSIMDRuntime::translateParameter(const FieldDecl
*FD
,
12689 const VarDecl
*NativeParam
) const {
12690 llvm_unreachable("Not supported in SIMD-only mode");
12694 CGOpenMPSIMDRuntime::getParameterAddress(CodeGenFunction
&CGF
,
12695 const VarDecl
*NativeParam
,
12696 const VarDecl
*TargetParam
) const {
12697 llvm_unreachable("Not supported in SIMD-only mode");